@@ -289,8 +289,8 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 derefer
289
289
; must be a multiple of element size.
290
290
; TODO: Could bitcast around this limitation.
291
291
292
- define <4 x i32 > @gep01_bitcast_load_i32_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
293
- ; CHECK-LABEL: @gep01_bitcast_load_i32_insert_v4i32 (
292
+ define <4 x i32 > @gep01_bitcast_load_i32_from_v16i8_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
293
+ ; CHECK-LABEL: @gep01_bitcast_load_i32_from_v16i8_insert_v4i32 (
294
294
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
295
295
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
296
296
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
@@ -302,6 +302,84 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl
302
302
ret <4 x i32 > %r
303
303
}
304
304
305
+ define <2 x i64 > @gep01_bitcast_load_i64_from_v16i8_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
306
+ ; CHECK-LABEL: @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(
307
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
308
+ ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
309
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
310
+ ; CHECK-NEXT: ret <2 x i64> [[R]]
311
+ ;
312
+ %gep = getelementptr inbounds <16 x i8 >, ptr %p , i64 0 , i64 1
313
+ %s = load i64 , ptr %gep , align 1
314
+ %r = insertelement <2 x i64 > undef , i64 %s , i64 0
315
+ ret <2 x i64 > %r
316
+ }
317
+
318
+ define <4 x i32 > @gep11_bitcast_load_i32_from_v16i8_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
319
+ ; CHECK-LABEL: @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(
320
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 11
321
+ ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
322
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
323
+ ; CHECK-NEXT: ret <4 x i32> [[R]]
324
+ ;
325
+ %gep = getelementptr inbounds <16 x i8 >, ptr %p , i64 0 , i64 11
326
+ %s = load i32 , ptr %gep , align 1
327
+ %r = insertelement <4 x i32 > undef , i32 %s , i64 0
328
+ ret <4 x i32 > %r
329
+ }
330
+
331
+ define <4 x i32 > @gep01_bitcast_load_i32_from_v8i16_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
332
+ ; CHECK-LABEL: @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(
333
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
334
+ ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
335
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
336
+ ; CHECK-NEXT: ret <4 x i32> [[R]]
337
+ ;
338
+ %gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 1
339
+ %s = load i32 , ptr %gep , align 1
340
+ %r = insertelement <4 x i32 > undef , i32 %s , i64 0
341
+ ret <4 x i32 > %r
342
+ }
343
+
344
+ define <2 x i64 > @gep01_bitcast_load_i64_from_v8i16_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
345
+ ; CHECK-LABEL: @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(
346
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
347
+ ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
348
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
349
+ ; CHECK-NEXT: ret <2 x i64> [[R]]
350
+ ;
351
+ %gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 1
352
+ %s = load i64 , ptr %gep , align 1
353
+ %r = insertelement <2 x i64 > undef , i64 %s , i64 0
354
+ ret <2 x i64 > %r
355
+ }
356
+
357
+ define <4 x i32 > @gep05_bitcast_load_i32_from_v8i16_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
358
+ ; CHECK-LABEL: @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(
359
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 5
360
+ ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
361
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
362
+ ; CHECK-NEXT: ret <4 x i32> [[R]]
363
+ ;
364
+ %gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 5
365
+ %s = load i32 , ptr %gep , align 1
366
+ %r = insertelement <4 x i32 > undef , i32 %s , i64 0
367
+ ret <4 x i32 > %r
368
+ }
369
+
370
+ define <2 x i64 > @gep01_bitcast_load_i32_from_v4i32_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
371
+ ; CHECK-LABEL: @gep01_bitcast_load_i32_from_v4i32_insert_v2i64(
372
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x i32>, ptr [[P:%.*]], i64 0, i64 1
373
+ ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
374
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
375
+ ; CHECK-NEXT: ret <2 x i64> [[R]]
376
+ ;
377
+ %gep = getelementptr inbounds <4 x i32 >, ptr %p , i64 0 , i64 1
378
+ %s = load i64 , ptr %gep , align 1
379
+ %r = insertelement <2 x i64 > undef , i64 %s , i64 0
380
+ ret <2 x i64 > %r
381
+ }
382
+
305
383
define <4 x i32 > @gep012_bitcast_load_i32_insert_v4i32 (ptr align 1 dereferenceable (20 ) %p ) nofree nosync {
306
384
; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
307
385
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
@@ -331,6 +409,58 @@ define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceab
331
409
ret <4 x i32 > %r
332
410
}
333
411
412
+ define <4 x i32 > @gep07_bitcast_load_i32_from_v8i16_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
413
+ ; CHECK-LABEL: @gep07_bitcast_load_i32_from_v8i16_insert_v4i32(
414
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 7
415
+ ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
416
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
417
+ ; CHECK-NEXT: ret <4 x i32> [[R]]
418
+ ;
419
+ %gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 7
420
+ %s = load i32 , ptr %gep , align 1
421
+ %r = insertelement <4 x i32 > undef , i32 %s , i64 0
422
+ ret <4 x i32 > %r
423
+ }
424
+
425
+ define <2 x i64 > @gep03_bitcast_load_i32_from_v4i32_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
426
+ ; CHECK-LABEL: @gep03_bitcast_load_i32_from_v4i32_insert_v2i64(
427
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x i32>, ptr [[P:%.*]], i64 0, i64 3
428
+ ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
429
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
430
+ ; CHECK-NEXT: ret <2 x i64> [[R]]
431
+ ;
432
+ %gep = getelementptr inbounds <4 x i32 >, ptr %p , i64 0 , i64 3
433
+ %s = load i64 , ptr %gep , align 1
434
+ %r = insertelement <2 x i64 > undef , i64 %s , i64 0
435
+ ret <2 x i64 > %r
436
+ }
437
+
438
+ define <2 x i64 > @gep09_bitcast_load_i64_from_v16i8_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) #0 {
439
+ ; CHECK-LABEL: @gep09_bitcast_load_i64_from_v16i8_insert_v2i64(
440
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 9
441
+ ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
442
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
443
+ ; CHECK-NEXT: ret <2 x i64> [[R]]
444
+ ;
445
+ %gep = getelementptr inbounds <16 x i8 >, ptr %p , i64 0 , i64 9
446
+ %s = load i64 , ptr %gep , align 1
447
+ %r = insertelement <2 x i64 > undef , i64 %s , i64 0
448
+ ret <2 x i64 > %r
449
+ }
450
+
451
+ define <2 x i64 > @gep05_bitcast_load_i64_from_v8i16_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
452
+ ; CHECK-LABEL: @gep05_bitcast_load_i64_from_v8i16_insert_v2i64(
453
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 5
454
+ ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
455
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
456
+ ; CHECK-NEXT: ret <2 x i64> [[R]]
457
+ ;
458
+ %gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 5
459
+ %s = load i64 , ptr %gep , align 1
460
+ %r = insertelement <2 x i64 > undef , i64 %s , i64 0
461
+ ret <2 x i64 > %r
462
+ }
463
+
334
464
; If there are enough dereferenceable bytes, we can offset the vector load.
335
465
336
466
define <8 x i16 > @gep10_load_i16_insert_v8i16 (ptr align 16 dereferenceable (32 ) %p ) nofree nosync {
0 commit comments