@@ -420,8 +420,7 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
420
420
; CHECK-LABEL: @ins_bo_ext_ext(
421
421
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
422
422
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423
- ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
424
- ; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
423
+ ; CHECK-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
425
424
; CHECK-NEXT: ret <4 x float> [[V3]]
426
425
;
427
426
%a2 = extractelement <4 x float > %a , i32 2
@@ -435,13 +434,21 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
435
434
; but it is likely that extracting from index 3 is the better option.
436
435
437
436
define <4 x float > @ins_bo_ext_ext_uses (<4 x float > %a , <4 x float > %b ) {
438
- ; CHECK-LABEL: @ins_bo_ext_ext_uses(
439
- ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
440
- ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
441
- ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
442
- ; CHECK-NEXT: call void @use_f32(float [[A23]])
443
- ; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
444
- ; CHECK-NEXT: ret <4 x float> [[V3]]
437
+ ; SSE-LABEL: @ins_bo_ext_ext_uses(
438
+ ; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
439
+ ; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
440
+ ; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
441
+ ; SSE-NEXT: call void @use_f32(float [[A23]])
442
+ ; SSE-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
443
+ ; SSE-NEXT: ret <4 x float> [[V3]]
444
+ ;
445
+ ; AVX-LABEL: @ins_bo_ext_ext_uses(
446
+ ; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
447
+ ; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
448
+ ; AVX-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
449
+ ; AVX-NEXT: call void @use_f32(float [[A23]])
450
+ ; AVX-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
451
+ ; AVX-NEXT: ret <4 x float> [[V3]]
445
452
;
446
453
%a2 = extractelement <4 x float > %a , i32 2
447
454
%a3 = extractelement <4 x float > %a , i32 3
@@ -452,22 +459,34 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
452
459
}
453
460
454
461
define <4 x float > @PR34724 (<4 x float > %a , <4 x float > %b ) {
455
- ; CHECK-LABEL: @PR34724(
456
- ; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
457
- ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
458
- ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
459
- ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
460
- ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
461
- ; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
462
- ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
463
- ; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
464
- ; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
465
- ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
466
- ; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
467
- ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> poison, float [[A23]], i32 1
468
- ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
469
- ; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
470
- ; CHECK-NEXT: ret <4 x float> [[V3]]
462
+ ; SSE-LABEL: @PR34724(
463
+ ; SSE-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
464
+ ; SSE-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
465
+ ; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
466
+ ; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
467
+ ; SSE-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
468
+ ; SSE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
469
+ ; SSE-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
470
+ ; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
471
+ ; SSE-NEXT: [[V1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
472
+ ; SSE-NEXT: [[V2:%.*]] = shufflevector <4 x float> [[V1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
473
+ ; SSE-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
474
+ ; SSE-NEXT: ret <4 x float> [[V3]]
475
+ ;
476
+ ; AVX-LABEL: @PR34724(
477
+ ; AVX-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
478
+ ; AVX-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
479
+ ; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
480
+ ; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
481
+ ; AVX-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
482
+ ; AVX-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
483
+ ; AVX-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
484
+ ; AVX-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
485
+ ; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
486
+ ; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
487
+ ; AVX-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
488
+ ; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
489
+ ; AVX-NEXT: ret <4 x float> [[V3]]
471
490
;
472
491
%a0 = extractelement <4 x float > %a , i32 0
473
492
%a1 = extractelement <4 x float > %a , i32 1
0 commit comments