@@ -434,16 +434,22 @@ define i32 @reduce_add(ptr %src) {
434
434
}
435
435
436
436
define float @reduce_fadd (ptr %src ) {
437
- ; CHECK-LABEL: @reduce_fadd(
438
- ; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
439
- ; CHECK-NEXT: [[L_SRC_0:%.*]] = load float, ptr [[GEP_SRC_0]], align 4
440
- ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 1
441
- ; CHECK-NEXT: [[L_SRC_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
442
- ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2
443
- ; CHECK-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
444
- ; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast float [[L_SRC_0]], [[L_SRC_1]]
445
- ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[L_SRC_2]]
446
- ; CHECK-NEXT: ret float [[ADD_1]]
437
+ ; NON-POW2-LABEL: @reduce_fadd(
438
+ ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
439
+ ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4
440
+ ; NON-POW2-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float -0.000000e+00, <3 x float> [[TMP1]])
441
+ ; NON-POW2-NEXT: ret float [[TMP2]]
442
+ ;
443
+ ; POW2-ONLY-LABEL: @reduce_fadd(
444
+ ; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
445
+ ; POW2-ONLY-NEXT: [[L_SRC_0:%.*]] = load float, ptr [[GEP_SRC_0]], align 4
446
+ ; POW2-ONLY-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 1
447
+ ; POW2-ONLY-NEXT: [[L_SRC_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
448
+ ; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2
449
+ ; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
450
+ ; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast float [[L_SRC_0]], [[L_SRC_1]]
451
+ ; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[L_SRC_2]]
452
+ ; POW2-ONLY-NEXT: ret float [[ADD_1]]
447
453
;
448
454
%gep.src.0 = getelementptr inbounds float , ptr %src , i32 0
449
455
%l.src.0 = load float , ptr %gep.src.0 , align 4
@@ -458,19 +464,26 @@ define float @reduce_fadd(ptr %src) {
458
464
}
459
465
460
466
define i32 @reduce_add_after_mul (ptr %src ) {
461
- ; CHECK-LABEL: @reduce_add_after_mul(
462
- ; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
463
- ; CHECK-NEXT: [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4
464
- ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1
465
- ; CHECK-NEXT: [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
466
- ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
467
- ; CHECK-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
468
- ; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_0]], 10
469
- ; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1]], 10
470
- ; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10
471
- ; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
472
- ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]]
473
- ; CHECK-NEXT: ret i32 [[ADD_1]]
467
+ ; NON-POW2-LABEL: @reduce_add_after_mul(
468
+ ; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
469
+ ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4
470
+ ; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP1]], <i32 10, i32 10, i32 10>
471
+ ; NON-POW2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP2]])
472
+ ; NON-POW2-NEXT: ret i32 [[TMP3]]
473
+ ;
474
+ ; POW2-ONLY-LABEL: @reduce_add_after_mul(
475
+ ; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
476
+ ; POW2-ONLY-NEXT: [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4
477
+ ; POW2-ONLY-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1
478
+ ; POW2-ONLY-NEXT: [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
479
+ ; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
480
+ ; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
481
+ ; POW2-ONLY-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_0]], 10
482
+ ; POW2-ONLY-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1]], 10
483
+ ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10
484
+ ; POW2-ONLY-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
485
+ ; POW2-ONLY-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]]
486
+ ; POW2-ONLY-NEXT: ret i32 [[ADD_1]]
474
487
;
475
488
%gep.src.0 = getelementptr inbounds i32 , ptr %src , i32 0
476
489
%l.src.0 = load i32 , ptr %gep.src.0 , align 4
@@ -489,25 +502,34 @@ define i32 @reduce_add_after_mul(ptr %src) {
489
502
}
490
503
491
504
define i32 @dot_product_i32 (ptr %a , ptr %b ) {
492
- ; CHECK-LABEL: @dot_product_i32(
493
- ; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
494
- ; CHECK-NEXT: [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4
495
- ; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
496
- ; CHECK-NEXT: [[L_A_1:%.*]] = load i32, ptr [[GEP_A_1]], align 4
497
- ; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2
498
- ; CHECK-NEXT: [[L_A_2:%.*]] = load i32, ptr [[GEP_A_2]], align 4
499
- ; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0
500
- ; CHECK-NEXT: [[L_B_0:%.*]] = load i32, ptr [[GEP_B_0]], align 4
501
- ; CHECK-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1
502
- ; CHECK-NEXT: [[L_B_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4
503
- ; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2
504
- ; CHECK-NEXT: [[L_B_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4
505
- ; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_A_0]], [[L_B_0]]
506
- ; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_A_1]], [[L_B_1]]
507
- ; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_A_2]], [[L_B_2]]
508
- ; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
509
- ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]]
510
- ; CHECK-NEXT: ret i32 [[ADD_1]]
505
+ ; NON-POW2-LABEL: @dot_product_i32(
506
+ ; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
507
+ ; NON-POW2-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0
508
+ ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_A_0]], align 4
509
+ ; NON-POW2-NEXT: [[TMP2:%.*]] = load <3 x i32>, ptr [[GEP_B_0]], align 4
510
+ ; NON-POW2-NEXT: [[TMP3:%.*]] = mul nsw <3 x i32> [[TMP1]], [[TMP2]]
511
+ ; NON-POW2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP3]])
512
+ ; NON-POW2-NEXT: ret i32 [[TMP4]]
513
+ ;
514
+ ; POW2-ONLY-LABEL: @dot_product_i32(
515
+ ; POW2-ONLY-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
516
+ ; POW2-ONLY-NEXT: [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4
517
+ ; POW2-ONLY-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
518
+ ; POW2-ONLY-NEXT: [[L_A_1:%.*]] = load i32, ptr [[GEP_A_1]], align 4
519
+ ; POW2-ONLY-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2
520
+ ; POW2-ONLY-NEXT: [[L_A_2:%.*]] = load i32, ptr [[GEP_A_2]], align 4
521
+ ; POW2-ONLY-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0
522
+ ; POW2-ONLY-NEXT: [[L_B_0:%.*]] = load i32, ptr [[GEP_B_0]], align 4
523
+ ; POW2-ONLY-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1
524
+ ; POW2-ONLY-NEXT: [[L_B_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4
525
+ ; POW2-ONLY-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2
526
+ ; POW2-ONLY-NEXT: [[L_B_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4
527
+ ; POW2-ONLY-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_A_0]], [[L_B_0]]
528
+ ; POW2-ONLY-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_A_1]], [[L_B_1]]
529
+ ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_A_2]], [[L_B_2]]
530
+ ; POW2-ONLY-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
531
+ ; POW2-ONLY-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]]
532
+ ; POW2-ONLY-NEXT: ret i32 [[ADD_1]]
511
533
;
512
534
%gep.a.0 = getelementptr inbounds i32 , ptr %a , i32 0
513
535
%l.a.0 = load i32 , ptr %gep.a.0 , align 4
@@ -533,22 +555,31 @@ define i32 @dot_product_i32(ptr %a, ptr %b) {
533
555
}
534
556
535
557
define float @dot_product_fp32 (ptr %a , ptr %b ) {
536
- ; CHECK-LABEL: @dot_product_fp32(
537
- ; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
538
- ; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
539
- ; CHECK-NEXT: [[L_A_2:%.*]] = load float, ptr [[GEP_A_2]], align 4
540
- ; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0
541
- ; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
542
- ; CHECK-NEXT: [[L_B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
543
- ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_A_0]], align 4
544
- ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP_B_0]], align 4
545
- ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP2]]
546
- ; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[L_A_2]], [[L_B_2]]
547
- ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
548
- ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
549
- ; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
550
- ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
551
- ; CHECK-NEXT: ret float [[ADD_1]]
558
+ ; NON-POW2-LABEL: @dot_product_fp32(
559
+ ; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
560
+ ; NON-POW2-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0
561
+ ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x float>, ptr [[GEP_A_0]], align 4
562
+ ; NON-POW2-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr [[GEP_B_0]], align 4
563
+ ; NON-POW2-NEXT: [[TMP3:%.*]] = fmul fast <3 x float> [[TMP1]], [[TMP2]]
564
+ ; NON-POW2-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float -0.000000e+00, <3 x float> [[TMP3]])
565
+ ; NON-POW2-NEXT: ret float [[TMP4]]
566
+ ;
567
+ ; POW2-ONLY-LABEL: @dot_product_fp32(
568
+ ; POW2-ONLY-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
569
+ ; POW2-ONLY-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
570
+ ; POW2-ONLY-NEXT: [[L_A_2:%.*]] = load float, ptr [[GEP_A_2]], align 4
571
+ ; POW2-ONLY-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0
572
+ ; POW2-ONLY-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
573
+ ; POW2-ONLY-NEXT: [[L_B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
574
+ ; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_A_0]], align 4
575
+ ; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP_B_0]], align 4
576
+ ; POW2-ONLY-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP2]]
577
+ ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = fmul fast float [[L_A_2]], [[L_B_2]]
578
+ ; POW2-ONLY-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
579
+ ; POW2-ONLY-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
580
+ ; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
581
+ ; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
582
+ ; POW2-ONLY-NEXT: ret float [[ADD_1]]
552
583
;
553
584
%gep.a.0 = getelementptr inbounds float , ptr %a , i32 0
554
585
%l.a.0 = load float , ptr %gep.a.0 , align 4
@@ -574,22 +605,31 @@ define float @dot_product_fp32(ptr %a, ptr %b) {
574
605
}
575
606
576
607
define double @dot_product_fp64 (ptr %a , ptr %b ) {
577
- ; CHECK-LABEL: @dot_product_fp64(
578
- ; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
579
- ; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds double, ptr [[A]], i32 2
580
- ; CHECK-NEXT: [[L_A_2:%.*]] = load double, ptr [[GEP_A_2]], align 4
581
- ; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0
582
- ; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds double, ptr [[B]], i32 2
583
- ; CHECK-NEXT: [[L_B_2:%.*]] = load double, ptr [[GEP_B_2]], align 4
584
- ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[GEP_A_0]], align 4
585
- ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[GEP_B_0]], align 4
586
- ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP2]]
587
- ; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast double [[L_A_2]], [[L_B_2]]
588
- ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
589
- ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
590
- ; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast double [[TMP4]], [[TMP5]]
591
- ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast double [[ADD_0]], [[MUL_2]]
592
- ; CHECK-NEXT: ret double [[ADD_1]]
608
+ ; NON-POW2-LABEL: @dot_product_fp64(
609
+ ; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
610
+ ; NON-POW2-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0
611
+ ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x double>, ptr [[GEP_A_0]], align 4
612
+ ; NON-POW2-NEXT: [[TMP2:%.*]] = load <3 x double>, ptr [[GEP_B_0]], align 4
613
+ ; NON-POW2-NEXT: [[TMP3:%.*]] = fmul fast <3 x double> [[TMP1]], [[TMP2]]
614
+ ; NON-POW2-NEXT: [[TMP4:%.*]] = call fast double @llvm.vector.reduce.fadd.v3f64(double -0.000000e+00, <3 x double> [[TMP3]])
615
+ ; NON-POW2-NEXT: ret double [[TMP4]]
616
+ ;
617
+ ; POW2-ONLY-LABEL: @dot_product_fp64(
618
+ ; POW2-ONLY-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
619
+ ; POW2-ONLY-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds double, ptr [[A]], i32 2
620
+ ; POW2-ONLY-NEXT: [[L_A_2:%.*]] = load double, ptr [[GEP_A_2]], align 4
621
+ ; POW2-ONLY-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0
622
+ ; POW2-ONLY-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds double, ptr [[B]], i32 2
623
+ ; POW2-ONLY-NEXT: [[L_B_2:%.*]] = load double, ptr [[GEP_B_2]], align 4
624
+ ; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[GEP_A_0]], align 4
625
+ ; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[GEP_B_0]], align 4
626
+ ; POW2-ONLY-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP2]]
627
+ ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = fmul fast double [[L_A_2]], [[L_B_2]]
628
+ ; POW2-ONLY-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
629
+ ; POW2-ONLY-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
630
+ ; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast double [[TMP4]], [[TMP5]]
631
+ ; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast double [[ADD_0]], [[MUL_2]]
632
+ ; POW2-ONLY-NEXT: ret double [[ADD_1]]
593
633
;
594
634
%gep.a.0 = getelementptr inbounds double , ptr %a , i32 0
595
635
%l.a.0 = load double , ptr %gep.a.0 , align 4
0 commit comments