@@ -587,13 +587,11 @@ define <4 x float> @asin_4x(ptr %a) {
587
587
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
588
588
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
589
589
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
590
- ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
591
- ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
592
- ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
593
- ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
594
- ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
595
- ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
596
- ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
590
+ ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
591
+ ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP3]])
592
+ ; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
593
+ ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
594
+ ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
597
595
;
598
596
entry:
599
597
%0 = load <4 x float >, ptr %a , align 16
@@ -666,13 +664,11 @@ define <4 x float> @acos_4x(ptr %a) {
666
664
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
667
665
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
668
666
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
669
- ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
670
- ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
671
- ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
672
- ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
673
- ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
674
- ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
675
- ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
667
+ ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
668
+ ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP3]])
669
+ ; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
670
+ ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
671
+ ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
676
672
;
677
673
entry:
678
674
%0 = load <4 x float >, ptr %a , align 16
@@ -745,13 +741,11 @@ define <4 x float> @atan_4x(ptr %a) {
745
741
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
746
742
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
747
743
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
748
- ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
749
- ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
750
- ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
751
- ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
752
- ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
753
- ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
754
- ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
744
+ ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
745
+ ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP3]])
746
+ ; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
747
+ ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
748
+ ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
755
749
;
756
750
entry:
757
751
%0 = load <4 x float >, ptr %a , align 16
@@ -824,13 +818,11 @@ define <4 x float> @sinh_4x(ptr %a) {
824
818
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
825
819
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
826
820
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
827
- ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
828
- ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
829
- ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
830
- ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
831
- ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
832
- ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
833
- ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
821
+ ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
822
+ ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP3]])
823
+ ; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
824
+ ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
825
+ ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
834
826
;
835
827
entry:
836
828
%0 = load <4 x float >, ptr %a , align 16
@@ -903,13 +895,11 @@ define <4 x float> @cosh_4x(ptr %a) {
903
895
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
904
896
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
905
897
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
906
- ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
907
- ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
908
- ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
909
- ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
910
- ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
911
- ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
912
- ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
898
+ ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
899
+ ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP3]])
900
+ ; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
901
+ ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
902
+ ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
913
903
;
914
904
entry:
915
905
%0 = load <4 x float >, ptr %a , align 16
@@ -982,13 +972,11 @@ define <4 x float> @tanh_4x(ptr %a) {
982
972
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
983
973
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
984
974
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
985
- ; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
986
- ; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
987
- ; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
988
- ; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
989
- ; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
990
- ; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
991
- ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
975
+ ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
976
+ ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP3]])
977
+ ; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
978
+ ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
979
+ ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
992
980
;
993
981
entry:
994
982
%0 = load <4 x float >, ptr %a , align 16
0 commit comments