Skip to content

Commit 86e6eb7

Browse files
committed
[AArch64] Regenerate accelerate-vector-functions.ll tests
1 parent 5560892 commit 86e6eb7

File tree

2 files changed

+60
-84
lines changed

2 files changed

+60
-84
lines changed

llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll

Lines changed: 30 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -627,13 +627,11 @@ define <4 x float> @int_asin_4x(ptr %a) {
627627
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
628628
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
629629
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
630-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
631-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
632-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
633-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
634-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
635-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
636-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
630+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
631+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP3]])
632+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
633+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
634+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
637635
;
638636
entry:
639637
%0 = load <4 x float>, ptr %a, align 16
@@ -708,13 +706,11 @@ define <4 x float> @int_acos_4x(ptr %a) {
708706
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
709707
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
710708
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
711-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
712-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
713-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
714-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
715-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
716-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
717-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
709+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
710+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP3]])
711+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
712+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
713+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
718714
;
719715
entry:
720716
%0 = load <4 x float>, ptr %a, align 16
@@ -789,13 +785,11 @@ define <4 x float> @int_atan_4x(ptr %a) {
789785
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
790786
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
791787
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
792-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
793-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
794-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
795-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
796-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
797-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
798-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
788+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
789+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP3]])
790+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
791+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
792+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
799793
;
800794
entry:
801795
%0 = load <4 x float>, ptr %a, align 16
@@ -870,13 +864,11 @@ define <4 x float> @int_sinh_4x(ptr %a) {
870864
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
871865
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
872866
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
873-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
874-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
875-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
876-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
877-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
878-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
879-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
867+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
868+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP3]])
869+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
870+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
871+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
880872
;
881873
entry:
882874
%0 = load <4 x float>, ptr %a, align 16
@@ -951,13 +943,11 @@ define <4 x float> @int_cosh_4x(ptr %a) {
951943
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
952944
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
953945
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
954-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
955-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
956-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
957-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
958-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
959-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
960-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
946+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
947+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP3]])
948+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
949+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
950+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
961951
;
962952
entry:
963953
%0 = load <4 x float>, ptr %a, align 16
@@ -1032,13 +1022,11 @@ define <4 x float> @int_tanh_4x(ptr %a) {
10321022
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
10331023
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
10341024
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1035-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1036-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
1037-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1038-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1039-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
1040-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1041-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
1025+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
1026+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP3]])
1027+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
1028+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1029+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
10421030
;
10431031
entry:
10441032
%0 = load <4 x float>, ptr %a, align 16

llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll

Lines changed: 30 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -627,13 +627,11 @@ define <4 x float> @int_asin_4x(ptr %a) {
627627
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
628628
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
629629
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
630-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
631-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
632-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
633-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
634-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
635-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
636-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
630+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
631+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP3]])
632+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
633+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
634+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
637635
;
638636
entry:
639637
%0 = load <4 x float>, ptr %a, align 16
@@ -708,13 +706,11 @@ define <4 x float> @int_acos_4x(ptr %a) {
708706
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
709707
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
710708
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
711-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
712-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
713-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
714-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
715-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
716-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
717-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
709+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
710+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP3]])
711+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
712+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
713+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
718714
;
719715
entry:
720716
%0 = load <4 x float>, ptr %a, align 16
@@ -789,13 +785,11 @@ define <4 x float> @int_atan_4x(ptr %a) {
789785
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
790786
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
791787
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
792-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
793-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
794-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
795-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
796-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
797-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
798-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
788+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
789+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP3]])
790+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
791+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
792+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
799793
;
800794
entry:
801795
%0 = load <4 x float>, ptr %a, align 16
@@ -870,13 +864,11 @@ define <4 x float> @int_sinh_4x(ptr %a) {
870864
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
871865
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
872866
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
873-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
874-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
875-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
876-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
877-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
878-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
879-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
867+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
868+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP3]])
869+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
870+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
871+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
880872
;
881873
entry:
882874
%0 = load <4 x float>, ptr %a, align 16
@@ -951,13 +943,11 @@ define <4 x float> @int_cosh_4x(ptr %a) {
951943
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
952944
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
953945
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
954-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
955-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
956-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
957-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
958-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
959-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
960-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
946+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
947+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP3]])
948+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
949+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
950+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
961951
;
962952
entry:
963953
%0 = load <4 x float>, ptr %a, align 16
@@ -1032,13 +1022,11 @@ define <4 x float> @int_tanh_4x(ptr %a) {
10321022
; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
10331023
; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
10341024
; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1035-
; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1036-
; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
1037-
; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1038-
; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1039-
; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
1040-
; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1041-
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]]
1025+
; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
1026+
; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP3]])
1027+
; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
1028+
; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1029+
; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]]
10421030
;
10431031
entry:
10441032
%0 = load <4 x float>, ptr %a, align 16

0 commit comments

Comments
 (0)