Skip to content

Commit bf570f1

Browse files
author
git apple-llvm automerger
committed
Merge commit '53283cc2f1dd' from llvm.org/main into apple/main
2 parents 8c427a4 + 53283cc commit bf570f1

File tree

2 files changed

+29
-43
lines changed

2 files changed

+29
-43
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37134,6 +37134,8 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
3713437134
break;
3713537135
LLVM_FALLTHROUGH;
3713637136
}
37137+
case X86ISD::MOVSD:
37138+
case X86ISD::MOVSS:
3713737139
case X86ISD::BLENDI:
3713837140
case X86ISD::SHUFP:
3713937141
case X86ISD::UNPCKH:

llvm/test/CodeGen/X86/horizontal-sum.ll

Lines changed: 27 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,9 @@ define <4 x float> @pair_sum_v4f32_v4f32(<4 x float> %0, <4 x float> %1, <4 x fl
2020
; SSSE3-SLOW-LABEL: pair_sum_v4f32_v4f32:
2121
; SSSE3-SLOW: # %bb.0:
2222
; SSSE3-SLOW-NEXT: haddps %xmm1, %xmm0
23-
; SSSE3-SLOW-NEXT: movaps %xmm0, %xmm1
24-
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
25-
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3,1,3]
26-
; SSSE3-SLOW-NEXT: addps %xmm1, %xmm0
27-
; SSSE3-SLOW-NEXT: haddps %xmm2, %xmm2
28-
; SSSE3-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
29-
; SSSE3-SLOW-NEXT: addps %xmm2, %xmm1
30-
; SSSE3-SLOW-NEXT: haddps %xmm3, %xmm3
31-
; SSSE3-SLOW-NEXT: movshdup {{.*#+}} xmm2 = xmm3[1,1,3,3]
32-
; SSSE3-SLOW-NEXT: addps %xmm3, %xmm2
33-
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
34-
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
23+
; SSSE3-SLOW-NEXT: haddps %xmm2, %xmm3
24+
; SSSE3-SLOW-NEXT: haddps %xmm3, %xmm0
25+
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,3,2]
3526
; SSSE3-SLOW-NEXT: retq
3627
;
3728
; SSSE3-FAST-LABEL: pair_sum_v4f32_v4f32:
@@ -106,13 +97,11 @@ define <4 x i32> @pair_sum_v4i32_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2,
10697
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,1,3]
10798
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,1,3]
10899
; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm0
109-
; SSSE3-SLOW-NEXT: phaddd %xmm2, %xmm2
110-
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1]
111-
; SSSE3-SLOW-NEXT: paddd %xmm2, %xmm1
112-
; SSSE3-SLOW-NEXT: phaddd %xmm3, %xmm3
113-
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,1,1]
100+
; SSSE3-SLOW-NEXT: phaddd %xmm2, %xmm3
101+
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
102+
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,1,0,1]
103+
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[1,1]
114104
; SSSE3-SLOW-NEXT: paddd %xmm3, %xmm2
115-
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
116105
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
117106
; SSSE3-SLOW-NEXT: retq
118107
;
@@ -699,23 +688,20 @@ define <4 x i32> @sequential_sum_v4i32_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i3
699688
; SSSE3-SLOW-NEXT: phaddd %xmm1, %xmm4
700689
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
701690
; SSSE3-SLOW-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
702-
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
703-
; SSSE3-SLOW-NEXT: paddd %xmm4, %xmm1
704-
; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm0
691+
; SSSE3-SLOW-NEXT: paddd %xmm0, %xmm4
705692
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,1,1]
706-
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm4 = xmm2[3,3,3,3]
707693
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,1,0,1]
708694
; SSSE3-SLOW-NEXT: paddd %xmm2, %xmm5
709-
; SSSE3-SLOW-NEXT: paddd %xmm4, %xmm5
710695
; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm5
711696
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
712-
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
713-
; SSSE3-SLOW-NEXT: paddd %xmm3, %xmm2
714-
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
715-
; SSSE3-SLOW-NEXT: paddd %xmm2, %xmm3
716-
; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm3
717-
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm5[2,3]
718-
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,0]
697+
; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm6 = xmm3[2,3,2,3]
698+
; SSSE3-SLOW-NEXT: paddd %xmm3, %xmm6
699+
; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm6
700+
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,1],xmm5[2,3]
701+
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm6[2,0]
702+
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm2[3,3]
703+
; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm3[2,0]
704+
; SSSE3-SLOW-NEXT: paddd %xmm4, %xmm0
719705
; SSSE3-SLOW-NEXT: retq
720706
;
721707
; SSSE3-FAST-LABEL: sequential_sum_v4i32_v4i32:
@@ -724,21 +710,19 @@ define <4 x i32> @sequential_sum_v4i32_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i3
724710
; SSSE3-FAST-NEXT: phaddd %xmm1, %xmm4
725711
; SSSE3-FAST-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
726712
; SSSE3-FAST-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
727-
; SSSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
728-
; SSSE3-FAST-NEXT: paddd %xmm4, %xmm1
729-
; SSSE3-FAST-NEXT: paddd %xmm1, %xmm0
713+
; SSSE3-FAST-NEXT: paddd %xmm0, %xmm4
730714
; SSSE3-FAST-NEXT: movdqa %xmm2, %xmm1
731715
; SSSE3-FAST-NEXT: phaddd %xmm2, %xmm1
732-
; SSSE3-FAST-NEXT: pshufd {{.*#+}} xmm4 = xmm2[3,3,3,3]
733-
; SSSE3-FAST-NEXT: paddd %xmm1, %xmm4
734-
; SSSE3-FAST-NEXT: paddd %xmm2, %xmm4
735-
; SSSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
736-
; SSSE3-FAST-NEXT: pshufd {{.*#+}} xmm2 = xmm3[3,3,3,3]
737-
; SSSE3-FAST-NEXT: phaddd %xmm3, %xmm3
738-
; SSSE3-FAST-NEXT: paddd %xmm3, %xmm2
739-
; SSSE3-FAST-NEXT: paddd %xmm1, %xmm2
740-
; SSSE3-FAST-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3]
741-
; SSSE3-FAST-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
716+
; SSSE3-FAST-NEXT: paddd %xmm2, %xmm1
717+
; SSSE3-FAST-NEXT: movdqa %xmm3, %xmm5
718+
; SSSE3-FAST-NEXT: phaddd %xmm3, %xmm5
719+
; SSSE3-FAST-NEXT: pshufd {{.*#+}} xmm6 = xmm3[2,3,2,3]
720+
; SSSE3-FAST-NEXT: paddd %xmm5, %xmm6
721+
; SSSE3-FAST-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,1],xmm1[2,3]
722+
; SSSE3-FAST-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm6[2,0]
723+
; SSSE3-FAST-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm2[3,3]
724+
; SSSE3-FAST-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm3[2,0]
725+
; SSSE3-FAST-NEXT: paddd %xmm4, %xmm0
742726
; SSSE3-FAST-NEXT: retq
743727
;
744728
; AVX1-SLOW-LABEL: sequential_sum_v4i32_v4i32:

0 commit comments

Comments
 (0)