Skip to content

Commit 0b9583c

Browse files
author
git apple-llvm automerger
committed
Merge commit 'de04ad7a8d19' from apple/master into swift/master-next
2 parents 5d2b4d0 + de04ad7 commit 0b9583c

File tree

5 files changed

+273
-254
lines changed

5 files changed

+273
-254
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34750,6 +34750,23 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3475034750
return true;
3475134751
}
3475234752

34753+
// If we don't demand all elements, then attempt to combine to a simpler
34754+
// shuffle.
34755+
// TODO: Handle other depths, but first we need to handle the fact that
34756+
// it might combine to the same shuffle.
34757+
if (!DemandedElts.isAllOnesValue() && Depth == 0) {
34758+
SmallVector<int, 64> DemandedMask(NumElts, SM_SentinelUndef);
34759+
for (int i = 0; i != NumElts; ++i)
34760+
if (DemandedElts[i])
34761+
DemandedMask[i] = i;
34762+
34763+
SDValue NewShuffle = combineX86ShufflesRecursively(
34764+
{Op}, 0, Op, DemandedMask, {}, Depth, /*HasVarMask*/ false,
34765+
/*AllowVarMask*/ true, TLO.DAG, Subtarget);
34766+
if (NewShuffle)
34767+
return TLO.CombineTo(Op, NewShuffle);
34768+
}
34769+
3475334770
return false;
3475434771
}
3475534772

llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8320,7 +8320,7 @@ define float @test_mm512_reduce_max_ps(<16 x float> %__W) {
83208320
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
83218321
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
83228322
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
8323-
; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
8323+
; X86-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
83248324
; X86-NEXT: vmaxss %xmm1, %xmm0, %xmm0
83258325
; X86-NEXT: vmovss %xmm0, (%esp)
83268326
; X86-NEXT: flds (%esp)
@@ -8337,7 +8337,7 @@ define float @test_mm512_reduce_max_ps(<16 x float> %__W) {
83378337
; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0
83388338
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
83398339
; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0
8340-
; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
8340+
; X64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
83418341
; X64-NEXT: vmaxss %xmm1, %xmm0, %xmm0
83428342
; X64-NEXT: vzeroupper
83438343
; X64-NEXT: retq
@@ -8446,7 +8446,7 @@ define float @test_mm512_reduce_min_ps(<16 x float> %__W) {
84468446
; X86-NEXT: vminps %xmm1, %xmm0, %xmm0
84478447
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
84488448
; X86-NEXT: vminps %xmm1, %xmm0, %xmm0
8449-
; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
8449+
; X86-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
84508450
; X86-NEXT: vminss %xmm1, %xmm0, %xmm0
84518451
; X86-NEXT: vmovss %xmm0, (%esp)
84528452
; X86-NEXT: flds (%esp)
@@ -8463,7 +8463,7 @@ define float @test_mm512_reduce_min_ps(<16 x float> %__W) {
84638463
; X64-NEXT: vminps %xmm1, %xmm0, %xmm0
84648464
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
84658465
; X64-NEXT: vminps %xmm1, %xmm0, %xmm0
8466-
; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
8466+
; X64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
84678467
; X64-NEXT: vminss %xmm1, %xmm0, %xmm0
84688468
; X64-NEXT: vzeroupper
84698469
; X64-NEXT: retq
@@ -8624,7 +8624,7 @@ define float @test_mm512_mask_reduce_max_ps(i16 zeroext %__M, <16 x float> %__W)
86248624
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
86258625
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
86268626
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
8627-
; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
8627+
; X86-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
86288628
; X86-NEXT: vmaxss %xmm1, %xmm0, %xmm0
86298629
; X86-NEXT: vmovss %xmm0, (%esp)
86308630
; X86-NEXT: flds (%esp)
@@ -8644,7 +8644,7 @@ define float @test_mm512_mask_reduce_max_ps(i16 zeroext %__M, <16 x float> %__W)
86448644
; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0
86458645
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
86468646
; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0
8647-
; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
8647+
; X64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
86488648
; X64-NEXT: vmaxss %xmm1, %xmm0, %xmm0
86498649
; X64-NEXT: vzeroupper
86508650
; X64-NEXT: retq
@@ -8809,7 +8809,7 @@ define float @test_mm512_mask_reduce_min_ps(i16 zeroext %__M, <16 x float> %__W)
88098809
; X86-NEXT: vminps %xmm1, %xmm0, %xmm0
88108810
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
88118811
; X86-NEXT: vminps %xmm1, %xmm0, %xmm0
8812-
; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
8812+
; X86-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
88138813
; X86-NEXT: vminss %xmm1, %xmm0, %xmm0
88148814
; X86-NEXT: vmovss %xmm0, (%esp)
88158815
; X86-NEXT: flds (%esp)
@@ -8829,7 +8829,7 @@ define float @test_mm512_mask_reduce_min_ps(i16 zeroext %__M, <16 x float> %__W)
88298829
; X64-NEXT: vminps %xmm1, %xmm0, %xmm0
88308830
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
88318831
; X64-NEXT: vminps %xmm1, %xmm0, %xmm0
8832-
; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
8832+
; X64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
88338833
; X64-NEXT: vminss %xmm1, %xmm0, %xmm0
88348834
; X64-NEXT: vzeroupper
88358835
; X64-NEXT: retq

llvm/test/CodeGen/X86/shrink_vmul.ll

Lines changed: 104 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -2085,85 +2085,88 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
20852085
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
20862086
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
20872087
; X86-SSE-NEXT: movdqa (%eax), %xmm5
2088-
; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2088+
; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
20892089
; X86-SSE-NEXT: movdqa (%ecx), %xmm2
20902090
; X86-SSE-NEXT: movdqa 16(%ecx), %xmm6
2091-
; X86-SSE-NEXT: pxor %xmm0, %xmm0
2092-
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2091+
; X86-SSE-NEXT: pxor %xmm1, %xmm1
2092+
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
20932093
; X86-SSE-NEXT: movdqa %xmm5, %xmm4
2094-
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
2095-
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
2096-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm5[3,1,2,3]
2097-
; X86-SSE-NEXT: movd %xmm0, %eax
2098-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm6[3,1,2,3]
2099-
; X86-SSE-NEXT: movd %xmm0, %esi
2094+
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3]
2095+
; X86-SSE-NEXT: movdqa %xmm5, %xmm3
2096+
; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
2097+
; X86-SSE-NEXT: movdqa %xmm5, %xmm1
2098+
; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2099+
; X86-SSE-NEXT: movd %xmm1, %eax
2100+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm6[3,1,2,3]
2101+
; X86-SSE-NEXT: movd %xmm1, %esi
21002102
; X86-SSE-NEXT: xorl %edx, %edx
21012103
; X86-SSE-NEXT: divl %esi
2102-
; X86-SSE-NEXT: movd %edx, %xmm0
2103-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1]
2104-
; X86-SSE-NEXT: movd %xmm3, %eax
2105-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
2106-
; X86-SSE-NEXT: movd %xmm3, %esi
2104+
; X86-SSE-NEXT: movd %edx, %xmm1
2105+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
2106+
; X86-SSE-NEXT: movd %xmm7, %eax
2107+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,0,1]
2108+
; X86-SSE-NEXT: movd %xmm7, %esi
21072109
; X86-SSE-NEXT: xorl %edx, %edx
21082110
; X86-SSE-NEXT: divl %esi
21092111
; X86-SSE-NEXT: movd %edx, %xmm7
2110-
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1]
2111-
; X86-SSE-NEXT: movd %xmm5, %eax
2112+
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm1[0],xmm7[1],xmm1[1]
2113+
; X86-SSE-NEXT: movd %xmm3, %eax
21122114
; X86-SSE-NEXT: movd %xmm6, %esi
21132115
; X86-SSE-NEXT: xorl %edx, %edx
21142116
; X86-SSE-NEXT: divl %esi
2117+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
2118+
; X86-SSE-NEXT: movd %xmm3, %eax
21152119
; X86-SSE-NEXT: movd %edx, %xmm3
2116-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
2117-
; X86-SSE-NEXT: movd %xmm5, %eax
2118-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
2119-
; X86-SSE-NEXT: movd %xmm5, %esi
2120-
; X86-SSE-NEXT: xorl %edx, %edx
2121-
; X86-SSE-NEXT: divl %esi
2122-
; X86-SSE-NEXT: movd %edx, %xmm5
2123-
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
2124-
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
2125-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3]
2126-
; X86-SSE-NEXT: movd %xmm6, %eax
2127-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
2120+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,2,3]
21282121
; X86-SSE-NEXT: movd %xmm6, %esi
21292122
; X86-SSE-NEXT: xorl %edx, %edx
21302123
; X86-SSE-NEXT: divl %esi
21312124
; X86-SSE-NEXT: movd %edx, %xmm6
2132-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
2125+
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1]
2126+
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
2127+
; X86-SSE-NEXT: movdqa %xmm5, %xmm7
2128+
; X86-SSE-NEXT: psrld $16, %xmm7
21332129
; X86-SSE-NEXT: movd %xmm7, %eax
2134-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1]
2130+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,2,3]
21352131
; X86-SSE-NEXT: movd %xmm7, %esi
21362132
; X86-SSE-NEXT: xorl %edx, %edx
21372133
; X86-SSE-NEXT: divl %esi
21382134
; X86-SSE-NEXT: movd %edx, %xmm7
2139-
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
21402135
; X86-SSE-NEXT: movd %xmm4, %eax
21412136
; X86-SSE-NEXT: movd %xmm2, %esi
21422137
; X86-SSE-NEXT: xorl %edx, %edx
21432138
; X86-SSE-NEXT: divl %esi
2144-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3]
2139+
; X86-SSE-NEXT: psrlq $48, %xmm5
2140+
; X86-SSE-NEXT: movd %xmm5, %eax
2141+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm2[3,1,2,3]
2142+
; X86-SSE-NEXT: movd %xmm5, %esi
2143+
; X86-SSE-NEXT: movd %edx, %xmm5
2144+
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
2145+
; X86-SSE-NEXT: xorl %edx, %edx
2146+
; X86-SSE-NEXT: divl %esi
2147+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
21452148
; X86-SSE-NEXT: movd %xmm4, %eax
21462149
; X86-SSE-NEXT: movd %edx, %xmm4
2147-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
2150+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
21482151
; X86-SSE-NEXT: movd %xmm2, %esi
21492152
; X86-SSE-NEXT: xorl %edx, %edx
21502153
; X86-SSE-NEXT: divl %esi
21512154
; X86-SSE-NEXT: movd %edx, %xmm2
2155+
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
2156+
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0]
2157+
; X86-SSE-NEXT: movd %xmm0, %eax
2158+
; X86-SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,0],xmm4[0,0]
2159+
; X86-SSE-NEXT: movdqa {{.*#+}} xmm0 = [8199,8199,8199,8199]
2160+
; X86-SSE-NEXT: pmuludq %xmm0, %xmm7
2161+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,2,2,3]
2162+
; X86-SSE-NEXT: pmuludq %xmm0, %xmm5
2163+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[0,2,2,3]
21522164
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
2153-
; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
2154-
; X86-SSE-NEXT: movd %xmm1, %eax
2155-
; X86-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0]
2156-
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
2157-
; X86-SSE-NEXT: pmuludq %xmm1, %xmm4
2158-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
2159-
; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
2160-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2161-
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
2162-
; X86-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm0[0,0]
2163-
; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
2164-
; X86-SSE-NEXT: pmuludq %xmm1, %xmm5
2165+
; X86-SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,0],xmm1[0,0]
2166+
; X86-SSE-NEXT: pmuludq %xmm0, %xmm3
2167+
; X86-SSE-NEXT: pmuludq %xmm0, %xmm6
21652168
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
2166-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
2169+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,2,2,3]
21672170
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
21682171
; X86-SSE-NEXT: xorl %edx, %edx
21692172
; X86-SSE-NEXT: divl 32(%ecx)
@@ -2324,92 +2327,95 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
23242327
; X64-SSE-LABEL: PR34947:
23252328
; X64-SSE: # %bb.0:
23262329
; X64-SSE-NEXT: movdqa (%rdi), %xmm5
2327-
; X64-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2330+
; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
23282331
; X64-SSE-NEXT: movdqa (%rsi), %xmm2
23292332
; X64-SSE-NEXT: movdqa 16(%rsi), %xmm6
2330-
; X64-SSE-NEXT: pxor %xmm0, %xmm0
2331-
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2333+
; X64-SSE-NEXT: pxor %xmm1, %xmm1
2334+
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
23322335
; X64-SSE-NEXT: movdqa %xmm5, %xmm3
2333-
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
2334-
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
2335-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm5[3,1,2,3]
2336-
; X64-SSE-NEXT: movd %xmm0, %eax
2337-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm6[3,1,2,3]
2338-
; X64-SSE-NEXT: movd %xmm0, %ecx
2336+
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
2337+
; X64-SSE-NEXT: movdqa %xmm5, %xmm7
2338+
; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4],xmm1[4],xmm7[5],xmm1[5],xmm7[6],xmm1[6],xmm7[7],xmm1[7]
2339+
; X64-SSE-NEXT: movdqa %xmm5, %xmm1
2340+
; X64-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2341+
; X64-SSE-NEXT: movd %xmm1, %eax
2342+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm6[3,1,2,3]
2343+
; X64-SSE-NEXT: movd %xmm1, %ecx
23392344
; X64-SSE-NEXT: xorl %edx, %edx
23402345
; X64-SSE-NEXT: divl %ecx
23412346
; X64-SSE-NEXT: movd %edx, %xmm8
2342-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,0,1]
2343-
; X64-SSE-NEXT: movd %xmm4, %eax
2344-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm6[2,3,0,1]
2345-
; X64-SSE-NEXT: movd %xmm4, %ecx
2347+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm7[2,3,0,1]
2348+
; X64-SSE-NEXT: movd %xmm1, %eax
2349+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm6[2,3,0,1]
2350+
; X64-SSE-NEXT: movd %xmm1, %ecx
23462351
; X64-SSE-NEXT: xorl %edx, %edx
23472352
; X64-SSE-NEXT: divl %ecx
2348-
; X64-SSE-NEXT: movd %edx, %xmm7
2349-
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1]
2350-
; X64-SSE-NEXT: movd %xmm5, %eax
2353+
; X64-SSE-NEXT: movd %edx, %xmm1
2354+
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm8[0],xmm1[1],xmm8[1]
2355+
; X64-SSE-NEXT: movd %xmm7, %eax
23512356
; X64-SSE-NEXT: movd %xmm6, %ecx
23522357
; X64-SSE-NEXT: xorl %edx, %edx
23532358
; X64-SSE-NEXT: divl %ecx
23542359
; X64-SSE-NEXT: movd %edx, %xmm4
2355-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
2356-
; X64-SSE-NEXT: movd %xmm5, %eax
2357-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
2358-
; X64-SSE-NEXT: movd %xmm5, %ecx
2359-
; X64-SSE-NEXT: xorl %edx, %edx
2360-
; X64-SSE-NEXT: divl %ecx
2361-
; X64-SSE-NEXT: movd %edx, %xmm5
2362-
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
2363-
; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
2364-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3]
2365-
; X64-SSE-NEXT: movd %xmm6, %eax
2366-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
2360+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,2,3]
2361+
; X64-SSE-NEXT: movd %xmm7, %eax
2362+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,2,3]
23672363
; X64-SSE-NEXT: movd %xmm6, %ecx
23682364
; X64-SSE-NEXT: xorl %edx, %edx
23692365
; X64-SSE-NEXT: divl %ecx
23702366
; X64-SSE-NEXT: movd %edx, %xmm6
2371-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
2372-
; X64-SSE-NEXT: movd %xmm7, %eax
2373-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1]
2374-
; X64-SSE-NEXT: movd %xmm7, %ecx
2367+
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
2368+
; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm1[0]
2369+
; X64-SSE-NEXT: movdqa %xmm5, %xmm1
2370+
; X64-SSE-NEXT: psrld $16, %xmm1
2371+
; X64-SSE-NEXT: movd %xmm1, %eax
2372+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
2373+
; X64-SSE-NEXT: movd %xmm1, %ecx
23752374
; X64-SSE-NEXT: xorl %edx, %edx
23762375
; X64-SSE-NEXT: divl %ecx
23772376
; X64-SSE-NEXT: movd %edx, %xmm7
2378-
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
23792377
; X64-SSE-NEXT: movd %xmm3, %eax
23802378
; X64-SSE-NEXT: movd %xmm2, %ecx
23812379
; X64-SSE-NEXT: xorl %edx, %edx
23822380
; X64-SSE-NEXT: divl %ecx
2383-
; X64-SSE-NEXT: movd %edx, %xmm0
2384-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
2381+
; X64-SSE-NEXT: movd %edx, %xmm1
2382+
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1]
2383+
; X64-SSE-NEXT: psrlq $48, %xmm5
2384+
; X64-SSE-NEXT: movd %xmm5, %eax
2385+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm2[3,1,2,3]
2386+
; X64-SSE-NEXT: movd %xmm5, %ecx
2387+
; X64-SSE-NEXT: xorl %edx, %edx
2388+
; X64-SSE-NEXT: divl %ecx
2389+
; X64-SSE-NEXT: movd %edx, %xmm5
2390+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
23852391
; X64-SSE-NEXT: movd %xmm3, %eax
2386-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
2392+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
23872393
; X64-SSE-NEXT: movd %xmm2, %ecx
23882394
; X64-SSE-NEXT: xorl %edx, %edx
23892395
; X64-SSE-NEXT: divl %ecx
23902396
; X64-SSE-NEXT: movd %edx, %xmm2
2391-
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2392-
; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm7[0]
2393-
; X64-SSE-NEXT: movd %xmm1, %eax
2397+
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
2398+
; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2399+
; X64-SSE-NEXT: movd %xmm0, %eax
23942400
; X64-SSE-NEXT: xorl %edx, %edx
23952401
; X64-SSE-NEXT: divl 32(%rsi)
2396-
; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
2397-
; X64-SSE-NEXT: pmuludq %xmm1, %xmm0
2398-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2399-
; X64-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0]
2400-
; X64-SSE-NEXT: pmuludq %xmm1, %xmm2
2401-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2402-
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2403-
; X64-SSE-NEXT: pmuludq %xmm1, %xmm4
2402+
; X64-SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,0],xmm5[0,0]
2403+
; X64-SSE-NEXT: movdqa {{.*#+}} xmm0 = [8199,8199,8199,8199]
2404+
; X64-SSE-NEXT: pmuludq %xmm0, %xmm7
2405+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,2,2,3]
2406+
; X64-SSE-NEXT: pmuludq %xmm0, %xmm1
2407+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2408+
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2409+
; X64-SSE-NEXT: pmuludq %xmm0, %xmm4
24042410
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3]
2405-
; X64-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm8[0,0]
2406-
; X64-SSE-NEXT: pmuludq %xmm1, %xmm5
2407-
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3]
2408-
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2411+
; X64-SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,0],xmm8[0,0]
2412+
; X64-SSE-NEXT: pmuludq %xmm0, %xmm6
2413+
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3]
2414+
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
24092415
; X64-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
24102416
; X64-SSE-NEXT: movl %eax, (%rax)
24112417
; X64-SSE-NEXT: movdqa %xmm2, (%rax)
2412-
; X64-SSE-NEXT: movdqa %xmm0, (%rax)
2418+
; X64-SSE-NEXT: movdqa %xmm1, (%rax)
24132419
; X64-SSE-NEXT: retq
24142420
;
24152421
; X64-AVX1-LABEL: PR34947:

0 commit comments

Comments
 (0)