@@ -2085,85 +2085,88 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
2085
2085
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
2086
2086
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
2087
2087
; X86-SSE-NEXT: movdqa (%eax), %xmm5
2088
- ; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2088
+ ; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2089
2089
; X86-SSE-NEXT: movdqa (%ecx), %xmm2
2090
2090
; X86-SSE-NEXT: movdqa 16(%ecx), %xmm6
2091
- ; X86-SSE-NEXT: pxor %xmm0 , %xmm0
2092
- ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0], xmm0[0],xmm1[1 ],xmm0[1],xmm1[2 ],xmm0[2],xmm1[3 ],xmm0[3]
2091
+ ; X86-SSE-NEXT: pxor %xmm1 , %xmm1
2092
+ ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0 ],xmm0[1],xmm1[1 ],xmm0[2],xmm1[2 ],xmm0[3],xmm1 [3]
2093
2093
; X86-SSE-NEXT: movdqa %xmm5, %xmm4
2094
- ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
2095
- ; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
2096
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm5[3,1,2,3]
2097
- ; X86-SSE-NEXT: movd %xmm0, %eax
2098
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm6[3,1,2,3]
2099
- ; X86-SSE-NEXT: movd %xmm0, %esi
2094
+ ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3]
2095
+ ; X86-SSE-NEXT: movdqa %xmm5, %xmm3
2096
+ ; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
2097
+ ; X86-SSE-NEXT: movdqa %xmm5, %xmm1
2098
+ ; X86-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2099
+ ; X86-SSE-NEXT: movd %xmm1, %eax
2100
+ ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm6[3,1,2,3]
2101
+ ; X86-SSE-NEXT: movd %xmm1, %esi
2100
2102
; X86-SSE-NEXT: xorl %edx, %edx
2101
2103
; X86-SSE-NEXT: divl %esi
2102
- ; X86-SSE-NEXT: movd %edx, %xmm0
2103
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5 [2,3,0,1]
2104
- ; X86-SSE-NEXT: movd %xmm3 , %eax
2105
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1]
2106
- ; X86-SSE-NEXT: movd %xmm3 , %esi
2104
+ ; X86-SSE-NEXT: movd %edx, %xmm1
2105
+ ; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3 [2,3,0,1]
2106
+ ; X86-SSE-NEXT: movd %xmm7 , %eax
2107
+ ; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm6[2,3,0,1]
2108
+ ; X86-SSE-NEXT: movd %xmm7 , %esi
2107
2109
; X86-SSE-NEXT: xorl %edx, %edx
2108
2110
; X86-SSE-NEXT: divl %esi
2109
2111
; X86-SSE-NEXT: movd %edx, %xmm7
2110
- ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm0 [0],xmm7[1],xmm0 [1]
2111
- ; X86-SSE-NEXT: movd %xmm5 , %eax
2112
+ ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm1 [0],xmm7[1],xmm1 [1]
2113
+ ; X86-SSE-NEXT: movd %xmm3 , %eax
2112
2114
; X86-SSE-NEXT: movd %xmm6, %esi
2113
2115
; X86-SSE-NEXT: xorl %edx, %edx
2114
2116
; X86-SSE-NEXT: divl %esi
2117
+ ; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
2118
+ ; X86-SSE-NEXT: movd %xmm3, %eax
2115
2119
; X86-SSE-NEXT: movd %edx, %xmm3
2116
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
2117
- ; X86-SSE-NEXT: movd %xmm5, %eax
2118
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
2119
- ; X86-SSE-NEXT: movd %xmm5, %esi
2120
- ; X86-SSE-NEXT: xorl %edx, %edx
2121
- ; X86-SSE-NEXT: divl %esi
2122
- ; X86-SSE-NEXT: movd %edx, %xmm5
2123
- ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
2124
- ; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
2125
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3]
2126
- ; X86-SSE-NEXT: movd %xmm6, %eax
2127
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
2120
+ ; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,2,3]
2128
2121
; X86-SSE-NEXT: movd %xmm6, %esi
2129
2122
; X86-SSE-NEXT: xorl %edx, %edx
2130
2123
; X86-SSE-NEXT: divl %esi
2131
2124
; X86-SSE-NEXT: movd %edx, %xmm6
2132
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1]
2125
+ ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1]
2126
+ ; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0]
2127
+ ; X86-SSE-NEXT: movdqa %xmm5, %xmm7
2128
+ ; X86-SSE-NEXT: psrld $16, %xmm7
2133
2129
; X86-SSE-NEXT: movd %xmm7, %eax
2134
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1 ]
2130
+ ; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,2,3 ]
2135
2131
; X86-SSE-NEXT: movd %xmm7, %esi
2136
2132
; X86-SSE-NEXT: xorl %edx, %edx
2137
2133
; X86-SSE-NEXT: divl %esi
2138
2134
; X86-SSE-NEXT: movd %edx, %xmm7
2139
- ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
2140
2135
; X86-SSE-NEXT: movd %xmm4, %eax
2141
2136
; X86-SSE-NEXT: movd %xmm2, %esi
2142
2137
; X86-SSE-NEXT: xorl %edx, %edx
2143
2138
; X86-SSE-NEXT: divl %esi
2144
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3]
2139
+ ; X86-SSE-NEXT: psrlq $48, %xmm5
2140
+ ; X86-SSE-NEXT: movd %xmm5, %eax
2141
+ ; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm2[3,1,2,3]
2142
+ ; X86-SSE-NEXT: movd %xmm5, %esi
2143
+ ; X86-SSE-NEXT: movd %edx, %xmm5
2144
+ ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
2145
+ ; X86-SSE-NEXT: xorl %edx, %edx
2146
+ ; X86-SSE-NEXT: divl %esi
2147
+ ; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
2145
2148
; X86-SSE-NEXT: movd %xmm4, %eax
2146
2149
; X86-SSE-NEXT: movd %edx, %xmm4
2147
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1, 2,3]
2150
+ ; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1 ]
2148
2151
; X86-SSE-NEXT: movd %xmm2, %esi
2149
2152
; X86-SSE-NEXT: xorl %edx, %edx
2150
2153
; X86-SSE-NEXT: divl %esi
2151
2154
; X86-SSE-NEXT: movd %edx, %xmm2
2155
+ ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
2156
+ ; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0]
2157
+ ; X86-SSE-NEXT: movd %xmm0, %eax
2158
+ ; X86-SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,0],xmm4[0,0]
2159
+ ; X86-SSE-NEXT: movdqa {{.*#+}} xmm0 = [8199,8199,8199,8199]
2160
+ ; X86-SSE-NEXT: pmuludq %xmm0, %xmm7
2161
+ ; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,2,2,3]
2162
+ ; X86-SSE-NEXT: pmuludq %xmm0, %xmm5
2163
+ ; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[0,2,2,3]
2152
2164
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
2153
- ; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
2154
- ; X86-SSE-NEXT: movd %xmm1, %eax
2155
- ; X86-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0]
2156
- ; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199]
2157
- ; X86-SSE-NEXT: pmuludq %xmm1, %xmm4
2158
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
2159
- ; X86-SSE-NEXT: pmuludq %xmm1, %xmm2
2160
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2161
- ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
2162
- ; X86-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm0[0,0]
2163
- ; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
2164
- ; X86-SSE-NEXT: pmuludq %xmm1, %xmm5
2165
+ ; X86-SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,0],xmm1[0,0]
2166
+ ; X86-SSE-NEXT: pmuludq %xmm0, %xmm3
2167
+ ; X86-SSE-NEXT: pmuludq %xmm0, %xmm6
2165
2168
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
2166
- ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5 [0,2,2,3]
2169
+ ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm6 [0,2,2,3]
2167
2170
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2168
2171
; X86-SSE-NEXT: xorl %edx, %edx
2169
2172
; X86-SSE-NEXT: divl 32(%ecx)
@@ -2324,92 +2327,95 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
2324
2327
; X64-SSE-LABEL: PR34947:
2325
2328
; X64-SSE: # %bb.0:
2326
2329
; X64-SSE-NEXT: movdqa (%rdi), %xmm5
2327
- ; X64-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2330
+ ; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2328
2331
; X64-SSE-NEXT: movdqa (%rsi), %xmm2
2329
2332
; X64-SSE-NEXT: movdqa 16(%rsi), %xmm6
2330
- ; X64-SSE-NEXT: pxor %xmm0 , %xmm0
2331
- ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0], xmm0[0],xmm1[1 ],xmm0[1],xmm1[2 ],xmm0[2],xmm1[3 ],xmm0[3]
2333
+ ; X64-SSE-NEXT: pxor %xmm1 , %xmm1
2334
+ ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0 ],xmm0[1],xmm1[1 ],xmm0[2],xmm1[2 ],xmm0[3],xmm1 [3]
2332
2335
; X64-SSE-NEXT: movdqa %xmm5, %xmm3
2333
- ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
2334
- ; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
2335
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm5[3,1,2,3]
2336
- ; X64-SSE-NEXT: movd %xmm0, %eax
2337
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm6[3,1,2,3]
2338
- ; X64-SSE-NEXT: movd %xmm0, %ecx
2336
+ ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
2337
+ ; X64-SSE-NEXT: movdqa %xmm5, %xmm7
2338
+ ; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4],xmm1[4],xmm7[5],xmm1[5],xmm7[6],xmm1[6],xmm7[7],xmm1[7]
2339
+ ; X64-SSE-NEXT: movdqa %xmm5, %xmm1
2340
+ ; X64-SSE-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2341
+ ; X64-SSE-NEXT: movd %xmm1, %eax
2342
+ ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm6[3,1,2,3]
2343
+ ; X64-SSE-NEXT: movd %xmm1, %ecx
2339
2344
; X64-SSE-NEXT: xorl %edx, %edx
2340
2345
; X64-SSE-NEXT: divl %ecx
2341
2346
; X64-SSE-NEXT: movd %edx, %xmm8
2342
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5 [2,3,0,1]
2343
- ; X64-SSE-NEXT: movd %xmm4 , %eax
2344
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm6[2,3,0,1]
2345
- ; X64-SSE-NEXT: movd %xmm4 , %ecx
2347
+ ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm7 [2,3,0,1]
2348
+ ; X64-SSE-NEXT: movd %xmm1 , %eax
2349
+ ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm6[2,3,0,1]
2350
+ ; X64-SSE-NEXT: movd %xmm1 , %ecx
2346
2351
; X64-SSE-NEXT: xorl %edx, %edx
2347
2352
; X64-SSE-NEXT: divl %ecx
2348
- ; X64-SSE-NEXT: movd %edx, %xmm7
2349
- ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7 [0],xmm8[0],xmm7 [1],xmm8[1]
2350
- ; X64-SSE-NEXT: movd %xmm5 , %eax
2353
+ ; X64-SSE-NEXT: movd %edx, %xmm1
2354
+ ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1 [0],xmm8[0],xmm1 [1],xmm8[1]
2355
+ ; X64-SSE-NEXT: movd %xmm7 , %eax
2351
2356
; X64-SSE-NEXT: movd %xmm6, %ecx
2352
2357
; X64-SSE-NEXT: xorl %edx, %edx
2353
2358
; X64-SSE-NEXT: divl %ecx
2354
2359
; X64-SSE-NEXT: movd %edx, %xmm4
2355
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3]
2356
- ; X64-SSE-NEXT: movd %xmm5, %eax
2357
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3]
2358
- ; X64-SSE-NEXT: movd %xmm5, %ecx
2359
- ; X64-SSE-NEXT: xorl %edx, %edx
2360
- ; X64-SSE-NEXT: divl %ecx
2361
- ; X64-SSE-NEXT: movd %edx, %xmm5
2362
- ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
2363
- ; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
2364
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3]
2365
- ; X64-SSE-NEXT: movd %xmm6, %eax
2366
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3]
2360
+ ; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,2,3]
2361
+ ; X64-SSE-NEXT: movd %xmm7, %eax
2362
+ ; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,2,3]
2367
2363
; X64-SSE-NEXT: movd %xmm6, %ecx
2368
2364
; X64-SSE-NEXT: xorl %edx, %edx
2369
2365
; X64-SSE-NEXT: divl %ecx
2370
2366
; X64-SSE-NEXT: movd %edx, %xmm6
2371
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1]
2372
- ; X64-SSE-NEXT: movd %xmm7, %eax
2373
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1]
2374
- ; X64-SSE-NEXT: movd %xmm7, %ecx
2367
+ ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
2368
+ ; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm1[0]
2369
+ ; X64-SSE-NEXT: movdqa %xmm5, %xmm1
2370
+ ; X64-SSE-NEXT: psrld $16, %xmm1
2371
+ ; X64-SSE-NEXT: movd %xmm1, %eax
2372
+ ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
2373
+ ; X64-SSE-NEXT: movd %xmm1, %ecx
2375
2374
; X64-SSE-NEXT: xorl %edx, %edx
2376
2375
; X64-SSE-NEXT: divl %ecx
2377
2376
; X64-SSE-NEXT: movd %edx, %xmm7
2378
- ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
2379
2377
; X64-SSE-NEXT: movd %xmm3, %eax
2380
2378
; X64-SSE-NEXT: movd %xmm2, %ecx
2381
2379
; X64-SSE-NEXT: xorl %edx, %edx
2382
2380
; X64-SSE-NEXT: divl %ecx
2383
- ; X64-SSE-NEXT: movd %edx, %xmm0
2384
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
2381
+ ; X64-SSE-NEXT: movd %edx, %xmm1
2382
+ ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1]
2383
+ ; X64-SSE-NEXT: psrlq $48, %xmm5
2384
+ ; X64-SSE-NEXT: movd %xmm5, %eax
2385
+ ; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm2[3,1,2,3]
2386
+ ; X64-SSE-NEXT: movd %xmm5, %ecx
2387
+ ; X64-SSE-NEXT: xorl %edx, %edx
2388
+ ; X64-SSE-NEXT: divl %ecx
2389
+ ; X64-SSE-NEXT: movd %edx, %xmm5
2390
+ ; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
2385
2391
; X64-SSE-NEXT: movd %xmm3, %eax
2386
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1, 2,3]
2392
+ ; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1 ]
2387
2393
; X64-SSE-NEXT: movd %xmm2, %ecx
2388
2394
; X64-SSE-NEXT: xorl %edx, %edx
2389
2395
; X64-SSE-NEXT: divl %ecx
2390
2396
; X64-SSE-NEXT: movd %edx, %xmm2
2391
- ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0 [0],xmm2 [0],xmm0 [1],xmm2 [1]
2392
- ; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0 [0],xmm7 [0]
2393
- ; X64-SSE-NEXT: movd %xmm1 , %eax
2397
+ ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2 [0],xmm5 [0],xmm2 [1],xmm5 [1]
2398
+ ; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1 [0],xmm2 [0]
2399
+ ; X64-SSE-NEXT: movd %xmm0 , %eax
2394
2400
; X64-SSE-NEXT: xorl %edx, %edx
2395
2401
; X64-SSE-NEXT: divl 32(%rsi)
2396
- ; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199 ]
2397
- ; X64-SSE-NEXT: pmuludq %xmm1, % xmm0
2398
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2399
- ; X64-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2 [0,0],xmm6[0,0 ]
2400
- ; X64-SSE-NEXT: pmuludq %xmm1 , %xmm2
2401
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2 [0,2,2,3]
2402
- ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0 [0],xmm2[0],xmm0 [1],xmm2[1]
2403
- ; X64-SSE-NEXT: pmuludq %xmm1 , %xmm4
2402
+ ; X64-SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,0],xmm5[0,0 ]
2403
+ ; X64-SSE-NEXT: movdqa {{.*#+}} xmm0 = [8199,8199,8199,8199]
2404
+ ; X64-SSE-NEXT: pmuludq % xmm0, %xmm7
2405
+ ; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm7 [0,2,2,3 ]
2406
+ ; X64-SSE-NEXT: pmuludq %xmm0 , %xmm1
2407
+ ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1 [0,2,2,3]
2408
+ ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1 [0],xmm2[0],xmm1 [1],xmm2[1]
2409
+ ; X64-SSE-NEXT: pmuludq %xmm0 , %xmm4
2404
2410
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3]
2405
- ; X64-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5 [0,0],xmm8[0,0]
2406
- ; X64-SSE-NEXT: pmuludq %xmm1 , %xmm5
2407
- ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5 [0,2,2,3]
2408
- ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1 [0],xmm2[1],xmm1 [1]
2411
+ ; X64-SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6 [0,0],xmm8[0,0]
2412
+ ; X64-SSE-NEXT: pmuludq %xmm0 , %xmm6
2413
+ ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm6 [0,2,2,3]
2414
+ ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0 [0],xmm2[1],xmm0 [1]
2409
2415
; X64-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007
2410
2416
; X64-SSE-NEXT: movl %eax, (%rax)
2411
2417
; X64-SSE-NEXT: movdqa %xmm2, (%rax)
2412
- ; X64-SSE-NEXT: movdqa %xmm0 , (%rax)
2418
+ ; X64-SSE-NEXT: movdqa %xmm1 , (%rax)
2413
2419
; X64-SSE-NEXT: retq
2414
2420
;
2415
2421
; X64-AVX1-LABEL: PR34947:
0 commit comments