1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32 -SSE2
2
+ ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 -SSE2
3
3
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=X64,X64-SSSE3
4
4
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
5
5
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
6
6
7
7
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
8
8
9
9
define i32 @t (ptr %val ) nounwind {
10
- ; X32 -SSE2-LABEL: t:
11
- ; X32 -SSE2: # %bb.0:
12
- ; X32 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
13
- ; X32 -SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,2,3]
14
- ; X32 -SSE2-NEXT: movd %xmm0, %eax
15
- ; X32 -SSE2-NEXT: retl
10
+ ; X86 -SSE2-LABEL: t:
11
+ ; X86 -SSE2: # %bb.0:
12
+ ; X86 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
13
+ ; X86 -SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,2,3]
14
+ ; X86 -SSE2-NEXT: movd %xmm0, %eax
15
+ ; X86 -SSE2-NEXT: retl
16
16
;
17
17
; X64-SSSE3-LABEL: t:
18
18
; X64-SSSE3: # %bb.0:
@@ -33,9 +33,9 @@ define i32 @t(ptr %val) nounwind {
33
33
; Case where extractelement of load ends up as undef.
34
34
; (Making sure this doesn't crash.)
35
35
define i32 @t2 (ptr %xp ) {
36
- ; X32 -SSE2-LABEL: t2:
37
- ; X32 -SSE2: # %bb.0:
38
- ; X32 -SSE2-NEXT: retl
36
+ ; X86 -SSE2-LABEL: t2:
37
+ ; X86 -SSE2: # %bb.0:
38
+ ; X86 -SSE2-NEXT: retl
39
39
;
40
40
; X64-LABEL: t2:
41
41
; X64: # %bb.0:
@@ -51,12 +51,12 @@ define i32 @t2(ptr %xp) {
51
51
; narrow load.
52
52
53
53
define void @t3 (ptr %a0 ) {
54
- ; X32 -SSE2-LABEL: t3:
55
- ; X32 -SSE2: # %bb.0: # %bb
56
- ; X32 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
57
- ; X32 -SSE2-NEXT: movups (%eax), %xmm0
58
- ; X32 -SSE2-NEXT: movhps %xmm0, (%eax)
59
- ; X32 -SSE2-NEXT: retl
54
+ ; X86 -SSE2-LABEL: t3:
55
+ ; X86 -SSE2: # %bb.0: # %bb
56
+ ; X86 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
57
+ ; X86 -SSE2-NEXT: movups (%eax), %xmm0
58
+ ; X86 -SSE2-NEXT: movhps %xmm0, (%eax)
59
+ ; X86 -SSE2-NEXT: retl
60
60
;
61
61
; X64-SSSE3-LABEL: t3:
62
62
; X64-SSSE3: # %bb.0: # %bb
81
81
; This is testing for an assertion - the extraction was assuming that the undef
82
82
; second shuffle operand was a post-bitcast type instead of a pre-bitcast type.
83
83
define i64 @t4 (ptr %a ) {
84
- ; X32 -SSE2-LABEL: t4:
85
- ; X32 -SSE2: # %bb.0:
86
- ; X32 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
87
- ; X32 -SSE2-NEXT: movdqa (%eax), %xmm0
88
- ; X32 -SSE2-NEXT: movd %xmm0, %eax
89
- ; X32 -SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
90
- ; X32 -SSE2-NEXT: movd %xmm0, %edx
91
- ; X32 -SSE2-NEXT: retl
84
+ ; X86 -SSE2-LABEL: t4:
85
+ ; X86 -SSE2: # %bb.0:
86
+ ; X86 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
87
+ ; X86 -SSE2-NEXT: movdqa (%eax), %xmm0
88
+ ; X86 -SSE2-NEXT: movd %xmm0, %eax
89
+ ; X86 -SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
90
+ ; X86 -SSE2-NEXT: movd %xmm0, %edx
91
+ ; X86 -SSE2-NEXT: retl
92
92
;
93
93
; X64-LABEL: t4:
94
94
; X64: # %bb.0:
@@ -103,13 +103,13 @@ define i64 @t4(ptr %a) {
103
103
104
104
; Don't extract from a volatile.
105
105
define void @t5 (ptr %a0 , ptr %a1 ) {
106
- ; X32 -SSE2-LABEL: t5:
107
- ; X32 -SSE2: # %bb.0:
108
- ; X32 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
109
- ; X32 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
110
- ; X32 -SSE2-NEXT: movaps (%ecx), %xmm0
111
- ; X32 -SSE2-NEXT: movhps %xmm0, (%eax)
112
- ; X32 -SSE2-NEXT: retl
106
+ ; X86 -SSE2-LABEL: t5:
107
+ ; X86 -SSE2: # %bb.0:
108
+ ; X86 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
109
+ ; X86 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
110
+ ; X86 -SSE2-NEXT: movaps (%ecx), %xmm0
111
+ ; X86 -SSE2-NEXT: movhps %xmm0, (%eax)
112
+ ; X86 -SSE2-NEXT: retl
113
113
;
114
114
; X64-SSSE3-LABEL: t5:
115
115
; X64-SSSE3: # %bb.0:
@@ -130,24 +130,24 @@ define void @t5(ptr%a0, ptr%a1) {
130
130
131
131
; Check for multiuse.
132
132
define float @t6 (ptr %a0 ) {
133
- ; X32 -SSE2-LABEL: t6:
134
- ; X32 -SSE2: # %bb.0:
135
- ; X32 -SSE2-NEXT: pushl %eax
136
- ; X32 -SSE2-NEXT: .cfi_def_cfa_offset 8
137
- ; X32 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
138
- ; X32 -SSE2-NEXT: movaps (%eax), %xmm0
139
- ; X32 -SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
140
- ; X32 -SSE2-NEXT: xorps %xmm1, %xmm1
141
- ; X32 -SSE2-NEXT: cmpeqss %xmm0, %xmm1
142
- ; X32 -SSE2-NEXT: movss {{.*#+}} xmm2 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
143
- ; X32 -SSE2-NEXT: andps %xmm1, %xmm2
144
- ; X32 -SSE2-NEXT: andnps %xmm0, %xmm1
145
- ; X32 -SSE2-NEXT: orps %xmm2, %xmm1
146
- ; X32 -SSE2-NEXT: movss %xmm1, (%esp)
147
- ; X32 -SSE2-NEXT: flds (%esp)
148
- ; X32 -SSE2-NEXT: popl %eax
149
- ; X32 -SSE2-NEXT: .cfi_def_cfa_offset 4
150
- ; X32 -SSE2-NEXT: retl
133
+ ; X86 -SSE2-LABEL: t6:
134
+ ; X86 -SSE2: # %bb.0:
135
+ ; X86 -SSE2-NEXT: pushl %eax
136
+ ; X86 -SSE2-NEXT: .cfi_def_cfa_offset 8
137
+ ; X86 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
138
+ ; X86 -SSE2-NEXT: movaps (%eax), %xmm0
139
+ ; X86 -SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
140
+ ; X86 -SSE2-NEXT: xorps %xmm1, %xmm1
141
+ ; X86 -SSE2-NEXT: cmpeqss %xmm0, %xmm1
142
+ ; X86 -SSE2-NEXT: movss {{.*#+}} xmm2 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
143
+ ; X86 -SSE2-NEXT: andps %xmm1, %xmm2
144
+ ; X86 -SSE2-NEXT: andnps %xmm0, %xmm1
145
+ ; X86 -SSE2-NEXT: orps %xmm2, %xmm1
146
+ ; X86 -SSE2-NEXT: movss %xmm1, (%esp)
147
+ ; X86 -SSE2-NEXT: flds (%esp)
148
+ ; X86 -SSE2-NEXT: popl %eax
149
+ ; X86 -SSE2-NEXT: .cfi_def_cfa_offset 4
150
+ ; X86 -SSE2-NEXT: retl
151
151
;
152
152
; X64-SSSE3-LABEL: t6:
153
153
; X64-SSSE3: # %bb.0:
@@ -184,20 +184,20 @@ define float @t6(ptr%a0) {
184
184
}
185
185
186
186
define void @PR43971 (ptr %a0 , ptr %a1 ) {
187
- ; X32 -SSE2-LABEL: PR43971:
188
- ; X32 -SSE2: # %bb.0: # %entry
189
- ; X32 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
190
- ; X32 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
191
- ; X32 -SSE2-NEXT: movaps 16(%ecx), %xmm0
192
- ; X32 -SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
193
- ; X32 -SSE2-NEXT: xorps %xmm1, %xmm1
194
- ; X32 -SSE2-NEXT: cmpltss %xmm0, %xmm1
195
- ; X32 -SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
196
- ; X32 -SSE2-NEXT: andps %xmm1, %xmm2
197
- ; X32 -SSE2-NEXT: andnps %xmm0, %xmm1
198
- ; X32 -SSE2-NEXT: orps %xmm2, %xmm1
199
- ; X32 -SSE2-NEXT: movss %xmm1, (%eax)
200
- ; X32 -SSE2-NEXT: retl
187
+ ; X86 -SSE2-LABEL: PR43971:
188
+ ; X86 -SSE2: # %bb.0: # %entry
189
+ ; X86 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
190
+ ; X86 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
191
+ ; X86 -SSE2-NEXT: movaps 16(%ecx), %xmm0
192
+ ; X86 -SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
193
+ ; X86 -SSE2-NEXT: xorps %xmm1, %xmm1
194
+ ; X86 -SSE2-NEXT: cmpltss %xmm0, %xmm1
195
+ ; X86 -SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
196
+ ; X86 -SSE2-NEXT: andps %xmm1, %xmm2
197
+ ; X86 -SSE2-NEXT: andnps %xmm0, %xmm1
198
+ ; X86 -SSE2-NEXT: orps %xmm2, %xmm1
199
+ ; X86 -SSE2-NEXT: movss %xmm1, (%eax)
200
+ ; X86 -SSE2-NEXT: retl
201
201
;
202
202
; X64-SSSE3-LABEL: PR43971:
203
203
; X64-SSSE3: # %bb.0: # %entry
@@ -231,22 +231,22 @@ entry:
231
231
}
232
232
233
233
define float @PR43971_1 (ptr %a0 ) nounwind {
234
- ; X32 -SSE2-LABEL: PR43971_1:
235
- ; X32 -SSE2: # %bb.0: # %entry
236
- ; X32 -SSE2-NEXT: pushl %eax
237
- ; X32 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
238
- ; X32 -SSE2-NEXT: movaps (%eax), %xmm0
239
- ; X32 -SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
240
- ; X32 -SSE2-NEXT: xorps %xmm1, %xmm1
241
- ; X32 -SSE2-NEXT: cmpeqss %xmm0, %xmm1
242
- ; X32 -SSE2-NEXT: movss {{.*#+}} xmm2 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
243
- ; X32 -SSE2-NEXT: andps %xmm1, %xmm2
244
- ; X32 -SSE2-NEXT: andnps %xmm0, %xmm1
245
- ; X32 -SSE2-NEXT: orps %xmm2, %xmm1
246
- ; X32 -SSE2-NEXT: movss %xmm1, (%esp)
247
- ; X32 -SSE2-NEXT: flds (%esp)
248
- ; X32 -SSE2-NEXT: popl %eax
249
- ; X32 -SSE2-NEXT: retl
234
+ ; X86 -SSE2-LABEL: PR43971_1:
235
+ ; X86 -SSE2: # %bb.0: # %entry
236
+ ; X86 -SSE2-NEXT: pushl %eax
237
+ ; X86 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
238
+ ; X86 -SSE2-NEXT: movaps (%eax), %xmm0
239
+ ; X86 -SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
240
+ ; X86 -SSE2-NEXT: xorps %xmm1, %xmm1
241
+ ; X86 -SSE2-NEXT: cmpeqss %xmm0, %xmm1
242
+ ; X86 -SSE2-NEXT: movss {{.*#+}} xmm2 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
243
+ ; X86 -SSE2-NEXT: andps %xmm1, %xmm2
244
+ ; X86 -SSE2-NEXT: andnps %xmm0, %xmm1
245
+ ; X86 -SSE2-NEXT: orps %xmm2, %xmm1
246
+ ; X86 -SSE2-NEXT: movss %xmm1, (%esp)
247
+ ; X86 -SSE2-NEXT: flds (%esp)
248
+ ; X86 -SSE2-NEXT: popl %eax
249
+ ; X86 -SSE2-NEXT: retl
250
250
;
251
251
; X64-SSSE3-LABEL: PR43971_1:
252
252
; X64-SSSE3: # %bb.0: # %entry
@@ -285,15 +285,15 @@ entry:
285
285
286
286
; Test for bad extractions from a VBROADCAST_LOAD of the <2 x i16> non-uniform constant bitcast as <4 x i32>.
287
287
define void @subextract_broadcast_load_constant (ptr nocapture %0 , ptr nocapture %1 , ptr nocapture %2 ) nounwind {
288
- ; X32 -SSE2-LABEL: subextract_broadcast_load_constant:
289
- ; X32 -SSE2: # %bb.0:
290
- ; X32 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
291
- ; X32 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
292
- ; X32 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
293
- ; X32 -SSE2-NEXT: movl $-1583308898, (%edx) # imm = 0xA1A09F9E
294
- ; X32 -SSE2-NEXT: movw $-24674, (%ecx) # imm = 0x9F9E
295
- ; X32 -SSE2-NEXT: movw $-24160, (%eax) # imm = 0xA1A0
296
- ; X32 -SSE2-NEXT: retl
288
+ ; X86 -SSE2-LABEL: subextract_broadcast_load_constant:
289
+ ; X86 -SSE2: # %bb.0:
290
+ ; X86 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
291
+ ; X86 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
292
+ ; X86 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
293
+ ; X86 -SSE2-NEXT: movl $-1583308898, (%edx) # imm = 0xA1A09F9E
294
+ ; X86 -SSE2-NEXT: movw $-24674, (%ecx) # imm = 0x9F9E
295
+ ; X86 -SSE2-NEXT: movw $-24160, (%eax) # imm = 0xA1A0
296
+ ; X86 -SSE2-NEXT: retl
297
297
;
298
298
; X64-LABEL: subextract_broadcast_load_constant:
299
299
; X64: # %bb.0:
@@ -319,15 +319,15 @@ define void @subextract_broadcast_load_constant(ptr nocapture %0, ptr nocapture
319
319
; A scalar load is favored over a XMM->GPR register transfer in this example.
320
320
321
321
define i32 @multi_use_load_scalarization (ptr %p ) nounwind {
322
- ; X32 -SSE2-LABEL: multi_use_load_scalarization:
323
- ; X32 -SSE2: # %bb.0:
324
- ; X32 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
325
- ; X32 -SSE2-NEXT: movl (%ecx), %eax
326
- ; X32 -SSE2-NEXT: movdqu (%ecx), %xmm0
327
- ; X32 -SSE2-NEXT: pcmpeqd %xmm1, %xmm1
328
- ; X32 -SSE2-NEXT: psubd %xmm1, %xmm0
329
- ; X32 -SSE2-NEXT: movdqa %xmm0, (%ecx)
330
- ; X32 -SSE2-NEXT: retl
322
+ ; X86 -SSE2-LABEL: multi_use_load_scalarization:
323
+ ; X86 -SSE2: # %bb.0:
324
+ ; X86 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
325
+ ; X86 -SSE2-NEXT: movl (%ecx), %eax
326
+ ; X86 -SSE2-NEXT: movdqu (%ecx), %xmm0
327
+ ; X86 -SSE2-NEXT: pcmpeqd %xmm1, %xmm1
328
+ ; X86 -SSE2-NEXT: psubd %xmm1, %xmm0
329
+ ; X86 -SSE2-NEXT: movdqa %xmm0, (%ecx)
330
+ ; X86 -SSE2-NEXT: retl
331
331
;
332
332
; X64-SSSE3-LABEL: multi_use_load_scalarization:
333
333
; X64-SSSE3: # %bb.0:
@@ -354,15 +354,15 @@ define i32 @multi_use_load_scalarization(ptr %p) nounwind {
354
354
}
355
355
356
356
define i32 @multi_use_volatile_load_scalarization (ptr %p ) nounwind {
357
- ; X32 -SSE2-LABEL: multi_use_volatile_load_scalarization:
358
- ; X32 -SSE2: # %bb.0:
359
- ; X32 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
360
- ; X32 -SSE2-NEXT: movdqu (%ecx), %xmm0
361
- ; X32 -SSE2-NEXT: pcmpeqd %xmm1, %xmm1
362
- ; X32 -SSE2-NEXT: movd %xmm0, %eax
363
- ; X32 -SSE2-NEXT: psubd %xmm1, %xmm0
364
- ; X32 -SSE2-NEXT: movdqa %xmm0, (%ecx)
365
- ; X32 -SSE2-NEXT: retl
357
+ ; X86 -SSE2-LABEL: multi_use_volatile_load_scalarization:
358
+ ; X86 -SSE2: # %bb.0:
359
+ ; X86 -SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
360
+ ; X86 -SSE2-NEXT: movdqu (%ecx), %xmm0
361
+ ; X86 -SSE2-NEXT: pcmpeqd %xmm1, %xmm1
362
+ ; X86 -SSE2-NEXT: movd %xmm0, %eax
363
+ ; X86 -SSE2-NEXT: psubd %xmm1, %xmm0
364
+ ; X86 -SSE2-NEXT: movdqa %xmm0, (%ecx)
365
+ ; X86 -SSE2-NEXT: retl
366
366
;
367
367
; X64-SSSE3-LABEL: multi_use_volatile_load_scalarization:
368
368
; X64-SSSE3: # %bb.0:
@@ -398,41 +398,41 @@ define i32 @multi_use_volatile_load_scalarization(ptr %p) nounwind {
398
398
@zero = internal unnamed_addr global <8 x i32 > zeroinitializer , align 32
399
399
400
400
define i32 @main () nounwind {
401
- ; X32 -SSE2-LABEL: main:
402
- ; X32 -SSE2: # %bb.0:
403
- ; X32 -SSE2-NEXT: pushl %ebp
404
- ; X32 -SSE2-NEXT: movl %esp, %ebp
405
- ; X32 -SSE2-NEXT: pushl %esi
406
- ; X32 -SSE2-NEXT: andl $-32, %esp
407
- ; X32 -SSE2-NEXT: subl $64, %esp
408
- ; X32 -SSE2-NEXT: movdqa zero, %xmm0
409
- ; X32 -SSE2-NEXT: movaps n1+16, %xmm1
410
- ; X32 -SSE2-NEXT: movaps n1, %xmm2
411
- ; X32 -SSE2-NEXT: movaps %xmm2, zero
412
- ; X32 -SSE2-NEXT: movaps %xmm1, zero+16
413
- ; X32 -SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,2,2,2]
414
- ; X32 -SSE2-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
415
- ; X32 -SSE2-NEXT: movaps %xmm1, (%esp)
416
- ; X32 -SSE2-NEXT: movdqa (%esp), %xmm1
417
- ; X32 -SSE2-NEXT: movaps {{[0-9]+}}(%esp), %xmm2
418
- ; X32 -SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
419
- ; X32 -SSE2-NEXT: movd %xmm2, %eax
420
- ; X32 -SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
421
- ; X32 -SSE2-NEXT: movd %xmm2, %ecx
422
- ; X32 -SSE2-NEXT: xorl %edx, %edx
423
- ; X32 -SSE2-NEXT: divl %ecx
424
- ; X32 -SSE2-NEXT: movl %eax, %ecx
425
- ; X32 -SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
426
- ; X32 -SSE2-NEXT: movd %xmm0, %eax
427
- ; X32 -SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
428
- ; X32 -SSE2-NEXT: movd %xmm0, %esi
429
- ; X32 -SSE2-NEXT: xorl %edx, %edx
430
- ; X32 -SSE2-NEXT: divl %esi
431
- ; X32 -SSE2-NEXT: addl %ecx, %eax
432
- ; X32 -SSE2-NEXT: leal -4(%ebp), %esp
433
- ; X32 -SSE2-NEXT: popl %esi
434
- ; X32 -SSE2-NEXT: popl %ebp
435
- ; X32 -SSE2-NEXT: retl
401
+ ; X86 -SSE2-LABEL: main:
402
+ ; X86 -SSE2: # %bb.0:
403
+ ; X86 -SSE2-NEXT: pushl %ebp
404
+ ; X86 -SSE2-NEXT: movl %esp, %ebp
405
+ ; X86 -SSE2-NEXT: pushl %esi
406
+ ; X86 -SSE2-NEXT: andl $-32, %esp
407
+ ; X86 -SSE2-NEXT: subl $64, %esp
408
+ ; X86 -SSE2-NEXT: movdqa zero, %xmm0
409
+ ; X86 -SSE2-NEXT: movaps n1+16, %xmm1
410
+ ; X86 -SSE2-NEXT: movaps n1, %xmm2
411
+ ; X86 -SSE2-NEXT: movaps %xmm2, zero
412
+ ; X86 -SSE2-NEXT: movaps %xmm1, zero+16
413
+ ; X86 -SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,2,2,2]
414
+ ; X86 -SSE2-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
415
+ ; X86 -SSE2-NEXT: movaps %xmm1, (%esp)
416
+ ; X86 -SSE2-NEXT: movdqa (%esp), %xmm1
417
+ ; X86 -SSE2-NEXT: movaps {{[0-9]+}}(%esp), %xmm2
418
+ ; X86 -SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
419
+ ; X86 -SSE2-NEXT: movd %xmm2, %eax
420
+ ; X86 -SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
421
+ ; X86 -SSE2-NEXT: movd %xmm2, %ecx
422
+ ; X86 -SSE2-NEXT: xorl %edx, %edx
423
+ ; X86 -SSE2-NEXT: divl %ecx
424
+ ; X86 -SSE2-NEXT: movl %eax, %ecx
425
+ ; X86 -SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
426
+ ; X86 -SSE2-NEXT: movd %xmm0, %eax
427
+ ; X86 -SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
428
+ ; X86 -SSE2-NEXT: movd %xmm0, %esi
429
+ ; X86 -SSE2-NEXT: xorl %edx, %edx
430
+ ; X86 -SSE2-NEXT: divl %esi
431
+ ; X86 -SSE2-NEXT: addl %ecx, %eax
432
+ ; X86 -SSE2-NEXT: leal -4(%ebp), %esp
433
+ ; X86 -SSE2-NEXT: popl %esi
434
+ ; X86 -SSE2-NEXT: popl %ebp
435
+ ; X86 -SSE2-NEXT: retl
436
436
;
437
437
; X64-SSSE3-LABEL: main:
438
438
; X64-SSSE3: # %bb.0:
0 commit comments