@@ -88,22 +88,48 @@ define <4 x i32> @combine_pmaddwd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
88
88
ret <4 x i32 > %4
89
89
}
90
90
91
+ ; TODO
91
92
define i32 @combine_pmaddwd_constant () {
92
- ; CHECK-LABEL: combine_pmaddwd_constant:
93
- ; CHECK: # %bb.0:
94
- ; CHECK-NEXT: movl $-155, %eax
95
- ; CHECK-NEXT: retq
93
+ ; SSE-LABEL: combine_pmaddwd_constant:
94
+ ; SSE: # %bb.0:
95
+ ; SSE-NEXT: pmovsxbw {{.*#+}} xmm0 = [65535,2,3,65532,65531,6,7,65528]
96
+ ; SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [65531,7,65527,65525,13,65521,17,65517]
97
+ ; SSE-NEXT: pextrd $2, %xmm0, %eax
98
+ ; SSE-NEXT: retq
99
+ ;
100
+ ; AVX-LABEL: combine_pmaddwd_constant:
101
+ ; AVX: # %bb.0:
102
+ ; AVX-NEXT: vpmovsxbw {{.*#+}} xmm0 = [65535,2,3,65532,65531,6,7,65528]
103
+ ; AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [65531,7,65527,65525,13,65521,17,65517]
104
+ ; AVX-NEXT: vpextrd $2, %xmm0, %eax
105
+ ; AVX-NEXT: retq
96
106
%1 = call <4 x i32 > @llvm.x86.sse2.pmadd.wd (<8 x i16 > <i16 -1 , i16 2 , i16 3 , i16 -4 , i16 -5 , i16 6 , i16 7 , i16 -8 >, <8 x i16 > <i16 -5 , i16 7 , i16 -9 , i16 -11 , i16 13 , i16 -15 , i16 17 , i16 -19 >)
97
107
%2 = extractelement <4 x i32 > %1 , i32 2 ; (-5*13)+(6*-15) = -155
98
108
ret i32 %2
99
109
}
100
110
101
111
; ensure we don't assume pmaddwd performs add nsw
102
112
define i32 @combine_pmaddwd_constant_nsw () {
103
- ; CHECK-LABEL: combine_pmaddwd_constant_nsw:
104
- ; CHECK: # %bb.0:
105
- ; CHECK-NEXT: movl $-2147483648, %eax # imm = 0x80000000
106
- ; CHECK-NEXT: retq
113
+ ; SSE-LABEL: combine_pmaddwd_constant_nsw:
114
+ ; SSE: # %bb.0:
115
+ ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
116
+ ; SSE-NEXT: pmaddwd %xmm0, %xmm0
117
+ ; SSE-NEXT: movd %xmm0, %eax
118
+ ; SSE-NEXT: retq
119
+ ;
120
+ ; AVX1-LABEL: combine_pmaddwd_constant_nsw:
121
+ ; AVX1: # %bb.0:
122
+ ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
123
+ ; AVX1-NEXT: vpmaddwd %xmm0, %xmm0, %xmm0
124
+ ; AVX1-NEXT: vmovd %xmm0, %eax
125
+ ; AVX1-NEXT: retq
126
+ ;
127
+ ; AVX2-LABEL: combine_pmaddwd_constant_nsw:
128
+ ; AVX2: # %bb.0:
129
+ ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
130
+ ; AVX2-NEXT: vpmaddwd %xmm0, %xmm0, %xmm0
131
+ ; AVX2-NEXT: vmovd %xmm0, %eax
132
+ ; AVX2-NEXT: retq
107
133
%1 = insertelement <8 x i16 > undef , i16 32768 , i32 0
108
134
%2 = shufflevector <8 x i16 > %1 , <8 x i16 > undef , <8 x i32 > zeroinitializer
109
135
%3 = call <4 x i32 > @llvm.x86.sse2.pmadd.wd (<8 x i16 > %2 , <8 x i16 > %2 )
@@ -193,25 +219,51 @@ define <8 x i16> @combine_pmaddubsw_demandedelts(<16 x i8> %a0, <16 x i8> %a1) {
193
219
ret <8 x i16 > %4
194
220
}
195
221
222
+ ; TODO
196
223
define i32 @combine_pmaddubsw_constant () {
197
- ; CHECK-LABEL: combine_pmaddubsw_constant:
198
- ; CHECK: # %bb.0:
199
- ; CHECK-NEXT: movl $1694, %eax # imm = 0x69E
200
- ; CHECK-NEXT: retq
224
+ ; SSE-LABEL: combine_pmaddubsw_constant:
225
+ ; SSE: # %bb.0:
226
+ ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
227
+ ; SSE-NEXT: pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
228
+ ; SSE-NEXT: pextrw $3, %xmm0, %eax
229
+ ; SSE-NEXT: cwtl
230
+ ; SSE-NEXT: retq
231
+ ;
232
+ ; AVX-LABEL: combine_pmaddubsw_constant:
233
+ ; AVX: # %bb.0:
234
+ ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
235
+ ; AVX-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
236
+ ; AVX-NEXT: vpextrw $3, %xmm0, %eax
237
+ ; AVX-NEXT: cwtl
238
+ ; AVX-NEXT: retq
201
239
%1 = call <8 x i16 > @llvm.x86.ssse3.pmadd.ub.sw.128 (<16 x i8 > <i8 0 , i8 1 , i8 2 , i8 3 , i8 4 , i8 5 , i8 -6 , i8 7 , i8 8 , i8 9 , i8 10 , i8 11 , i8 12 , i8 13 , i8 14 , i8 15 >, <16 x i8 > <i8 1 , i8 2 , i8 3 , i8 4 , i8 5 , i8 6 , i8 7 , i8 -8 , i8 9 , i8 10 , i8 11 , i8 12 , i8 13 , i8 14 , i8 15 , i8 16 >)
202
240
%2 = extractelement <8 x i16 > %1 , i32 3 ; ((uint16_t)-6*7)+(7*-8) = (250*7)+(7*-8) = 1694
203
241
%3 = sext i16 %2 to i32
204
242
ret i32 %3
205
243
}
206
244
245
+ ; TODO
207
246
define i32 @combine_pmaddubsw_constant_sat () {
208
- ; CHECK-LABEL: combine_pmaddubsw_constant_sat:
209
- ; CHECK: # %bb.0:
210
- ; CHECK-NEXT: movl $-32768, %eax # imm = 0x8000
211
- ; CHECK-NEXT: retq
247
+ ; SSE-LABEL: combine_pmaddubsw_constant_sat:
248
+ ; SSE: # %bb.0:
249
+ ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [255,255,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
250
+ ; SSE-NEXT: pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,128,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
251
+ ; SSE-NEXT: movd %xmm0, %eax
252
+ ; SSE-NEXT: cwtl
253
+ ; SSE-NEXT: retq
254
+ ;
255
+ ; AVX-LABEL: combine_pmaddubsw_constant_sat:
256
+ ; AVX: # %bb.0:
257
+ ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [255,255,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
258
+ ; AVX-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,128,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
259
+ ; AVX-NEXT: vmovd %xmm0, %eax
260
+ ; AVX-NEXT: cwtl
261
+ ; AVX-NEXT: retq
212
262
%1 = call <8 x i16 > @llvm.x86.ssse3.pmadd.ub.sw.128 (<16 x i8 > <i8 -1 , i8 -1 , i8 2 , i8 3 , i8 4 , i8 5 , i8 -6 , i8 7 , i8 8 , i8 9 , i8 10 , i8 11 , i8 12 , i8 13 , i8 14 , i8 15 >, <16 x i8 > <i8 -128 , i8 -128 , i8 3 , i8 4 , i8 5 , i8 6 , i8 7 , i8 -8 , i8 9 , i8 10 , i8 11 , i8 12 , i8 13 , i8 14 , i8 15 , i8 16 >)
213
263
%2 = extractelement <8 x i16 > %1 , i32 0 ; add_sat_i16(((uint16_t)-1*-128),((uint16_t)-1*-128)_ = add_sat_i16(255*-128),(255*-128)) = sat_i16(-65280) = -32768
214
264
%3 = sext i16 %2 to i32
215
265
ret i32 %3
216
266
}
217
267
268
+ ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
269
+ ; CHECK: {{.*}}
0 commit comments