@@ -63,6 +63,31 @@ define <8 x i32> @combine_pmaddwd_concat(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>
63
63
ret <8 x i32 > %3
64
64
}
65
65
66
+ define <4 x i32 > @combine_pmaddwd_demandedelts (<8 x i16 > %a0 , <8 x i16 > %a1 ) {
67
+ ; SSE-LABEL: combine_pmaddwd_demandedelts:
68
+ ; SSE: # %bb.0:
69
+ ; SSE-NEXT: pmaddwd %xmm1, %xmm0
70
+ ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
71
+ ; SSE-NEXT: retq
72
+ ;
73
+ ; AVX1-LABEL: combine_pmaddwd_demandedelts:
74
+ ; AVX1: # %bb.0:
75
+ ; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
76
+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
77
+ ; AVX1-NEXT: retq
78
+ ;
79
+ ; AVX2-LABEL: combine_pmaddwd_demandedelts:
80
+ ; AVX2: # %bb.0:
81
+ ; AVX2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
82
+ ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
83
+ ; AVX2-NEXT: retq
84
+ %1 = shufflevector <8 x i16 > %a0 , <8 x i16 > poison, <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 4 , i32 4 , i32 4 >
85
+ %2 = shufflevector <8 x i16 > %a1 , <8 x i16 > poison, <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 7 , i32 7 , i32 7 , i32 7 >
86
+ %3 = call <4 x i32 > @llvm.x86.sse2.pmadd.wd (<8 x i16 > %1 , <8 x i16 > %2 )
87
+ %4 = shufflevector <4 x i32 > %3 , <4 x i32 > poison, <4 x i32 > zeroinitializer
88
+ ret <4 x i32 > %4
89
+ }
90
+
66
91
define i32 @combine_pmaddwd_constant () {
67
92
; CHECK-LABEL: combine_pmaddwd_constant:
68
93
; CHECK: # %bb.0:
@@ -130,6 +155,38 @@ define <16 x i16> @combine_pmaddubsw_concat(<16 x i8> %a0, <16 x i8> %a1, <16 x
130
155
ret <16 x i16 > %3
131
156
}
132
157
158
+ ; TODO: Missing SimplifyDemandedVectorElts support
159
+ define <8 x i16 > @combine_pmaddubsw_demandedelts (<16 x i8 > %a0 , <16 x i8 > %a1 ) {
160
+ ; SSE-LABEL: combine_pmaddubsw_demandedelts:
161
+ ; SSE: # %bb.0:
162
+ ; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,8,8,8,8,8,8,8]
163
+ ; SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,15,15,15,15,15,15,15,15]
164
+ ; SSE-NEXT: pmaddubsw %xmm1, %xmm0
165
+ ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
166
+ ; SSE-NEXT: retq
167
+ ;
168
+ ; AVX1-LABEL: combine_pmaddubsw_demandedelts:
169
+ ; AVX1: # %bb.0:
170
+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,8,8,8,8,8,8,8]
171
+ ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,15,15,15,15,15,15,15,15]
172
+ ; AVX1-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
173
+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
174
+ ; AVX1-NEXT: retq
175
+ ;
176
+ ; AVX2-LABEL: combine_pmaddubsw_demandedelts:
177
+ ; AVX2: # %bb.0:
178
+ ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,8,8,8,8,8,8,8]
179
+ ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,15,15,15,15,15,15,15,15]
180
+ ; AVX2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
181
+ ; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
182
+ ; AVX2-NEXT: retq
183
+ %1 = shufflevector <16 x i8 > %a0 , <16 x i8 > poison, <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 8 , i32 8 , i32 8 , i32 8 , i32 8 , i32 8 , i32 8 >
184
+ %2 = shufflevector <16 x i8 > %a1 , <16 x i8 > poison, <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 15 , i32 15 , i32 15 , i32 15 , i32 15 , i32 15 , i32 15 , i32 15 >
185
+ %3 = call <8 x i16 > @llvm.x86.ssse3.pmadd.ub.sw.128 (<16 x i8 > %1 , <16 x i8 > %2 )
186
+ %4 = shufflevector <8 x i16 > %3 , <8 x i16 > poison, <8 x i32 > <i32 0 , i32 1 , i32 0 , i32 1 , i32 0 , i32 1 , i32 0 , i32 1 >
187
+ ret <8 x i16 > %4
188
+ }
189
+
133
190
define i32 @combine_pmaddubsw_constant () {
134
191
; CHECK-LABEL: combine_pmaddubsw_constant:
135
192
; CHECK: # %bb.0:
0 commit comments