Skip to content

Commit 2dea00b

Browse files
committed
[X86] combine-pmadd.ll - add demandedelts tests for pmaddwd/pmaddubsw
pmaddwd is already handled, but pmaddubsw is missing
1 parent dc5d541 commit 2dea00b

File tree

1 file changed

+57
-0
lines changed

1 file changed

+57
-0
lines changed

llvm/test/CodeGen/X86/combine-pmadd.ll

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,31 @@ define <8 x i32> @combine_pmaddwd_concat(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>
6363
ret <8 x i32> %3
6464
}
6565

66+
define <4 x i32> @combine_pmaddwd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
67+
; SSE-LABEL: combine_pmaddwd_demandedelts:
68+
; SSE: # %bb.0:
69+
; SSE-NEXT: pmaddwd %xmm1, %xmm0
70+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
71+
; SSE-NEXT: retq
72+
;
73+
; AVX1-LABEL: combine_pmaddwd_demandedelts:
74+
; AVX1: # %bb.0:
75+
; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
76+
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
77+
; AVX1-NEXT: retq
78+
;
79+
; AVX2-LABEL: combine_pmaddwd_demandedelts:
80+
; AVX2: # %bb.0:
81+
; AVX2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
82+
; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
83+
; AVX2-NEXT: retq
84+
%1 = shufflevector <8 x i16> %a0, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
85+
%2 = shufflevector <8 x i16> %a1, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 7, i32 7>
86+
%3 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %1, <8 x i16> %2)
87+
%4 = shufflevector <4 x i32> %3, <4 x i32> poison, <4 x i32> zeroinitializer
88+
ret <4 x i32> %4
89+
}
90+
6691
define i32 @combine_pmaddwd_constant() {
6792
; CHECK-LABEL: combine_pmaddwd_constant:
6893
; CHECK: # %bb.0:
@@ -130,6 +155,38 @@ define <16 x i16> @combine_pmaddubsw_concat(<16 x i8> %a0, <16 x i8> %a1, <16 x
130155
ret <16 x i16> %3
131156
}
132157

158+
; TODO: Missing SimplifyDemandedVectorElts support
159+
define <8 x i16> @combine_pmaddubsw_demandedelts(<16 x i8> %a0, <16 x i8> %a1) {
160+
; SSE-LABEL: combine_pmaddubsw_demandedelts:
161+
; SSE: # %bb.0:
162+
; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,8,8,8,8,8,8,8]
163+
; SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,15,15,15,15,15,15,15,15]
164+
; SSE-NEXT: pmaddubsw %xmm1, %xmm0
165+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
166+
; SSE-NEXT: retq
167+
;
168+
; AVX1-LABEL: combine_pmaddubsw_demandedelts:
169+
; AVX1: # %bb.0:
170+
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,8,8,8,8,8,8,8]
171+
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,15,15,15,15,15,15,15,15]
172+
; AVX1-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
173+
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
174+
; AVX1-NEXT: retq
175+
;
176+
; AVX2-LABEL: combine_pmaddubsw_demandedelts:
177+
; AVX2: # %bb.0:
178+
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,8,8,8,8,8,8,8]
179+
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,15,15,15,15,15,15,15,15]
180+
; AVX2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
181+
; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
182+
; AVX2-NEXT: retq
183+
%1 = shufflevector <16 x i8> %a0, <16 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
184+
%2 = shufflevector <16 x i8> %a1, <16 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
185+
%3 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %1, <16 x i8> %2)
186+
%4 = shufflevector <8 x i16> %3, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
187+
ret <8 x i16> %4
188+
}
189+
133190
define i32 @combine_pmaddubsw_constant() {
134191
; CHECK-LABEL: combine_pmaddubsw_constant:
135192
; CHECK: # %bb.0:

0 commit comments

Comments
 (0)