Skip to content

Commit 88f010a

Browse files
committed
[X86] Improve test coverage for concat(pmaddubsw(),pmaddubsw()) -> pmaddubsw(concat(),concat())
Ensure we have tests for both beneficial/non-beneficial concatenation cases
1 parent 94a62b3 commit 88f010a

File tree

1 file changed

+34
-3
lines changed

1 file changed

+34
-3
lines changed

llvm/test/CodeGen/X86/combine-pmadd.ll

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,22 +179,53 @@ define <8 x i16> @combine_pmaddubsw_zero_commute(<16 x i8> %a0, <16 x i8> %a1) {
179179
ret <8 x i16> %1
180180
}
181181

182-
define <16 x i16> @combine_pmaddubsw_concat(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 x i8> %a3) {
182+
define <16 x i16> @combine_pmaddubsw_concat(<32 x i8> %a0, <32 x i8> %a1) {
183183
; SSE-LABEL: combine_pmaddubsw_concat:
184184
; SSE: # %bb.0:
185+
; SSE-NEXT: pmaddubsw %xmm2, %xmm0
186+
; SSE-NEXT: pmaddubsw %xmm3, %xmm1
187+
; SSE-NEXT: retq
188+
;
189+
; AVX1-LABEL: combine_pmaddubsw_concat:
190+
; AVX1: # %bb.0:
191+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
192+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
193+
; AVX1-NEXT: vpmaddubsw %xmm3, %xmm2, %xmm2
194+
; AVX1-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
195+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
196+
; AVX1-NEXT: retq
197+
;
198+
; AVX2-LABEL: combine_pmaddubsw_concat:
199+
; AVX2: # %bb.0:
200+
; AVX2-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0
201+
; AVX2-NEXT: retq
202+
%lo0 = shufflevector <32 x i8> %a0, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
203+
%lo1 = shufflevector <32 x i8> %a1, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
204+
%hi0 = shufflevector <32 x i8> %a0, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
205+
%hi1 = shufflevector <32 x i8> %a1, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
206+
%lo = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %lo0, <16 x i8> %lo1)
207+
%hi = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %hi0, <16 x i8> %hi1)
208+
%res = shufflevector <8 x i16> %lo, <8 x i16> %hi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
209+
ret <16 x i16> %res
210+
}
211+
212+
; TODO: Not beneficial to concatenate both inputs just to create a 256-bit pmaddubsw
213+
define <16 x i16> @combine_pmaddubsw_concat_unecessary(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 x i8> %a3) {
214+
; SSE-LABEL: combine_pmaddubsw_concat_unecessary:
215+
; SSE: # %bb.0:
185216
; SSE-NEXT: pmaddubsw %xmm1, %xmm0
186217
; SSE-NEXT: pmaddubsw %xmm3, %xmm2
187218
; SSE-NEXT: movdqa %xmm2, %xmm1
188219
; SSE-NEXT: retq
189220
;
190-
; AVX1-LABEL: combine_pmaddubsw_concat:
221+
; AVX1-LABEL: combine_pmaddubsw_concat_unecessary:
191222
; AVX1: # %bb.0:
192223
; AVX1-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
193224
; AVX1-NEXT: vpmaddubsw %xmm3, %xmm2, %xmm1
194225
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
195226
; AVX1-NEXT: retq
196227
;
197-
; AVX2-LABEL: combine_pmaddubsw_concat:
228+
; AVX2-LABEL: combine_pmaddubsw_concat_unecessary:
198229
; AVX2: # %bb.0:
199230
; AVX2-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
200231
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0

0 commit comments

Comments
 (0)