@@ -34,6 +34,26 @@ define <4 x i32> @combine_pmaddwd_zero_commute(<8 x i16> %a0, <8 x i16> %a1) {
34
34
ret <4 x i32 > %1
35
35
}
36
36
37
+ ; TODO: pmaddwd knownbits handling
38
+ define i32 @combine_pmaddwd_constant () {
39
+ ; SSE-LABEL: combine_pmaddwd_constant:
40
+ ; SSE: # %bb.0:
41
+ ; SSE-NEXT: pmovsxbw {{.*#+}} xmm0 = [65535,2,3,65532,65531,6,7,65528]
42
+ ; SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [65531,7,65527,65525,13,65521,17,65517]
43
+ ; SSE-NEXT: pextrd $2, %xmm0, %eax
44
+ ; SSE-NEXT: retq
45
+ ;
46
+ ; AVX-LABEL: combine_pmaddwd_constant:
47
+ ; AVX: # %bb.0:
48
+ ; AVX-NEXT: vpmovsxbw {{.*#+}} xmm0 = [65535,2,3,65532,65531,6,7,65528]
49
+ ; AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [65531,7,65527,65525,13,65521,17,65517]
50
+ ; AVX-NEXT: vpextrd $2, %xmm0, %eax
51
+ ; AVX-NEXT: retq
52
+ %1 = call <4 x i32 > @llvm.x86.sse2.pmadd.wd (<8 x i16 > <i16 -1 , i16 2 , i16 3 , i16 -4 , i16 -5 , i16 6 , i16 7 , i16 -8 >, <8 x i16 > <i16 -5 , i16 7 , i16 -9 , i16 -11 , i16 13 , i16 -15 , i16 17 , i16 -19 >)
53
+ %2 = extractelement <4 x i32 > %1 , i32 2 ; (-5*13)+(6*-15) = -155
54
+ ret i32 %2
55
+ }
56
+
37
57
define <8 x i16 > @combine_pmaddubsw_zero (<16 x i8 > %a0 , <16 x i8 > %a1 ) {
38
58
; SSE-LABEL: combine_pmaddubsw_zero:
39
59
; SSE: # %bb.0:
0 commit comments