@@ -36,6 +36,21 @@ define <16 x i8> @permute_packss_packus_128(<4 x i32> %a0, <4 x i32> %a1, <4 x i
36
36
ret <16 x i8 > %4
37
37
}
38
38
39
+ define <8 x i16 > @permute_phadd_phadd_128 (<8 x i16 > %a0 , <8 x i16 > %a1 , <8 x i16 > %a2 , <8 x i16 > %a3 ) {
40
+ ; CHECK-LABEL: permute_phadd_phadd_128:
41
+ ; CHECK: ## %bb.0:
42
+ ; CHECK-NEXT: vphaddw %xmm1, %xmm0, %xmm0
43
+ ; CHECK-NEXT: vphaddw %xmm3, %xmm2, %xmm1
44
+ ; CHECK-NEXT: vphaddw %xmm1, %xmm0, %xmm0
45
+ ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,0]
46
+ ; CHECK-NEXT: ret{{[l|q]}}
47
+ %1 = call <8 x i16 > @llvm.x86.ssse3.phadd.w.128 (<8 x i16 > %a0 , <8 x i16 > %a1 )
48
+ %2 = call <8 x i16 > @llvm.x86.ssse3.phadd.w.128 (<8 x i16 > %a2 , <8 x i16 > %a3 )
49
+ %3 = call <8 x i16 > @llvm.x86.ssse3.phadd.w.128 (<8 x i16 > %1 , <8 x i16 > %2 )
50
+ %4 = shufflevector <8 x i16 > %3 , <8 x i16 > poison, <8 x i32 > <i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 0 , i32 1 >
51
+ ret <8 x i16 > %4
52
+ }
53
+
39
54
;
40
55
; 256-bit Vectors
41
56
;
@@ -55,9 +70,47 @@ define <8 x float> @permute_hadd_hadd_256(<8 x float> %a0, <8 x float> %a1, <8 x
55
70
ret <8 x float > %4
56
71
}
57
72
58
- declare <8 x float > @llvm.x86.avx.hadd.ps.256 (<8 x float >, <8 x float >)
73
+ define <16 x i16 > @permute_phadd_phadd_256 (<16 x i16 > %a0 , <16 x i16 > %a1 , <16 x i16 > %a2 , <16 x i16 > %a3 ) {
74
+ ; CHECK-LABEL: permute_phadd_phadd_256:
75
+ ; CHECK: ## %bb.0:
76
+ ; CHECK-NEXT: vphaddw %ymm1, %ymm0, %ymm0
77
+ ; CHECK-NEXT: vphaddw %ymm3, %ymm2, %ymm1
78
+ ; CHECK-NEXT: vphaddw %ymm1, %ymm0, %ymm0
79
+ ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
80
+ ; CHECK-NEXT: ret{{[l|q]}}
81
+ %1 = call <16 x i16 > @llvm.x86.avx2.phadd.w (<16 x i16 > %a0 , <16 x i16 > %a1 )
82
+ %2 = call <16 x i16 > @llvm.x86.avx2.phadd.w (<16 x i16 > %a2 , <16 x i16 > %a3 )
83
+ %3 = call <16 x i16 > @llvm.x86.avx2.phadd.w (<16 x i16 > %1 , <16 x i16 > %2 )
84
+ %4 = shufflevector <16 x i16 > %3 , <16 x i16 > poison, <16 x i32 > <i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 0 , i32 1 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 , i32 8 , i32 9 >
85
+ ret <16 x i16 > %4
86
+ }
87
+
88
+ declare <4 x float > @llvm.x86.sse3.hadd.ps (<4 x float >, <4 x float >)
89
+ declare <4 x float > @llvm.x86.sse3.hsub.ps (<4 x float >, <4 x float >)
90
+ declare <2 x double > @llvm.x86.sse3.hadd.pd (<2 x double >, <2 x double >)
91
+ declare <2 x double > @llvm.x86.sse3.hsub.pd (<2 x double >, <2 x double >)
92
+
93
+ declare <8 x i16 > @llvm.x86.ssse3.phadd.w.128 (<8 x i16 >, <8 x i16 >)
94
+ declare <4 x i32 > @llvm.x86.ssse3.phadd.d.128 (<4 x i32 >, <4 x i32 >)
95
+ declare <8 x i16 > @llvm.x86.ssse3.phsub.w.128 (<8 x i16 >, <8 x i16 >)
96
+ declare <4 x i32 > @llvm.x86.ssse3.phsub.d.128 (<4 x i32 >, <4 x i32 >)
59
97
60
98
declare <16 x i8 > @llvm.x86.sse2.packsswb.128 (<8 x i16 >, <8 x i16 >)
61
99
declare <8 x i16 > @llvm.x86.sse2.packssdw.128 (<4 x i32 >, <4 x i32 >)
62
100
declare <16 x i8 > @llvm.x86.sse2.packuswb.128 (<8 x i16 >, <8 x i16 >)
63
101
declare <8 x i16 > @llvm.x86.sse41.packusdw (<4 x i32 >, <4 x i32 >)
102
+
103
+ declare <8 x float > @llvm.x86.avx.hadd.ps.256 (<8 x float >, <8 x float >)
104
+ declare <8 x float > @llvm.x86.avx.hsub.ps.256 (<8 x float >, <8 x float >)
105
+ declare <4 x double > @llvm.x86.avx.hadd.pd.256 (<4 x double >, <4 x double >)
106
+ declare <4 x double > @llvm.x86.avx.hsub.pd.256 (<4 x double >, <4 x double >)
107
+
108
+ declare <16 x i16 > @llvm.x86.avx2.phadd.w (<16 x i16 >, <16 x i16 >)
109
+ declare <8 x i32 > @llvm.x86.avx2.phadd.d (<8 x i32 >, <8 x i32 >)
110
+ declare <16 x i16 > @llvm.x86.avx2.phsub.w (<16 x i16 >, <16 x i16 >)
111
+ declare <8 x i32 > @llvm.x86.avx2.phsub.d (<8 x i32 >, <8 x i32 >)
112
+
113
+ declare <32 x i8 > @llvm.x86.avx2.packsswb (<16 x i16 >, <16 x i16 >)
114
+ declare <16 x i16 > @llvm.x86.avx2.packssdw (<8 x i32 >, <8 x i32 >)
115
+ declare <32 x i8 > @llvm.x86.avx2.packuswb (<16 x i16 >, <16 x i16 >)
116
+ declare <16 x i16 > @llvm.x86.avx2.packusdw (<8 x i32 >, <8 x i32 >)
0 commit comments