|
10 | 10 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X64,X64-AVX512FP16
|
11 | 11 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512VLDQ
|
12 | 12 |
|
| 13 | +; |
| 14 | +; 128-bit Vectors |
| 15 | +; |
| 16 | + |
13 | 17 | define <2 x double> @fabs_v2f64(<2 x double> %p) {
|
14 | 18 | ; X86-AVX-LABEL: fabs_v2f64:
|
15 | 19 | ; X86-AVX: # %bb.0:
|
@@ -92,6 +96,49 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) {
|
92 | 96 | }
|
93 | 97 | declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
|
94 | 98 |
|
| 99 | +define <8 x half> @fabs_v8f16(ptr %p) { |
| 100 | +; X86-AVX1-LABEL: fabs_v8f16: |
| 101 | +; X86-AVX1: # %bb.0: |
| 102 | +; X86-AVX1-NEXT: movl 4(%esp), [[ADDRREG:%.*]] |
| 103 | +; X86-AVX1-NEXT: vmovaps ([[ADDRREG]]), %xmm0 |
| 104 | +; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 |
| 105 | +; X86-AVX1-NEXT: retl |
| 106 | + |
| 107 | +; X86-AVX2-LABEL: fabs_v8f16: |
| 108 | +; X86-AVX2: # %bb.0: |
| 109 | +; X86-AVX2-NEXT: movl 4(%esp), [[REG:%.*]] |
| 110 | +; X86-AVX2-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 |
| 111 | +; X86-AVX2-NEXT: vpand ([[REG]]), %xmm0, %xmm0 |
| 112 | +; X86-AVX2-NEXT: retl |
| 113 | + |
| 114 | +; X64-AVX512VL-LABEL: fabs_v8f16: |
| 115 | +; X64-AVX512VL: # %bb.0: |
| 116 | +; X64-AVX512VL-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| 117 | +; X64-AVX512VL-NEXT: vpand (%rdi), %xmm0, %xmm0 |
| 118 | +; X64-AVX512VL-NEXT: retq |
| 119 | + |
| 120 | +; X64-AVX1-LABEL: fabs_v8f16: |
| 121 | +; X64-AVX1: # %bb.0: |
| 122 | +; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 |
| 123 | +; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| 124 | +; X64-AVX1-NEXT: retq |
| 125 | + |
| 126 | +; X64-AVX2-LABEL: fabs_v8f16: |
| 127 | +; X64-AVX2: # %bb.0: |
| 128 | +; X64-AVX2-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| 129 | +; X64-AVX2-NEXT: vpand (%rdi), %xmm0, %xmm0 |
| 130 | +; X64-AVX2-NEXT: retq |
| 131 | + |
| 132 | + %v = load <8 x half>, ptr %p, align 16 |
| 133 | + %nnv = call <8 x half> @llvm.fabs.v8f16(<8 x half> %v) |
| 134 | + ret <8 x half> %nnv |
| 135 | +} |
| 136 | +declare <8 x half> @llvm.fabs.v8f16(<8 x half> %p) |
| 137 | + |
| 138 | +; |
| 139 | +; 256-bit Vectors |
| 140 | +; |
| 141 | + |
95 | 142 | define <4 x double> @fabs_v4f64(<4 x double> %p) {
|
96 | 143 | ; X86-AVX1-LABEL: fabs_v4f64:
|
97 | 144 | ; X86-AVX1: # %bb.0:
|
@@ -139,86 +186,6 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) {
|
139 | 186 | }
|
140 | 187 | declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
|
141 | 188 |
|
142 |
| -define <8 x half> @fabs_v8f16(ptr %p) { |
143 |
| -; X86-AVX1-LABEL: fabs_v8f16: |
144 |
| -; X86-AVX1: # %bb.0: |
145 |
| -; X86-AVX1-NEXT: movl 4(%esp), [[ADDRREG:%.*]] |
146 |
| -; X86-AVX1-NEXT: vmovaps ([[ADDRREG]]), %xmm0 |
147 |
| -; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 |
148 |
| -; X86-AVX1-NEXT: retl |
149 |
| - |
150 |
| -; X86-AVX2-LABEL: fabs_v8f16: |
151 |
| -; X86-AVX2: # %bb.0: |
152 |
| -; X86-AVX2-NEXT: movl 4(%esp), [[REG:%.*]] |
153 |
| -; X86-AVX2-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 |
154 |
| -; X86-AVX2-NEXT: vpand ([[REG]]), %xmm0, %xmm0 |
155 |
| -; X86-AVX2-NEXT: retl |
156 |
| - |
157 |
| -; X64-AVX512VL-LABEL: fabs_v8f16: |
158 |
| -; X64-AVX512VL: # %bb.0: |
159 |
| -; X64-AVX512VL-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
160 |
| -; X64-AVX512VL-NEXT: vpand (%rdi), %xmm0, %xmm0 |
161 |
| -; X64-AVX512VL-NEXT: retq |
162 |
| - |
163 |
| -; X64-AVX1-LABEL: fabs_v8f16: |
164 |
| -; X64-AVX1: # %bb.0: |
165 |
| -; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 |
166 |
| -; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
167 |
| -; X64-AVX1-NEXT: retq |
168 |
| - |
169 |
| -; X64-AVX2-LABEL: fabs_v8f16: |
170 |
| -; X64-AVX2: # %bb.0: |
171 |
| -; X64-AVX2-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
172 |
| -; X64-AVX2-NEXT: vpand (%rdi), %xmm0, %xmm0 |
173 |
| -; X64-AVX2-NEXT: retq |
174 |
| - |
175 |
| - %v = load <8 x half>, ptr %p, align 16 |
176 |
| - %nnv = call <8 x half> @llvm.fabs.v8f16(<8 x half> %v) |
177 |
| - ret <8 x half> %nnv |
178 |
| -} |
179 |
| -declare <8 x half> @llvm.fabs.v8f16(<8 x half> %p) |
180 |
| - |
181 |
| -define <16 x half> @fabs_v16f16(ptr %p) { |
182 |
| -; X86-AVX512FP16-LABEL: fabs_v16f16: |
183 |
| -; X86-AVX512FP16: # %bb.0: |
184 |
| -; X86-AVX512FP16-NEXT: movl 4(%esp), [[REG:%.*]] |
185 |
| -; X86-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[YMM:%ymm[0-9]+]] |
186 |
| -; X86-AVX512FP16-NEXT: vpand ([[REG]]), [[YMM]], [[YMM]] |
187 |
| -; X86-AVX512FP16-NEXT: retl |
188 |
| - |
189 |
| -; X64-AVX512FP16-LABEL: fabs_v16f16: |
190 |
| -; X64-AVX512FP16: # %bb.0: |
191 |
| -; X64-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[YMM:%ymm[0-9]+]] |
192 |
| -; X64-AVX512FP16-NEXT: vpand (%rdi), [[YMM]], [[YMM]] |
193 |
| -; X64-AVX512FP16-NEXT: retq |
194 |
| -; |
195 |
| - %v = load <16 x half>, ptr %p, align 32 |
196 |
| - %nnv = call <16 x half> @llvm.fabs.v16f16(<16 x half> %v) |
197 |
| - ret <16 x half> %nnv |
198 |
| -} |
199 |
| -declare <16 x half> @llvm.fabs.v16f16(<16 x half> %p) |
200 |
| - |
201 |
| -define <32 x half> @fabs_v32f16(ptr %p) { |
202 |
| -; X86-AVX512FP16-LABEL: fabs_v32f16: |
203 |
| -; X86-AVX512FP16: # %bb.0: |
204 |
| -; X86-AVX512FP16-NEXT: movl 4(%esp), [[REG:%.*]] |
205 |
| -; X86-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[ZMM:%zmm[0-9]+]] |
206 |
| -; X86-AVX512FP16-NEXT: vpandq ([[REG]]), [[ZMM]], [[ZMM]] |
207 |
| -; X86-AVX512FP16-NEXT: retl |
208 |
| - |
209 |
| -; X64-AVX512FP16-LABEL: fabs_v32f16: |
210 |
| -; X64-AVX512FP16: # %bb.0: |
211 |
| -; X64-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[ZMM:%zmm[0-9]+]] |
212 |
| -; X64-AVX512FP16-NEXT: vpandq (%rdi), [[ZMM]], [[ZMM]] |
213 |
| -; X64-AVX512FP16-NEXT: retq |
214 |
| - |
215 |
| - %v = load <32 x half>, ptr %p, align 64 |
216 |
| - %nnv = call <32 x half> @llvm.fabs.v32f16(<32 x half> %v) |
217 |
| - ret <32 x half> %nnv |
218 |
| -} |
219 |
| -declare <32 x half> @llvm.fabs.v32f16(<32 x half> %p) |
220 |
| - |
221 |
| - |
222 | 189 | define <8 x float> @fabs_v8f32(<8 x float> %p) {
|
223 | 190 | ; X86-AVX1-LABEL: fabs_v8f32:
|
224 | 191 | ; X86-AVX1: # %bb.0:
|
@@ -266,6 +233,30 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) {
|
266 | 233 | }
|
267 | 234 | declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
|
268 | 235 |
|
| 236 | +define <16 x half> @fabs_v16f16(ptr %p) { |
| 237 | +; X86-AVX512FP16-LABEL: fabs_v16f16: |
| 238 | +; X86-AVX512FP16: # %bb.0: |
| 239 | +; X86-AVX512FP16-NEXT: movl 4(%esp), [[REG:%.*]] |
| 240 | +; X86-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[YMM:%ymm[0-9]+]] |
| 241 | +; X86-AVX512FP16-NEXT: vpand ([[REG]]), [[YMM]], [[YMM]] |
| 242 | +; X86-AVX512FP16-NEXT: retl |
| 243 | + |
| 244 | +; X64-AVX512FP16-LABEL: fabs_v16f16: |
| 245 | +; X64-AVX512FP16: # %bb.0: |
| 246 | +; X64-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[YMM:%ymm[0-9]+]] |
| 247 | +; X64-AVX512FP16-NEXT: vpand (%rdi), [[YMM]], [[YMM]] |
| 248 | +; X64-AVX512FP16-NEXT: retq |
| 249 | +; |
| 250 | + %v = load <16 x half>, ptr %p, align 32 |
| 251 | + %nnv = call <16 x half> @llvm.fabs.v16f16(<16 x half> %v) |
| 252 | + ret <16 x half> %nnv |
| 253 | +} |
| 254 | +declare <16 x half> @llvm.fabs.v16f16(<16 x half> %p) |
| 255 | + |
| 256 | +; |
| 257 | +; 512-bit Vectors |
| 258 | +; |
| 259 | + |
269 | 260 | define <8 x double> @fabs_v8f64(<8 x double> %p) {
|
270 | 261 | ; X86-AVX-LABEL: fabs_v8f64:
|
271 | 262 | ; X86-AVX: # %bb.0:
|
@@ -344,6 +335,26 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) {
|
344 | 335 | }
|
345 | 336 | declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
|
346 | 337 |
|
| 338 | +define <32 x half> @fabs_v32f16(ptr %p) { |
| 339 | +; X86-AVX512FP16-LABEL: fabs_v32f16: |
| 340 | +; X86-AVX512FP16: # %bb.0: |
| 341 | +; X86-AVX512FP16-NEXT: movl 4(%esp), [[REG:%.*]] |
| 342 | +; X86-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[ZMM:%zmm[0-9]+]] |
| 343 | +; X86-AVX512FP16-NEXT: vpandq ([[REG]]), [[ZMM]], [[ZMM]] |
| 344 | +; X86-AVX512FP16-NEXT: retl |
| 345 | + |
| 346 | +; X64-AVX512FP16-LABEL: fabs_v32f16: |
| 347 | +; X64-AVX512FP16: # %bb.0: |
| 348 | +; X64-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[ZMM:%zmm[0-9]+]] |
| 349 | +; X64-AVX512FP16-NEXT: vpandq (%rdi), [[ZMM]], [[ZMM]] |
| 350 | +; X64-AVX512FP16-NEXT: retq |
| 351 | + |
| 352 | + %v = load <32 x half>, ptr %p, align 64 |
| 353 | + %nnv = call <32 x half> @llvm.fabs.v32f16(<32 x half> %v) |
| 354 | + ret <32 x half> %nnv |
| 355 | +} |
| 356 | +declare <32 x half> @llvm.fabs.v32f16(<32 x half> %p) |
| 357 | + |
347 | 358 | ; PR20354: when generating code for a vector fabs op,
|
348 | 359 | ; make sure that we're only turning off the sign bit of each float value.
|
349 | 360 | ; No constant pool loads or vector ops are needed for the fabs of a
|
|
0 commit comments