Skip to content

Commit a66085c

Browse files
committed
[X86] vec_fabs.ll - sort tests into 128/256/512-bit vector types
1 parent 915f6c3 commit a66085c

File tree

1 file changed

+91
-80
lines changed

1 file changed

+91
-80
lines changed

llvm/test/CodeGen/X86/vec_fabs.ll

Lines changed: 91 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X64,X64-AVX512FP16
1111
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512VLDQ
1212

13+
;
14+
; 128-bit Vectors
15+
;
16+
1317
define <2 x double> @fabs_v2f64(<2 x double> %p) {
1418
; X86-AVX-LABEL: fabs_v2f64:
1519
; X86-AVX: # %bb.0:
@@ -92,6 +96,49 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) {
9296
}
9397
declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
9498

99+
define <8 x half> @fabs_v8f16(ptr %p) {
100+
; X86-AVX1-LABEL: fabs_v8f16:
101+
; X86-AVX1: # %bb.0:
102+
; X86-AVX1-NEXT: movl 4(%esp), [[ADDRREG:%.*]]
103+
; X86-AVX1-NEXT: vmovaps ([[ADDRREG]]), %xmm0
104+
; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
105+
; X86-AVX1-NEXT: retl
106+
107+
; X86-AVX2-LABEL: fabs_v8f16:
108+
; X86-AVX2: # %bb.0:
109+
; X86-AVX2-NEXT: movl 4(%esp), [[REG:%.*]]
110+
; X86-AVX2-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
111+
; X86-AVX2-NEXT: vpand ([[REG]]), %xmm0, %xmm0
112+
; X86-AVX2-NEXT: retl
113+
114+
; X64-AVX512VL-LABEL: fabs_v8f16:
115+
; X64-AVX512VL: # %bb.0:
116+
; X64-AVX512VL-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
117+
; X64-AVX512VL-NEXT: vpand (%rdi), %xmm0, %xmm0
118+
; X64-AVX512VL-NEXT: retq
119+
120+
; X64-AVX1-LABEL: fabs_v8f16:
121+
; X64-AVX1: # %bb.0:
122+
; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0
123+
; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
124+
; X64-AVX1-NEXT: retq
125+
126+
; X64-AVX2-LABEL: fabs_v8f16:
127+
; X64-AVX2: # %bb.0:
128+
; X64-AVX2-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
129+
; X64-AVX2-NEXT: vpand (%rdi), %xmm0, %xmm0
130+
; X64-AVX2-NEXT: retq
131+
132+
%v = load <8 x half>, ptr %p, align 16
133+
%nnv = call <8 x half> @llvm.fabs.v8f16(<8 x half> %v)
134+
ret <8 x half> %nnv
135+
}
136+
declare <8 x half> @llvm.fabs.v8f16(<8 x half> %p)
137+
138+
;
139+
; 256-bit Vectors
140+
;
141+
95142
define <4 x double> @fabs_v4f64(<4 x double> %p) {
96143
; X86-AVX1-LABEL: fabs_v4f64:
97144
; X86-AVX1: # %bb.0:
@@ -139,86 +186,6 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) {
139186
}
140187
declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
141188

142-
define <8 x half> @fabs_v8f16(ptr %p) {
143-
; X86-AVX1-LABEL: fabs_v8f16:
144-
; X86-AVX1: # %bb.0:
145-
; X86-AVX1-NEXT: movl 4(%esp), [[ADDRREG:%.*]]
146-
; X86-AVX1-NEXT: vmovaps ([[ADDRREG]]), %xmm0
147-
; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
148-
; X86-AVX1-NEXT: retl
149-
150-
; X86-AVX2-LABEL: fabs_v8f16:
151-
; X86-AVX2: # %bb.0:
152-
; X86-AVX2-NEXT: movl 4(%esp), [[REG:%.*]]
153-
; X86-AVX2-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
154-
; X86-AVX2-NEXT: vpand ([[REG]]), %xmm0, %xmm0
155-
; X86-AVX2-NEXT: retl
156-
157-
; X64-AVX512VL-LABEL: fabs_v8f16:
158-
; X64-AVX512VL: # %bb.0:
159-
; X64-AVX512VL-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
160-
; X64-AVX512VL-NEXT: vpand (%rdi), %xmm0, %xmm0
161-
; X64-AVX512VL-NEXT: retq
162-
163-
; X64-AVX1-LABEL: fabs_v8f16:
164-
; X64-AVX1: # %bb.0:
165-
; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0
166-
; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
167-
; X64-AVX1-NEXT: retq
168-
169-
; X64-AVX2-LABEL: fabs_v8f16:
170-
; X64-AVX2: # %bb.0:
171-
; X64-AVX2-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
172-
; X64-AVX2-NEXT: vpand (%rdi), %xmm0, %xmm0
173-
; X64-AVX2-NEXT: retq
174-
175-
%v = load <8 x half>, ptr %p, align 16
176-
%nnv = call <8 x half> @llvm.fabs.v8f16(<8 x half> %v)
177-
ret <8 x half> %nnv
178-
}
179-
declare <8 x half> @llvm.fabs.v8f16(<8 x half> %p)
180-
181-
define <16 x half> @fabs_v16f16(ptr %p) {
182-
; X86-AVX512FP16-LABEL: fabs_v16f16:
183-
; X86-AVX512FP16: # %bb.0:
184-
; X86-AVX512FP16-NEXT: movl 4(%esp), [[REG:%.*]]
185-
; X86-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[YMM:%ymm[0-9]+]]
186-
; X86-AVX512FP16-NEXT: vpand ([[REG]]), [[YMM]], [[YMM]]
187-
; X86-AVX512FP16-NEXT: retl
188-
189-
; X64-AVX512FP16-LABEL: fabs_v16f16:
190-
; X64-AVX512FP16: # %bb.0:
191-
; X64-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[YMM:%ymm[0-9]+]]
192-
; X64-AVX512FP16-NEXT: vpand (%rdi), [[YMM]], [[YMM]]
193-
; X64-AVX512FP16-NEXT: retq
194-
;
195-
%v = load <16 x half>, ptr %p, align 32
196-
%nnv = call <16 x half> @llvm.fabs.v16f16(<16 x half> %v)
197-
ret <16 x half> %nnv
198-
}
199-
declare <16 x half> @llvm.fabs.v16f16(<16 x half> %p)
200-
201-
define <32 x half> @fabs_v32f16(ptr %p) {
202-
; X86-AVX512FP16-LABEL: fabs_v32f16:
203-
; X86-AVX512FP16: # %bb.0:
204-
; X86-AVX512FP16-NEXT: movl 4(%esp), [[REG:%.*]]
205-
; X86-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[ZMM:%zmm[0-9]+]]
206-
; X86-AVX512FP16-NEXT: vpandq ([[REG]]), [[ZMM]], [[ZMM]]
207-
; X86-AVX512FP16-NEXT: retl
208-
209-
; X64-AVX512FP16-LABEL: fabs_v32f16:
210-
; X64-AVX512FP16: # %bb.0:
211-
; X64-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[ZMM:%zmm[0-9]+]]
212-
; X64-AVX512FP16-NEXT: vpandq (%rdi), [[ZMM]], [[ZMM]]
213-
; X64-AVX512FP16-NEXT: retq
214-
215-
%v = load <32 x half>, ptr %p, align 64
216-
%nnv = call <32 x half> @llvm.fabs.v32f16(<32 x half> %v)
217-
ret <32 x half> %nnv
218-
}
219-
declare <32 x half> @llvm.fabs.v32f16(<32 x half> %p)
220-
221-
222189
define <8 x float> @fabs_v8f32(<8 x float> %p) {
223190
; X86-AVX1-LABEL: fabs_v8f32:
224191
; X86-AVX1: # %bb.0:
@@ -266,6 +233,30 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) {
266233
}
267234
declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
268235

236+
define <16 x half> @fabs_v16f16(ptr %p) {
237+
; X86-AVX512FP16-LABEL: fabs_v16f16:
238+
; X86-AVX512FP16: # %bb.0:
239+
; X86-AVX512FP16-NEXT: movl 4(%esp), [[REG:%.*]]
240+
; X86-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[YMM:%ymm[0-9]+]]
241+
; X86-AVX512FP16-NEXT: vpand ([[REG]]), [[YMM]], [[YMM]]
242+
; X86-AVX512FP16-NEXT: retl
243+
244+
; X64-AVX512FP16-LABEL: fabs_v16f16:
245+
; X64-AVX512FP16: # %bb.0:
246+
; X64-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[YMM:%ymm[0-9]+]]
247+
; X64-AVX512FP16-NEXT: vpand (%rdi), [[YMM]], [[YMM]]
248+
; X64-AVX512FP16-NEXT: retq
249+
;
250+
%v = load <16 x half>, ptr %p, align 32
251+
%nnv = call <16 x half> @llvm.fabs.v16f16(<16 x half> %v)
252+
ret <16 x half> %nnv
253+
}
254+
declare <16 x half> @llvm.fabs.v16f16(<16 x half> %p)
255+
256+
;
257+
; 512-bit Vectors
258+
;
259+
269260
define <8 x double> @fabs_v8f64(<8 x double> %p) {
270261
; X86-AVX-LABEL: fabs_v8f64:
271262
; X86-AVX: # %bb.0:
@@ -344,6 +335,26 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) {
344335
}
345336
declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
346337

338+
define <32 x half> @fabs_v32f16(ptr %p) {
339+
; X86-AVX512FP16-LABEL: fabs_v32f16:
340+
; X86-AVX512FP16: # %bb.0:
341+
; X86-AVX512FP16-NEXT: movl 4(%esp), [[REG:%.*]]
342+
; X86-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[ZMM:%zmm[0-9]+]]
343+
; X86-AVX512FP16-NEXT: vpandq ([[REG]]), [[ZMM]], [[ZMM]]
344+
; X86-AVX512FP16-NEXT: retl
345+
346+
; X64-AVX512FP16-LABEL: fabs_v32f16:
347+
; X64-AVX512FP16: # %bb.0:
348+
; X64-AVX512FP16-NEXT: vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[ZMM:%zmm[0-9]+]]
349+
; X64-AVX512FP16-NEXT: vpandq (%rdi), [[ZMM]], [[ZMM]]
350+
; X64-AVX512FP16-NEXT: retq
351+
352+
%v = load <32 x half>, ptr %p, align 64
353+
%nnv = call <32 x half> @llvm.fabs.v32f16(<32 x half> %v)
354+
ret <32 x half> %nnv
355+
}
356+
declare <32 x half> @llvm.fabs.v32f16(<32 x half> %p)
357+
347358
; PR20354: when generating code for a vector fabs op,
348359
; make sure that we're only turning off the sign bit of each float value.
349360
; No constant pool loads or vector ops are needed for the fabs of a

0 commit comments

Comments
 (0)