Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 52b6c2d

Browse files
committed
R600: Expand vector fceil
Move fp64 fceil tests to fceil64.ll v2: rebase Signed-off-by: Jan Vesely <[email protected]> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211194 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 0c57bab commit 52b6c2d

File tree

3 files changed

+217
-85
lines changed

3 files changed

+217
-85
lines changed

lib/Target/R600/AMDGPUISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
317317
for (MVT VT : FloatVectorTypes) {
318318
setOperationAction(ISD::FABS, VT, Expand);
319319
setOperationAction(ISD::FADD, VT, Expand);
320+
setOperationAction(ISD::FCEIL, VT, Expand);
320321
setOperationAction(ISD::FCOS, VT, Expand);
321322
setOperationAction(ISD::FDIV, VT, Expand);
322323
setOperationAction(ISD::FPOW, VT, Expand);

test/CodeGen/R600/fceil.ll

Lines changed: 113 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,103 +1,131 @@
1-
; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
21
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2+
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
33

4-
declare double @llvm.ceil.f64(double) nounwind readnone
5-
declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
6-
declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone
7-
declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
8-
declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
9-
declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
4+
declare float @llvm.ceil.f32(float) nounwind readnone
5+
declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone
6+
declare <3 x float> @llvm.ceil.v3f32(<3 x float>) nounwind readnone
7+
declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
8+
declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone
9+
declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone
1010

11-
; FUNC-LABEL: @fceil_f64:
12-
; CI: V_CEIL_F64_e32
13-
; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
14-
; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
15-
; SI: S_LSHR_B64
16-
; SI: S_NOT_B64
17-
; SI: S_AND_B64
18-
; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
19-
; SI: CMP_LT_I32
20-
; SI: CNDMASK_B32
21-
; SI: CNDMASK_B32
22-
; SI: CMP_GT_I32
23-
; SI: CNDMASK_B32
24-
; SI: CNDMASK_B32
25-
; SI: CMP_GT_F64
26-
; SI: CNDMASK_B32
27-
; SI: CMP_NE_I32
28-
; SI: CNDMASK_B32
29-
; SI: CNDMASK_B32
30-
; SI: V_ADD_F64
31-
define void @fceil_f64(double addrspace(1)* %out, double %x) {
32-
%y = call double @llvm.ceil.f64(double %x) nounwind readnone
33-
store double %y, double addrspace(1)* %out
11+
; FUNC-LABEL: @fceil_f32:
12+
; SI: V_CEIL_F32_e32
13+
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
14+
; EG: CEIL {{\*? *}}[[RESULT]]
15+
define void @fceil_f32(float addrspace(1)* %out, float %x) {
16+
%y = call float @llvm.ceil.f32(float %x) nounwind readnone
17+
store float %y, float addrspace(1)* %out
3418
ret void
3519
}
3620

37-
; FUNC-LABEL: @fceil_v2f64:
38-
; CI: V_CEIL_F64_e32
39-
; CI: V_CEIL_F64_e32
40-
define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
41-
%y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
42-
store <2 x double> %y, <2 x double> addrspace(1)* %out
21+
; FUNC-LABEL: @fceil_v2f32:
22+
; SI: V_CEIL_F32_e32
23+
; SI: V_CEIL_F32_e32
24+
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
25+
; EG: CEIL {{\*? *}}[[RESULT]]
26+
; EG: CEIL {{\*? *}}[[RESULT]]
27+
define void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
28+
%y = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x) nounwind readnone
29+
store <2 x float> %y, <2 x float> addrspace(1)* %out
4330
ret void
4431
}
4532

46-
; FIXME-FUNC-LABEL: @fceil_v3f64:
47-
; FIXME-CI: V_CEIL_F64_e32
48-
; FIXME-CI: V_CEIL_F64_e32
49-
; FIXME-CI: V_CEIL_F64_e32
50-
; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
51-
; %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
52-
; store <3 x double> %y, <3 x double> addrspace(1)* %out
53-
; ret void
54-
; }
33+
; FUNC-LABEL: @fceil_v3f32:
34+
; FIXME-SI: V_CEIL_F32_e32
35+
; FIXME-SI: V_CEIL_F32_e32
36+
; FIXME-SI: V_CEIL_F32_e32
37+
; FIXME-EG: v3 is treated as v2 and v1, hence 2 stores
38+
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
39+
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
40+
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
41+
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
42+
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
43+
define void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
44+
%y = call <3 x float> @llvm.ceil.v3f32(<3 x float> %x) nounwind readnone
45+
store <3 x float> %y, <3 x float> addrspace(1)* %out
46+
ret void
47+
}
5548

56-
; FUNC-LABEL: @fceil_v4f64:
57-
; CI: V_CEIL_F64_e32
58-
; CI: V_CEIL_F64_e32
59-
; CI: V_CEIL_F64_e32
60-
; CI: V_CEIL_F64_e32
61-
define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
62-
%y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
63-
store <4 x double> %y, <4 x double> addrspace(1)* %out
49+
; FUNC-LABEL: @fceil_v4f32:
50+
; SI: V_CEIL_F32_e32
51+
; SI: V_CEIL_F32_e32
52+
; SI: V_CEIL_F32_e32
53+
; SI: V_CEIL_F32_e32
54+
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
55+
; EG: CEIL {{\*? *}}[[RESULT]]
56+
; EG: CEIL {{\*? *}}[[RESULT]]
57+
; EG: CEIL {{\*? *}}[[RESULT]]
58+
; EG: CEIL {{\*? *}}[[RESULT]]
59+
define void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
60+
%y = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
61+
store <4 x float> %y, <4 x float> addrspace(1)* %out
6462
ret void
6563
}
6664

67-
; FUNC-LABEL: @fceil_v8f64:
68-
; CI: V_CEIL_F64_e32
69-
; CI: V_CEIL_F64_e32
70-
; CI: V_CEIL_F64_e32
71-
; CI: V_CEIL_F64_e32
72-
; CI: V_CEIL_F64_e32
73-
; CI: V_CEIL_F64_e32
74-
; CI: V_CEIL_F64_e32
75-
; CI: V_CEIL_F64_e32
76-
define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
77-
%y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
78-
store <8 x double> %y, <8 x double> addrspace(1)* %out
65+
; FUNC-LABEL: @fceil_v8f32:
66+
; SI: V_CEIL_F32_e32
67+
; SI: V_CEIL_F32_e32
68+
; SI: V_CEIL_F32_e32
69+
; SI: V_CEIL_F32_e32
70+
; SI: V_CEIL_F32_e32
71+
; SI: V_CEIL_F32_e32
72+
; SI: V_CEIL_F32_e32
73+
; SI: V_CEIL_F32_e32
74+
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
75+
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
76+
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
77+
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
78+
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
79+
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
80+
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
81+
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
82+
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
83+
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
84+
define void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
85+
%y = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) nounwind readnone
86+
store <8 x float> %y, <8 x float> addrspace(1)* %out
7987
ret void
8088
}
8189

82-
; FUNC-LABEL: @fceil_v16f64:
83-
; CI: V_CEIL_F64_e32
84-
; CI: V_CEIL_F64_e32
85-
; CI: V_CEIL_F64_e32
86-
; CI: V_CEIL_F64_e32
87-
; CI: V_CEIL_F64_e32
88-
; CI: V_CEIL_F64_e32
89-
; CI: V_CEIL_F64_e32
90-
; CI: V_CEIL_F64_e32
91-
; CI: V_CEIL_F64_e32
92-
; CI: V_CEIL_F64_e32
93-
; CI: V_CEIL_F64_e32
94-
; CI: V_CEIL_F64_e32
95-
; CI: V_CEIL_F64_e32
96-
; CI: V_CEIL_F64_e32
97-
; CI: V_CEIL_F64_e32
98-
; CI: V_CEIL_F64_e32
99-
define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
100-
%y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
101-
store <16 x double> %y, <16 x double> addrspace(1)* %out
90+
; FUNC-LABEL: @fceil_v16f32:
91+
; SI: V_CEIL_F32_e32
92+
; SI: V_CEIL_F32_e32
93+
; SI: V_CEIL_F32_e32
94+
; SI: V_CEIL_F32_e32
95+
; SI: V_CEIL_F32_e32
96+
; SI: V_CEIL_F32_e32
97+
; SI: V_CEIL_F32_e32
98+
; SI: V_CEIL_F32_e32
99+
; SI: V_CEIL_F32_e32
100+
; SI: V_CEIL_F32_e32
101+
; SI: V_CEIL_F32_e32
102+
; SI: V_CEIL_F32_e32
103+
; SI: V_CEIL_F32_e32
104+
; SI: V_CEIL_F32_e32
105+
; SI: V_CEIL_F32_e32
106+
; SI: V_CEIL_F32_e32
107+
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
108+
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
109+
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT3:T[0-9]+]]{{\.[XYZW]}}
110+
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT4:T[0-9]+]]{{\.[XYZW]}}
111+
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
112+
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
113+
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
114+
; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
115+
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
116+
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
117+
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
118+
; EG-DAG: CEIL {{\*? *}}[[RESULT2]]
119+
; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
120+
; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
121+
; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
122+
; EG-DAG: CEIL {{\*? *}}[[RESULT3]]
123+
; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
124+
; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
125+
; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
126+
; EG-DAG: CEIL {{\*? *}}[[RESULT4]]
127+
define void @fceil_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) {
128+
%y = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) nounwind readnone
129+
store <16 x float> %y, <16 x float> addrspace(1)* %out
102130
ret void
103131
}

test/CodeGen/R600/fceil64.ll

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
2+
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3+
4+
declare double @llvm.ceil.f64(double) nounwind readnone
5+
declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
6+
declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone
7+
declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
8+
declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
9+
declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
10+
11+
; FUNC-LABEL: @fceil_f64:
12+
; CI: V_CEIL_F64_e32
13+
; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
14+
; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
15+
; SI: S_LSHR_B64
16+
; SI: S_NOT_B64
17+
; SI: S_AND_B64
18+
; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
19+
; SI: CMP_LT_I32
20+
; SI: CNDMASK_B32
21+
; SI: CNDMASK_B32
22+
; SI: CMP_GT_I32
23+
; SI: CNDMASK_B32
24+
; SI: CNDMASK_B32
25+
; SI: CMP_GT_F64
26+
; SI: CNDMASK_B32
27+
; SI: CMP_NE_I32
28+
; SI: CNDMASK_B32
29+
; SI: CNDMASK_B32
30+
; SI: V_ADD_F64
31+
define void @fceil_f64(double addrspace(1)* %out, double %x) {
32+
%y = call double @llvm.ceil.f64(double %x) nounwind readnone
33+
store double %y, double addrspace(1)* %out
34+
ret void
35+
}
36+
37+
; FUNC-LABEL: @fceil_v2f64:
38+
; CI: V_CEIL_F64_e32
39+
; CI: V_CEIL_F64_e32
40+
define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
41+
%y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
42+
store <2 x double> %y, <2 x double> addrspace(1)* %out
43+
ret void
44+
}
45+
46+
; FIXME-FUNC-LABEL: @fceil_v3f64:
47+
; FIXME-CI: V_CEIL_F64_e32
48+
; FIXME-CI: V_CEIL_F64_e32
49+
; FIXME-CI: V_CEIL_F64_e32
50+
; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
51+
; %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
52+
; store <3 x double> %y, <3 x double> addrspace(1)* %out
53+
; ret void
54+
; }
55+
56+
; FUNC-LABEL: @fceil_v4f64:
57+
; CI: V_CEIL_F64_e32
58+
; CI: V_CEIL_F64_e32
59+
; CI: V_CEIL_F64_e32
60+
; CI: V_CEIL_F64_e32
61+
define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
62+
%y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
63+
store <4 x double> %y, <4 x double> addrspace(1)* %out
64+
ret void
65+
}
66+
67+
; FUNC-LABEL: @fceil_v8f64:
68+
; CI: V_CEIL_F64_e32
69+
; CI: V_CEIL_F64_e32
70+
; CI: V_CEIL_F64_e32
71+
; CI: V_CEIL_F64_e32
72+
; CI: V_CEIL_F64_e32
73+
; CI: V_CEIL_F64_e32
74+
; CI: V_CEIL_F64_e32
75+
; CI: V_CEIL_F64_e32
76+
define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
77+
%y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
78+
store <8 x double> %y, <8 x double> addrspace(1)* %out
79+
ret void
80+
}
81+
82+
; FUNC-LABEL: @fceil_v16f64:
83+
; CI: V_CEIL_F64_e32
84+
; CI: V_CEIL_F64_e32
85+
; CI: V_CEIL_F64_e32
86+
; CI: V_CEIL_F64_e32
87+
; CI: V_CEIL_F64_e32
88+
; CI: V_CEIL_F64_e32
89+
; CI: V_CEIL_F64_e32
90+
; CI: V_CEIL_F64_e32
91+
; CI: V_CEIL_F64_e32
92+
; CI: V_CEIL_F64_e32
93+
; CI: V_CEIL_F64_e32
94+
; CI: V_CEIL_F64_e32
95+
; CI: V_CEIL_F64_e32
96+
; CI: V_CEIL_F64_e32
97+
; CI: V_CEIL_F64_e32
98+
; CI: V_CEIL_F64_e32
99+
define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
100+
%y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
101+
store <16 x double> %y, <16 x double> addrspace(1)* %out
102+
ret void
103+
}

0 commit comments

Comments
 (0)