1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
1
2
; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | FileCheck --check-prefixes=CHECK %s
2
3
; RUN: %if ptxas-11.8 %{ llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | %ptxas-verify -arch=sm_80 %}
3
4
@@ -6,36 +7,48 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
6
7
declare <2 x bfloat> @llvm.sin.f16 (<2 x bfloat> %a ) #0
7
8
declare <2 x bfloat> @llvm.cos.f16 (<2 x bfloat> %a ) #0
8
9
9
- ; CHECK-LABEL: test_sin(
10
- ; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_sin_param_0];
11
- ; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
12
- ; CHECK-DAG: cvt.f32.bf16 [[AF0:%f[0-9]+]], [[A0]];
13
- ; CHECK-DAG: cvt.f32.bf16 [[AF1:%f[0-9]+]], [[A1]];
14
- ; CHECK-DAG: sin.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
15
- ; CHECK-DAG: sin.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
16
- ; CHECK-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[RF0]];
17
- ; CHECK-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[RF1]];
18
- ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
19
- ; CHECK: st.param.b32 [func_retval0], [[R]];
20
- ; CHECK: ret;
21
10
define <2 x bfloat> @test_sin (<2 x bfloat> %a ) #0 #1 {
11
+ ; CHECK-LABEL: test_sin(
12
+ ; CHECK: {
13
+ ; CHECK-NEXT: .reg .b16 %rs<5>;
14
+ ; CHECK-NEXT: .reg .b32 %r<3>;
15
+ ; CHECK-NEXT: .reg .f32 %f<5>;
16
+ ; CHECK-EMPTY:
17
+ ; CHECK-NEXT: // %bb.0:
18
+ ; CHECK-NEXT: ld.param.b32 %r1, [test_sin_param_0];
19
+ ; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
20
+ ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs2;
21
+ ; CHECK-NEXT: sin.approx.f32 %f2, %f1;
22
+ ; CHECK-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
23
+ ; CHECK-NEXT: cvt.f32.bf16 %f3, %rs1;
24
+ ; CHECK-NEXT: sin.approx.f32 %f4, %f3;
25
+ ; CHECK-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
26
+ ; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
27
+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
28
+ ; CHECK-NEXT: ret;
22
29
%r = call <2 x bfloat> @llvm.sin.f16 (<2 x bfloat> %a )
23
30
ret <2 x bfloat> %r
24
31
}
25
32
26
- ; CHECK-LABEL: test_cos(
27
- ; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_cos_param_0];
28
- ; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
29
- ; CHECK-DAG: cvt.f32.bf16 [[AF0:%f[0-9]+]], [[A0]];
30
- ; CHECK-DAG: cvt.f32.bf16 [[AF1:%f[0-9]+]], [[A1]];
31
- ; CHECK-DAG: cos.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
32
- ; CHECK-DAG: cos.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
33
- ; CHECK-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[RF0]];
34
- ; CHECK-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[RF1]];
35
- ; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
36
- ; CHECK: st.param.b32 [func_retval0], [[R]];
37
- ; CHECK: ret;
38
33
define <2 x bfloat> @test_cos (<2 x bfloat> %a ) #0 #1 {
34
+ ; CHECK-LABEL: test_cos(
35
+ ; CHECK: {
36
+ ; CHECK-NEXT: .reg .b16 %rs<5>;
37
+ ; CHECK-NEXT: .reg .b32 %r<3>;
38
+ ; CHECK-NEXT: .reg .f32 %f<5>;
39
+ ; CHECK-EMPTY:
40
+ ; CHECK-NEXT: // %bb.0:
41
+ ; CHECK-NEXT: ld.param.b32 %r1, [test_cos_param_0];
42
+ ; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
43
+ ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs2;
44
+ ; CHECK-NEXT: cos.approx.f32 %f2, %f1;
45
+ ; CHECK-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
46
+ ; CHECK-NEXT: cvt.f32.bf16 %f3, %rs1;
47
+ ; CHECK-NEXT: cos.approx.f32 %f4, %f3;
48
+ ; CHECK-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
49
+ ; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
50
+ ; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
51
+ ; CHECK-NEXT: ret;
39
52
%r = call <2 x bfloat> @llvm.cos.f16 (<2 x bfloat> %a )
40
53
ret <2 x bfloat> %r
41
54
}
0 commit comments