|
5 | 5 | target triple = "nvptx64-nvidia-cuda"
|
6 | 6 | target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
7 | 7 |
|
8 |
| -define float @fcopysign_f(float %a, float %b) { |
9 |
| -; CHECK-LABEL: fcopysign_f( |
| 8 | +define float @fcopysign_f_f(float %a, float %b) { |
| 9 | +; CHECK-LABEL: fcopysign_f_f( |
10 | 10 | ; CHECK: {
|
11 | 11 | ; CHECK-NEXT: .reg .f32 %f<4>;
|
12 | 12 | ; CHECK-EMPTY:
|
13 | 13 | ; CHECK-NEXT: // %bb.0:
|
14 |
| -; CHECK-NEXT: ld.param.f32 %f1, [fcopysign_f_param_0]; |
15 |
| -; CHECK-NEXT: ld.param.f32 %f2, [fcopysign_f_param_1]; |
| 14 | +; CHECK-NEXT: ld.param.f32 %f1, [fcopysign_f_f_param_0]; |
| 15 | +; CHECK-NEXT: ld.param.f32 %f2, [fcopysign_f_f_param_1]; |
16 | 16 | ; CHECK-NEXT: copysign.f32 %f3, %f2, %f1;
|
17 | 17 | ; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3;
|
18 | 18 | ; CHECK-NEXT: ret;
|
19 | 19 | %val = call float @llvm.copysign.f32(float %a, float %b)
|
20 | 20 | ret float %val
|
21 | 21 | }
|
22 | 22 |
|
23 |
| -define double @fcopysign_d(double %a, double %b) { |
24 |
| -; CHECK-LABEL: fcopysign_d( |
| 23 | +define double @fcopysign_d_d(double %a, double %b) { |
| 24 | +; CHECK-LABEL: fcopysign_d_d( |
25 | 25 | ; CHECK: {
|
26 | 26 | ; CHECK-NEXT: .reg .f64 %fd<4>;
|
27 | 27 | ; CHECK-EMPTY:
|
28 | 28 | ; CHECK-NEXT: // %bb.0:
|
29 |
| -; CHECK-NEXT: ld.param.f64 %fd1, [fcopysign_d_param_0]; |
30 |
| -; CHECK-NEXT: ld.param.f64 %fd2, [fcopysign_d_param_1]; |
| 29 | +; CHECK-NEXT: ld.param.f64 %fd1, [fcopysign_d_d_param_0]; |
| 30 | +; CHECK-NEXT: ld.param.f64 %fd2, [fcopysign_d_d_param_1]; |
31 | 31 | ; CHECK-NEXT: copysign.f64 %fd3, %fd2, %fd1;
|
32 | 32 | ; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3;
|
33 | 33 | ; CHECK-NEXT: ret;
|
34 | 34 | %val = call double @llvm.copysign.f64(double %a, double %b)
|
35 | 35 | ret double %val
|
36 | 36 | }
|
37 | 37 |
|
| 38 | +define float @fcopysign_f_d(float %a, double %b) { |
| 39 | +; CHECK-LABEL: fcopysign_f_d( |
| 40 | +; CHECK: { |
| 41 | +; CHECK-NEXT: .reg .f32 %f<4>; |
| 42 | +; CHECK-NEXT: .reg .f64 %fd<2>; |
| 43 | +; CHECK-EMPTY: |
| 44 | +; CHECK-NEXT: // %bb.0: |
| 45 | +; CHECK-NEXT: ld.param.f32 %f1, [fcopysign_f_d_param_0]; |
| 46 | +; CHECK-NEXT: ld.param.f64 %fd1, [fcopysign_f_d_param_1]; |
| 47 | +; CHECK-NEXT: cvt.rn.f32.f64 %f2, %fd1; |
| 48 | +; CHECK-NEXT: copysign.f32 %f3, %f2, %f1; |
| 49 | +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; |
| 50 | +; CHECK-NEXT: ret; |
| 51 | + %c = fptrunc double %b to float |
| 52 | + %val = call float @llvm.copysign.f32(float %a, float %c) |
| 53 | + ret float %val |
| 54 | +} |
| 55 | + |
| 56 | +define float @fcopysign_f_h(float %a, half %b) { |
| 57 | +; CHECK-LABEL: fcopysign_f_h( |
| 58 | +; CHECK: { |
| 59 | +; CHECK-NEXT: .reg .b16 %rs<2>; |
| 60 | +; CHECK-NEXT: .reg .f32 %f<4>; |
| 61 | +; CHECK-EMPTY: |
| 62 | +; CHECK-NEXT: // %bb.0: |
| 63 | +; CHECK-NEXT: ld.param.f32 %f1, [fcopysign_f_h_param_0]; |
| 64 | +; CHECK-NEXT: ld.param.b16 %rs1, [fcopysign_f_h_param_1]; |
| 65 | +; CHECK-NEXT: cvt.f32.f16 %f2, %rs1; |
| 66 | +; CHECK-NEXT: copysign.f32 %f3, %f2, %f1; |
| 67 | +; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; |
| 68 | +; CHECK-NEXT: ret; |
| 69 | + %c = fpext half %b to float |
| 70 | + %val = call float @llvm.copysign.f32(float %a, float %c) |
| 71 | + ret float %val |
| 72 | +} |
| 73 | + |
| 74 | +define double @fcopysign_d_f(double %a, float %b) { |
| 75 | +; CHECK-LABEL: fcopysign_d_f( |
| 76 | +; CHECK: { |
| 77 | +; CHECK-NEXT: .reg .f32 %f<2>; |
| 78 | +; CHECK-NEXT: .reg .f64 %fd<4>; |
| 79 | +; CHECK-EMPTY: |
| 80 | +; CHECK-NEXT: // %bb.0: |
| 81 | +; CHECK-NEXT: ld.param.f64 %fd1, [fcopysign_d_f_param_0]; |
| 82 | +; CHECK-NEXT: ld.param.f32 %f1, [fcopysign_d_f_param_1]; |
| 83 | +; CHECK-NEXT: cvt.f64.f32 %fd2, %f1; |
| 84 | +; CHECK-NEXT: copysign.f64 %fd3, %fd2, %fd1; |
| 85 | +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3; |
| 86 | +; CHECK-NEXT: ret; |
| 87 | + %c = fpext float %b to double |
| 88 | + %val = call double @llvm.copysign.f64(double %a, double %c) |
| 89 | + ret double %val |
| 90 | +} |
| 91 | + |
| 92 | +define double @fcopysign_d_h(double %a, half %b) { |
| 93 | +; CHECK-LABEL: fcopysign_d_h( |
| 94 | +; CHECK: { |
| 95 | +; CHECK-NEXT: .reg .b16 %rs<2>; |
| 96 | +; CHECK-NEXT: .reg .f64 %fd<4>; |
| 97 | +; CHECK-EMPTY: |
| 98 | +; CHECK-NEXT: // %bb.0: |
| 99 | +; CHECK-NEXT: ld.param.f64 %fd1, [fcopysign_d_h_param_0]; |
| 100 | +; CHECK-NEXT: ld.param.b16 %rs1, [fcopysign_d_h_param_1]; |
| 101 | +; CHECK-NEXT: cvt.f64.f16 %fd2, %rs1; |
| 102 | +; CHECK-NEXT: copysign.f64 %fd3, %fd2, %fd1; |
| 103 | +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3; |
| 104 | +; CHECK-NEXT: ret; |
| 105 | + %c = fpext half %b to double |
| 106 | + %val = call double @llvm.copysign.f64(double %a, double %c) |
| 107 | + ret double %val |
| 108 | +} |
| 109 | + |
| 110 | + |
38 | 111 | declare float @llvm.copysign.f32(float, float)
|
39 | 112 | declare double @llvm.copysign.f64(double, double)
|
0 commit comments