Skip to content

Commit 17b39fd

Browse files
committed
Introduce NVPTXISD::FCOPYSIGN with matching types, lower into this
1 parent f13f2f6 commit 17b39fd

File tree

4 files changed

+110
-12
lines changed

4 files changed

+110
-12
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -838,8 +838,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
838838
setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);
839839
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
840840
setOperationAction(ISD::FCOPYSIGN, MVT::v2bf16, Expand);
841-
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
842-
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
841+
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
842+
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
843843

844844
// These map to corresponding instructions for f32/f64. f16 must be
845845
// promoted to f32. v2f16 is expanded to f16, which is then promoted
@@ -964,6 +964,7 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
964964
MAKE_CASE(NVPTXISD::BFE)
965965
MAKE_CASE(NVPTXISD::BFI)
966966
MAKE_CASE(NVPTXISD::PRMT)
967+
MAKE_CASE(NVPTXISD::FCOPYSIGN)
967968
MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC)
968969
MAKE_CASE(NVPTXISD::SETP_F16X2)
969970
MAKE_CASE(NVPTXISD::SETP_BF16X2)
@@ -2560,6 +2561,23 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
25602561
}
25612562
}
25622563

2564+
/// Convert the generic copysign to the NVPTXISD version which guarantees that
2565+
/// the types of the operands will match
2566+
SDValue NVPTXTargetLowering::LowerFCOPYSIGN(SDValue Op,
2567+
SelectionDAG &DAG) const {
2568+
EVT VT = Op.getValueType();
2569+
SDLoc DL(Op);
2570+
2571+
SDValue In1 = Op.getOperand(0);
2572+
SDValue In2 = Op.getOperand(1);
2573+
EVT SrcVT = In2.getValueType();
2574+
2575+
if (!SrcVT.bitsEq(VT))
2576+
In2 = DAG.getFPExtendOrRound(In2, DL, VT);
2577+
2578+
return DAG.getNode(NVPTXISD::FCOPYSIGN, DL, VT, In1, In2);
2579+
}
2580+
25632581
SDValue NVPTXTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
25642582
EVT VT = Op.getValueType();
25652583

@@ -2803,6 +2821,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
28032821
return LowerSelect(Op, DAG);
28042822
case ISD::FROUND:
28052823
return LowerFROUND(Op, DAG);
2824+
case ISD::FCOPYSIGN:
2825+
return LowerFCOPYSIGN(Op, DAG);
28062826
case ISD::SINT_TO_FP:
28072827
case ISD::UINT_TO_FP:
28082828
return LowerINT_TO_FP(Op, DAG);

llvm/lib/Target/NVPTX/NVPTXISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ enum NodeType : unsigned {
6161
BFE,
6262
BFI,
6363
PRMT,
64+
FCOPYSIGN,
6465
DYNAMIC_STACKALLOC,
6566
BrxStart,
6667
BrxItem,
@@ -623,6 +624,8 @@ class NVPTXTargetLowering : public TargetLowering {
623624
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
624625
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
625626

627+
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
628+
626629
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
627630
SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
628631
SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -981,15 +981,17 @@ def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
981981
// copysign
982982
//
983983

984+
def fcopysign_nvptx : SDNode<"NVPTXISD::FCOPYSIGN", SDTFPBinOp>;
985+
984986
def COPYSIGN_F :
985987
NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src0, Float32Regs:$src1),
986988
"copysign.f32 \t$dst, $src0, $src1;",
987-
[(set Float32Regs:$dst, (fcopysign Float32Regs:$src1, Float32Regs:$src0))]>;
989+
[(set Float32Regs:$dst, (fcopysign_nvptx Float32Regs:$src1, Float32Regs:$src0))]>;
988990

989991
def COPYSIGN_D :
990992
NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src0, Float64Regs:$src1),
991993
"copysign.f64 \t$dst, $src0, $src1;",
992-
[(set Float64Regs:$dst, (fcopysign Float64Regs:$src1, Float64Regs:$src0))]>;
994+
[(set Float64Regs:$dst, (fcopysign_nvptx Float64Regs:$src1, Float64Regs:$src0))]>;
993995

994996
//
995997
// Abs, Neg bf16, bf16x2

llvm/test/CodeGen/NVPTX/copysign.ll

Lines changed: 81 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,35 +5,108 @@
55
target triple = "nvptx64-nvidia-cuda"
66
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
77

8-
define float @fcopysign_f(float %a, float %b) {
9-
; CHECK-LABEL: fcopysign_f(
8+
define float @fcopysign_f_f(float %a, float %b) {
9+
; CHECK-LABEL: fcopysign_f_f(
1010
; CHECK: {
1111
; CHECK-NEXT: .reg .f32 %f<4>;
1212
; CHECK-EMPTY:
1313
; CHECK-NEXT: // %bb.0:
14-
; CHECK-NEXT: ld.param.f32 %f1, [fcopysign_f_param_0];
15-
; CHECK-NEXT: ld.param.f32 %f2, [fcopysign_f_param_1];
14+
; CHECK-NEXT: ld.param.f32 %f1, [fcopysign_f_f_param_0];
15+
; CHECK-NEXT: ld.param.f32 %f2, [fcopysign_f_f_param_1];
1616
; CHECK-NEXT: copysign.f32 %f3, %f2, %f1;
1717
; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3;
1818
; CHECK-NEXT: ret;
1919
%val = call float @llvm.copysign.f32(float %a, float %b)
2020
ret float %val
2121
}
2222

23-
define double @fcopysign_d(double %a, double %b) {
24-
; CHECK-LABEL: fcopysign_d(
23+
define double @fcopysign_d_d(double %a, double %b) {
24+
; CHECK-LABEL: fcopysign_d_d(
2525
; CHECK: {
2626
; CHECK-NEXT: .reg .f64 %fd<4>;
2727
; CHECK-EMPTY:
2828
; CHECK-NEXT: // %bb.0:
29-
; CHECK-NEXT: ld.param.f64 %fd1, [fcopysign_d_param_0];
30-
; CHECK-NEXT: ld.param.f64 %fd2, [fcopysign_d_param_1];
29+
; CHECK-NEXT: ld.param.f64 %fd1, [fcopysign_d_d_param_0];
30+
; CHECK-NEXT: ld.param.f64 %fd2, [fcopysign_d_d_param_1];
3131
; CHECK-NEXT: copysign.f64 %fd3, %fd2, %fd1;
3232
; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3;
3333
; CHECK-NEXT: ret;
3434
%val = call double @llvm.copysign.f64(double %a, double %b)
3535
ret double %val
3636
}
3737

38+
define float @fcopysign_f_d(float %a, double %b) {
39+
; CHECK-LABEL: fcopysign_f_d(
40+
; CHECK: {
41+
; CHECK-NEXT: .reg .f32 %f<4>;
42+
; CHECK-NEXT: .reg .f64 %fd<2>;
43+
; CHECK-EMPTY:
44+
; CHECK-NEXT: // %bb.0:
45+
; CHECK-NEXT: ld.param.f32 %f1, [fcopysign_f_d_param_0];
46+
; CHECK-NEXT: ld.param.f64 %fd1, [fcopysign_f_d_param_1];
47+
; CHECK-NEXT: cvt.rn.f32.f64 %f2, %fd1;
48+
; CHECK-NEXT: copysign.f32 %f3, %f2, %f1;
49+
; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3;
50+
; CHECK-NEXT: ret;
51+
%c = fptrunc double %b to float
52+
%val = call float @llvm.copysign.f32(float %a, float %c)
53+
ret float %val
54+
}
55+
56+
define float @fcopysign_f_h(float %a, half %b) {
57+
; CHECK-LABEL: fcopysign_f_h(
58+
; CHECK: {
59+
; CHECK-NEXT: .reg .b16 %rs<2>;
60+
; CHECK-NEXT: .reg .f32 %f<4>;
61+
; CHECK-EMPTY:
62+
; CHECK-NEXT: // %bb.0:
63+
; CHECK-NEXT: ld.param.f32 %f1, [fcopysign_f_h_param_0];
64+
; CHECK-NEXT: ld.param.b16 %rs1, [fcopysign_f_h_param_1];
65+
; CHECK-NEXT: cvt.f32.f16 %f2, %rs1;
66+
; CHECK-NEXT: copysign.f32 %f3, %f2, %f1;
67+
; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3;
68+
; CHECK-NEXT: ret;
69+
%c = fpext half %b to float
70+
%val = call float @llvm.copysign.f32(float %a, float %c)
71+
ret float %val
72+
}
73+
74+
define double @fcopysign_d_f(double %a, float %b) {
75+
; CHECK-LABEL: fcopysign_d_f(
76+
; CHECK: {
77+
; CHECK-NEXT: .reg .f32 %f<2>;
78+
; CHECK-NEXT: .reg .f64 %fd<4>;
79+
; CHECK-EMPTY:
80+
; CHECK-NEXT: // %bb.0:
81+
; CHECK-NEXT: ld.param.f64 %fd1, [fcopysign_d_f_param_0];
82+
; CHECK-NEXT: ld.param.f32 %f1, [fcopysign_d_f_param_1];
83+
; CHECK-NEXT: cvt.f64.f32 %fd2, %f1;
84+
; CHECK-NEXT: copysign.f64 %fd3, %fd2, %fd1;
85+
; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3;
86+
; CHECK-NEXT: ret;
87+
%c = fpext float %b to double
88+
%val = call double @llvm.copysign.f64(double %a, double %c)
89+
ret double %val
90+
}
91+
92+
define double @fcopysign_d_h(double %a, half %b) {
93+
; CHECK-LABEL: fcopysign_d_h(
94+
; CHECK: {
95+
; CHECK-NEXT: .reg .b16 %rs<2>;
96+
; CHECK-NEXT: .reg .f64 %fd<4>;
97+
; CHECK-EMPTY:
98+
; CHECK-NEXT: // %bb.0:
99+
; CHECK-NEXT: ld.param.f64 %fd1, [fcopysign_d_h_param_0];
100+
; CHECK-NEXT: ld.param.b16 %rs1, [fcopysign_d_h_param_1];
101+
; CHECK-NEXT: cvt.f64.f16 %fd2, %rs1;
102+
; CHECK-NEXT: copysign.f64 %fd3, %fd2, %fd1;
103+
; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3;
104+
; CHECK-NEXT: ret;
105+
%c = fpext half %b to double
106+
%val = call double @llvm.copysign.f64(double %a, double %c)
107+
ret double %val
108+
}
109+
110+
38111
declare float @llvm.copysign.f32(float, float)
39112
declare double @llvm.copysign.f64(double, double)

0 commit comments

Comments
 (0)