Skip to content

Commit 9a1eded

Browse files
authored
[RISCV] Custom legalize f16/bf16 FCOPYSIGN with Zfhmin/Zbfmin. (#107039)
The LegalizeDAG expansion will go through memory since i16 isn't a legal type. Avoid this by using FMV nodes. Similar to what we did for #106886 for FNEG and FABS. Special care is needed to handle the Sign operand being a different type.
1 parent 78abeca commit 9a1eded

File tree

6 files changed

+235
-430
lines changed

6 files changed

+235
-430
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
459459
setOperationAction(ISD::FREM, MVT::bf16, Promote);
460460
setOperationAction(ISD::FABS, MVT::bf16, Custom);
461461
setOperationAction(ISD::FNEG, MVT::bf16, Custom);
462-
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
462+
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Custom);
463463
}
464464

465465
if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
@@ -477,7 +477,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
477477
setOperationAction(Op, MVT::f16, Custom);
478478
setOperationAction(ISD::FABS, MVT::f16, Custom);
479479
setOperationAction(ISD::FNEG, MVT::f16, Custom);
480-
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
480+
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
481481
}
482482

483483
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
@@ -5964,6 +5964,69 @@ static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG,
59645964
return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
59655965
}
59665966

5967+
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG,
5968+
const RISCVSubtarget &Subtarget) {
5969+
assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
5970+
5971+
MVT XLenVT = Subtarget.getXLenVT();
5972+
MVT VT = Op.getSimpleValueType();
5973+
assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
5974+
5975+
SDValue Mag = Op.getOperand(0);
5976+
SDValue Sign = Op.getOperand(1);
5977+
5978+
SDLoc DL(Op);
5979+
5980+
// Get sign bit into an integer value.
5981+
SDValue SignAsInt;
5982+
unsigned SignSize = Sign.getValueSizeInBits();
5983+
if (SignSize == Subtarget.getXLen()) {
5984+
SignAsInt = DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
5985+
} else if (SignSize == 16) {
5986+
SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
5987+
} else if (SignSize == 32) {
5988+
SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
5989+
} else if (SignSize == 64) {
5990+
assert(XLenVT == MVT::i32 && "Unexpected type");
5991+
// Copy the upper word to integer.
5992+
SignAsInt = DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
5993+
.getValue(1);
5994+
SignSize = 32;
5995+
} else
5996+
llvm_unreachable("Unexpected sign size");
5997+
5998+
// Get the signbit at the right position for MagAsInt.
5999+
int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
6000+
if (ShiftAmount > 0) {
6001+
SignAsInt = DAG.getNode(ISD::SRL, DL, XLenVT, SignAsInt,
6002+
DAG.getConstant(ShiftAmount, DL, XLenVT));
6003+
} else if (ShiftAmount < 0) {
6004+
SignAsInt = DAG.getNode(ISD::SHL, DL, XLenVT, SignAsInt,
6005+
DAG.getConstant(-ShiftAmount, DL, XLenVT));
6006+
}
6007+
6008+
// Mask the sign bit and any bits above it. The extra bits will be dropped
6009+
// when we convert back to FP.
6010+
SDValue SignMask = DAG.getConstant(
6011+
APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
6012+
SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
6013+
6014+
// Transform Mag value to integer, and clear the sign bit.
6015+
SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
6016+
SDValue ClearSignMask = DAG.getConstant(
6017+
APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
6018+
SDValue ClearedSign =
6019+
DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
6020+
6021+
SDNodeFlags Flags;
6022+
Flags.setDisjoint(true);
6023+
6024+
SDValue CopiedSign =
6025+
DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit, Flags);
6026+
6027+
return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
6028+
}
6029+
59676030
/// Get a RISC-V target specified VL op for a given SDNode.
59686031
static unsigned getRISCVVLOp(SDValue Op) {
59696032
#define OP_CASE(NODE) \
@@ -7164,6 +7227,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
71647227
case ISD::VSELECT:
71657228
return lowerFixedLengthVectorSelectToRVV(Op, DAG);
71667229
case ISD::FCOPYSIGN:
7230+
if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7231+
return lowerFCOPYSIGN(Op, DAG, Subtarget);
71677232
if (Op.getValueType() == MVT::nxv32f16 &&
71687233
(Subtarget.hasVInstructionsF16Minimal() &&
71697234
!Subtarget.hasVInstructionsF16()))

llvm/test/CodeGen/RISCV/bfloat-arith.ll

Lines changed: 28 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -73,42 +73,26 @@ declare bfloat @llvm.copysign.bf16(bfloat, bfloat)
7373
define bfloat @fsgnj_bf16(bfloat %a, bfloat %b) nounwind {
7474
; RV32IZFBFMIN-LABEL: fsgnj_bf16:
7575
; RV32IZFBFMIN: # %bb.0:
76-
; RV32IZFBFMIN-NEXT: addi sp, sp, -16
77-
; RV32IZFBFMIN-NEXT: fsh fa1, 12(sp)
78-
; RV32IZFBFMIN-NEXT: lbu a0, 13(sp)
76+
; RV32IZFBFMIN-NEXT: fmv.x.h a0, fa1
77+
; RV32IZFBFMIN-NEXT: lui a1, 1048568
78+
; RV32IZFBFMIN-NEXT: and a0, a0, a1
7979
; RV32IZFBFMIN-NEXT: fmv.x.h a1, fa0
8080
; RV32IZFBFMIN-NEXT: slli a1, a1, 17
81-
; RV32IZFBFMIN-NEXT: andi a2, a0, 128
82-
; RV32IZFBFMIN-NEXT: srli a0, a1, 17
83-
; RV32IZFBFMIN-NEXT: beqz a2, .LBB5_2
84-
; RV32IZFBFMIN-NEXT: # %bb.1:
85-
; RV32IZFBFMIN-NEXT: lui a1, 1048568
86-
; RV32IZFBFMIN-NEXT: or a0, a0, a1
87-
; RV32IZFBFMIN-NEXT: .LBB5_2:
88-
; RV32IZFBFMIN-NEXT: fmv.h.x fa5, a0
89-
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
90-
; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
91-
; RV32IZFBFMIN-NEXT: addi sp, sp, 16
81+
; RV32IZFBFMIN-NEXT: srli a1, a1, 17
82+
; RV32IZFBFMIN-NEXT: or a0, a1, a0
83+
; RV32IZFBFMIN-NEXT: fmv.h.x fa0, a0
9284
; RV32IZFBFMIN-NEXT: ret
9385
;
9486
; RV64IZFBFMIN-LABEL: fsgnj_bf16:
9587
; RV64IZFBFMIN: # %bb.0:
96-
; RV64IZFBFMIN-NEXT: addi sp, sp, -16
97-
; RV64IZFBFMIN-NEXT: fsh fa1, 8(sp)
98-
; RV64IZFBFMIN-NEXT: lbu a0, 9(sp)
88+
; RV64IZFBFMIN-NEXT: fmv.x.h a0, fa1
89+
; RV64IZFBFMIN-NEXT: lui a1, 1048568
90+
; RV64IZFBFMIN-NEXT: and a0, a0, a1
9991
; RV64IZFBFMIN-NEXT: fmv.x.h a1, fa0
10092
; RV64IZFBFMIN-NEXT: slli a1, a1, 49
101-
; RV64IZFBFMIN-NEXT: andi a2, a0, 128
102-
; RV64IZFBFMIN-NEXT: srli a0, a1, 49
103-
; RV64IZFBFMIN-NEXT: beqz a2, .LBB5_2
104-
; RV64IZFBFMIN-NEXT: # %bb.1:
105-
; RV64IZFBFMIN-NEXT: lui a1, 1048568
106-
; RV64IZFBFMIN-NEXT: or a0, a0, a1
107-
; RV64IZFBFMIN-NEXT: .LBB5_2:
108-
; RV64IZFBFMIN-NEXT: fmv.h.x fa5, a0
109-
; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
110-
; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
111-
; RV64IZFBFMIN-NEXT: addi sp, sp, 16
93+
; RV64IZFBFMIN-NEXT: srli a1, a1, 49
94+
; RV64IZFBFMIN-NEXT: or a0, a1, a0
95+
; RV64IZFBFMIN-NEXT: fmv.h.x fa0, a0
11296
; RV64IZFBFMIN-NEXT: ret
11397
%1 = call bfloat @llvm.copysign.bf16(bfloat %a, bfloat %b)
11498
ret bfloat %1
@@ -138,62 +122,36 @@ define i32 @fneg_bf16(bfloat %a, bfloat %b) nounwind {
138122
define bfloat @fsgnjn_bf16(bfloat %a, bfloat %b) nounwind {
139123
; RV32IZFBFMIN-LABEL: fsgnjn_bf16:
140124
; RV32IZFBFMIN: # %bb.0:
141-
; RV32IZFBFMIN-NEXT: addi sp, sp, -16
142125
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1
143126
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
144127
; RV32IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5
145128
; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
146-
; RV32IZFBFMIN-NEXT: fmv.x.h a1, fa5
147-
; RV32IZFBFMIN-NEXT: lui a0, 1048568
148-
; RV32IZFBFMIN-NEXT: xor a1, a1, a0
149-
; RV32IZFBFMIN-NEXT: fmv.h.x fa5, a1
150-
; RV32IZFBFMIN-NEXT: fsh fa5, 12(sp)
151-
; RV32IZFBFMIN-NEXT: lbu a1, 13(sp)
152-
; RV32IZFBFMIN-NEXT: fmv.x.h a2, fa0
153-
; RV32IZFBFMIN-NEXT: slli a2, a2, 17
154-
; RV32IZFBFMIN-NEXT: andi a3, a1, 128
155-
; RV32IZFBFMIN-NEXT: srli a1, a2, 17
156-
; RV32IZFBFMIN-NEXT: bnez a3, .LBB7_2
157-
; RV32IZFBFMIN-NEXT: # %bb.1:
158-
; RV32IZFBFMIN-NEXT: fmv.h.x fa5, a1
159-
; RV32IZFBFMIN-NEXT: j .LBB7_3
160-
; RV32IZFBFMIN-NEXT: .LBB7_2:
129+
; RV32IZFBFMIN-NEXT: fmv.x.h a0, fa5
130+
; RV32IZFBFMIN-NEXT: not a0, a0
131+
; RV32IZFBFMIN-NEXT: lui a1, 1048568
132+
; RV32IZFBFMIN-NEXT: and a0, a0, a1
133+
; RV32IZFBFMIN-NEXT: fmv.x.h a1, fa0
134+
; RV32IZFBFMIN-NEXT: slli a1, a1, 17
135+
; RV32IZFBFMIN-NEXT: srli a1, a1, 17
161136
; RV32IZFBFMIN-NEXT: or a0, a1, a0
162-
; RV32IZFBFMIN-NEXT: fmv.h.x fa5, a0
163-
; RV32IZFBFMIN-NEXT: .LBB7_3:
164-
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
165-
; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
166-
; RV32IZFBFMIN-NEXT: addi sp, sp, 16
137+
; RV32IZFBFMIN-NEXT: fmv.h.x fa0, a0
167138
; RV32IZFBFMIN-NEXT: ret
168139
;
169140
; RV64IZFBFMIN-LABEL: fsgnjn_bf16:
170141
; RV64IZFBFMIN: # %bb.0:
171-
; RV64IZFBFMIN-NEXT: addi sp, sp, -16
172142
; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1
173143
; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
174144
; RV64IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5
175145
; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
176-
; RV64IZFBFMIN-NEXT: fmv.x.h a1, fa5
177-
; RV64IZFBFMIN-NEXT: lui a0, 1048568
178-
; RV64IZFBFMIN-NEXT: xor a1, a1, a0
179-
; RV64IZFBFMIN-NEXT: fmv.h.x fa5, a1
180-
; RV64IZFBFMIN-NEXT: fsh fa5, 8(sp)
181-
; RV64IZFBFMIN-NEXT: lbu a1, 9(sp)
182-
; RV64IZFBFMIN-NEXT: fmv.x.h a2, fa0
183-
; RV64IZFBFMIN-NEXT: slli a2, a2, 49
184-
; RV64IZFBFMIN-NEXT: andi a3, a1, 128
185-
; RV64IZFBFMIN-NEXT: srli a1, a2, 49
186-
; RV64IZFBFMIN-NEXT: bnez a3, .LBB7_2
187-
; RV64IZFBFMIN-NEXT: # %bb.1:
188-
; RV64IZFBFMIN-NEXT: fmv.h.x fa5, a1
189-
; RV64IZFBFMIN-NEXT: j .LBB7_3
190-
; RV64IZFBFMIN-NEXT: .LBB7_2:
146+
; RV64IZFBFMIN-NEXT: fmv.x.h a0, fa5
147+
; RV64IZFBFMIN-NEXT: not a0, a0
148+
; RV64IZFBFMIN-NEXT: lui a1, 1048568
149+
; RV64IZFBFMIN-NEXT: and a0, a0, a1
150+
; RV64IZFBFMIN-NEXT: fmv.x.h a1, fa0
151+
; RV64IZFBFMIN-NEXT: slli a1, a1, 49
152+
; RV64IZFBFMIN-NEXT: srli a1, a1, 49
191153
; RV64IZFBFMIN-NEXT: or a0, a1, a0
192-
; RV64IZFBFMIN-NEXT: fmv.h.x fa5, a0
193-
; RV64IZFBFMIN-NEXT: .LBB7_3:
194-
; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
195-
; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
196-
; RV64IZFBFMIN-NEXT: addi sp, sp, 16
154+
; RV64IZFBFMIN-NEXT: fmv.h.x fa0, a0
197155
; RV64IZFBFMIN-NEXT: ret
198156
%1 = fadd bfloat %a, %b
199157
%2 = fneg bfloat %1

llvm/test/CodeGen/RISCV/copysign-casts.ll

Lines changed: 42 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -503,54 +503,38 @@ define half @fold_demote_h_s(half %a, float %b) nounwind {
503503
;
504504
; RV32IFZFHMIN-LABEL: fold_demote_h_s:
505505
; RV32IFZFHMIN: # %bb.0:
506-
; RV32IFZFHMIN-NEXT: fmv.x.h a0, fa0
507-
; RV32IFZFHMIN-NEXT: slli a0, a0, 17
508-
; RV32IFZFHMIN-NEXT: fmv.x.w a1, fa1
509-
; RV32IFZFHMIN-NEXT: srli a0, a0, 17
510-
; RV32IFZFHMIN-NEXT: bgez a1, .LBB4_2
511-
; RV32IFZFHMIN-NEXT: # %bb.1:
512-
; RV32IFZFHMIN-NEXT: lui a1, 1048568
513-
; RV32IFZFHMIN-NEXT: or a0, a0, a1
514-
; RV32IFZFHMIN-NEXT: .LBB4_2:
515-
; RV32IFZFHMIN-NEXT: fmv.h.x fa5, a0
516-
; RV32IFZFHMIN-NEXT: fcvt.s.h fa5, fa5
517-
; RV32IFZFHMIN-NEXT: fcvt.h.s fa0, fa5
506+
; RV32IFZFHMIN-NEXT: fmv.x.w a0, fa1
507+
; RV32IFZFHMIN-NEXT: srli a0, a0, 31
508+
; RV32IFZFHMIN-NEXT: slli a0, a0, 15
509+
; RV32IFZFHMIN-NEXT: fmv.x.h a1, fa0
510+
; RV32IFZFHMIN-NEXT: slli a1, a1, 17
511+
; RV32IFZFHMIN-NEXT: srli a1, a1, 17
512+
; RV32IFZFHMIN-NEXT: or a0, a1, a0
513+
; RV32IFZFHMIN-NEXT: fmv.h.x fa0, a0
518514
; RV32IFZFHMIN-NEXT: ret
519515
;
520516
; RV32IFDZFHMIN-LABEL: fold_demote_h_s:
521517
; RV32IFDZFHMIN: # %bb.0:
522-
; RV32IFDZFHMIN-NEXT: fmv.x.h a0, fa0
523-
; RV32IFDZFHMIN-NEXT: slli a0, a0, 17
524-
; RV32IFDZFHMIN-NEXT: fmv.x.w a1, fa1
525-
; RV32IFDZFHMIN-NEXT: srli a0, a0, 17
526-
; RV32IFDZFHMIN-NEXT: bgez a1, .LBB4_2
527-
; RV32IFDZFHMIN-NEXT: # %bb.1:
528-
; RV32IFDZFHMIN-NEXT: lui a1, 1048568
529-
; RV32IFDZFHMIN-NEXT: or a0, a0, a1
530-
; RV32IFDZFHMIN-NEXT: .LBB4_2:
531-
; RV32IFDZFHMIN-NEXT: fmv.h.x fa5, a0
532-
; RV32IFDZFHMIN-NEXT: fcvt.s.h fa5, fa5
533-
; RV32IFDZFHMIN-NEXT: fcvt.h.s fa0, fa5
518+
; RV32IFDZFHMIN-NEXT: fmv.x.w a0, fa1
519+
; RV32IFDZFHMIN-NEXT: srli a0, a0, 31
520+
; RV32IFDZFHMIN-NEXT: slli a0, a0, 15
521+
; RV32IFDZFHMIN-NEXT: fmv.x.h a1, fa0
522+
; RV32IFDZFHMIN-NEXT: slli a1, a1, 17
523+
; RV32IFDZFHMIN-NEXT: srli a1, a1, 17
524+
; RV32IFDZFHMIN-NEXT: or a0, a1, a0
525+
; RV32IFDZFHMIN-NEXT: fmv.h.x fa0, a0
534526
; RV32IFDZFHMIN-NEXT: ret
535527
;
536528
; RV64IFDZFHMIN-LABEL: fold_demote_h_s:
537529
; RV64IFDZFHMIN: # %bb.0:
538-
; RV64IFDZFHMIN-NEXT: addi sp, sp, -16
539-
; RV64IFDZFHMIN-NEXT: fsw fa1, 8(sp)
540-
; RV64IFDZFHMIN-NEXT: lbu a0, 11(sp)
530+
; RV64IFDZFHMIN-NEXT: fmv.x.w a0, fa1
531+
; RV64IFDZFHMIN-NEXT: srli a0, a0, 31
532+
; RV64IFDZFHMIN-NEXT: slli a0, a0, 15
541533
; RV64IFDZFHMIN-NEXT: fmv.x.h a1, fa0
542534
; RV64IFDZFHMIN-NEXT: slli a1, a1, 49
543-
; RV64IFDZFHMIN-NEXT: andi a2, a0, 128
544-
; RV64IFDZFHMIN-NEXT: srli a0, a1, 49
545-
; RV64IFDZFHMIN-NEXT: beqz a2, .LBB4_2
546-
; RV64IFDZFHMIN-NEXT: # %bb.1:
547-
; RV64IFDZFHMIN-NEXT: lui a1, 1048568
548-
; RV64IFDZFHMIN-NEXT: or a0, a0, a1
549-
; RV64IFDZFHMIN-NEXT: .LBB4_2:
550-
; RV64IFDZFHMIN-NEXT: fmv.h.x fa5, a0
551-
; RV64IFDZFHMIN-NEXT: fcvt.s.h fa5, fa5
552-
; RV64IFDZFHMIN-NEXT: fcvt.h.s fa0, fa5
553-
; RV64IFDZFHMIN-NEXT: addi sp, sp, 16
535+
; RV64IFDZFHMIN-NEXT: srli a1, a1, 49
536+
; RV64IFDZFHMIN-NEXT: or a0, a1, a0
537+
; RV64IFDZFHMIN-NEXT: fmv.h.x fa0, a0
554538
; RV64IFDZFHMIN-NEXT: ret
555539
%c = fptrunc float %b to half
556540
%t = call half @llvm.copysign.f16(half %a, half %c)
@@ -646,60 +630,40 @@ define half @fold_demote_h_d(half %a, double %b) nounwind {
646630
;
647631
; RV32IFZFHMIN-LABEL: fold_demote_h_d:
648632
; RV32IFZFHMIN: # %bb.0:
649-
; RV32IFZFHMIN-NEXT: addi sp, sp, -16
650-
; RV32IFZFHMIN-NEXT: srli a1, a1, 16
651-
; RV32IFZFHMIN-NEXT: fmv.h.x fa5, a1
652-
; RV32IFZFHMIN-NEXT: fsh fa5, 12(sp)
653-
; RV32IFZFHMIN-NEXT: lbu a0, 13(sp)
654-
; RV32IFZFHMIN-NEXT: fmv.x.h a1, fa0
655-
; RV32IFZFHMIN-NEXT: slli a1, a1, 17
656-
; RV32IFZFHMIN-NEXT: andi a2, a0, 128
657-
; RV32IFZFHMIN-NEXT: srli a0, a1, 17
658-
; RV32IFZFHMIN-NEXT: beqz a2, .LBB5_2
659-
; RV32IFZFHMIN-NEXT: # %bb.1:
660-
; RV32IFZFHMIN-NEXT: lui a1, 1048568
633+
; RV32IFZFHMIN-NEXT: srli a1, a1, 31
634+
; RV32IFZFHMIN-NEXT: slli a1, a1, 15
635+
; RV32IFZFHMIN-NEXT: fmv.x.h a0, fa0
636+
; RV32IFZFHMIN-NEXT: slli a0, a0, 17
637+
; RV32IFZFHMIN-NEXT: srli a0, a0, 17
661638
; RV32IFZFHMIN-NEXT: or a0, a0, a1
662-
; RV32IFZFHMIN-NEXT: .LBB5_2:
663-
; RV32IFZFHMIN-NEXT: fmv.h.x fa5, a0
664-
; RV32IFZFHMIN-NEXT: fcvt.s.h fa5, fa5
665-
; RV32IFZFHMIN-NEXT: fcvt.h.s fa0, fa5
666-
; RV32IFZFHMIN-NEXT: addi sp, sp, 16
639+
; RV32IFZFHMIN-NEXT: fmv.h.x fa0, a0
667640
; RV32IFZFHMIN-NEXT: ret
668641
;
669642
; RV32IFDZFHMIN-LABEL: fold_demote_h_d:
670643
; RV32IFDZFHMIN: # %bb.0:
671644
; RV32IFDZFHMIN-NEXT: addi sp, sp, -16
672645
; RV32IFDZFHMIN-NEXT: fsd fa1, 8(sp)
673-
; RV32IFDZFHMIN-NEXT: lbu a0, 15(sp)
646+
; RV32IFDZFHMIN-NEXT: lw a0, 12(sp)
647+
; RV32IFDZFHMIN-NEXT: srli a0, a0, 31
648+
; RV32IFDZFHMIN-NEXT: slli a0, a0, 15
674649
; RV32IFDZFHMIN-NEXT: fmv.x.h a1, fa0
675650
; RV32IFDZFHMIN-NEXT: slli a1, a1, 17
676-
; RV32IFDZFHMIN-NEXT: andi a2, a0, 128
677-
; RV32IFDZFHMIN-NEXT: srli a0, a1, 17
678-
; RV32IFDZFHMIN-NEXT: beqz a2, .LBB5_2
679-
; RV32IFDZFHMIN-NEXT: # %bb.1:
680-
; RV32IFDZFHMIN-NEXT: lui a1, 1048568
681-
; RV32IFDZFHMIN-NEXT: or a0, a0, a1
682-
; RV32IFDZFHMIN-NEXT: .LBB5_2:
683-
; RV32IFDZFHMIN-NEXT: fmv.h.x fa5, a0
684-
; RV32IFDZFHMIN-NEXT: fcvt.s.h fa5, fa5
685-
; RV32IFDZFHMIN-NEXT: fcvt.h.s fa0, fa5
651+
; RV32IFDZFHMIN-NEXT: srli a1, a1, 17
652+
; RV32IFDZFHMIN-NEXT: or a0, a1, a0
653+
; RV32IFDZFHMIN-NEXT: fmv.h.x fa0, a0
686654
; RV32IFDZFHMIN-NEXT: addi sp, sp, 16
687655
; RV32IFDZFHMIN-NEXT: ret
688656
;
689657
; RV64IFDZFHMIN-LABEL: fold_demote_h_d:
690658
; RV64IFDZFHMIN: # %bb.0:
691-
; RV64IFDZFHMIN-NEXT: fmv.x.h a0, fa0
692-
; RV64IFDZFHMIN-NEXT: slli a0, a0, 49
693-
; RV64IFDZFHMIN-NEXT: fmv.x.d a1, fa1
694-
; RV64IFDZFHMIN-NEXT: srli a0, a0, 49
695-
; RV64IFDZFHMIN-NEXT: bgez a1, .LBB5_2
696-
; RV64IFDZFHMIN-NEXT: # %bb.1:
697-
; RV64IFDZFHMIN-NEXT: lui a1, 1048568
698-
; RV64IFDZFHMIN-NEXT: or a0, a0, a1
699-
; RV64IFDZFHMIN-NEXT: .LBB5_2:
700-
; RV64IFDZFHMIN-NEXT: fmv.h.x fa5, a0
701-
; RV64IFDZFHMIN-NEXT: fcvt.s.h fa5, fa5
702-
; RV64IFDZFHMIN-NEXT: fcvt.h.s fa0, fa5
659+
; RV64IFDZFHMIN-NEXT: fmv.x.d a0, fa1
660+
; RV64IFDZFHMIN-NEXT: srli a0, a0, 63
661+
; RV64IFDZFHMIN-NEXT: slli a0, a0, 15
662+
; RV64IFDZFHMIN-NEXT: fmv.x.h a1, fa0
663+
; RV64IFDZFHMIN-NEXT: slli a1, a1, 49
664+
; RV64IFDZFHMIN-NEXT: srli a1, a1, 49
665+
; RV64IFDZFHMIN-NEXT: or a0, a1, a0
666+
; RV64IFDZFHMIN-NEXT: fmv.h.x fa0, a0
703667
; RV64IFDZFHMIN-NEXT: ret
704668
%c = fptrunc double %b to half
705669
%t = call half @llvm.copysign.f16(half %a, half %c)

0 commit comments

Comments
 (0)