-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SystemZ] Add DAGCombine for FCOPYSIGN to remove rounding. #136131
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-systemz Author: Jonas Paulsson (JonPsson1) ChangesAdd a DAGCombine for FCOPYSIGN that removes the rounding which is never needed as the sign bit is already in the correct place. This helps the rounding to f16 case which needs a libcall. Also remove the roundings for other FP VTs and simplify the CPSDR patterns correspondingly. fp-copysign-03.ll test updated, now also covering the other FP VT combinations. @f12: ExpandFCOPYSIGN() calls getSignAsIntValue() which does a bitcast via memory as i16 is not legal. So it stores the 2 f16 bytes, and then loads an i8 back from the same address. Is this the right byte order in memory for an fp value (sign bit in byte at lowest address)? Full diff: https://github.com/llvm/llvm-project/pull/136131.diff 6 Files Affected:
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 75cd5a319557d..4823756a2ec79 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -791,6 +791,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
ISD::SINT_TO_FP,
ISD::UINT_TO_FP,
ISD::STRICT_FP_EXTEND,
+ ISD::FCOPYSIGN,
ISD::BSWAP,
ISD::SETCC,
ISD::SRL,
@@ -8548,6 +8549,22 @@ SDValue SystemZTargetLowering::combineINT_TO_FP(
return SDValue();
}
+SDValue SystemZTargetLowering::combineFCOPYSIGN(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ SDValue ValOp = N->getOperand(0);
+ SDValue SignOp = N->getOperand(1);
+
+ // Remove the rounding which is not needed.
+ if (SignOp.getOpcode() == ISD::FP_ROUND) {
+ SDValue WideOp = SignOp.getOperand(0);
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, ValOp, WideOp);
+ }
+
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::combineBSWAP(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -9137,6 +9154,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
+ case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
case ISD::SETCC: return combineSETCC(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index f438332c2dc4f..f3536a840fda8 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -777,6 +777,7 @@ class SystemZTargetLowering : public TargetLowering {
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineINT_TO_FP(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineFCOPYSIGN(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSETCC(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 7775f456bbdc1..50f28409ee682 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -109,10 +109,10 @@ let isCodeGenOnly = 1 in {
// The sign of an FP128 is in the high register.
let Predicates = [FeatureNoVectorEnhancements1] in
- def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 FP128:$src2)))),
+ def : Pat<(fcopysign FP32:$src1, (f128 FP128:$src2)),
(CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
let Predicates = [FeatureVectorEnhancements1] in
- def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 VR128:$src2)))),
+ def : Pat<(fcopysign FP32:$src1, (f128 VR128:$src2)),
(CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;
// fcopysign with an FP64 result.
@@ -124,10 +124,10 @@ def CPSDRdd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP64>;
// The sign of an FP128 is in the high register.
let Predicates = [FeatureNoVectorEnhancements1] in
- def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 FP128:$src2)))),
+ def : Pat<(fcopysign FP64:$src1, (f128 FP128:$src2)),
(CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
let Predicates = [FeatureVectorEnhancements1] in
- def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 VR128:$src2)))),
+ def : Pat<(fcopysign FP64:$src1, (f128 VR128:$src2)),
(CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;
// fcopysign with an FP128 result. Use "upper" as the high half and leave
diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll
index d2b6488008e6b..eee97b265a6af 100644
--- a/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll
@@ -1,4 +1,4 @@
-; Test copysign operations.
+; Test copysign libcalls.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll
index 178568ebb3bf9..86a4f21a6b594 100644
--- a/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll
@@ -1,4 +1,4 @@
-; Test f128 copysign operations on z14.
+; Test f128 copysign libcalls on z14.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll
index 909519e8ace55..c2c5889a689e4 100644
--- a/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll
@@ -3,7 +3,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 \
; RUN: | FileCheck %s --check-prefixes=CHECK,Z16
;
-; Test copysign intrinsics with half.
+; Test copysign intrinsics.
declare half @llvm.copysign.f16(half, half)
declare float @llvm.copysign.f32(float, float)
@@ -43,53 +43,25 @@ define half @f2(half %a, double %b) {
}
; Test copysign with an f16 result and f128 sign argument.
-; TODO: Let the DAGCombiner remove the fp_round.
define half @f3(half %a, fp128 %b) {
; Z10-LABEL: f3:
; Z10: # %bb.0:
-; Z10-NEXT: stmg %r14, %r15, 112(%r15)
-; Z10-NEXT: .cfi_offset %r14, -48
-; Z10-NEXT: .cfi_offset %r15, -40
-; Z10-NEXT: aghi %r15, -184
-; Z10-NEXT: .cfi_def_cfa_offset 344
-; Z10-NEXT: std %f8, 176(%r15) # 8-byte Spill
-; Z10-NEXT: .cfi_offset %f8, -168
; Z10-NEXT: ld %f1, 0(%r2)
; Z10-NEXT: ld %f3, 8(%r2)
-; Z10-NEXT: ler %f8, %f0
-; Z10-NEXT: la %r2, 160(%r15)
-; Z10-NEXT: std %f1, 160(%r15)
-; Z10-NEXT: std %f3, 168(%r15)
-; Z10-NEXT: brasl %r14, __trunctfhf2@PLT
-; Z10-NEXT: cpsdr %f0, %f0, %f8
-; Z10-NEXT: ld %f8, 176(%r15) # 8-byte Reload
-; Z10-NEXT: lmg %r14, %r15, 296(%r15)
+; Z10-NEXT: cpsdr %f0, %f1, %f0
; Z10-NEXT: br %r14
;
; Z16-LABEL: f3:
; Z16: # %bb.0:
-; Z16-NEXT: stmg %r14, %r15, 112(%r15)
-; Z16-NEXT: .cfi_offset %r14, -48
-; Z16-NEXT: .cfi_offset %r15, -40
-; Z16-NEXT: aghi %r15, -184
-; Z16-NEXT: .cfi_def_cfa_offset 344
-; Z16-NEXT: std %f8, 176(%r15) # 8-byte Spill
-; Z16-NEXT: .cfi_offset %f8, -168
-; Z16-NEXT: ldr %f8, %f0
-; Z16-NEXT: vl %v0, 0(%r2), 3
-; Z16-NEXT: la %r2, 160(%r15)
-; Z16-NEXT: vst %v0, 160(%r15), 3
-; Z16-NEXT: brasl %r14, __trunctfhf2@PLT
-; Z16-NEXT: cpsdr %f0, %f0, %f8
-; Z16-NEXT: ld %f8, 176(%r15) # 8-byte Reload
-; Z16-NEXT: lmg %r14, %r15, 296(%r15)
+; Z16-NEXT: vl %v1, 0(%r2), 3
+; Z16-NEXT: cpsdr %f0, %f1, %f0
; Z16-NEXT: br %r14
%bh = fptrunc fp128 %b to half
%res = call half @llvm.copysign.f16(half %a, half %bh)
ret half %res
}
-; Test copysign with an f32 result and half sign argument.
+; Test copysign with an f32 result and f16 sign argument.
define float @f4(float %a, half %b) {
; CHECK-LABEL: f4:
; CHECK: # %bb.0:
@@ -100,20 +72,100 @@ define float @f4(float %a, half %b) {
ret float %res
}
-; Test copysign with an f64 result and half sign argument.
-define double @f5(double %a, half %b) {
+; Test copysign with an f32 result and f32 sign argument.
+define float @f5(float %a, float %b) {
; CHECK-LABEL: f5:
; CHECK: # %bb.0:
; CHECK-NEXT: cpsdr %f0, %f2, %f0
+; CHECK-NEXT: br %r14
+ %res = call float @llvm.copysign.f32(float %a, float %b)
+ ret float %res
+}
+
+; Test copysign with an f32 result and f64 sign argument.
+define float @f6(float %a, double %b) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cpsdr %f0, %f2, %f0
+; CHECK-NEXT: br %r14
+ %bf = fptrunc double %b to float
+ %res = call float @llvm.copysign.f32(float %a, float %bf)
+ ret float %res
+}
+
+; Test copysign with an f32 result and f128 sign argument.
+define float @f7(float %a, fp128 %b) {
+; Z10-LABEL: f7:
+; Z10: # %bb.0:
+; Z10-NEXT: ld %f1, 0(%r2)
+; Z10-NEXT: ld %f3, 8(%r2)
+; Z10-NEXT: cpsdr %f0, %f1, %f0
+; Z10-NEXT: br %r14
+;
+; Z16-LABEL: f7:
+; Z16: # %bb.0:
+; Z16-NEXT: vl %v1, 0(%r2), 3
+; Z16-NEXT: cpsdr %f0, %f1, %f0
+; Z16-NEXT: br %r14
+ %bf = fptrunc fp128 %b to float
+ %res = call float @llvm.copysign.f32(float %a, float %bf)
+ ret float %res
+}
+
+; Test copysign with an f64 result and f16 sign argument.
+define double @f8(double %a, half %b) {
+; CHECK-LABEL: f8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cpsdr %f0, %f2, %f0
; CHECK-NEXT: br %r14
%bd = fpext half %b to double
%res = call double @llvm.copysign.f64(double %a, double %bd)
ret double %res
}
-; Test copysign with an f128 result and half sign argument.
-define fp128 @f6(fp128 %a, half %b) {
-; Z10-LABEL: f6:
+; Test copysign with an f64 result and f32 sign argument.
+define double @f9(double %a, float %b) {
+; CHECK-LABEL: f9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cpsdr %f0, %f2, %f0
+; CHECK-NEXT: br %r14
+ %bd = fpext float %b to double
+ %res = call double @llvm.copysign.f64(double %a, double %bd)
+ ret double %res
+}
+
+; Test copysign with an f64 result and f64 sign argument.
+define double @f10(double %a, double %b) {
+; CHECK-LABEL: f10:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cpsdr %f0, %f2, %f0
+; CHECK-NEXT: br %r14
+ %res = call double @llvm.copysign.f64(double %a, double %b)
+ ret double %res
+}
+
+; Test copysign with an f64 result and f128 sign argument.
+define double @f11(double %a, fp128 %b) {
+; Z10-LABEL: f11:
+; Z10: # %bb.0:
+; Z10-NEXT: ld %f1, 0(%r2)
+; Z10-NEXT: ld %f3, 8(%r2)
+; Z10-NEXT: cpsdr %f0, %f1, %f0
+; Z10-NEXT: br %r14
+;
+; Z16-LABEL: f11:
+; Z16: # %bb.0:
+; Z16-NEXT: vl %v1, 0(%r2), 3
+; Z16-NEXT: cpsdr %f0, %f1, %f0
+; Z16-NEXT: br %r14
+ %bd = fptrunc fp128 %b to double
+ %res = call double @llvm.copysign.f64(double %a, double %bd)
+ ret double %res
+}
+
+; Test copysign with an f128 result and f16 sign argument.
+define fp128 @f12(fp128 %a, half %b) {
+; Z10-LABEL: f12:
; Z10: # %bb.0:
; Z10-NEXT: ld %f1, 0(%r3)
; Z10-NEXT: ld %f3, 8(%r3)
@@ -122,24 +174,117 @@ define fp128 @f6(fp128 %a, half %b) {
; Z10-NEXT: std %f3, 8(%r2)
; Z10-NEXT: br %r14
;
-; Z16-LABEL: f6:
+; Z16-LABEL: f12:
; Z16: # %bb.0:
; Z16-NEXT: aghi %r15, -168
; Z16-NEXT: .cfi_def_cfa_offset 328
; Z16-NEXT: vl %v1, 0(%r3), 3
; Z16-NEXT: vsteh %v0, 164(%r15), 0
; Z16-NEXT: tm 164(%r15), 128
-; Z16-NEXT: je .LBB6_2
+; Z16-NEXT: je .LBB12_2
; Z16-NEXT: # %bb.1:
; Z16-NEXT: wflnxb %v0, %v1
-; Z16-NEXT: j .LBB6_3
-; Z16-NEXT: .LBB6_2:
+; Z16-NEXT: j .LBB12_3
+; Z16-NEXT: .LBB12_2:
; Z16-NEXT: wflpxb %v0, %v1
-; Z16-NEXT: .LBB6_3:
+; Z16-NEXT: .LBB12_3:
; Z16-NEXT: vst %v0, 0(%r2), 3
; Z16-NEXT: aghi %r15, 168
; Z16-NEXT: br %r14
- %bd = fpext half %b to fp128
- %res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %bd)
+ %b128 = fpext half %b to fp128
+ %res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b128)
+ ret fp128 %res
+}
+
+; Test copysign with an f128 result and f32 sign argument.
+define fp128 @f13(fp128 %a, float %b) {
+; Z10-LABEL: f13:
+; Z10: # %bb.0:
+; Z10-NEXT: ld %f1, 0(%r3)
+; Z10-NEXT: ld %f3, 8(%r3)
+; Z10-NEXT: cpsdr %f1, %f0, %f1
+; Z10-NEXT: std %f1, 0(%r2)
+; Z10-NEXT: std %f3, 8(%r2)
+; Z10-NEXT: br %r14
+;
+; Z16-LABEL: f13:
+; Z16: # %bb.0:
+; Z16-NEXT: vl %v1, 0(%r3), 3
+; Z16-NEXT: vlgvf %r0, %v0, 0
+; Z16-NEXT: tmlh %r0, 32768
+; Z16-NEXT: je .LBB13_2
+; Z16-NEXT: # %bb.1:
+; Z16-NEXT: wflnxb %v0, %v1
+; Z16-NEXT: vst %v0, 0(%r2), 3
+; Z16-NEXT: br %r14
+; Z16-NEXT: .LBB13_2:
+; Z16-NEXT: wflpxb %v0, %v1
+; Z16-NEXT: vst %v0, 0(%r2), 3
+; Z16-NEXT: br %r14
+ %b128 = fpext float %b to fp128
+ %res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b128)
+ ret fp128 %res
+}
+
+; Test copysign with an f128 result and f64 sign argument.
+define fp128 @f14(fp128 %a, double %b) {
+; Z10-LABEL: f14:
+; Z10: # %bb.0:
+; Z10-NEXT: ld %f1, 0(%r3)
+; Z10-NEXT: ld %f3, 8(%r3)
+; Z10-NEXT: cpsdr %f1, %f0, %f1
+; Z10-NEXT: std %f1, 0(%r2)
+; Z10-NEXT: std %f3, 8(%r2)
+; Z10-NEXT: br %r14
+;
+; Z16-LABEL: f14:
+; Z16: # %bb.0:
+; Z16-NEXT: vl %v1, 0(%r3), 3
+; Z16-NEXT: lgdr %r0, %f0
+; Z16-NEXT: tmhh %r0, 32768
+; Z16-NEXT: je .LBB14_2
+; Z16-NEXT: # %bb.1:
+; Z16-NEXT: wflnxb %v0, %v1
+; Z16-NEXT: vst %v0, 0(%r2), 3
+; Z16-NEXT: br %r14
+; Z16-NEXT: .LBB14_2:
+; Z16-NEXT: wflpxb %v0, %v1
+; Z16-NEXT: vst %v0, 0(%r2), 3
+; Z16-NEXT: br %r14
+ %b128 = fpext double %b to fp128
+ %res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b128)
+ ret fp128 %res
+}
+
+; Test copysign with an f128 result and f128 sign argument.
+define fp128 @f15(fp128 %a, fp128 %b) {
+; Z10-LABEL: f15:
+; Z10: # %bb.0:
+; Z10-NEXT: ld %f0, 0(%r3)
+; Z10-NEXT: ld %f2, 8(%r3)
+; Z10-NEXT: ld %f1, 0(%r4)
+; Z10-NEXT: ld %f3, 8(%r4)
+; Z10-NEXT: cpsdr %f0, %f1, %f0
+; Z10-NEXT: std %f0, 0(%r2)
+; Z10-NEXT: std %f2, 8(%r2)
+; Z10-NEXT: br %r14
+;
+; Z16-LABEL: f15:
+; Z16: # %bb.0:
+; Z16-NEXT: larl %r1, .LCPI15_0
+; Z16-NEXT: vl %v1, 0(%r4), 3
+; Z16-NEXT: vl %v2, 0(%r1), 3
+; Z16-NEXT: vl %v0, 0(%r3), 3
+; Z16-NEXT: vtm %v1, %v2
+; Z16-NEXT: je .LBB15_2
+; Z16-NEXT: # %bb.1:
+; Z16-NEXT: wflnxb %v0, %v0
+; Z16-NEXT: vst %v0, 0(%r2), 3
+; Z16-NEXT: br %r14
+; Z16-NEXT: .LBB15_2:
+; Z16-NEXT: wflpxb %v0, %v0
+; Z16-NEXT: vst %v0, 0(%r2), 3
+; Z16-NEXT: br %r14
+ %res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
ret fp128 %res
}
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions cpp,h -- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp llvm/lib/Target/SystemZ/SystemZISelLowering.h View the diff from clang-format here.diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 4823756a2..1bd967eb2 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -8549,8 +8549,8 @@ SDValue SystemZTargetLowering::combineINT_TO_FP(
return SDValue();
}
-SDValue SystemZTargetLowering::combineFCOPYSIGN(
- SDNode *N, DAGCombinerInfo &DCI) const {
+SDValue SystemZTargetLowering::combineFCOPYSIGN(SDNode *N,
+ DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
SDValue ValOp = N->getOperand(0);
@@ -9154,7 +9154,8 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
- case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
+ case ISD::FCOPYSIGN:
+ return combineFCOPYSIGN(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
case ISD::SETCC: return combineSETCC(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
|
Looking at CanCombineFCOPYSIGN_EXTEND_ROUND(), it seems that the reason the f128 case isn't handled in common code is that there is a specific guard against it because of x86. Maybe an option now would be to have a DAGCombine target hook that SystemZ would override, which would eventually be removed if x86 isel is improved per the comment. Or maybe just a comment there saying that the SystemZ handling could be removed if this changed? |
This is correct (the sign bit is in the MSB, which ends up at the lowest address on a big-endian system). It's still inefficient though ...
I guess it would be worthwhile discussing this. For now, I think this patch is fine - we can always remove the custom combiner if and when common code gets updated. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/16559 Here is the relevant piece of the build log for the reference
|
Add a DAGCombine for FCOPYSIGN that removes the rounding which is never needed as the sign bit is already in the correct place. This helps in particular the rounding to f16 case which needs a libcall. Also remove the roundings for other FP VTs and simplify the CPSDR patterns correspondingly. fp-copysign-03.ll test updated, now also covering the other FP VT combinations.
Add a DAGCombine for FCOPYSIGN that removes the rounding which is never needed as the sign bit is already in the correct place. This helps in particular the rounding to f16 case which needs a libcall. Also remove the roundings for other FP VTs and simplify the CPSDR patterns correspondingly. fp-copysign-03.ll test updated, now also covering the other FP VT combinations.
Add a DAGCombine for FCOPYSIGN that removes the rounding which is never needed as the sign bit is already in the correct place. This helps in particular the rounding to f16 case which needs a libcall. Also remove the roundings for other FP VTs and simplify the CPSDR patterns correspondingly. fp-copysign-03.ll test updated, now also covering the other FP VT combinations.
Add a DAGCombine for FCOPYSIGN that removes the rounding which is never needed as the sign bit is already in the correct place. This helps in particular the rounding to f16 case which needs a libcall. Also remove the roundings for other FP VTs and simplify the CPSDR patterns correspondingly. fp-copysign-03.ll test updated, now also covering the other FP VT combinations.
Add a DAGCombine for FCOPYSIGN that removes the rounding which is never needed as the sign bit is already in the correct place. This helps the rounding to f16 case which needs a libcall.
Also remove the roundings for other FP VTs and simplify the CPSDR patterns correspondingly.
fp-copysign-03.ll test updated, now also covering the other FP VT combinations.
@f12: ExpandFCOPYSIGN() calls getSignAsIntValue() which does a bitcast via memory as i16 is not legal. So it stores the 2 f16 bytes, and then loads an i8 back from the same address. Is this the right byte order in memory for an fp value (sign bit in byte at lowest address)?