-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Pass f32/f64 directly without a bitcast for Zfinx/Zdinx. #107464
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
With Zfinx/Zdinx, f32/f64 are legal types for a GPR, we don't need a bitcast. This avoids turning fneg/fabs into bitwise operations purely because of these bitcasts. If the bitwise operations are faster for some reason on a Zfinx CPU, then that seems like it should be done for all fneg/fabs, not just ones near function arguments/returns. I don't have much interest in Zfinx, this just makes the code more similar to what I proposed for Zhinx in llvm#107446.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesWith Zfinx/Zdinx, f32/f64 are legal types for a GPR, we don't need a bitcast. This avoids turning fneg/fabs into bitwise operations purely because of these bitcasts. If the bitwise operations are faster for some reason on a Zfinx CPU, then that seems like it should be done for all fneg/fabs, not just ones near function arguments/returns. I don't have much interest in Zfinx, this just makes the code more similar to what I proposed for Zhinx in #107446. Patch is 64.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/107464.diff 11 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6b4219b4623847..ad8a5b163ec315 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -19227,6 +19227,16 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI);
+ const RISCVSubtarget &STI =
+ State.getMachineFunction().getSubtarget<RISCVSubtarget>();
+ if ((ValVT == MVT::f32 && XLen == 32 && STI.hasStdExtZfinx()) ||
+ (ValVT == MVT::f64 && XLen == 64 && STI.hasStdExtZdinx())) {
+ if (MCRegister Reg = State.AllocateReg(ArgGPRs)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::bf16 ||
(ValVT == MVT::f32 && XLen == 64))) {
MCRegister Reg = State.AllocateReg(ArgGPRs);
diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll
index ee54501fe59a8c..fa74dcb4810060 100644
--- a/llvm/test/CodeGen/RISCV/double-arith.ll
+++ b/llvm/test/CodeGen/RISCV/double-arith.ll
@@ -320,8 +320,7 @@ define double @fsgnjn_d(double %a, double %b) nounwind {
;
; RV64IZFINXZDINX-LABEL: fsgnjn_d:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: not a1, a1
-; RV64IZFINXZDINX-NEXT: fsgnj.d a0, a0, a1
+; RV64IZFINXZDINX-NEXT: fsgnjn.d a0, a0, a1
; RV64IZFINXZDINX-NEXT: ret
;
; RV32I-LABEL: fsgnjn_d:
@@ -852,9 +851,7 @@ define double @fnmadd_d_3(double %a, double %b, double %c) nounwind {
; RV64IZFINXZDINX-LABEL: fnmadd_d_3:
; RV64IZFINXZDINX: # %bb.0:
; RV64IZFINXZDINX-NEXT: fmadd.d a0, a0, a1, a2
-; RV64IZFINXZDINX-NEXT: li a1, -1
-; RV64IZFINXZDINX-NEXT: slli a1, a1, 63
-; RV64IZFINXZDINX-NEXT: xor a0, a0, a1
+; RV64IZFINXZDINX-NEXT: fneg.d a0, a0
; RV64IZFINXZDINX-NEXT: ret
;
; RV32I-LABEL: fnmadd_d_3:
@@ -900,10 +897,7 @@ define double @fnmadd_nsz(double %a, double %b, double %c) nounwind {
;
; RV64IZFINXZDINX-LABEL: fnmadd_nsz:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: fmadd.d a0, a0, a1, a2
-; RV64IZFINXZDINX-NEXT: li a1, -1
-; RV64IZFINXZDINX-NEXT: slli a1, a1, 63
-; RV64IZFINXZDINX-NEXT: xor a0, a0, a1
+; RV64IZFINXZDINX-NEXT: fnmadd.d a0, a0, a1, a2
; RV64IZFINXZDINX-NEXT: ret
;
; RV32I-LABEL: fnmadd_nsz:
diff --git a/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll
index 70d3291d3a8402..f7d57178b03d41 100644
--- a/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll
+++ b/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll
@@ -56,9 +56,7 @@ define double @fneg(double %a) nounwind {
;
; RV64IZFINXZDINX-LABEL: fneg:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: li a1, -1
-; RV64IZFINXZDINX-NEXT: slli a1, a1, 63
-; RV64IZFINXZDINX-NEXT: xor a0, a0, a1
+; RV64IZFINXZDINX-NEXT: fneg.d a0, a0
; RV64IZFINXZDINX-NEXT: ret
%1 = fneg double %a
ret double %1
@@ -99,8 +97,7 @@ define double @fabs(double %a) nounwind {
;
; RV64IZFINXZDINX-LABEL: fabs:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: slli a0, a0, 1
-; RV64IZFINXZDINX-NEXT: srli a0, a0, 1
+; RV64IZFINXZDINX-NEXT: fabs.d a0, a0
; RV64IZFINXZDINX-NEXT: ret
%1 = call double @llvm.fabs.f64(double %a)
ret double %1
@@ -165,8 +162,7 @@ define double @fcopysign_fneg(double %a, double %b) nounwind {
;
; RV64IZFINXZDINX-LABEL: fcopysign_fneg:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: not a1, a1
-; RV64IZFINXZDINX-NEXT: fsgnj.d a0, a0, a1
+; RV64IZFINXZDINX-NEXT: fsgnjn.d a0, a0, a1
; RV64IZFINXZDINX-NEXT: ret
%1 = fneg double %b
%2 = call double @llvm.copysign.f64(double %a, double %1)
diff --git a/llvm/test/CodeGen/RISCV/double-imm.ll b/llvm/test/CodeGen/RISCV/double-imm.ll
index 827f034f143fb5..2294171d95ab2c 100644
--- a/llvm/test/CodeGen/RISCV/double-imm.ll
+++ b/llvm/test/CodeGen/RISCV/double-imm.ll
@@ -115,8 +115,7 @@ define double @double_negative_zero(ptr %pd) nounwind {
;
; CHECKRV64ZDINX-LABEL: double_negative_zero:
; CHECKRV64ZDINX: # %bb.0:
-; CHECKRV64ZDINX-NEXT: li a0, -1
-; CHECKRV64ZDINX-NEXT: slli a0, a0, 63
+; CHECKRV64ZDINX-NEXT: fneg.d a0, zero
; CHECKRV64ZDINX-NEXT: ret
ret double -0.0
}
@@ -160,12 +159,11 @@ define dso_local double @negzero_sel(i16 noundef %a, double noundef %d) nounwind
; CHECKRV64ZDINX-LABEL: negzero_sel:
; CHECKRV64ZDINX: # %bb.0: # %entry
; CHECKRV64ZDINX-NEXT: slli a2, a0, 48
+; CHECKRV64ZDINX-NEXT: mv a0, a1
; CHECKRV64ZDINX-NEXT: beqz a2, .LBB4_2
; CHECKRV64ZDINX-NEXT: # %bb.1: # %entry
; CHECKRV64ZDINX-NEXT: fneg.d a0, zero
-; CHECKRV64ZDINX-NEXT: ret
-; CHECKRV64ZDINX-NEXT: .LBB4_2:
-; CHECKRV64ZDINX-NEXT: mv a0, a1
+; CHECKRV64ZDINX-NEXT: .LBB4_2: # %entry
; CHECKRV64ZDINX-NEXT: ret
entry:
%tobool.not = icmp eq i16 %a, 0
diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/double-intrinsics.ll
index 94b3b1f1b199c2..ea4af4cb60cd3d 100644
--- a/llvm/test/CodeGen/RISCV/double-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/double-intrinsics.ll
@@ -684,8 +684,7 @@ define double @fabs_f64(double %a) nounwind {
;
; RV64IZFINXZDINX-LABEL: fabs_f64:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: slli a0, a0, 1
-; RV64IZFINXZDINX-NEXT: srli a0, a0, 1
+; RV64IZFINXZDINX-NEXT: fabs.d a0, a0
; RV64IZFINXZDINX-NEXT: ret
;
; RV32I-LABEL: fabs_f64:
diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll
index 931f73a94170a6..3f32734db0ba71 100644
--- a/llvm/test/CodeGen/RISCV/float-arith.ll
+++ b/llvm/test/CodeGen/RISCV/float-arith.ll
@@ -4,9 +4,9 @@
; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
; RUN: -target-abi=lp64f | FileCheck -check-prefix=CHECKIF %s
; RUN: llc -mtriple=riscv32 -mattr=+zfinx -verify-machineinstrs < %s \
-; RUN: -target-abi=ilp32 | FileCheck -check-prefix=CHECKIZFINX %s
+; RUN: -target-abi=ilp32 | FileCheck -check-prefixes=CHECKIZFINX,RV32IZFINX %s
; RUN: llc -mtriple=riscv64 -mattr=+zfinx -verify-machineinstrs < %s \
-; RUN: -target-abi=lp64 | FileCheck -check-prefix=CHECKIZFINX %s
+; RUN: -target-abi=lp64 | FileCheck -check-prefixes=CHECKIZFINX,RV64IZFINX %s
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32I %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
@@ -706,12 +706,18 @@ define float @fnmadd_s_3(float %a, float %b, float %c) nounwind {
; CHECKIF-NEXT: fneg.s fa0, fa5
; CHECKIF-NEXT: ret
;
-; CHECKIZFINX-LABEL: fnmadd_s_3:
-; CHECKIZFINX: # %bb.0:
-; CHECKIZFINX-NEXT: fmadd.s a0, a0, a1, a2
-; CHECKIZFINX-NEXT: lui a1, 524288
-; CHECKIZFINX-NEXT: xor a0, a0, a1
-; CHECKIZFINX-NEXT: ret
+; RV32IZFINX-LABEL: fnmadd_s_3:
+; RV32IZFINX: # %bb.0:
+; RV32IZFINX-NEXT: fmadd.s a0, a0, a1, a2
+; RV32IZFINX-NEXT: fneg.s a0, a0
+; RV32IZFINX-NEXT: ret
+;
+; RV64IZFINX-LABEL: fnmadd_s_3:
+; RV64IZFINX: # %bb.0:
+; RV64IZFINX-NEXT: fmadd.s a0, a0, a1, a2
+; RV64IZFINX-NEXT: lui a1, 524288
+; RV64IZFINX-NEXT: xor a0, a0, a1
+; RV64IZFINX-NEXT: ret
;
; RV32I-LABEL: fnmadd_s_3:
; RV32I: # %bb.0:
@@ -755,12 +761,17 @@ define float @fnmadd_nsz(float %a, float %b, float %c) nounwind {
; CHECKIF-NEXT: fnmadd.s fa0, fa0, fa1, fa2
; CHECKIF-NEXT: ret
;
-; CHECKIZFINX-LABEL: fnmadd_nsz:
-; CHECKIZFINX: # %bb.0:
-; CHECKIZFINX-NEXT: fmadd.s a0, a0, a1, a2
-; CHECKIZFINX-NEXT: lui a1, 524288
-; CHECKIZFINX-NEXT: xor a0, a0, a1
-; CHECKIZFINX-NEXT: ret
+; RV32IZFINX-LABEL: fnmadd_nsz:
+; RV32IZFINX: # %bb.0:
+; RV32IZFINX-NEXT: fnmadd.s a0, a0, a1, a2
+; RV32IZFINX-NEXT: ret
+;
+; RV64IZFINX-LABEL: fnmadd_nsz:
+; RV64IZFINX: # %bb.0:
+; RV64IZFINX-NEXT: fmadd.s a0, a0, a1, a2
+; RV64IZFINX-NEXT: lui a1, 524288
+; RV64IZFINX-NEXT: xor a0, a0, a1
+; RV64IZFINX-NEXT: ret
;
; RV32I-LABEL: fnmadd_nsz:
; RV32I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
index 53df588bf1c374..2338219687ef75 100644
--- a/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
+++ b/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
@@ -33,8 +33,7 @@ define float @fneg(float %a) nounwind {
;
; RV32IZFINX-LABEL: fneg:
; RV32IZFINX: # %bb.0:
-; RV32IZFINX-NEXT: lui a1, 524288
-; RV32IZFINX-NEXT: xor a0, a0, a1
+; RV32IZFINX-NEXT: fneg.s a0, a0
; RV32IZFINX-NEXT: ret
;
; RV64I-LABEL: fneg:
@@ -75,8 +74,7 @@ define float @fabs(float %a) nounwind {
;
; RV32IZFINX-LABEL: fabs:
; RV32IZFINX: # %bb.0:
-; RV32IZFINX-NEXT: slli a0, a0, 1
-; RV32IZFINX-NEXT: srli a0, a0, 1
+; RV32IZFINX-NEXT: fabs.s a0, a0
; RV32IZFINX-NEXT: ret
;
; RV64I-LABEL: fabs:
@@ -128,8 +126,7 @@ define float @fcopysign_fneg(float %a, float %b) nounwind {
;
; RV32IZFINX-LABEL: fcopysign_fneg:
; RV32IZFINX: # %bb.0:
-; RV32IZFINX-NEXT: not a1, a1
-; RV32IZFINX-NEXT: fsgnj.s a0, a0, a1
+; RV32IZFINX-NEXT: fsgnjn.s a0, a0, a1
; RV32IZFINX-NEXT: ret
;
; RV64I-LABEL: fcopysign_fneg:
diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
index 9f1578ce158f95..52442026dab502 100644
--- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
@@ -688,8 +688,7 @@ define float @fabs_f32(float %a) nounwind {
;
; RV32IZFINX-LABEL: fabs_f32:
; RV32IZFINX: # %bb.0:
-; RV32IZFINX-NEXT: slli a0, a0, 1
-; RV32IZFINX-NEXT: srli a0, a0, 1
+; RV32IZFINX-NEXT: fabs.s a0, a0
; RV32IZFINX-NEXT: ret
;
; RV64IF-LABEL: fabs_f32:
diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
index 42ac20286a8920..198b18c75272a9 100644
--- a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
+++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
@@ -96,7 +96,6 @@ define i64 @test_floor_si64(float %x) nounwind {
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: mv s0, a0
; RV32IZFINX-NEXT: lui a0, 307200
; RV32IZFINX-NEXT: fabs.s a1, s0
@@ -109,33 +108,32 @@ define i64 @test_floor_si64(float %x) nounwind {
; RV32IZFINX-NEXT: .LBB1_2:
; RV32IZFINX-NEXT: lui a0, 913408
; RV32IZFINX-NEXT: fle.s s1, a0, s0
-; RV32IZFINX-NEXT: neg s2, s1
; RV32IZFINX-NEXT: mv a0, s0
; RV32IZFINX-NEXT: call __fixsfdi
-; RV32IZFINX-NEXT: and a0, s2, a0
-; RV32IZFINX-NEXT: lui a2, 389120
-; RV32IZFINX-NEXT: addi a2, a2, -1
-; RV32IZFINX-NEXT: flt.s a4, a2, s0
-; RV32IZFINX-NEXT: neg a2, a4
-; RV32IZFINX-NEXT: or a0, a2, a0
-; RV32IZFINX-NEXT: feq.s a2, s0, s0
-; RV32IZFINX-NEXT: neg a2, a2
-; RV32IZFINX-NEXT: lui a5, 524288
-; RV32IZFINX-NEXT: lui a3, 524288
+; RV32IZFINX-NEXT: lui a4, 524288
+; RV32IZFINX-NEXT: lui a2, 524288
; RV32IZFINX-NEXT: beqz s1, .LBB1_4
; RV32IZFINX-NEXT: # %bb.3:
-; RV32IZFINX-NEXT: mv a3, a1
+; RV32IZFINX-NEXT: mv a2, a1
; RV32IZFINX-NEXT: .LBB1_4:
-; RV32IZFINX-NEXT: and a0, a2, a0
-; RV32IZFINX-NEXT: beqz a4, .LBB1_6
+; RV32IZFINX-NEXT: lui a1, 389120
+; RV32IZFINX-NEXT: addi a1, a1, -1
+; RV32IZFINX-NEXT: flt.s a3, a1, s0
+; RV32IZFINX-NEXT: beqz a3, .LBB1_6
; RV32IZFINX-NEXT: # %bb.5:
-; RV32IZFINX-NEXT: addi a3, a5, -1
+; RV32IZFINX-NEXT: addi a2, a4, -1
; RV32IZFINX-NEXT: .LBB1_6:
-; RV32IZFINX-NEXT: and a1, a2, a3
+; RV32IZFINX-NEXT: feq.s a1, s0, s0
+; RV32IZFINX-NEXT: neg a4, a1
+; RV32IZFINX-NEXT: and a1, a4, a2
+; RV32IZFINX-NEXT: neg a2, s1
+; RV32IZFINX-NEXT: and a0, a2, a0
+; RV32IZFINX-NEXT: neg a2, a3
+; RV32IZFINX-NEXT: or a0, a2, a0
+; RV32IZFINX-NEXT: and a0, a4, a0
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: addi sp, sp, 16
; RV32IZFINX-NEXT: ret
;
@@ -356,7 +354,6 @@ define i64 @test_ceil_si64(float %x) nounwind {
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: mv s0, a0
; RV32IZFINX-NEXT: lui a0, 307200
; RV32IZFINX-NEXT: fabs.s a1, s0
@@ -369,33 +366,32 @@ define i64 @test_ceil_si64(float %x) nounwind {
; RV32IZFINX-NEXT: .LBB5_2:
; RV32IZFINX-NEXT: lui a0, 913408
; RV32IZFINX-NEXT: fle.s s1, a0, s0
-; RV32IZFINX-NEXT: neg s2, s1
; RV32IZFINX-NEXT: mv a0, s0
; RV32IZFINX-NEXT: call __fixsfdi
-; RV32IZFINX-NEXT: and a0, s2, a0
-; RV32IZFINX-NEXT: lui a2, 389120
-; RV32IZFINX-NEXT: addi a2, a2, -1
-; RV32IZFINX-NEXT: flt.s a4, a2, s0
-; RV32IZFINX-NEXT: neg a2, a4
-; RV32IZFINX-NEXT: or a0, a2, a0
-; RV32IZFINX-NEXT: feq.s a2, s0, s0
-; RV32IZFINX-NEXT: neg a2, a2
-; RV32IZFINX-NEXT: lui a5, 524288
-; RV32IZFINX-NEXT: lui a3, 524288
+; RV32IZFINX-NEXT: lui a4, 524288
+; RV32IZFINX-NEXT: lui a2, 524288
; RV32IZFINX-NEXT: beqz s1, .LBB5_4
; RV32IZFINX-NEXT: # %bb.3:
-; RV32IZFINX-NEXT: mv a3, a1
+; RV32IZFINX-NEXT: mv a2, a1
; RV32IZFINX-NEXT: .LBB5_4:
-; RV32IZFINX-NEXT: and a0, a2, a0
-; RV32IZFINX-NEXT: beqz a4, .LBB5_6
+; RV32IZFINX-NEXT: lui a1, 389120
+; RV32IZFINX-NEXT: addi a1, a1, -1
+; RV32IZFINX-NEXT: flt.s a3, a1, s0
+; RV32IZFINX-NEXT: beqz a3, .LBB5_6
; RV32IZFINX-NEXT: # %bb.5:
-; RV32IZFINX-NEXT: addi a3, a5, -1
+; RV32IZFINX-NEXT: addi a2, a4, -1
; RV32IZFINX-NEXT: .LBB5_6:
-; RV32IZFINX-NEXT: and a1, a2, a3
+; RV32IZFINX-NEXT: feq.s a1, s0, s0
+; RV32IZFINX-NEXT: neg a4, a1
+; RV32IZFINX-NEXT: and a1, a4, a2
+; RV32IZFINX-NEXT: neg a2, s1
+; RV32IZFINX-NEXT: and a0, a2, a0
+; RV32IZFINX-NEXT: neg a2, a3
+; RV32IZFINX-NEXT: or a0, a2, a0
+; RV32IZFINX-NEXT: and a0, a4, a0
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: addi sp, sp, 16
; RV32IZFINX-NEXT: ret
;
@@ -616,7 +612,6 @@ define i64 @test_trunc_si64(float %x) nounwind {
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: mv s0, a0
; RV32IZFINX-NEXT: lui a0, 307200
; RV32IZFINX-NEXT: fabs.s a1, s0
@@ -629,33 +624,32 @@ define i64 @test_trunc_si64(float %x) nounwind {
; RV32IZFINX-NEXT: .LBB9_2:
; RV32IZFINX-NEXT: lui a0, 913408
; RV32IZFINX-NEXT: fle.s s1, a0, s0
-; RV32IZFINX-NEXT: neg s2, s1
; RV32IZFINX-NEXT: mv a0, s0
; RV32IZFINX-NEXT: call __fixsfdi
-; RV32IZFINX-NEXT: and a0, s2, a0
-; RV32IZFINX-NEXT: lui a2, 389120
-; RV32IZFINX-NEXT: addi a2, a2, -1
-; RV32IZFINX-NEXT: flt.s a4, a2, s0
-; RV32IZFINX-NEXT: neg a2, a4
-; RV32IZFINX-NEXT: or a0, a2, a0
-; RV32IZFINX-NEXT: feq.s a2, s0, s0
-; RV32IZFINX-NEXT: neg a2, a2
-; RV32IZFINX-NEXT: lui a5, 524288
-; RV32IZFINX-NEXT: lui a3, 524288
+; RV32IZFINX-NEXT: lui a4, 524288
+; RV32IZFINX-NEXT: lui a2, 524288
; RV32IZFINX-NEXT: beqz s1, .LBB9_4
; RV32IZFINX-NEXT: # %bb.3:
-; RV32IZFINX-NEXT: mv a3, a1
+; RV32IZFINX-NEXT: mv a2, a1
; RV32IZFINX-NEXT: .LBB9_4:
-; RV32IZFINX-NEXT: and a0, a2, a0
-; RV32IZFINX-NEXT: beqz a4, .LBB9_6
+; RV32IZFINX-NEXT: lui a1, 389120
+; RV32IZFINX-NEXT: addi a1, a1, -1
+; RV32IZFINX-NEXT: flt.s a3, a1, s0
+; RV32IZFINX-NEXT: beqz a3, .LBB9_6
; RV32IZFINX-NEXT: # %bb.5:
-; RV32IZFINX-NEXT: addi a3, a5, -1
+; RV32IZFINX-NEXT: addi a2, a4, -1
; RV32IZFINX-NEXT: .LBB9_6:
-; RV32IZFINX-NEXT: and a1, a2, a3
+; RV32IZFINX-NEXT: feq.s a1, s0, s0
+; RV32IZFINX-NEXT: neg a4, a1
+; RV32IZFINX-NEXT: and a1, a4, a2
+; RV32IZFINX-NEXT: neg a2, s1
+; RV32IZFINX-NEXT: and a0, a2, a0
+; RV32IZFINX-NEXT: neg a2, a3
+; RV32IZFINX-NEXT: or a0, a2, a0
+; RV32IZFINX-NEXT: and a0, a4, a0
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: addi sp, sp, 16
; RV32IZFINX-NEXT: ret
;
@@ -876,7 +870,6 @@ define i64 @test_round_si64(float %x) nounwind {
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: mv s0, a0
; RV32IZFINX-NEXT: lui a0, 307200
; RV32IZFINX-NEXT: fabs.s a1, s0
@@ -889,33 +882,32 @@ define i64 @test_round_si64(float %x) nounwind {
; RV32IZFINX-NEXT: .LBB13_2:
; RV32IZFINX-NEXT: lui a0, 913408
; RV32IZFINX-NEXT: fle.s s1, a0, s0
-; RV32IZFINX-NEXT: neg s2, s1
; RV32IZFINX-NEXT: mv a0, s0
; RV32IZFINX-NEXT: call __fixsfdi
-; RV32IZFINX-NEXT: and a0, s2, a0
-; RV32IZFINX-NEXT: lui a2, 389120
-; RV32IZFINX-NEXT: addi a2, a2, -1
-; RV32IZFINX-NEXT: flt.s a4, a2, s0
-; RV32IZFINX-NEXT: neg a2, a4
-; RV32IZFINX-NEXT: or a0, a2, a0
-; RV32IZFINX-NEXT: feq.s a2, s0, s0
-; RV32IZFINX-NEXT: neg a2, a2
-; RV32IZFINX-NEXT: lui a5, 524288
-; RV32IZFINX-NEXT: lui a3, 524288
+; RV32IZFINX-NEXT: lui a4, 524288
+; RV32IZFINX-NEXT: lui a2, 524288
; RV32IZFINX-NEXT: beqz s1, .LBB13_4
; RV32IZFINX-NEXT: # %bb.3:
-; RV32IZFINX-NEXT: mv a3, a1
+; RV32IZFINX-NEXT: mv a2, a1
; RV32IZFINX-NEXT: .LBB13_4:
-; RV32IZFINX-NEXT: and a0, a2, a0
-; RV32IZFINX-NEXT: beqz a4, .LBB13_6
+; RV32IZFINX-NEXT: lui a1, 389120
+; RV32IZFINX-NEXT: addi a1, a1, -1
+; RV32IZFINX-NEXT: flt.s a3, a1, s0
+; RV32IZFINX-NEXT: beqz a3, .LBB13_6
; RV32IZFINX-NEXT: # %bb.5:
-; RV32IZFINX-NEXT: addi a3, a5, -1
+; RV32IZFINX-NEXT: addi a2, a4, -1
; RV32IZFINX-NEXT: .LBB13_6:
-; RV32IZFINX-NEXT: and a1, a2, a3
+; RV32IZFINX-NEXT: feq.s a1, s0, s0
+; RV32IZFINX-NEXT: neg a4, a1
+; RV32IZFINX-NEXT: and a1, a4, a2
+; RV32IZFINX-NEXT: neg a2, s1
+; RV32IZFINX-NEXT: and a0, a2, a0
+; RV32IZFINX-NEXT: neg a2, a3
+; RV32IZFINX-NEXT: or a0, a2, a0
+; RV32IZFINX-NEXT: and a0, a4, a0
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: addi sp, sp, 16
; RV32IZFINX-NEXT: ret
;
@@ -1136,7 +1128,6 @@ define i64 @test_roundeven_si64(float %x) nounwind {
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: mv s0, a0
; RV32IZFINX-NEXT: lui a0, 307200
; RV32IZFINX-NEXT: fabs.s a1, s0
@@ -1149,33 +1140,32 @@ define i64 @test_roundeven_si64(float %x) nounwind {
; RV32IZFINX-NEXT: .LBB17_2:
; RV32IZFINX-NEXT: lui a0, 913408
; RV32IZFINX-NEXT: fle.s s1, a0, s0
-; RV32IZFINX-NEXT: neg s2, s1
; RV32IZFINX-NEXT: mv a0, s0
; RV32IZFINX-NEXT: call __fixsfdi
-; RV32IZFINX-NEXT: and a0, s2, a0
-; RV32IZFINX-NEXT: lui a2, 389120
-; RV32IZFINX-NEXT: addi a2, a2, -1
-; RV32IZFINX-NEXT: flt.s a4, a2, s0
-; RV32IZFINX-NEXT: neg a2, a4
-; RV32IZFINX-NEXT: or a0, a2, a0
-; RV32IZFINX-NEXT: feq...
[truncated]
|
; ZHINX32-NEXT: sw s9, 100(sp) # 4-byte Folded Spill | ||
; ZHINX32-NEXT: sw s10, 96(sp) # 4-byte Folded Spill | ||
; ZHINX32-NEXT: sw s11, 92(sp) # 4-byte Folded Spill | ||
; ZHINX32-NEXT: addi sp, sp, -160 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a regression, because we now have additional spills due to no scheduling.
We got lucky before because (f32 bitcast (i32 load) was combined to (f32 load) and used the "IROrder" from the bitcast. This affected where 4 loads were placed when SelectionDAG was linearized in MIR. Now we create a (f32 load) directly and it gets a different "IROrder". Not sure where the different IROrders originate from.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
With Zfinx/Zdinx, f32/f64 are legal types for a GPR, we don't need a bitcast.
This avoids turning fneg/fabs into bitwise operations purely because of these bitcasts. If the bitwise operations are faster for some reason on a Zfinx CPU, then that seems like it should be done for all fneg/fabs, not just ones near function arguments/returns.
I don't have much interest in Zfinx, this just makes the code more similar to what I proposed for Zhinx in #107446.