-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LoongArch] Set isAsCheapAsAMove on ADDI.D and {X}ORI instructions #94733
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-loongarch Author: hev (heiher) ChangesPatch is 140.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94733.diff 9 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index f72f46e39e2a4..f4bdd7d1209cf 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -734,7 +734,7 @@ def ADD_W : ALU_3R<0x00100000>;
def SUB_W : ALU_3R<0x00110000>;
def ADDI_W : ALU_2RI12<0x02800000, simm12_addlike>;
def ALSL_W : ALU_3RI2<0x00040000, uimm2_plus1>;
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def LU12I_W : ALU_1RI20<0x14000000, simm20_lu12iw>;
}
def SLT : ALU_3R<0x00120000>;
@@ -751,7 +751,7 @@ def XOR : ALU_3R<0x00158000>;
def ANDN : ALU_3R<0x00168000>;
def ORN : ALU_3R<0x00160000>;
def ANDI : ALU_2RI12<0x03400000, uimm12>;
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def ORI : ALU_2RI12<0x03800000, uimm12_ori>;
def XORI : ALU_2RI12<0x03c00000, uimm12>;
}
@@ -858,7 +858,7 @@ def ADD_D : ALU_3R<0x00108000>;
def SUB_D : ALU_3R<0x00118000>;
// ADDI_D isn't always rematerializable, but isReMaterializable will be used as
// a hint which is verified in isReallyTriviallyReMaterializable.
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def ADDI_D : ALU_2RI12<0x02c00000, simm12_addlike>;
}
def ADDU16I_D : ALU_2RI16<0x10000000, simm16>;
@@ -866,12 +866,12 @@ def ALSL_WU : ALU_3RI2<0x00060000, uimm2_plus1>;
def ALSL_D : ALU_3RI2<0x002c0000, uimm2_plus1>;
let Constraints = "$rd = $dst" in {
let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
- isReMaterializable = 1 in
+ isReMaterializable = 1, isAsCheapAsAMove = 1 in
def LU32I_D : Fmt1RI20<0x16000000, (outs GPR:$dst),
(ins GPR:$rd, simm20_lu32id:$imm20),
"$rd, $imm20">;
}
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def LU52I_D : ALU_2RI12<0x03000000, simm12_lu52id>;
}
def PCADDU18I : ALU_1RI20<0x1e000000, simm20_pcaddu18i>;
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
index 5e89fd41d6a7c..cc6ba057019c6 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
@@ -28,9 +28,9 @@ define i64 @caller_float_in_fpr() nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; CHECK-NEXT: ori $a0, $zero, 1
; CHECK-NEXT: movgr2fr.w $fa0, $zero
; CHECK-NEXT: movgr2fr.d $fa1, $zero
+; CHECK-NEXT: ori $a0, $zero, 1
; CHECK-NEXT: bl %plt(callee_float_in_fpr)
; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 16
diff --git a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
index a3d85f6bd06d8..49155a3966a84 100644
--- a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
+++ b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
@@ -16,7 +16,6 @@ define dso_local noundef signext i32 @main() nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi.d $sp, $sp, -272
; CHECK-NEXT: st.d $ra, $sp, 264 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 256 # 8-byte Folded Spill
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_0)
; CHECK-NEXT: xvld $xr0, $a0, 0
@@ -33,25 +32,23 @@ define dso_local noundef signext i32 @main() nounwind {
; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_3)
; CHECK-NEXT: xvld $xr3, $a0, 0
; CHECK-NEXT: xvst $xr3, $sp, 0 # 32-byte Folded Spill
-; CHECK-NEXT: xvst $xr0, $sp, 128
-; CHECK-NEXT: xvst $xr1, $sp, 160
-; CHECK-NEXT: xvst $xr2, $sp, 192
-; CHECK-NEXT: xvst $xr3, $sp, 224
-; CHECK-NEXT: addi.d $fp, $sp, 128
-; CHECK-NEXT: move $a0, $fp
+; CHECK-NEXT: xvst $xr0, $sp, 136
+; CHECK-NEXT: xvst $xr1, $sp, 168
+; CHECK-NEXT: xvst $xr2, $sp, 200
+; CHECK-NEXT: xvst $xr3, $sp, 232
+; CHECK-NEXT: addi.d $a0, $sp, 136
; CHECK-NEXT: bl %plt(foo)
; CHECK-NEXT: xvld $xr0, $sp, 96 # 32-byte Folded Reload
-; CHECK-NEXT: xvst $xr0, $sp, 128
+; CHECK-NEXT: xvst $xr0, $sp, 136
; CHECK-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; CHECK-NEXT: xvst $xr0, $sp, 160
+; CHECK-NEXT: xvst $xr0, $sp, 168
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
-; CHECK-NEXT: xvst $xr0, $sp, 192
+; CHECK-NEXT: xvst $xr0, $sp, 200
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
-; CHECK-NEXT: xvst $xr0, $sp, 224
-; CHECK-NEXT: move $a0, $fp
+; CHECK-NEXT: xvst $xr0, $sp, 232
+; CHECK-NEXT: addi.d $a0, $sp, 136
; CHECK-NEXT: bl %plt(bar)
; CHECK-NEXT: move $a0, $zero
-; CHECK-NEXT: ld.d $fp, $sp, 256 # 8-byte Folded Reload
; CHECK-NEXT: ld.d $ra, $sp, 264 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 272
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
index 81cc29419a0e0..ea110e8a90c13 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
@@ -297,95 +297,68 @@ define float @float_fmax_acquire(ptr %p) nounwind {
define double @double_fadd_acquire(ptr %p) nounwind {
; LA64F-LABEL: double_fadd_acquire:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -80
-; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -48
+; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
-; LA64F-NEXT: ld.d $s5, $a0, 0
-; LA64F-NEXT: lu52i.d $s0, $zero, 1023
-; LA64F-NEXT: ori $s1, $zero, 8
-; LA64F-NEXT: addi.d $s2, $sp, 8
-; LA64F-NEXT: addi.d $s3, $sp, 0
-; LA64F-NEXT: ori $s4, $zero, 2
+; LA64F-NEXT: ld.d $s0, $a0, 0
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB4_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT: move $a0, $s5
-; LA64F-NEXT: move $a1, $s0
+; LA64F-NEXT: lu52i.d $a1, $zero, 1023
+; LA64F-NEXT: move $a0, $s0
; LA64F-NEXT: bl %plt(__adddf3)
-; LA64F-NEXT: st.d $s5, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
-; LA64F-NEXT: move $a0, $s1
+; LA64F-NEXT: st.d $s0, $sp, 16
+; LA64F-NEXT: st.d $a0, $sp, 8
+; LA64F-NEXT: ori $a0, $zero, 8
+; LA64F-NEXT: addi.d $a2, $sp, 16
+; LA64F-NEXT: addi.d $a3, $sp, 8
+; LA64F-NEXT: ori $a4, $zero, 2
+; LA64F-NEXT: ori $a5, $zero, 2
; LA64F-NEXT: move $a1, $fp
-; LA64F-NEXT: move $a2, $s2
-; LA64F-NEXT: move $a3, $s3
-; LA64F-NEXT: move $a4, $s4
-; LA64F-NEXT: move $a5, $s4
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s5, $sp, 8
+; LA64F-NEXT: ld.d $s0, $sp, 16
; LA64F-NEXT: beqz $a0, .LBB4_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
-; LA64F-NEXT: move $a0, $s5
-; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 80
+; LA64F-NEXT: move $a0, $s0
+; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 48
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fadd_acquire:
; LA64D: # %bb.0:
-; LA64D-NEXT: addi.d $sp, $sp, -80
-; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
-; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
-; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill
-; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill
-; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill
-; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill
+; LA64D-NEXT: addi.d $sp, $sp, -48
+; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill
; LA64D-NEXT: move $fp, $a0
; LA64D-NEXT: fld.d $fa0, $a0, 0
; LA64D-NEXT: addi.d $a0, $zero, 1
; LA64D-NEXT: movgr2fr.d $fa1, $a0
; LA64D-NEXT: ffint.d.l $fs0, $fa1
-; LA64D-NEXT: ori $s0, $zero, 8
-; LA64D-NEXT: addi.d $s1, $sp, 16
-; LA64D-NEXT: addi.d $s2, $sp, 8
-; LA64D-NEXT: ori $s3, $zero, 2
; LA64D-NEXT: .p2align 4, , 16
; LA64D-NEXT: .LBB4_1: # %atomicrmw.start
; LA64D-NEXT: # =>This Inner Loop Header: Depth=1
; LA64D-NEXT: fadd.d $fa1, $fa0, $fs0
; LA64D-NEXT: fst.d $fa0, $sp, 16
; LA64D-NEXT: fst.d $fa1, $sp, 8
-; LA64D-NEXT: move $a0, $s0
+; LA64D-NEXT: ori $a0, $zero, 8
+; LA64D-NEXT: addi.d $a2, $sp, 16
+; LA64D-NEXT: addi.d $a3, $sp, 8
+; LA64D-NEXT: ori $a4, $zero, 2
+; LA64D-NEXT: ori $a5, $zero, 2
; LA64D-NEXT: move $a1, $fp
-; LA64D-NEXT: move $a2, $s1
-; LA64D-NEXT: move $a3, $s2
-; LA64D-NEXT: move $a4, $s3
-; LA64D-NEXT: move $a5, $s3
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
; LA64D-NEXT: fld.d $fa0, $sp, 16
; LA64D-NEXT: beqz $a0, .LBB4_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
-; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload
-; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload
-; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
-; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
-; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
-; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
-; LA64D-NEXT: addi.d $sp, $sp, 80
+; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64D-NEXT: addi.d $sp, $sp, 48
; LA64D-NEXT: ret
%v = atomicrmw fadd ptr %p, double 1.0 acquire, align 4
ret double %v
@@ -394,95 +367,68 @@ define double @double_fadd_acquire(ptr %p) nounwind {
define double @double_fsub_acquire(ptr %p) nounwind {
; LA64F-LABEL: double_fsub_acquire:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -80
-; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -48
+; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
-; LA64F-NEXT: ld.d $s5, $a0, 0
-; LA64F-NEXT: lu52i.d $s0, $zero, -1025
-; LA64F-NEXT: ori $s1, $zero, 8
-; LA64F-NEXT: addi.d $s2, $sp, 8
-; LA64F-NEXT: addi.d $s3, $sp, 0
-; LA64F-NEXT: ori $s4, $zero, 2
+; LA64F-NEXT: ld.d $s0, $a0, 0
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB5_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT: move $a0, $s5
-; LA64F-NEXT: move $a1, $s0
+; LA64F-NEXT: lu52i.d $a1, $zero, -1025
+; LA64F-NEXT: move $a0, $s0
; LA64F-NEXT: bl %plt(__adddf3)
-; LA64F-NEXT: st.d $s5, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
-; LA64F-NEXT: move $a0, $s1
+; LA64F-NEXT: st.d $s0, $sp, 16
+; LA64F-NEXT: st.d $a0, $sp, 8
+; LA64F-NEXT: ori $a0, $zero, 8
+; LA64F-NEXT: addi.d $a2, $sp, 16
+; LA64F-NEXT: addi.d $a3, $sp, 8
+; LA64F-NEXT: ori $a4, $zero, 2
+; LA64F-NEXT: ori $a5, $zero, 2
; LA64F-NEXT: move $a1, $fp
-; LA64F-NEXT: move $a2, $s2
-; LA64F-NEXT: move $a3, $s3
-; LA64F-NEXT: move $a4, $s4
-; LA64F-NEXT: move $a5, $s4
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s5, $sp, 8
+; LA64F-NEXT: ld.d $s0, $sp, 16
; LA64F-NEXT: beqz $a0, .LBB5_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
-; LA64F-NEXT: move $a0, $s5
-; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 80
+; LA64F-NEXT: move $a0, $s0
+; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 48
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fsub_acquire:
; LA64D: # %bb.0:
-; LA64D-NEXT: addi.d $sp, $sp, -80
-; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
-; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
-; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill
-; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill
-; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill
-; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill
+; LA64D-NEXT: addi.d $sp, $sp, -48
+; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill
; LA64D-NEXT: move $fp, $a0
; LA64D-NEXT: fld.d $fa0, $a0, 0
; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI5_0)
; LA64D-NEXT: fld.d $fs0, $a0, 0
-; LA64D-NEXT: ori $s0, $zero, 8
-; LA64D-NEXT: addi.d $s1, $sp, 16
-; LA64D-NEXT: addi.d $s2, $sp, 8
-; LA64D-NEXT: ori $s3, $zero, 2
; LA64D-NEXT: .p2align 4, , 16
; LA64D-NEXT: .LBB5_1: # %atomicrmw.start
; LA64D-NEXT: # =>This Inner Loop Header: Depth=1
; LA64D-NEXT: fadd.d $fa1, $fa0, $fs0
; LA64D-NEXT: fst.d $fa0, $sp, 16
; LA64D-NEXT: fst.d $fa1, $sp, 8
-; LA64D-NEXT: move $a0, $s0
+; LA64D-NEXT: ori $a0, $zero, 8
+; LA64D-NEXT: addi.d $a2, $sp, 16
+; LA64D-NEXT: addi.d $a3, $sp, 8
+; LA64D-NEXT: ori $a4, $zero, 2
+; LA64D-NEXT: ori $a5, $zero, 2
; LA64D-NEXT: move $a1, $fp
-; LA64D-NEXT: move $a2, $s1
-; LA64D-NEXT: move $a3, $s2
-; LA64D-NEXT: move $a4, $s3
-; LA64D-NEXT: move $a5, $s3
; LA64D-NEXT: bl %plt(__atomic_compare_exchange)
; LA64D-NEXT: fld.d $fa0, $sp, 16
; LA64D-NEXT: beqz $a0, .LBB5_1
; LA64D-NEXT: # %bb.2: # %atomicrmw.end
; LA64D-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
-; LA64D-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload
-; LA64D-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload
-; LA64D-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
-; LA64D-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
-; LA64D-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
-; LA64D-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
-; LA64D-NEXT: addi.d $sp, $sp, 80
+; LA64D-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64D-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64D-NEXT: addi.d $sp, $sp, 48
; LA64D-NEXT: ret
%v = atomicrmw fsub ptr %p, double 1.0 acquire, align 4
ret double %v
@@ -491,71 +437,48 @@ define double @double_fsub_acquire(ptr %p) nounwind {
define double @double_fmin_acquire(ptr %p) nounwind {
; LA64F-LABEL: double_fmin_acquire:
; LA64F: # %bb.0:
-; LA64F-NEXT: addi.d $sp, $sp, -80
-; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill
-; LA64F-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill
+; LA64F-NEXT: addi.d $sp, $sp, -48
+; LA64F-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64F-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
-; LA64F-NEXT: ld.d $s5, $a0, 0
-; LA64F-NEXT: lu52i.d $s0, $zero, 1023
-; LA64F-NEXT: ori $s1, $zero, 8
-; LA64F-NEXT: addi.d $s2, $sp, 8
-; LA64F-NEXT: addi.d $s3, $sp, 0
-; LA64F-NEXT: ori $s4, $zero, 2
+; LA64F-NEXT: ld.d $s0, $a0, 0
; LA64F-NEXT: .p2align 4, , 16
; LA64F-NEXT: .LBB6_1: # %atomicrmw.start
; LA64F-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64F-NEXT: move $a0, $s5
-; LA64F-NEXT: move $a1, $s0
+; LA64F-NEXT: lu52i.d $a1, $zero, 1023
+; LA64F-NEXT: move $a0, $s0
; LA64F-NEXT: bl %plt(fmin)
-; LA64F-NEXT: st.d $s5, $sp, 8
-; LA64F-NEXT: st.d $a0, $sp, 0
-; LA64F-NEXT: move $a0, $s1
+; LA64F-NEXT: st.d $s0, $sp, 16
+; LA64F-NEXT: st.d $a0, $sp, 8
+; LA64F-NEXT: ori $a0, $zero, 8
+; LA64F-NEXT: addi.d $a2, $sp, 16
+; LA64F-NEXT: addi.d $a3, $sp, 8
+; LA64F-NEXT: ori $a4, $zero, 2
+; LA64F-NEXT: ori $a5, $zero, 2
; LA64F-NEXT: move $a1, $fp
-; LA64F-NEXT: move $a2, $s2
-; LA64F-NEXT: move $a3, $s3
-; LA64F-NEXT: move $a4, $s4
-; LA64F-NEXT: move $a5, $s4
; LA64F-NEXT: bl %plt(__atomic_compare_exchange)
-; LA64F-NEXT: ld.d $s5, $sp, 8
+; LA64F-NEXT: ld.d $s0, $sp, 16
; LA64F-NEXT: beqz $a0, .LBB6_1
; LA64F-NEXT: # %bb.2: # %atomicrmw.end
-; LA64F-NEXT: move $a0, $s5
-; LA64F-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
-; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
-; LA64F-NEXT: addi.d $sp, $sp, 80
+; LA64F-NEXT: move $a0, $s0
+; LA64F-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64F-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64F-NEXT: addi.d $sp, $sp, 48
; LA64F-NEXT: ret
;
; LA64D-LABEL: double_fmin_acquire:
; LA64D: # %bb.0:
-; LA64D-NEXT: addi.d $sp, $sp, -80
-; LA64D-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
-; LA64D-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
-; LA64D-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill
-; LA64D-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill
-; LA64D-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill
-; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill
+; LA64D-NEXT: addi.d $sp, $sp, -48
+; LA64D-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64D-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill
; LA64D-NEXT: move $fp, $a0
; LA64D-NEXT: fld.d $fa0, $a0, 0
; LA64D-NEXT: addi.d $a0, $zero, 1
; LA64D-NEXT: movgr2fr.d $fa1, $a0
; LA64D-NEXT: ffint.d.l $fs0, $fa1
-; LA64D-NEXT: ori $s0, $zero, 8
-; LA64D-NEXT: addi.d $s1, $sp, 16
-; LA64D-NEXT: addi.d $s2, $sp, 8
-; LA64D-NEXT: ori $s3, $zero, 2
; LA64D-NEXT: .p2align...
[truncated]
|
About the title: lu12i.w but not lu12i.d |
Seems lu32i.d is 4x slower than move (or). |
Good catch! It seems that removing the suffix of |
Real tests do not show |
No description provided.