-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Revert "[RISCV] Enable TTI::shouldDropLSRSolutionIfLessProfitable by default" #98328
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
asb
merged 1 commit into
main
from
revert-89927-2024q2-lsr-riscv-drop-unprofitable-solutions-by-default
Jul 10, 2024
Merged
Revert "[RISCV] Enable TTI::shouldDropLSRSolutionIfLessProfitable by default" #98328
asb
merged 1 commit into
main
from
revert-89927-2024q2-lsr-riscv-drop-unprofitable-solutions-by-default
Jul 10, 2024
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
…default …" This reverts commit af47a4e.
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-transforms Author: Alex Bradbury (asb) ChangesReverts llvm/llvm-project#89927 while we investigate performance regressions reported by @dtcxzyw Patch is 67.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98328.diff 9 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 7b239b8fc17a3..9c37a4f6ec2d0 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -398,8 +398,6 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
return true;
}
- bool shouldDropLSRSolutionIfLessProfitable() const { return true; }
-
std::optional<unsigned> getMinPageSize() const { return 4096; }
};
diff --git a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
index 92639be0017e8..2b4b8e979f3d7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
@@ -86,29 +86,30 @@ declare i64 @llvm.vscale.i64()
define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
; NO-SINK-LABEL: sink_splat_add_scalable:
; NO-SINK: # %bb.0: # %entry
-; NO-SINK-NEXT: csrr a2, vlenb
-; NO-SINK-NEXT: srli a2, a2, 1
+; NO-SINK-NEXT: csrr a5, vlenb
+; NO-SINK-NEXT: srli a2, a5, 1
; NO-SINK-NEXT: li a3, 1024
; NO-SINK-NEXT: bgeu a3, a2, .LBB1_2
; NO-SINK-NEXT: # %bb.1:
; NO-SINK-NEXT: li a3, 0
; NO-SINK-NEXT: j .LBB1_5
; NO-SINK-NEXT: .LBB1_2: # %vector.ph
-; NO-SINK-NEXT: li a5, 0
; NO-SINK-NEXT: addi a3, a2, -1
; NO-SINK-NEXT: andi a4, a3, 1024
; NO-SINK-NEXT: xori a3, a4, 1024
; NO-SINK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; NO-SINK-NEXT: vmv.v.x v8, a1
+; NO-SINK-NEXT: slli a5, a5, 1
+; NO-SINK-NEXT: mv a6, a0
+; NO-SINK-NEXT: mv a7, a3
; NO-SINK-NEXT: .LBB1_3: # %vector.body
; NO-SINK-NEXT: # =>This Inner Loop Header: Depth=1
-; NO-SINK-NEXT: slli a6, a5, 2
-; NO-SINK-NEXT: add a6, a0, a6
; NO-SINK-NEXT: vl2re32.v v10, (a6)
; NO-SINK-NEXT: vadd.vv v10, v10, v8
-; NO-SINK-NEXT: add a5, a5, a2
; NO-SINK-NEXT: vs2r.v v10, (a6)
-; NO-SINK-NEXT: bne a5, a3, .LBB1_3
+; NO-SINK-NEXT: sub a7, a7, a2
+; NO-SINK-NEXT: add a6, a6, a5
+; NO-SINK-NEXT: bnez a7, .LBB1_3
; NO-SINK-NEXT: # %bb.4: # %middle.block
; NO-SINK-NEXT: beqz a4, .LBB1_7
; NO-SINK-NEXT: .LBB1_5: # %for.body.preheader
@@ -128,28 +129,29 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
;
; SINK-LABEL: sink_splat_add_scalable:
; SINK: # %bb.0: # %entry
-; SINK-NEXT: csrr a2, vlenb
-; SINK-NEXT: srli a2, a2, 1
+; SINK-NEXT: csrr a5, vlenb
+; SINK-NEXT: srli a2, a5, 1
; SINK-NEXT: li a3, 1024
; SINK-NEXT: bgeu a3, a2, .LBB1_2
; SINK-NEXT: # %bb.1:
; SINK-NEXT: li a3, 0
; SINK-NEXT: j .LBB1_5
; SINK-NEXT: .LBB1_2: # %vector.ph
-; SINK-NEXT: li a5, 0
; SINK-NEXT: addi a3, a2, -1
; SINK-NEXT: andi a4, a3, 1024
; SINK-NEXT: xori a3, a4, 1024
-; SINK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
+; SINK-NEXT: slli a5, a5, 1
+; SINK-NEXT: mv a6, a0
+; SINK-NEXT: mv a7, a3
+; SINK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; SINK-NEXT: .LBB1_3: # %vector.body
; SINK-NEXT: # =>This Inner Loop Header: Depth=1
-; SINK-NEXT: slli a6, a5, 2
-; SINK-NEXT: add a6, a0, a6
; SINK-NEXT: vl2re32.v v8, (a6)
; SINK-NEXT: vadd.vx v8, v8, a1
-; SINK-NEXT: add a5, a5, a2
; SINK-NEXT: vs2r.v v8, (a6)
-; SINK-NEXT: bne a5, a3, .LBB1_3
+; SINK-NEXT: sub a7, a7, a2
+; SINK-NEXT: add a6, a6, a5
+; SINK-NEXT: bnez a7, .LBB1_3
; SINK-NEXT: # %bb.4: # %middle.block
; SINK-NEXT: beqz a4, .LBB1_7
; SINK-NEXT: .LBB1_5: # %for.body.preheader
@@ -169,28 +171,29 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
;
; DEFAULT-LABEL: sink_splat_add_scalable:
; DEFAULT: # %bb.0: # %entry
-; DEFAULT-NEXT: csrr a2, vlenb
-; DEFAULT-NEXT: srli a2, a2, 1
+; DEFAULT-NEXT: csrr a5, vlenb
+; DEFAULT-NEXT: srli a2, a5, 1
; DEFAULT-NEXT: li a3, 1024
; DEFAULT-NEXT: bgeu a3, a2, .LBB1_2
; DEFAULT-NEXT: # %bb.1:
; DEFAULT-NEXT: li a3, 0
; DEFAULT-NEXT: j .LBB1_5
; DEFAULT-NEXT: .LBB1_2: # %vector.ph
-; DEFAULT-NEXT: li a5, 0
; DEFAULT-NEXT: addi a3, a2, -1
; DEFAULT-NEXT: andi a4, a3, 1024
; DEFAULT-NEXT: xori a3, a4, 1024
-; DEFAULT-NEXT: vsetvli a6, zero, e32, m2, ta, ma
+; DEFAULT-NEXT: slli a5, a5, 1
+; DEFAULT-NEXT: mv a6, a0
+; DEFAULT-NEXT: mv a7, a3
+; DEFAULT-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; DEFAULT-NEXT: .LBB1_3: # %vector.body
; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1
-; DEFAULT-NEXT: slli a6, a5, 2
-; DEFAULT-NEXT: add a6, a0, a6
; DEFAULT-NEXT: vl2re32.v v8, (a6)
; DEFAULT-NEXT: vadd.vx v8, v8, a1
-; DEFAULT-NEXT: add a5, a5, a2
; DEFAULT-NEXT: vs2r.v v8, (a6)
-; DEFAULT-NEXT: bne a5, a3, .LBB1_3
+; DEFAULT-NEXT: sub a7, a7, a2
+; DEFAULT-NEXT: add a6, a6, a5
+; DEFAULT-NEXT: bnez a7, .LBB1_3
; DEFAULT-NEXT: # %bb.4: # %middle.block
; DEFAULT-NEXT: beqz a4, .LBB1_7
; DEFAULT-NEXT: .LBB1_5: # %for.body.preheader
@@ -404,32 +407,32 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; NO-SINK-LABEL: sink_splat_fadd_scalable:
; NO-SINK: # %bb.0: # %entry
; NO-SINK-NEXT: csrr a1, vlenb
-; NO-SINK-NEXT: srli a1, a1, 2
-; NO-SINK-NEXT: li a2, 1024
-; NO-SINK-NEXT: bgeu a2, a1, .LBB4_2
+; NO-SINK-NEXT: srli a2, a1, 2
+; NO-SINK-NEXT: li a3, 1024
+; NO-SINK-NEXT: bgeu a3, a2, .LBB4_2
; NO-SINK-NEXT: # %bb.1:
-; NO-SINK-NEXT: li a2, 0
+; NO-SINK-NEXT: li a3, 0
; NO-SINK-NEXT: j .LBB4_5
; NO-SINK-NEXT: .LBB4_2: # %vector.ph
-; NO-SINK-NEXT: li a4, 0
-; NO-SINK-NEXT: addi a2, a1, -1
-; NO-SINK-NEXT: andi a3, a2, 1024
-; NO-SINK-NEXT: xori a2, a3, 1024
+; NO-SINK-NEXT: addi a3, a2, -1
+; NO-SINK-NEXT: andi a4, a3, 1024
+; NO-SINK-NEXT: xori a3, a4, 1024
; NO-SINK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; NO-SINK-NEXT: vfmv.v.f v8, fa0
+; NO-SINK-NEXT: mv a5, a0
+; NO-SINK-NEXT: mv a6, a3
; NO-SINK-NEXT: .LBB4_3: # %vector.body
; NO-SINK-NEXT: # =>This Inner Loop Header: Depth=1
-; NO-SINK-NEXT: slli a5, a4, 2
-; NO-SINK-NEXT: add a5, a0, a5
; NO-SINK-NEXT: vl1re32.v v9, (a5)
; NO-SINK-NEXT: vfadd.vv v9, v9, v8
-; NO-SINK-NEXT: add a4, a4, a1
; NO-SINK-NEXT: vs1r.v v9, (a5)
-; NO-SINK-NEXT: bne a4, a2, .LBB4_3
+; NO-SINK-NEXT: sub a6, a6, a2
+; NO-SINK-NEXT: add a5, a5, a1
+; NO-SINK-NEXT: bnez a6, .LBB4_3
; NO-SINK-NEXT: # %bb.4: # %middle.block
-; NO-SINK-NEXT: beqz a3, .LBB4_7
+; NO-SINK-NEXT: beqz a4, .LBB4_7
; NO-SINK-NEXT: .LBB4_5: # %for.body.preheader
-; NO-SINK-NEXT: slli a1, a2, 2
+; NO-SINK-NEXT: slli a1, a3, 2
; NO-SINK-NEXT: add a1, a0, a1
; NO-SINK-NEXT: lui a2, 1
; NO-SINK-NEXT: add a0, a0, a2
@@ -446,31 +449,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; SINK-LABEL: sink_splat_fadd_scalable:
; SINK: # %bb.0: # %entry
; SINK-NEXT: csrr a1, vlenb
-; SINK-NEXT: srli a1, a1, 2
-; SINK-NEXT: li a2, 1024
-; SINK-NEXT: bgeu a2, a1, .LBB4_2
+; SINK-NEXT: srli a2, a1, 2
+; SINK-NEXT: li a3, 1024
+; SINK-NEXT: bgeu a3, a2, .LBB4_2
; SINK-NEXT: # %bb.1:
-; SINK-NEXT: li a2, 0
+; SINK-NEXT: li a3, 0
; SINK-NEXT: j .LBB4_5
; SINK-NEXT: .LBB4_2: # %vector.ph
-; SINK-NEXT: li a4, 0
-; SINK-NEXT: addi a2, a1, -1
-; SINK-NEXT: andi a3, a2, 1024
-; SINK-NEXT: xori a2, a3, 1024
-; SINK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
+; SINK-NEXT: addi a3, a2, -1
+; SINK-NEXT: andi a4, a3, 1024
+; SINK-NEXT: xori a3, a4, 1024
+; SINK-NEXT: mv a5, a0
+; SINK-NEXT: mv a6, a3
+; SINK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; SINK-NEXT: .LBB4_3: # %vector.body
; SINK-NEXT: # =>This Inner Loop Header: Depth=1
-; SINK-NEXT: slli a5, a4, 2
-; SINK-NEXT: add a5, a0, a5
; SINK-NEXT: vl1re32.v v8, (a5)
; SINK-NEXT: vfadd.vf v8, v8, fa0
-; SINK-NEXT: add a4, a4, a1
; SINK-NEXT: vs1r.v v8, (a5)
-; SINK-NEXT: bne a4, a2, .LBB4_3
+; SINK-NEXT: sub a6, a6, a2
+; SINK-NEXT: add a5, a5, a1
+; SINK-NEXT: bnez a6, .LBB4_3
; SINK-NEXT: # %bb.4: # %middle.block
-; SINK-NEXT: beqz a3, .LBB4_7
+; SINK-NEXT: beqz a4, .LBB4_7
; SINK-NEXT: .LBB4_5: # %for.body.preheader
-; SINK-NEXT: slli a1, a2, 2
+; SINK-NEXT: slli a1, a3, 2
; SINK-NEXT: add a1, a0, a1
; SINK-NEXT: lui a2, 1
; SINK-NEXT: add a0, a0, a2
@@ -487,31 +490,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; DEFAULT-LABEL: sink_splat_fadd_scalable:
; DEFAULT: # %bb.0: # %entry
; DEFAULT-NEXT: csrr a1, vlenb
-; DEFAULT-NEXT: srli a1, a1, 2
-; DEFAULT-NEXT: li a2, 1024
-; DEFAULT-NEXT: bgeu a2, a1, .LBB4_2
+; DEFAULT-NEXT: srli a2, a1, 2
+; DEFAULT-NEXT: li a3, 1024
+; DEFAULT-NEXT: bgeu a3, a2, .LBB4_2
; DEFAULT-NEXT: # %bb.1:
-; DEFAULT-NEXT: li a2, 0
+; DEFAULT-NEXT: li a3, 0
; DEFAULT-NEXT: j .LBB4_5
; DEFAULT-NEXT: .LBB4_2: # %vector.ph
-; DEFAULT-NEXT: li a4, 0
-; DEFAULT-NEXT: addi a2, a1, -1
-; DEFAULT-NEXT: andi a3, a2, 1024
-; DEFAULT-NEXT: xori a2, a3, 1024
-; DEFAULT-NEXT: vsetvli a5, zero, e32, m1, ta, ma
+; DEFAULT-NEXT: addi a3, a2, -1
+; DEFAULT-NEXT: andi a4, a3, 1024
+; DEFAULT-NEXT: xori a3, a4, 1024
+; DEFAULT-NEXT: mv a5, a0
+; DEFAULT-NEXT: mv a6, a3
+; DEFAULT-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; DEFAULT-NEXT: .LBB4_3: # %vector.body
; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1
-; DEFAULT-NEXT: slli a5, a4, 2
-; DEFAULT-NEXT: add a5, a0, a5
; DEFAULT-NEXT: vl1re32.v v8, (a5)
; DEFAULT-NEXT: vfadd.vf v8, v8, fa0
-; DEFAULT-NEXT: add a4, a4, a1
; DEFAULT-NEXT: vs1r.v v8, (a5)
-; DEFAULT-NEXT: bne a4, a2, .LBB4_3
+; DEFAULT-NEXT: sub a6, a6, a2
+; DEFAULT-NEXT: add a5, a5, a1
+; DEFAULT-NEXT: bnez a6, .LBB4_3
; DEFAULT-NEXT: # %bb.4: # %middle.block
-; DEFAULT-NEXT: beqz a3, .LBB4_7
+; DEFAULT-NEXT: beqz a4, .LBB4_7
; DEFAULT-NEXT: .LBB4_5: # %for.body.preheader
-; DEFAULT-NEXT: slli a1, a2, 2
+; DEFAULT-NEXT: slli a1, a3, 2
; DEFAULT-NEXT: add a1, a0, a1
; DEFAULT-NEXT: lui a2, 1
; DEFAULT-NEXT: add a0, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll
index c95301809375c..3cb3c94d4e1f2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll
@@ -7,106 +7,143 @@
define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscale x 4 x i1> %arg.6, i64 %arg.7, i1 %arg.8, i64 %arg.9, i32 %arg.10) vscale_range(2,2) {
; CHECK-LABEL: main:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi sp, sp, -48
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: addi sp, sp, -112
+; CHECK-NEXT: .cfi_def_cfa_offset 112
+; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s5, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s6, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s7, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s8, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s10, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s11, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: .cfi_offset s0, -16
; CHECK-NEXT: .cfi_offset s1, -24
; CHECK-NEXT: .cfi_offset s2, -32
; CHECK-NEXT: .cfi_offset s3, -40
-; CHECK-NEXT: li a7, 0
-; CHECK-NEXT: ld s2, 48(sp)
+; CHECK-NEXT: .cfi_offset s4, -48
+; CHECK-NEXT: .cfi_offset s5, -56
+; CHECK-NEXT: .cfi_offset s6, -64
+; CHECK-NEXT: .cfi_offset s7, -72
+; CHECK-NEXT: .cfi_offset s8, -80
+; CHECK-NEXT: .cfi_offset s9, -88
+; CHECK-NEXT: .cfi_offset s10, -96
+; CHECK-NEXT: .cfi_offset s11, -104
+; CHECK-NEXT: li s2, 0
+; CHECK-NEXT: li a7, 8
+; CHECK-NEXT: li t0, 12
+; CHECK-NEXT: li s0, 4
+; CHECK-NEXT: li t1, 20
+; CHECK-NEXT: ld a1, 112(sp)
+; CHECK-NEXT: sd a1, 0(sp) # 8-byte Folded Spill
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: andi s3, a0, 1
-; CHECK-NEXT: andi t1, a2, 1
-; CHECK-NEXT: andi a6, a4, 1
+; CHECK-NEXT: andi t3, a4, 1
+; CHECK-NEXT: li t2, 4
; CHECK-NEXT: .LBB0_1: # %for.cond1.preheader.i
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_2 Depth 2
; CHECK-NEXT: # Child Loop BB0_3 Depth 3
; CHECK-NEXT: # Child Loop BB0_4 Depth 4
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
-; CHECK-NEXT: li t0, 0
+; CHECK-NEXT: mv t4, t1
+; CHECK-NEXT: mv t5, t2
+; CHECK-NEXT: mv t6, t0
+; CHECK-NEXT: mv s3, a7
+; CHECK-NEXT: mv a6, s2
; CHECK-NEXT: .LBB0_2: # %for.cond5.preheader.i
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # => This Loop Header: Depth=2
; CHECK-NEXT: # Child Loop BB0_3 Depth 3
; CHECK-NEXT: # Child Loop BB0_4 Depth 4
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
-; CHECK-NEXT: li t3, 0
-; CHECK-NEXT: add t2, t0, a7
+; CHECK-NEXT: mv s5, t4
+; CHECK-NEXT: mv s6, t5
+; CHECK-NEXT: mv s7, t6
+; CHECK-NEXT: mv s8, s3
+; CHECK-NEXT: mv s4, a6
; CHECK-NEXT: .LBB0_3: # %for.cond9.preheader.i
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # Parent Loop BB0_2 Depth=2
; CHECK-NEXT: # => This Loop Header: Depth=3
; CHECK-NEXT: # Child Loop BB0_4 Depth 4
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
-; CHECK-NEXT: li t5, 0
-; CHECK-NEXT: add t4, t2, t3
+; CHECK-NEXT: mv s11, s5
+; CHECK-NEXT: mv a3, s6
+; CHECK-NEXT: mv ra, s7
+; CHECK-NEXT: mv a4, s8
+; CHECK-NEXT: mv s9, s4
; CHECK-NEXT: .LBB0_4: # %vector.ph.i
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # Parent Loop BB0_2 Depth=2
; CHECK-NEXT: # Parent Loop BB0_3 Depth=3
; CHECK-NEXT: # => This Loop Header: Depth=4
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
-; CHECK-NEXT: li s1, 0
-; CHECK-NEXT: add a2, t4, t5
-; CHECK-NEXT: addi a0, a2, 2
-; CHECK-NEXT: addi a3, a2, 3
-; CHECK-NEXT: addi a1, a2, 1
-; CHECK-NEXT: addi a4, a2, 5
-; CHECK-NEXT: li a5, 1
+; CHECK-NEXT: li a5, 0
; CHECK-NEXT: .LBB0_5: # %vector.body.i
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # Parent Loop BB0_2 Depth=2
; CHECK-NEXT: # Parent Loop BB0_3 Depth=3
; CHECK-NEXT: # Parent Loop BB0_4 Depth=4
; CHECK-NEXT: # => This Inner Loop Header: Depth=5
-; CHECK-NEXT: mv t6, s1
-; CHECK-NEXT: addi a2, a2, 1
-; CHECK-NEXT: addi a5, a5, 1
-; CHECK-NEXT: slli s1, a0, 2
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: addi a3, a3, 1
-; CHECK-NEXT: slli s0, a1, 2
-; CHECK-NEXT: addi a1, a1, 1
-; CHECK-NEXT: addi a4, a4, 1
-; CHECK-NEXT: vse32.v v8, (s1), v0.t
-; CHECK-NEXT: vse32.v v8, (s0), v0.t
-; CHECK-NEXT: addi s1, t6, 1
-; CHECK-NEXT: bnez t6, .LBB0_5
+; CHECK-NEXT: addi s1, a5, 4
+; CHECK-NEXT: add a1, a4, a5
+; CHECK-NEXT: vse32.v v8, (a1), v0.t
+; CHECK-NEXT: add a5, a5, a3
+; CHECK-NEXT: vse32.v v8, (a5), v0.t
+; CHECK-NEXT: mv a5, s1
+; CHECK-NEXT: bne s1, s0, .LBB0_5
; CHECK-NEXT: # %bb.6: # %for.cond.cleanup15.i
; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=4
-; CHECK-NEXT: addi t5, t5, 1
-; CHECK-NEXT: beqz s3, .LBB0_4
+; CHECK-NEXT: addi s9, s9, 4
+; CHECK-NEXT: addi a4, a4, 4
+; CHECK-NEXT: addi ra, ra, 4
+; CHECK-NEXT: addi a3, a3, 4
+; CHECK-NEXT: andi s10, a0, 1
+; CHECK-NEXT: addi s11, s11, 4
+; CHECK-NEXT: beqz s10, .LBB0_4
; CHECK-NEXT: # %bb.7: # %for.cond.cleanup11.i
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=3
-; CHECK-NEXT: addi t3, t3, 1
-; CHECK-NEXT: beqz t1, .LBB0_3
+; CHECK-NEXT: addi s4, s4, 4
+; CHECK-NEXT: addi s8, s8, 4
+; CHECK-NEXT: addi s7, s7, 4
+; CHECK-NEXT: addi s6, s6, 4
+; CHECK-NEXT: andi a1, a2, 1
+; CHECK-NEXT: addi s5, s5, 4
+; CHECK-NEXT: beqz a1, .LBB0_3
; CHECK-NEXT: # %bb.8: # %for.cond.cleanup7.i
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=2
-; CHECK-NEXT: addi t0, t0, 1
-; CHECK-NEXT: beqz a6, .LBB0_2
+; CHECK-NEXT: addi a6, a6, 4
+; CHECK-NEXT: addi s3, s3, 4
+; CHECK-NEXT: addi t6, t6, 4
+; CHECK-NEXT: addi t5, t5, 4
+; CHECK-NEXT: addi t4, t4, 4
+; CHECK-NEXT: beqz t3, .LBB0_2
; CHECK-NEXT: # %bb.9: # %for.cond.cleanup3.i
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: addi a7, a7, 1
-; CHECK-NEXT: beqz t1, .LBB0_1
+; CHECK-NEXT: addi s2, s2, 4
+; CHECK-NEXT: addi a7, a7, 4
+; CHECK-NEXT: addi t0, t0, 4
+; CHECK-NEXT: addi t2, t2, 4
+; CHECK-NEXT: addi t1, t1, 4
+; CHECK-NEXT: beqz a1, .LBB0_1
; CHECK-NEXT: # %bb.10: # %l.exit
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: jalr a0
-; CHECK-NEXT: beqz s3, .LBB0_12
+; CHECK-NEXT: beqz s10, .LBB0_12
; CHECK-NEXT: .LBB0_11: # %for.body7.us.14
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: j .LBB0_11
; CHECK-NEXT: .LBB0_12: # %for.body7.us.19
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmv.s.x v8, s2
+; CHECK-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vmv.v.i v16, 0
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v16, v8, 1
@@ -116,12 +153,20 @@ define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscal
; CHECK-NEXT: snez a0, a0
; CHECK-NEXT: sb a0, 0(zero)
; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 48
+; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s5, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s6, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s7, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s8, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s10, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s11, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 112
; CHECK-NEXT: ret
entry:
%0 = tail call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index 7742cfc7da640..d1c98f828e76d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -243,28 +243,29 @@ for.cond.cleanup: ; preds = %vector.body
define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_mul_scalable:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: srli a2, a2, 1
+; CHECK-NEXT: csrr a5, vlenb
+; CHECK-NEXT: srli a2, a5, 1
; CHECK-NEXT: li a3, 1024
; CHECK-NEXT: bgeu a3, a2, .LBB7_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB7_5
; CHECK-NEXT: .LBB7_2: # %vector.ph
-; CHECK-NEXT: li a5, 0
; CHECK-NEXT: addi a3, a2, -1
; CHECK-NEXT: andi a4, a3, 1024
; CHECK-NEXT: xori a3, a4, 1024
-; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
+; CHECK-NEXT: slli a5, a5, 1
+; CHECK-NEXT: mv a6, a0
+; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT:...
[truncated]
|
aaryanshukla
pushed a commit
to aaryanshukla/llvm-project
that referenced
this pull request
Jul 14, 2024
…default" (llvm#98328) Reverts llvm#89927 while we investigate performance regressions reported by @dtcxzyw
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Reverts #89927 while we investigate performance regressions reported by @dtcxzyw