Skip to content

Commit af47a4e

Browse files
authored
[RISCV] Enable TTI::shouldDropLSRSolutionIfLessProfitable by default (#89927)
This avoids some cases where LSR produces results that lead to very poor codegen. There's a chance we'll see minor degradations for some inputs in the case that our metrics say the found solution is worse, but in reality it's better than the starting point. Per the review thread, at least one vendor has been enabling this by defualt for some time and found overall it's an improvement. As such, we'll enable by default and aim to fix any as-yet-unknown regressions in-tree.
1 parent 1afd4b7 commit af47a4e

File tree

9 files changed

+425
-510
lines changed

9 files changed

+425
-510
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
398398
return true;
399399
}
400400

401+
bool shouldDropLSRSolutionIfLessProfitable() const { return true; }
402+
401403
std::optional<unsigned> getMinPageSize() const { return 4096; }
402404
};
403405

llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll

Lines changed: 67 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -86,30 +86,29 @@ declare i64 @llvm.vscale.i64()
8686
define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
8787
; NO-SINK-LABEL: sink_splat_add_scalable:
8888
; NO-SINK: # %bb.0: # %entry
89-
; NO-SINK-NEXT: csrr a5, vlenb
90-
; NO-SINK-NEXT: srli a2, a5, 1
89+
; NO-SINK-NEXT: csrr a2, vlenb
90+
; NO-SINK-NEXT: srli a2, a2, 1
9191
; NO-SINK-NEXT: li a3, 1024
9292
; NO-SINK-NEXT: bgeu a3, a2, .LBB1_2
9393
; NO-SINK-NEXT: # %bb.1:
9494
; NO-SINK-NEXT: li a3, 0
9595
; NO-SINK-NEXT: j .LBB1_5
9696
; NO-SINK-NEXT: .LBB1_2: # %vector.ph
97+
; NO-SINK-NEXT: li a5, 0
9798
; NO-SINK-NEXT: addi a3, a2, -1
9899
; NO-SINK-NEXT: andi a4, a3, 1024
99100
; NO-SINK-NEXT: xori a3, a4, 1024
100101
; NO-SINK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
101102
; NO-SINK-NEXT: vmv.v.x v8, a1
102-
; NO-SINK-NEXT: slli a5, a5, 1
103-
; NO-SINK-NEXT: mv a6, a0
104-
; NO-SINK-NEXT: mv a7, a3
105103
; NO-SINK-NEXT: .LBB1_3: # %vector.body
106104
; NO-SINK-NEXT: # =>This Inner Loop Header: Depth=1
105+
; NO-SINK-NEXT: slli a6, a5, 2
106+
; NO-SINK-NEXT: add a6, a0, a6
107107
; NO-SINK-NEXT: vl2re32.v v10, (a6)
108108
; NO-SINK-NEXT: vadd.vv v10, v10, v8
109+
; NO-SINK-NEXT: add a5, a5, a2
109110
; NO-SINK-NEXT: vs2r.v v10, (a6)
110-
; NO-SINK-NEXT: sub a7, a7, a2
111-
; NO-SINK-NEXT: add a6, a6, a5
112-
; NO-SINK-NEXT: bnez a7, .LBB1_3
111+
; NO-SINK-NEXT: bne a5, a3, .LBB1_3
113112
; NO-SINK-NEXT: # %bb.4: # %middle.block
114113
; NO-SINK-NEXT: beqz a4, .LBB1_7
115114
; NO-SINK-NEXT: .LBB1_5: # %for.body.preheader
@@ -129,29 +128,28 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
129128
;
130129
; SINK-LABEL: sink_splat_add_scalable:
131130
; SINK: # %bb.0: # %entry
132-
; SINK-NEXT: csrr a5, vlenb
133-
; SINK-NEXT: srli a2, a5, 1
131+
; SINK-NEXT: csrr a2, vlenb
132+
; SINK-NEXT: srli a2, a2, 1
134133
; SINK-NEXT: li a3, 1024
135134
; SINK-NEXT: bgeu a3, a2, .LBB1_2
136135
; SINK-NEXT: # %bb.1:
137136
; SINK-NEXT: li a3, 0
138137
; SINK-NEXT: j .LBB1_5
139138
; SINK-NEXT: .LBB1_2: # %vector.ph
139+
; SINK-NEXT: li a5, 0
140140
; SINK-NEXT: addi a3, a2, -1
141141
; SINK-NEXT: andi a4, a3, 1024
142142
; SINK-NEXT: xori a3, a4, 1024
143-
; SINK-NEXT: slli a5, a5, 1
144-
; SINK-NEXT: mv a6, a0
145-
; SINK-NEXT: mv a7, a3
146-
; SINK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
143+
; SINK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
147144
; SINK-NEXT: .LBB1_3: # %vector.body
148145
; SINK-NEXT: # =>This Inner Loop Header: Depth=1
146+
; SINK-NEXT: slli a6, a5, 2
147+
; SINK-NEXT: add a6, a0, a6
149148
; SINK-NEXT: vl2re32.v v8, (a6)
150149
; SINK-NEXT: vadd.vx v8, v8, a1
150+
; SINK-NEXT: add a5, a5, a2
151151
; SINK-NEXT: vs2r.v v8, (a6)
152-
; SINK-NEXT: sub a7, a7, a2
153-
; SINK-NEXT: add a6, a6, a5
154-
; SINK-NEXT: bnez a7, .LBB1_3
152+
; SINK-NEXT: bne a5, a3, .LBB1_3
155153
; SINK-NEXT: # %bb.4: # %middle.block
156154
; SINK-NEXT: beqz a4, .LBB1_7
157155
; SINK-NEXT: .LBB1_5: # %for.body.preheader
@@ -171,29 +169,28 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
171169
;
172170
; DEFAULT-LABEL: sink_splat_add_scalable:
173171
; DEFAULT: # %bb.0: # %entry
174-
; DEFAULT-NEXT: csrr a5, vlenb
175-
; DEFAULT-NEXT: srli a2, a5, 1
172+
; DEFAULT-NEXT: csrr a2, vlenb
173+
; DEFAULT-NEXT: srli a2, a2, 1
176174
; DEFAULT-NEXT: li a3, 1024
177175
; DEFAULT-NEXT: bgeu a3, a2, .LBB1_2
178176
; DEFAULT-NEXT: # %bb.1:
179177
; DEFAULT-NEXT: li a3, 0
180178
; DEFAULT-NEXT: j .LBB1_5
181179
; DEFAULT-NEXT: .LBB1_2: # %vector.ph
180+
; DEFAULT-NEXT: li a5, 0
182181
; DEFAULT-NEXT: addi a3, a2, -1
183182
; DEFAULT-NEXT: andi a4, a3, 1024
184183
; DEFAULT-NEXT: xori a3, a4, 1024
185-
; DEFAULT-NEXT: slli a5, a5, 1
186-
; DEFAULT-NEXT: mv a6, a0
187-
; DEFAULT-NEXT: mv a7, a3
188-
; DEFAULT-NEXT: vsetvli t0, zero, e32, m2, ta, ma
184+
; DEFAULT-NEXT: vsetvli a6, zero, e32, m2, ta, ma
189185
; DEFAULT-NEXT: .LBB1_3: # %vector.body
190186
; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1
187+
; DEFAULT-NEXT: slli a6, a5, 2
188+
; DEFAULT-NEXT: add a6, a0, a6
191189
; DEFAULT-NEXT: vl2re32.v v8, (a6)
192190
; DEFAULT-NEXT: vadd.vx v8, v8, a1
191+
; DEFAULT-NEXT: add a5, a5, a2
193192
; DEFAULT-NEXT: vs2r.v v8, (a6)
194-
; DEFAULT-NEXT: sub a7, a7, a2
195-
; DEFAULT-NEXT: add a6, a6, a5
196-
; DEFAULT-NEXT: bnez a7, .LBB1_3
193+
; DEFAULT-NEXT: bne a5, a3, .LBB1_3
197194
; DEFAULT-NEXT: # %bb.4: # %middle.block
198195
; DEFAULT-NEXT: beqz a4, .LBB1_7
199196
; DEFAULT-NEXT: .LBB1_5: # %for.body.preheader
@@ -407,32 +404,32 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
407404
; NO-SINK-LABEL: sink_splat_fadd_scalable:
408405
; NO-SINK: # %bb.0: # %entry
409406
; NO-SINK-NEXT: csrr a1, vlenb
410-
; NO-SINK-NEXT: srli a2, a1, 2
411-
; NO-SINK-NEXT: li a3, 1024
412-
; NO-SINK-NEXT: bgeu a3, a2, .LBB4_2
407+
; NO-SINK-NEXT: srli a1, a1, 2
408+
; NO-SINK-NEXT: li a2, 1024
409+
; NO-SINK-NEXT: bgeu a2, a1, .LBB4_2
413410
; NO-SINK-NEXT: # %bb.1:
414-
; NO-SINK-NEXT: li a3, 0
411+
; NO-SINK-NEXT: li a2, 0
415412
; NO-SINK-NEXT: j .LBB4_5
416413
; NO-SINK-NEXT: .LBB4_2: # %vector.ph
417-
; NO-SINK-NEXT: addi a3, a2, -1
418-
; NO-SINK-NEXT: andi a4, a3, 1024
419-
; NO-SINK-NEXT: xori a3, a4, 1024
414+
; NO-SINK-NEXT: li a4, 0
415+
; NO-SINK-NEXT: addi a2, a1, -1
416+
; NO-SINK-NEXT: andi a3, a2, 1024
417+
; NO-SINK-NEXT: xori a2, a3, 1024
420418
; NO-SINK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
421419
; NO-SINK-NEXT: vfmv.v.f v8, fa0
422-
; NO-SINK-NEXT: mv a5, a0
423-
; NO-SINK-NEXT: mv a6, a3
424420
; NO-SINK-NEXT: .LBB4_3: # %vector.body
425421
; NO-SINK-NEXT: # =>This Inner Loop Header: Depth=1
422+
; NO-SINK-NEXT: slli a5, a4, 2
423+
; NO-SINK-NEXT: add a5, a0, a5
426424
; NO-SINK-NEXT: vl1re32.v v9, (a5)
427425
; NO-SINK-NEXT: vfadd.vv v9, v9, v8
426+
; NO-SINK-NEXT: add a4, a4, a1
428427
; NO-SINK-NEXT: vs1r.v v9, (a5)
429-
; NO-SINK-NEXT: sub a6, a6, a2
430-
; NO-SINK-NEXT: add a5, a5, a1
431-
; NO-SINK-NEXT: bnez a6, .LBB4_3
428+
; NO-SINK-NEXT: bne a4, a2, .LBB4_3
432429
; NO-SINK-NEXT: # %bb.4: # %middle.block
433-
; NO-SINK-NEXT: beqz a4, .LBB4_7
430+
; NO-SINK-NEXT: beqz a3, .LBB4_7
434431
; NO-SINK-NEXT: .LBB4_5: # %for.body.preheader
435-
; NO-SINK-NEXT: slli a1, a3, 2
432+
; NO-SINK-NEXT: slli a1, a2, 2
436433
; NO-SINK-NEXT: add a1, a0, a1
437434
; NO-SINK-NEXT: lui a2, 1
438435
; NO-SINK-NEXT: add a0, a0, a2
@@ -449,31 +446,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
449446
; SINK-LABEL: sink_splat_fadd_scalable:
450447
; SINK: # %bb.0: # %entry
451448
; SINK-NEXT: csrr a1, vlenb
452-
; SINK-NEXT: srli a2, a1, 2
453-
; SINK-NEXT: li a3, 1024
454-
; SINK-NEXT: bgeu a3, a2, .LBB4_2
449+
; SINK-NEXT: srli a1, a1, 2
450+
; SINK-NEXT: li a2, 1024
451+
; SINK-NEXT: bgeu a2, a1, .LBB4_2
455452
; SINK-NEXT: # %bb.1:
456-
; SINK-NEXT: li a3, 0
453+
; SINK-NEXT: li a2, 0
457454
; SINK-NEXT: j .LBB4_5
458455
; SINK-NEXT: .LBB4_2: # %vector.ph
459-
; SINK-NEXT: addi a3, a2, -1
460-
; SINK-NEXT: andi a4, a3, 1024
461-
; SINK-NEXT: xori a3, a4, 1024
462-
; SINK-NEXT: mv a5, a0
463-
; SINK-NEXT: mv a6, a3
464-
; SINK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
456+
; SINK-NEXT: li a4, 0
457+
; SINK-NEXT: addi a2, a1, -1
458+
; SINK-NEXT: andi a3, a2, 1024
459+
; SINK-NEXT: xori a2, a3, 1024
460+
; SINK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
465461
; SINK-NEXT: .LBB4_3: # %vector.body
466462
; SINK-NEXT: # =>This Inner Loop Header: Depth=1
463+
; SINK-NEXT: slli a5, a4, 2
464+
; SINK-NEXT: add a5, a0, a5
467465
; SINK-NEXT: vl1re32.v v8, (a5)
468466
; SINK-NEXT: vfadd.vf v8, v8, fa0
467+
; SINK-NEXT: add a4, a4, a1
469468
; SINK-NEXT: vs1r.v v8, (a5)
470-
; SINK-NEXT: sub a6, a6, a2
471-
; SINK-NEXT: add a5, a5, a1
472-
; SINK-NEXT: bnez a6, .LBB4_3
469+
; SINK-NEXT: bne a4, a2, .LBB4_3
473470
; SINK-NEXT: # %bb.4: # %middle.block
474-
; SINK-NEXT: beqz a4, .LBB4_7
471+
; SINK-NEXT: beqz a3, .LBB4_7
475472
; SINK-NEXT: .LBB4_5: # %for.body.preheader
476-
; SINK-NEXT: slli a1, a3, 2
473+
; SINK-NEXT: slli a1, a2, 2
477474
; SINK-NEXT: add a1, a0, a1
478475
; SINK-NEXT: lui a2, 1
479476
; SINK-NEXT: add a0, a0, a2
@@ -490,31 +487,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
490487
; DEFAULT-LABEL: sink_splat_fadd_scalable:
491488
; DEFAULT: # %bb.0: # %entry
492489
; DEFAULT-NEXT: csrr a1, vlenb
493-
; DEFAULT-NEXT: srli a2, a1, 2
494-
; DEFAULT-NEXT: li a3, 1024
495-
; DEFAULT-NEXT: bgeu a3, a2, .LBB4_2
490+
; DEFAULT-NEXT: srli a1, a1, 2
491+
; DEFAULT-NEXT: li a2, 1024
492+
; DEFAULT-NEXT: bgeu a2, a1, .LBB4_2
496493
; DEFAULT-NEXT: # %bb.1:
497-
; DEFAULT-NEXT: li a3, 0
494+
; DEFAULT-NEXT: li a2, 0
498495
; DEFAULT-NEXT: j .LBB4_5
499496
; DEFAULT-NEXT: .LBB4_2: # %vector.ph
500-
; DEFAULT-NEXT: addi a3, a2, -1
501-
; DEFAULT-NEXT: andi a4, a3, 1024
502-
; DEFAULT-NEXT: xori a3, a4, 1024
503-
; DEFAULT-NEXT: mv a5, a0
504-
; DEFAULT-NEXT: mv a6, a3
505-
; DEFAULT-NEXT: vsetvli a7, zero, e32, m1, ta, ma
497+
; DEFAULT-NEXT: li a4, 0
498+
; DEFAULT-NEXT: addi a2, a1, -1
499+
; DEFAULT-NEXT: andi a3, a2, 1024
500+
; DEFAULT-NEXT: xori a2, a3, 1024
501+
; DEFAULT-NEXT: vsetvli a5, zero, e32, m1, ta, ma
506502
; DEFAULT-NEXT: .LBB4_3: # %vector.body
507503
; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1
504+
; DEFAULT-NEXT: slli a5, a4, 2
505+
; DEFAULT-NEXT: add a5, a0, a5
508506
; DEFAULT-NEXT: vl1re32.v v8, (a5)
509507
; DEFAULT-NEXT: vfadd.vf v8, v8, fa0
508+
; DEFAULT-NEXT: add a4, a4, a1
510509
; DEFAULT-NEXT: vs1r.v v8, (a5)
511-
; DEFAULT-NEXT: sub a6, a6, a2
512-
; DEFAULT-NEXT: add a5, a5, a1
513-
; DEFAULT-NEXT: bnez a6, .LBB4_3
510+
; DEFAULT-NEXT: bne a4, a2, .LBB4_3
514511
; DEFAULT-NEXT: # %bb.4: # %middle.block
515-
; DEFAULT-NEXT: beqz a4, .LBB4_7
512+
; DEFAULT-NEXT: beqz a3, .LBB4_7
516513
; DEFAULT-NEXT: .LBB4_5: # %for.body.preheader
517-
; DEFAULT-NEXT: slli a1, a3, 2
514+
; DEFAULT-NEXT: slli a1, a2, 2
518515
; DEFAULT-NEXT: add a1, a0, a1
519516
; DEFAULT-NEXT: lui a2, 1
520517
; DEFAULT-NEXT: add a0, a0, a2

0 commit comments

Comments
 (0)