Skip to content

Commit 904473d

Browse files
committed
[RISCV] Enable TTI::shouldDropLSRSolutionIfLessProfitable by default
This avoids some cases where LSR produces results that lead to very poor codegen. There's a chance we'll see minor degradations for some inputs in the case that our metrics say the found solution is worse, but in reality it's better than the starting point.
1 parent 3ac6a64 commit 904473d

File tree

8 files changed

+372
-412
lines changed

8 files changed

+372
-412
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
397397
bool shouldFoldTerminatingConditionAfterLSR() const {
398398
return true;
399399
}
400+
401+
bool shouldDropLSRSolutionIfLessProfitable() const { return true; }
400402
};
401403

402404
} // end namespace llvm

llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll

Lines changed: 67 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -86,30 +86,29 @@ declare i64 @llvm.vscale.i64()
8686
define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
8787
; NO-SINK-LABEL: sink_splat_add_scalable:
8888
; NO-SINK: # %bb.0: # %entry
89-
; NO-SINK-NEXT: csrr a5, vlenb
90-
; NO-SINK-NEXT: srli a2, a5, 1
89+
; NO-SINK-NEXT: csrr a2, vlenb
90+
; NO-SINK-NEXT: srli a2, a2, 1
9191
; NO-SINK-NEXT: li a3, 1024
9292
; NO-SINK-NEXT: bgeu a3, a2, .LBB1_2
9393
; NO-SINK-NEXT: # %bb.1:
9494
; NO-SINK-NEXT: li a3, 0
9595
; NO-SINK-NEXT: j .LBB1_5
9696
; NO-SINK-NEXT: .LBB1_2: # %vector.ph
97+
; NO-SINK-NEXT: li a5, 0
9798
; NO-SINK-NEXT: addi a3, a2, -1
9899
; NO-SINK-NEXT: andi a4, a3, 1024
99100
; NO-SINK-NEXT: xori a3, a4, 1024
100101
; NO-SINK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
101102
; NO-SINK-NEXT: vmv.v.x v8, a1
102-
; NO-SINK-NEXT: slli a5, a5, 1
103-
; NO-SINK-NEXT: mv a6, a0
104-
; NO-SINK-NEXT: mv a7, a3
105103
; NO-SINK-NEXT: .LBB1_3: # %vector.body
106104
; NO-SINK-NEXT: # =>This Inner Loop Header: Depth=1
105+
; NO-SINK-NEXT: slli a6, a5, 2
106+
; NO-SINK-NEXT: add a6, a0, a6
107107
; NO-SINK-NEXT: vl2re32.v v10, (a6)
108108
; NO-SINK-NEXT: vadd.vv v10, v10, v8
109+
; NO-SINK-NEXT: add a5, a5, a2
109110
; NO-SINK-NEXT: vs2r.v v10, (a6)
110-
; NO-SINK-NEXT: sub a7, a7, a2
111-
; NO-SINK-NEXT: add a6, a6, a5
112-
; NO-SINK-NEXT: bnez a7, .LBB1_3
111+
; NO-SINK-NEXT: bne a5, a3, .LBB1_3
113112
; NO-SINK-NEXT: # %bb.4: # %middle.block
114113
; NO-SINK-NEXT: beqz a4, .LBB1_7
115114
; NO-SINK-NEXT: .LBB1_5: # %for.body.preheader
@@ -129,29 +128,28 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
129128
;
130129
; SINK-LABEL: sink_splat_add_scalable:
131130
; SINK: # %bb.0: # %entry
132-
; SINK-NEXT: csrr a5, vlenb
133-
; SINK-NEXT: srli a2, a5, 1
131+
; SINK-NEXT: csrr a2, vlenb
132+
; SINK-NEXT: srli a2, a2, 1
134133
; SINK-NEXT: li a3, 1024
135134
; SINK-NEXT: bgeu a3, a2, .LBB1_2
136135
; SINK-NEXT: # %bb.1:
137136
; SINK-NEXT: li a3, 0
138137
; SINK-NEXT: j .LBB1_5
139138
; SINK-NEXT: .LBB1_2: # %vector.ph
139+
; SINK-NEXT: li a5, 0
140140
; SINK-NEXT: addi a3, a2, -1
141141
; SINK-NEXT: andi a4, a3, 1024
142142
; SINK-NEXT: xori a3, a4, 1024
143-
; SINK-NEXT: slli a5, a5, 1
144-
; SINK-NEXT: mv a6, a0
145-
; SINK-NEXT: mv a7, a3
146-
; SINK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
143+
; SINK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
147144
; SINK-NEXT: .LBB1_3: # %vector.body
148145
; SINK-NEXT: # =>This Inner Loop Header: Depth=1
146+
; SINK-NEXT: slli a6, a5, 2
147+
; SINK-NEXT: add a6, a0, a6
149148
; SINK-NEXT: vl2re32.v v8, (a6)
150149
; SINK-NEXT: vadd.vx v8, v8, a1
150+
; SINK-NEXT: add a5, a5, a2
151151
; SINK-NEXT: vs2r.v v8, (a6)
152-
; SINK-NEXT: sub a7, a7, a2
153-
; SINK-NEXT: add a6, a6, a5
154-
; SINK-NEXT: bnez a7, .LBB1_3
152+
; SINK-NEXT: bne a5, a3, .LBB1_3
155153
; SINK-NEXT: # %bb.4: # %middle.block
156154
; SINK-NEXT: beqz a4, .LBB1_7
157155
; SINK-NEXT: .LBB1_5: # %for.body.preheader
@@ -171,29 +169,28 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
171169
;
172170
; DEFAULT-LABEL: sink_splat_add_scalable:
173171
; DEFAULT: # %bb.0: # %entry
174-
; DEFAULT-NEXT: csrr a5, vlenb
175-
; DEFAULT-NEXT: srli a2, a5, 1
172+
; DEFAULT-NEXT: csrr a2, vlenb
173+
; DEFAULT-NEXT: srli a2, a2, 1
176174
; DEFAULT-NEXT: li a3, 1024
177175
; DEFAULT-NEXT: bgeu a3, a2, .LBB1_2
178176
; DEFAULT-NEXT: # %bb.1:
179177
; DEFAULT-NEXT: li a3, 0
180178
; DEFAULT-NEXT: j .LBB1_5
181179
; DEFAULT-NEXT: .LBB1_2: # %vector.ph
180+
; DEFAULT-NEXT: li a5, 0
182181
; DEFAULT-NEXT: addi a3, a2, -1
183182
; DEFAULT-NEXT: andi a4, a3, 1024
184183
; DEFAULT-NEXT: xori a3, a4, 1024
185-
; DEFAULT-NEXT: slli a5, a5, 1
186-
; DEFAULT-NEXT: mv a6, a0
187-
; DEFAULT-NEXT: mv a7, a3
188-
; DEFAULT-NEXT: vsetvli t0, zero, e32, m2, ta, ma
184+
; DEFAULT-NEXT: vsetvli a6, zero, e32, m2, ta, ma
189185
; DEFAULT-NEXT: .LBB1_3: # %vector.body
190186
; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1
187+
; DEFAULT-NEXT: slli a6, a5, 2
188+
; DEFAULT-NEXT: add a6, a0, a6
191189
; DEFAULT-NEXT: vl2re32.v v8, (a6)
192190
; DEFAULT-NEXT: vadd.vx v8, v8, a1
191+
; DEFAULT-NEXT: add a5, a5, a2
193192
; DEFAULT-NEXT: vs2r.v v8, (a6)
194-
; DEFAULT-NEXT: sub a7, a7, a2
195-
; DEFAULT-NEXT: add a6, a6, a5
196-
; DEFAULT-NEXT: bnez a7, .LBB1_3
193+
; DEFAULT-NEXT: bne a5, a3, .LBB1_3
197194
; DEFAULT-NEXT: # %bb.4: # %middle.block
198195
; DEFAULT-NEXT: beqz a4, .LBB1_7
199196
; DEFAULT-NEXT: .LBB1_5: # %for.body.preheader
@@ -407,32 +404,32 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
407404
; NO-SINK-LABEL: sink_splat_fadd_scalable:
408405
; NO-SINK: # %bb.0: # %entry
409406
; NO-SINK-NEXT: csrr a1, vlenb
410-
; NO-SINK-NEXT: srli a2, a1, 2
411-
; NO-SINK-NEXT: li a3, 1024
412-
; NO-SINK-NEXT: bgeu a3, a2, .LBB4_2
407+
; NO-SINK-NEXT: srli a1, a1, 2
408+
; NO-SINK-NEXT: li a2, 1024
409+
; NO-SINK-NEXT: bgeu a2, a1, .LBB4_2
413410
; NO-SINK-NEXT: # %bb.1:
414-
; NO-SINK-NEXT: li a3, 0
411+
; NO-SINK-NEXT: li a2, 0
415412
; NO-SINK-NEXT: j .LBB4_5
416413
; NO-SINK-NEXT: .LBB4_2: # %vector.ph
417-
; NO-SINK-NEXT: addi a3, a2, -1
418-
; NO-SINK-NEXT: andi a4, a3, 1024
419-
; NO-SINK-NEXT: xori a3, a4, 1024
414+
; NO-SINK-NEXT: li a4, 0
415+
; NO-SINK-NEXT: addi a2, a1, -1
416+
; NO-SINK-NEXT: andi a3, a2, 1024
417+
; NO-SINK-NEXT: xori a2, a3, 1024
420418
; NO-SINK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
421419
; NO-SINK-NEXT: vfmv.v.f v8, fa0
422-
; NO-SINK-NEXT: mv a5, a0
423-
; NO-SINK-NEXT: mv a6, a3
424420
; NO-SINK-NEXT: .LBB4_3: # %vector.body
425421
; NO-SINK-NEXT: # =>This Inner Loop Header: Depth=1
422+
; NO-SINK-NEXT: slli a5, a4, 2
423+
; NO-SINK-NEXT: add a5, a0, a5
426424
; NO-SINK-NEXT: vl1re32.v v9, (a5)
427425
; NO-SINK-NEXT: vfadd.vv v9, v9, v8
426+
; NO-SINK-NEXT: add a4, a4, a1
428427
; NO-SINK-NEXT: vs1r.v v9, (a5)
429-
; NO-SINK-NEXT: sub a6, a6, a2
430-
; NO-SINK-NEXT: add a5, a5, a1
431-
; NO-SINK-NEXT: bnez a6, .LBB4_3
428+
; NO-SINK-NEXT: bne a4, a2, .LBB4_3
432429
; NO-SINK-NEXT: # %bb.4: # %middle.block
433-
; NO-SINK-NEXT: beqz a4, .LBB4_7
430+
; NO-SINK-NEXT: beqz a3, .LBB4_7
434431
; NO-SINK-NEXT: .LBB4_5: # %for.body.preheader
435-
; NO-SINK-NEXT: slli a1, a3, 2
432+
; NO-SINK-NEXT: slli a1, a2, 2
436433
; NO-SINK-NEXT: add a1, a0, a1
437434
; NO-SINK-NEXT: lui a2, 1
438435
; NO-SINK-NEXT: add a0, a0, a2
@@ -449,31 +446,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
449446
; SINK-LABEL: sink_splat_fadd_scalable:
450447
; SINK: # %bb.0: # %entry
451448
; SINK-NEXT: csrr a1, vlenb
452-
; SINK-NEXT: srli a2, a1, 2
453-
; SINK-NEXT: li a3, 1024
454-
; SINK-NEXT: bgeu a3, a2, .LBB4_2
449+
; SINK-NEXT: srli a1, a1, 2
450+
; SINK-NEXT: li a2, 1024
451+
; SINK-NEXT: bgeu a2, a1, .LBB4_2
455452
; SINK-NEXT: # %bb.1:
456-
; SINK-NEXT: li a3, 0
453+
; SINK-NEXT: li a2, 0
457454
; SINK-NEXT: j .LBB4_5
458455
; SINK-NEXT: .LBB4_2: # %vector.ph
459-
; SINK-NEXT: addi a3, a2, -1
460-
; SINK-NEXT: andi a4, a3, 1024
461-
; SINK-NEXT: xori a3, a4, 1024
462-
; SINK-NEXT: mv a5, a0
463-
; SINK-NEXT: mv a6, a3
464-
; SINK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
456+
; SINK-NEXT: li a4, 0
457+
; SINK-NEXT: addi a2, a1, -1
458+
; SINK-NEXT: andi a3, a2, 1024
459+
; SINK-NEXT: xori a2, a3, 1024
460+
; SINK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
465461
; SINK-NEXT: .LBB4_3: # %vector.body
466462
; SINK-NEXT: # =>This Inner Loop Header: Depth=1
463+
; SINK-NEXT: slli a5, a4, 2
464+
; SINK-NEXT: add a5, a0, a5
467465
; SINK-NEXT: vl1re32.v v8, (a5)
468466
; SINK-NEXT: vfadd.vf v8, v8, fa0
467+
; SINK-NEXT: add a4, a4, a1
469468
; SINK-NEXT: vs1r.v v8, (a5)
470-
; SINK-NEXT: sub a6, a6, a2
471-
; SINK-NEXT: add a5, a5, a1
472-
; SINK-NEXT: bnez a6, .LBB4_3
469+
; SINK-NEXT: bne a4, a2, .LBB4_3
473470
; SINK-NEXT: # %bb.4: # %middle.block
474-
; SINK-NEXT: beqz a4, .LBB4_7
471+
; SINK-NEXT: beqz a3, .LBB4_7
475472
; SINK-NEXT: .LBB4_5: # %for.body.preheader
476-
; SINK-NEXT: slli a1, a3, 2
473+
; SINK-NEXT: slli a1, a2, 2
477474
; SINK-NEXT: add a1, a0, a1
478475
; SINK-NEXT: lui a2, 1
479476
; SINK-NEXT: add a0, a0, a2
@@ -490,31 +487,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
490487
; DEFAULT-LABEL: sink_splat_fadd_scalable:
491488
; DEFAULT: # %bb.0: # %entry
492489
; DEFAULT-NEXT: csrr a1, vlenb
493-
; DEFAULT-NEXT: srli a2, a1, 2
494-
; DEFAULT-NEXT: li a3, 1024
495-
; DEFAULT-NEXT: bgeu a3, a2, .LBB4_2
490+
; DEFAULT-NEXT: srli a1, a1, 2
491+
; DEFAULT-NEXT: li a2, 1024
492+
; DEFAULT-NEXT: bgeu a2, a1, .LBB4_2
496493
; DEFAULT-NEXT: # %bb.1:
497-
; DEFAULT-NEXT: li a3, 0
494+
; DEFAULT-NEXT: li a2, 0
498495
; DEFAULT-NEXT: j .LBB4_5
499496
; DEFAULT-NEXT: .LBB4_2: # %vector.ph
500-
; DEFAULT-NEXT: addi a3, a2, -1
501-
; DEFAULT-NEXT: andi a4, a3, 1024
502-
; DEFAULT-NEXT: xori a3, a4, 1024
503-
; DEFAULT-NEXT: mv a5, a0
504-
; DEFAULT-NEXT: mv a6, a3
505-
; DEFAULT-NEXT: vsetvli a7, zero, e32, m1, ta, ma
497+
; DEFAULT-NEXT: li a4, 0
498+
; DEFAULT-NEXT: addi a2, a1, -1
499+
; DEFAULT-NEXT: andi a3, a2, 1024
500+
; DEFAULT-NEXT: xori a2, a3, 1024
501+
; DEFAULT-NEXT: vsetvli a5, zero, e32, m1, ta, ma
506502
; DEFAULT-NEXT: .LBB4_3: # %vector.body
507503
; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1
504+
; DEFAULT-NEXT: slli a5, a4, 2
505+
; DEFAULT-NEXT: add a5, a0, a5
508506
; DEFAULT-NEXT: vl1re32.v v8, (a5)
509507
; DEFAULT-NEXT: vfadd.vf v8, v8, fa0
508+
; DEFAULT-NEXT: add a4, a4, a1
510509
; DEFAULT-NEXT: vs1r.v v8, (a5)
511-
; DEFAULT-NEXT: sub a6, a6, a2
512-
; DEFAULT-NEXT: add a5, a5, a1
513-
; DEFAULT-NEXT: bnez a6, .LBB4_3
510+
; DEFAULT-NEXT: bne a4, a2, .LBB4_3
514511
; DEFAULT-NEXT: # %bb.4: # %middle.block
515-
; DEFAULT-NEXT: beqz a4, .LBB4_7
512+
; DEFAULT-NEXT: beqz a3, .LBB4_7
516513
; DEFAULT-NEXT: .LBB4_5: # %for.body.preheader
517-
; DEFAULT-NEXT: slli a1, a3, 2
514+
; DEFAULT-NEXT: slli a1, a2, 2
518515
; DEFAULT-NEXT: add a1, a0, a1
519516
; DEFAULT-NEXT: lui a2, 1
520517
; DEFAULT-NEXT: add a0, a0, a2

0 commit comments

Comments
 (0)