Skip to content

Commit 6657d4b

Browse files
authored
[TTI][RISCV] Unconditionally break critical edges to sink ADDI (#108889)
This looks like a rather weird change, so let me explain why this isn't as unreasonable as it looks. Let's start with the problem it's solving. ``` define signext i32 @overlap_live_ranges(ptr %arg, i32 signext %arg1) { bb: %i = icmp eq i32 %arg1, 1 br i1 %i, label %bb2, label %bb5 bb2: ; preds = %bb %i3 = getelementptr inbounds nuw i8, ptr %arg, i64 4 %i4 = load i32, ptr %i3, align 4 br label %bb5 bb5: ; preds = %bb2, %bb %i6 = phi i32 [ %i4, %bb2 ], [ 13, %bb ] ret i32 %i6 } ``` Right now, we codegen this as: ``` li a3, 1 li a2, 13 bne a1, a3, .LBB0_2 lw a2, 4(a0) .LBB0_2: mv a0, a2 ret ``` In this example, we have two values which must be assigned to a0 per the ABI (%arg, and the return value). SelectionDAG ensures that all values used in a successor phi are defined before exit the predecessor block. This creates an ADDI to materialize the immediate in the entry block. Currently, this ADDI is not sunk into the tail block because we'd have to split a critical edges to do so. Note that if our immediate was anything large enough to require two instructions we *would* split this critical edge. Looking at other targets, we notice that they don't seem to have this problem. They perform the sinking, and tail duplication that we don't. Why? Well, it turns out for AArch64 that this is entirely an accident of the existance of the gpr32all register class. The immediate is materialized into the gpr32 class, and then copied into the gpr32all register class. The existance of that copy puts us right back into the two instruction case noted above. This change essentially just bypasses this emergent behavior aspect of the aarch64 behavior, and implements the same "always sink immediates" behavior for RISCV as well.
1 parent 6633916 commit 6657d4b

File tree

14 files changed

+470
-428
lines changed

14 files changed

+470
-428
lines changed

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,12 @@ class TargetInstrInfo : public MCInstrInfo {
159159
return true;
160160
}
161161

162+
/// For a "cheap" instruction which doesn't enable additional sinking,
163+
/// should MachineSink break a critical edge to sink it anyways?
164+
virtual bool shouldBreakCriticalEdgeToSink(MachineInstr &MI) const {
165+
return false;
166+
}
167+
162168
protected:
163169
/// For instructions with opcodes for which the M_REMATERIALIZABLE flag is
164170
/// set, this hook lets the target specify whether the instruction is actually

llvm/lib/CodeGen/MachineSink.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -958,7 +958,9 @@ bool MachineSinking::isWorthBreakingCriticalEdge(
958958
}
959959
}
960960

961-
return false;
961+
// Let the target decide if it's worth breaking this
962+
// critical edge for a "cheap" instruction.
963+
return TII->shouldBreakCriticalEdgeToSink(MI);
962964
}
963965

964966
bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI,

llvm/lib/Target/RISCV/RISCVInstrInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,11 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
7878

7979
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
8080

81+
bool shouldBreakCriticalEdgeToSink(MachineInstr &MI) const override {
82+
return MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() &&
83+
MI.getOperand(1).getReg() == RISCV::X0;
84+
}
85+
8186
void copyPhysRegVector(MachineBasicBlock &MBB,
8287
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
8388
MCRegister DstReg, MCRegister SrcReg, bool KillSrc,

llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,23 +43,21 @@ define i32 @fcvt_wu_d(double %a) nounwind {
4343
define i32 @fcvt_wu_d_multiple_use(double %x, ptr %y) nounwind {
4444
; RV32IFD-LABEL: fcvt_wu_d_multiple_use:
4545
; RV32IFD: # %bb.0:
46-
; RV32IFD-NEXT: fcvt.wu.d a1, fa0, rtz
47-
; RV32IFD-NEXT: li a0, 1
48-
; RV32IFD-NEXT: beqz a1, .LBB4_2
46+
; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz
47+
; RV32IFD-NEXT: bnez a0, .LBB4_2
4948
; RV32IFD-NEXT: # %bb.1:
50-
; RV32IFD-NEXT: mv a0, a1
49+
; RV32IFD-NEXT: li a0, 1
5150
; RV32IFD-NEXT: .LBB4_2:
5251
; RV32IFD-NEXT: ret
5352
;
5453
; RV64IFD-LABEL: fcvt_wu_d_multiple_use:
5554
; RV64IFD: # %bb.0:
56-
; RV64IFD-NEXT: fcvt.wu.d a1, fa0, rtz
57-
; RV64IFD-NEXT: slli a0, a1, 32
58-
; RV64IFD-NEXT: srli a2, a0, 32
59-
; RV64IFD-NEXT: li a0, 1
60-
; RV64IFD-NEXT: beqz a2, .LBB4_2
55+
; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz
56+
; RV64IFD-NEXT: slli a1, a0, 32
57+
; RV64IFD-NEXT: srli a1, a1, 32
58+
; RV64IFD-NEXT: bnez a1, .LBB4_2
6159
; RV64IFD-NEXT: # %bb.1:
62-
; RV64IFD-NEXT: mv a0, a1
60+
; RV64IFD-NEXT: li a0, 1
6361
; RV64IFD-NEXT: .LBB4_2:
6462
; RV64IFD-NEXT: ret
6563
%a = fptoui double %x to i32

llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,21 @@ define i32 @fcvt_wu_s(float %a) nounwind {
2727
define i32 @fcvt_wu_s_multiple_use(float %x, ptr %y) nounwind {
2828
; RV32IF-LABEL: fcvt_wu_s_multiple_use:
2929
; RV32IF: # %bb.0:
30-
; RV32IF-NEXT: fcvt.wu.s a1, fa0, rtz
31-
; RV32IF-NEXT: li a0, 1
32-
; RV32IF-NEXT: beqz a1, .LBB2_2
30+
; RV32IF-NEXT: fcvt.wu.s a0, fa0, rtz
31+
; RV32IF-NEXT: bnez a0, .LBB2_2
3332
; RV32IF-NEXT: # %bb.1:
34-
; RV32IF-NEXT: mv a0, a1
33+
; RV32IF-NEXT: li a0, 1
3534
; RV32IF-NEXT: .LBB2_2:
3635
; RV32IF-NEXT: ret
3736
;
3837
; RV64IF-LABEL: fcvt_wu_s_multiple_use:
3938
; RV64IF: # %bb.0:
40-
; RV64IF-NEXT: fcvt.wu.s a1, fa0, rtz
41-
; RV64IF-NEXT: slli a0, a1, 32
42-
; RV64IF-NEXT: srli a2, a0, 32
43-
; RV64IF-NEXT: li a0, 1
44-
; RV64IF-NEXT: beqz a2, .LBB2_2
39+
; RV64IF-NEXT: fcvt.wu.s a0, fa0, rtz
40+
; RV64IF-NEXT: slli a1, a0, 32
41+
; RV64IF-NEXT: srli a1, a1, 32
42+
; RV64IF-NEXT: bnez a1, .LBB2_2
4543
; RV64IF-NEXT: # %bb.1:
46-
; RV64IF-NEXT: mv a0, a1
44+
; RV64IF-NEXT: li a0, 1
4745
; RV64IF-NEXT: .LBB2_2:
4846
; RV64IF-NEXT: ret
4947
%a = fptoui float %x to i32

llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll

Lines changed: 37 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -196,11 +196,9 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
196196
define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
197197
; RV64I-LABEL: findLastSet_i32:
198198
; RV64I: # %bb.0:
199-
; RV64I-NEXT: addi sp, sp, -32
200-
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
201-
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
202-
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
203-
; RV64I-NEXT: li s0, -1
199+
; RV64I-NEXT: addi sp, sp, -16
200+
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
201+
; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
204202
; RV64I-NEXT: slli a1, a0, 32
205203
; RV64I-NEXT: srliw a2, a0, 1
206204
; RV64I-NEXT: lui a3, 349525
@@ -227,36 +225,37 @@ define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
227225
; RV64I-NEXT: srli a2, a0, 4
228226
; RV64I-NEXT: add a0, a2, a0
229227
; RV64I-NEXT: lui a2, 4112
230-
; RV64I-NEXT: srli s1, a1, 32
228+
; RV64I-NEXT: srli s0, a1, 32
231229
; RV64I-NEXT: addiw a1, a3, -241
232230
; RV64I-NEXT: and a0, a0, a1
233231
; RV64I-NEXT: addiw a1, a2, 257
234232
; RV64I-NEXT: call __muldi3
235-
; RV64I-NEXT: beqz s1, .LBB3_2
233+
; RV64I-NEXT: beqz s0, .LBB3_2
236234
; RV64I-NEXT: # %bb.1:
237235
; RV64I-NEXT: srliw a0, a0, 24
238236
; RV64I-NEXT: li a1, 32
239237
; RV64I-NEXT: subw a1, a1, a0
240-
; RV64I-NEXT: xori s0, a1, 31
238+
; RV64I-NEXT: xori a0, a1, 31
239+
; RV64I-NEXT: j .LBB3_3
241240
; RV64I-NEXT: .LBB3_2:
242-
; RV64I-NEXT: mv a0, s0
243-
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
244-
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
245-
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
246-
; RV64I-NEXT: addi sp, sp, 32
241+
; RV64I-NEXT: li a0, -1
242+
; RV64I-NEXT: .LBB3_3:
243+
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
244+
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
245+
; RV64I-NEXT: addi sp, sp, 16
247246
; RV64I-NEXT: ret
248247
;
249248
; RV64ZBB-LABEL: findLastSet_i32:
250249
; RV64ZBB: # %bb.0:
251250
; RV64ZBB-NEXT: slli a1, a0, 32
252-
; RV64ZBB-NEXT: srli a2, a1, 32
253-
; RV64ZBB-NEXT: li a1, -1
254-
; RV64ZBB-NEXT: beqz a2, .LBB3_2
251+
; RV64ZBB-NEXT: srli a1, a1, 32
252+
; RV64ZBB-NEXT: beqz a1, .LBB3_2
255253
; RV64ZBB-NEXT: # %bb.1:
256254
; RV64ZBB-NEXT: clzw a0, a0
257-
; RV64ZBB-NEXT: xori a1, a0, 31
255+
; RV64ZBB-NEXT: xori a0, a0, 31
256+
; RV64ZBB-NEXT: ret
258257
; RV64ZBB-NEXT: .LBB3_2:
259-
; RV64ZBB-NEXT: mv a0, a1
258+
; RV64ZBB-NEXT: li a0, -1
260259
; RV64ZBB-NEXT: ret
261260
%1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
262261
%2 = xor i32 31, %1
@@ -493,14 +492,12 @@ define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
493492
define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
494493
; RV64I-LABEL: findFirstSet_i32:
495494
; RV64I: # %bb.0:
496-
; RV64I-NEXT: addi sp, sp, -32
497-
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
498-
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
499-
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
500-
; RV64I-NEXT: mv s1, a0
501-
; RV64I-NEXT: li s0, -1
495+
; RV64I-NEXT: addi sp, sp, -16
496+
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
497+
; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
498+
; RV64I-NEXT: mv s0, a0
502499
; RV64I-NEXT: not a0, a0
503-
; RV64I-NEXT: addi a1, s1, -1
500+
; RV64I-NEXT: addi a1, s0, -1
504501
; RV64I-NEXT: lui a2, 349525
505502
; RV64I-NEXT: and a0, a0, a1
506503
; RV64I-NEXT: addiw a1, a2, 1365
@@ -521,29 +518,30 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
521518
; RV64I-NEXT: and a0, a0, a2
522519
; RV64I-NEXT: addiw a1, a1, 257
523520
; RV64I-NEXT: call __muldi3
524-
; RV64I-NEXT: slli s1, s1, 32
525-
; RV64I-NEXT: srli s1, s1, 32
526-
; RV64I-NEXT: beqz s1, .LBB8_2
521+
; RV64I-NEXT: slli s0, s0, 32
522+
; RV64I-NEXT: srli s0, s0, 32
523+
; RV64I-NEXT: beqz s0, .LBB8_2
527524
; RV64I-NEXT: # %bb.1:
528-
; RV64I-NEXT: srliw s0, a0, 24
525+
; RV64I-NEXT: srliw a0, a0, 24
526+
; RV64I-NEXT: j .LBB8_3
529527
; RV64I-NEXT: .LBB8_2:
530-
; RV64I-NEXT: mv a0, s0
531-
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
532-
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
533-
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
534-
; RV64I-NEXT: addi sp, sp, 32
528+
; RV64I-NEXT: li a0, -1
529+
; RV64I-NEXT: .LBB8_3:
530+
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
531+
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
532+
; RV64I-NEXT: addi sp, sp, 16
535533
; RV64I-NEXT: ret
536534
;
537535
; RV64ZBB-LABEL: findFirstSet_i32:
538536
; RV64ZBB: # %bb.0:
539537
; RV64ZBB-NEXT: slli a1, a0, 32
540-
; RV64ZBB-NEXT: srli a2, a1, 32
541-
; RV64ZBB-NEXT: li a1, -1
542-
; RV64ZBB-NEXT: beqz a2, .LBB8_2
538+
; RV64ZBB-NEXT: srli a1, a1, 32
539+
; RV64ZBB-NEXT: beqz a1, .LBB8_2
543540
; RV64ZBB-NEXT: # %bb.1:
544-
; RV64ZBB-NEXT: ctzw a1, a0
541+
; RV64ZBB-NEXT: ctzw a0, a0
542+
; RV64ZBB-NEXT: ret
545543
; RV64ZBB-NEXT: .LBB8_2:
546-
; RV64ZBB-NEXT: mv a0, a1
544+
; RV64ZBB-NEXT: li a0, -1
547545
; RV64ZBB-NEXT: ret
548546
%1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
549547
%2 = icmp eq i32 %a, 0

llvm/test/CodeGen/RISCV/aext-to-sext.ll

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,14 @@ bar:
7878
define i64 @sext_phi_constants(i32 signext %c) {
7979
; RV64I-LABEL: sext_phi_constants:
8080
; RV64I: # %bb.0:
81-
; RV64I-NEXT: li a1, -1
82-
; RV64I-NEXT: bnez a0, .LBB2_2
83-
; RV64I-NEXT: # %bb.1: # %iffalse
84-
; RV64I-NEXT: li a1, -2
85-
; RV64I-NEXT: .LBB2_2: # %merge
86-
; RV64I-NEXT: slli a0, a1, 32
81+
; RV64I-NEXT: beqz a0, .LBB2_2
82+
; RV64I-NEXT: # %bb.1:
83+
; RV64I-NEXT: li a0, -1
84+
; RV64I-NEXT: j .LBB2_3
85+
; RV64I-NEXT: .LBB2_2: # %iffalse
86+
; RV64I-NEXT: li a0, -2
87+
; RV64I-NEXT: .LBB2_3: # %merge
88+
; RV64I-NEXT: slli a0, a0, 32
8789
; RV64I-NEXT: srli a0, a0, 32
8890
; RV64I-NEXT: ret
8991
%a = icmp ne i32 %c, 0

0 commit comments

Comments
 (0)