Skip to content

Commit 7afa44f

Browse files
mohammed-nurulhoquekito-cheng
authored andcommitted
[RISCV] Add more sign-extending ops to MIR sext.w pass.
This patch adds single-bit and bit-counting ops to list of sign-extending ops. A single-bit write propagates sign-extendedness if it's not in the sign-bits. Bit extraction and bit counting always outputs a small number, so sign-extended. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D121152
1 parent 53491e4 commit 7afa44f

File tree

2 files changed

+118
-15
lines changed

2 files changed

+118
-15
lines changed

llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ static bool isSignExtendingOpW(const MachineInstr &MI) {
9191
case RISCV::FCVT_WU_D:
9292
case RISCV::FMV_X_W:
9393
// The following aren't W instructions, but are either sign extended from a
94-
// smaller size or put zeros in bits 63:31.
94+
// smaller size, always outputs a small integer, or put zeros in bits 63:31.
9595
case RISCV::LBU:
9696
case RISCV::LHU:
9797
case RISCV::LB:
@@ -104,6 +104,11 @@ static bool isSignExtendingOpW(const MachineInstr &MI) {
104104
case RISCV::SEXT_H:
105105
case RISCV::ZEXT_H_RV64:
106106
case RISCV::FMV_X_H:
107+
case RISCV::BEXT:
108+
case RISCV::BEXTI:
109+
case RISCV::CLZ:
110+
case RISCV::CPOP:
111+
case RISCV::CTZ:
107112
return true;
108113
// shifting right sufficiently makes the value 32-bit sign-extended
109114
case RISCV::SRAI:
@@ -167,14 +172,21 @@ static bool isSignExtendedW(const MachineInstr &OrigMI,
167172
Worklist.push_back(SrcMI);
168173
break;
169174
}
175+
176+
// For these, we just need to check if the 1st operand is sign extended.
177+
case RISCV::BCLRI:
178+
case RISCV::BINVI:
179+
case RISCV::BSETI:
180+
if (MI->getOperand(2).getImm() >= 31)
181+
return false;
182+
LLVM_FALLTHROUGH;
170183
case RISCV::REM:
171184
case RISCV::ANDI:
172185
case RISCV::ORI:
173186
case RISCV::XORI: {
174187
// |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
175188
// DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
176-
// Logical operations use a sign extended 12-bit immediate. We just need
177-
// to check if the other operand is sign extended.
189+
// Logical operations use a sign extended 12-bit immediate.
178190
Register SrcReg = MI->getOperand(1).getReg();
179191
if (!SrcReg.isVirtual())
180192
return false;

llvm/test/CodeGen/RISCV/sextw-removal.ll

Lines changed: 103 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -315,17 +315,108 @@ bb7: ; preds = %bb2
315315
}
316316
declare float @baz(i32 signext %i3)
317317

318+
define void @test7(i32 signext %arg, i32 signext %arg1) nounwind {
319+
; RV64I-LABEL: test7:
320+
; RV64I: # %bb.0: # %bb
321+
; RV64I-NEXT: addi sp, sp, -48
322+
; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
323+
; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
324+
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
325+
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
326+
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
327+
; RV64I-NEXT: lui a2, %hi(.LCPI6_0)
328+
; RV64I-NEXT: ld s0, %lo(.LCPI6_0)(a2)
329+
; RV64I-NEXT: lui a2, %hi(.LCPI6_1)
330+
; RV64I-NEXT: ld s1, %lo(.LCPI6_1)(a2)
331+
; RV64I-NEXT: lui a2, %hi(.LCPI6_2)
332+
; RV64I-NEXT: ld s2, %lo(.LCPI6_2)(a2)
333+
; RV64I-NEXT: lui a2, %hi(.LCPI6_3)
334+
; RV64I-NEXT: ld s3, %lo(.LCPI6_3)(a2)
335+
; RV64I-NEXT: sraw a0, a0, a1
336+
; RV64I-NEXT: .LBB6_1: # %bb2
337+
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
338+
; RV64I-NEXT: call foo@plt
339+
; RV64I-NEXT: srli a1, a0, 1
340+
; RV64I-NEXT: and a1, a1, s0
341+
; RV64I-NEXT: sub a0, a0, a1
342+
; RV64I-NEXT: and a1, a0, s1
343+
; RV64I-NEXT: srli a0, a0, 2
344+
; RV64I-NEXT: and a0, a0, s1
345+
; RV64I-NEXT: add a0, a1, a0
346+
; RV64I-NEXT: srli a1, a0, 4
347+
; RV64I-NEXT: add a0, a0, a1
348+
; RV64I-NEXT: and a0, a0, s2
349+
; RV64I-NEXT: mul a0, a0, s3
350+
; RV64I-NEXT: srli a0, a0, 56
351+
; RV64I-NEXT: bnez a0, .LBB6_1
352+
; RV64I-NEXT: # %bb.2: # %bb7
353+
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
354+
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
355+
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
356+
; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
357+
; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
358+
; RV64I-NEXT: addi sp, sp, 48
359+
; RV64I-NEXT: ret
360+
;
361+
; RV64ZBB-LABEL: test7:
362+
; RV64ZBB: # %bb.0: # %bb
363+
; RV64ZBB-NEXT: addi sp, sp, -16
364+
; RV64ZBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
365+
; RV64ZBB-NEXT: sraw a0, a0, a1
366+
; RV64ZBB-NEXT: .LBB6_1: # %bb2
367+
; RV64ZBB-NEXT: # =>This Inner Loop Header: Depth=1
368+
; RV64ZBB-NEXT: call foo@plt
369+
; RV64ZBB-NEXT: cpop a0, a0
370+
; RV64ZBB-NEXT: bnez a0, .LBB6_1
371+
; RV64ZBB-NEXT: # %bb.2: # %bb7
372+
; RV64ZBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
373+
; RV64ZBB-NEXT: addi sp, sp, 16
374+
; RV64ZBB-NEXT: ret
375+
;
376+
; NOREMOVAL-LABEL: test7:
377+
; NOREMOVAL: # %bb.0: # %bb
378+
; NOREMOVAL-NEXT: addi sp, sp, -16
379+
; NOREMOVAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
380+
; NOREMOVAL-NEXT: sraw a0, a0, a1
381+
; NOREMOVAL-NEXT: .LBB6_1: # %bb2
382+
; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
383+
; NOREMOVAL-NEXT: sext.w a0, a0
384+
; NOREMOVAL-NEXT: call foo@plt
385+
; NOREMOVAL-NEXT: cpop a0, a0
386+
; NOREMOVAL-NEXT: bnez a0, .LBB6_1
387+
; NOREMOVAL-NEXT: # %bb.2: # %bb7
388+
; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
389+
; NOREMOVAL-NEXT: addi sp, sp, 16
390+
; NOREMOVAL-NEXT: ret
391+
bb:
392+
%i = ashr i32 %arg, %arg1
393+
br label %bb2
394+
395+
bb2: ; preds = %bb2, %bb
396+
%i3 = phi i32 [ %i, %bb ], [ %i6, %bb2 ]
397+
%i4 = tail call signext i64 @foo(i32 signext %i3)
398+
%i5 = tail call i64 @llvm.ctpop.i64(i64 %i4)
399+
%i6 = trunc i64 %i5 to i32
400+
%i7 = icmp eq i32 %i6, 0
401+
br i1 %i7, label %bb7, label %bb2
402+
403+
bb7: ; preds = %bb2
404+
ret void
405+
}
406+
407+
declare i64 @llvm.ctpop.i64(i64)
408+
318409
define void @test8(i32 signext %arg, i32 signext %arg1) nounwind {
319410
; CHECK-LABEL: test8:
320411
; CHECK: # %bb.0: # %bb
321412
; CHECK-NEXT: addi sp, sp, -16
322413
; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
323414
; CHECK-NEXT: sraw a0, a0, a1
324-
; CHECK-NEXT: .LBB6_1: # %bb2
415+
; CHECK-NEXT: .LBB7_1: # %bb2
325416
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
326417
; CHECK-NEXT: call foo@plt
327418
; CHECK-NEXT: ori a0, a0, -256
328-
; CHECK-NEXT: bnez a0, .LBB6_1
419+
; CHECK-NEXT: bnez a0, .LBB7_1
329420
; CHECK-NEXT: # %bb.2: # %bb7
330421
; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
331422
; CHECK-NEXT: addi sp, sp, 16
@@ -336,12 +427,12 @@ define void @test8(i32 signext %arg, i32 signext %arg1) nounwind {
336427
; NOREMOVAL-NEXT: addi sp, sp, -16
337428
; NOREMOVAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
338429
; NOREMOVAL-NEXT: sraw a0, a0, a1
339-
; NOREMOVAL-NEXT: .LBB6_1: # %bb2
430+
; NOREMOVAL-NEXT: .LBB7_1: # %bb2
340431
; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
341432
; NOREMOVAL-NEXT: sext.w a0, a0
342433
; NOREMOVAL-NEXT: call foo@plt
343434
; NOREMOVAL-NEXT: ori a0, a0, -256
344-
; NOREMOVAL-NEXT: bnez a0, .LBB6_1
435+
; NOREMOVAL-NEXT: bnez a0, .LBB7_1
345436
; NOREMOVAL-NEXT: # %bb.2: # %bb7
346437
; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
347438
; NOREMOVAL-NEXT: addi sp, sp, 16
@@ -372,12 +463,12 @@ define void @test9(i32 signext %arg, i32 signext %arg1) nounwind {
372463
; CHECK-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
373464
; CHECK-NEXT: sraw a0, a0, a1
374465
; CHECK-NEXT: li s0, 254
375-
; CHECK-NEXT: .LBB7_1: # %bb2
466+
; CHECK-NEXT: .LBB8_1: # %bb2
376467
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
377468
; CHECK-NEXT: call bar@plt
378469
; CHECK-NEXT: mv a1, a0
379470
; CHECK-NEXT: slti a0, a0, 255
380-
; CHECK-NEXT: blt s0, a1, .LBB7_1
471+
; CHECK-NEXT: blt s0, a1, .LBB8_1
381472
; CHECK-NEXT: # %bb.2: # %bb7
382473
; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
383474
; CHECK-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
@@ -391,12 +482,12 @@ define void @test9(i32 signext %arg, i32 signext %arg1) nounwind {
391482
; NOREMOVAL-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
392483
; NOREMOVAL-NEXT: sraw a1, a0, a1
393484
; NOREMOVAL-NEXT: li s0, 254
394-
; NOREMOVAL-NEXT: .LBB7_1: # %bb2
485+
; NOREMOVAL-NEXT: .LBB8_1: # %bb2
395486
; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
396487
; NOREMOVAL-NEXT: sext.w a0, a1
397488
; NOREMOVAL-NEXT: call bar@plt
398489
; NOREMOVAL-NEXT: slti a1, a0, 255
399-
; NOREMOVAL-NEXT: blt s0, a0, .LBB7_1
490+
; NOREMOVAL-NEXT: blt s0, a0, .LBB8_1
400491
; NOREMOVAL-NEXT: # %bb.2: # %bb7
401492
; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
402493
; NOREMOVAL-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
@@ -426,12 +517,12 @@ define void @test10(i32 signext %arg, i32 signext %arg1) nounwind {
426517
; CHECK-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
427518
; CHECK-NEXT: sraw a0, a0, a1
428519
; CHECK-NEXT: fmv.w.x fs0, zero
429-
; CHECK-NEXT: .LBB8_1: # %bb2
520+
; CHECK-NEXT: .LBB9_1: # %bb2
430521
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
431522
; CHECK-NEXT: call baz@plt
432523
; CHECK-NEXT: feq.s a1, fa0, fs0
433524
; CHECK-NEXT: fmv.x.w a0, fa0
434-
; CHECK-NEXT: beqz a1, .LBB8_1
525+
; CHECK-NEXT: beqz a1, .LBB9_1
435526
; CHECK-NEXT: # %bb.2: # %bb7
436527
; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
437528
; CHECK-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
@@ -445,13 +536,13 @@ define void @test10(i32 signext %arg, i32 signext %arg1) nounwind {
445536
; NOREMOVAL-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
446537
; NOREMOVAL-NEXT: sraw a0, a0, a1
447538
; NOREMOVAL-NEXT: fmv.w.x fs0, zero
448-
; NOREMOVAL-NEXT: .LBB8_1: # %bb2
539+
; NOREMOVAL-NEXT: .LBB9_1: # %bb2
449540
; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
450541
; NOREMOVAL-NEXT: sext.w a0, a0
451542
; NOREMOVAL-NEXT: call baz@plt
452543
; NOREMOVAL-NEXT: feq.s a1, fa0, fs0
453544
; NOREMOVAL-NEXT: fmv.x.w a0, fa0
454-
; NOREMOVAL-NEXT: beqz a1, .LBB8_1
545+
; NOREMOVAL-NEXT: beqz a1, .LBB9_1
455546
; NOREMOVAL-NEXT: # %bb.2: # %bb7
456547
; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
457548
; NOREMOVAL-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload

0 commit comments

Comments
 (0)