Skip to content

Commit dde2a7f

Browse files
committed
[RISCV] Exploit fact that vscale is always power of two to replace urem sequence
When doing scalable vectorization, the loop vectorizer uses a urem in the computation of the vector trip count. The RHS of that urem is a (possibly shifted) call to @llvm.vscale. vscale is effectively the number of "blocks" in the vector register. (That is, types such as <vscale x 8 x i8> and <vscale x 1 x i8> both fill one 64 bit block, and vscale is essentially how many of those blocks there are in a single vector register at runtime.) We know from the RISCV V extension specification that VLEN must be a power of two between ELEN and 2^16. Since our block size is 64 bits, the must be a power of two numbers of blocks. (For everything other than VLEN<=32, but that's already broken.) It is worth noting that AArch64 SVE specification explicitly allows non-power-of-two sizes for the vector registers and thus can't claim that vscale is a power of two by this logic. Differential Revision: https://reviews.llvm.org/D129609
1 parent db73a52 commit dde2a7f

File tree

6 files changed

+102
-56
lines changed

6 files changed

+102
-56
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,9 @@ class TargetLoweringBase {
546546
return BypassSlowDivWidths;
547547
}
548548

549+
/// Return true only if vscale must be a power of two.
550+
virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
551+
549552
/// Return true if Flow Control is an expensive operation that should be
550553
/// avoided.
551554
bool isJumpExpensive() const { return JumpIsExpensive; }

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3869,6 +3869,12 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
38693869
if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2())
38703870
return true;
38713871

3872+
// vscale(power-of-two) is a power-of-two for some targets
3873+
if (Val.getOpcode() == ISD::VSCALE &&
3874+
getTargetLoweringInfo().isVScaleKnownToBeAPowerOfTwo() &&
3875+
isKnownToBeAPowerOfTwo(Val.getOperand(0)))
3876+
return true;
3877+
38723878
// More could be done here, though the above checks are enough
38733879
// to handle some common cases.
38743880

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12130,6 +12130,17 @@ const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
1213012130
return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
1213112131
}
1213212132

12133+
bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
12134+
// We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
12135+
// of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
12136+
// a power of two as well.
12137+
// FIXME: This doesn't work for zve32, but that's already broken
12138+
// elsewhere for the same reason.
12139+
assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
12140+
assert(RISCV::RVVBitsPerBlock == 64 && "RVVBitsPerBlock changed, audit needed");
12141+
return true;
12142+
}
12143+
1213312144
bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1213412145
EVT VT) const {
1213512146
VT = VT.getScalarType();

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,8 @@ class RISCVTargetLowering : public TargetLowering {
597597
unsigned uid,
598598
MCContext &Ctx) const override;
599599

600+
bool isVScaleKnownToBeAPowerOfTwo() const override;
601+
600602
private:
601603
/// RISCVCCAssignFn - This target-specific function extends the default
602604
/// CCValAssign with additional information used to lower RISC-V calling

llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll

Lines changed: 66 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -266,8 +266,9 @@ define void @sink_splat_mul_scalable(i32* nocapture %a, i32 signext %x) {
266266
; CHECK-NEXT: j .LBB7_5
267267
; CHECK-NEXT: .LBB7_2: # %vector.ph
268268
; CHECK-NEXT: li a6, 0
269-
; CHECK-NEXT: remu a4, a3, a2
270-
; CHECK-NEXT: sub a3, a3, a4
269+
; CHECK-NEXT: addiw a3, a2, -1
270+
; CHECK-NEXT: andi a4, a3, 1024
271+
; CHECK-NEXT: xori a3, a4, 1024
271272
; CHECK-NEXT: slli a5, a5, 1
272273
; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu
273274
; CHECK-NEXT: mv a7, a0
@@ -358,8 +359,9 @@ define void @sink_splat_add_scalable(i32* nocapture %a, i32 signext %x) {
358359
; CHECK-NEXT: j .LBB8_5
359360
; CHECK-NEXT: .LBB8_2: # %vector.ph
360361
; CHECK-NEXT: li a6, 0
361-
; CHECK-NEXT: remu a4, a3, a2
362-
; CHECK-NEXT: sub a3, a3, a4
362+
; CHECK-NEXT: addiw a3, a2, -1
363+
; CHECK-NEXT: andi a4, a3, 1024
364+
; CHECK-NEXT: xori a3, a4, 1024
363365
; CHECK-NEXT: slli a5, a5, 1
364366
; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu
365367
; CHECK-NEXT: mv a7, a0
@@ -450,8 +452,9 @@ define void @sink_splat_sub_scalable(i32* nocapture %a, i32 signext %x) {
450452
; CHECK-NEXT: j .LBB9_5
451453
; CHECK-NEXT: .LBB9_2: # %vector.ph
452454
; CHECK-NEXT: li a6, 0
453-
; CHECK-NEXT: remu a4, a3, a2
454-
; CHECK-NEXT: sub a3, a3, a4
455+
; CHECK-NEXT: addiw a3, a2, -1
456+
; CHECK-NEXT: andi a4, a3, 1024
457+
; CHECK-NEXT: xori a3, a4, 1024
455458
; CHECK-NEXT: slli a5, a5, 1
456459
; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu
457460
; CHECK-NEXT: mv a7, a0
@@ -542,8 +545,9 @@ define void @sink_splat_rsub_scalable(i32* nocapture %a, i32 signext %x) {
542545
; CHECK-NEXT: j .LBB10_5
543546
; CHECK-NEXT: .LBB10_2: # %vector.ph
544547
; CHECK-NEXT: li a6, 0
545-
; CHECK-NEXT: remu a4, a3, a2
546-
; CHECK-NEXT: sub a3, a3, a4
548+
; CHECK-NEXT: addiw a3, a2, -1
549+
; CHECK-NEXT: andi a4, a3, 1024
550+
; CHECK-NEXT: xori a3, a4, 1024
547551
; CHECK-NEXT: slli a5, a5, 1
548552
; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu
549553
; CHECK-NEXT: mv a7, a0
@@ -634,8 +638,9 @@ define void @sink_splat_and_scalable(i32* nocapture %a, i32 signext %x) {
634638
; CHECK-NEXT: j .LBB11_5
635639
; CHECK-NEXT: .LBB11_2: # %vector.ph
636640
; CHECK-NEXT: li a6, 0
637-
; CHECK-NEXT: remu a4, a3, a2
638-
; CHECK-NEXT: sub a3, a3, a4
641+
; CHECK-NEXT: addiw a3, a2, -1
642+
; CHECK-NEXT: andi a4, a3, 1024
643+
; CHECK-NEXT: xori a3, a4, 1024
639644
; CHECK-NEXT: slli a5, a5, 1
640645
; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu
641646
; CHECK-NEXT: mv a7, a0
@@ -726,8 +731,9 @@ define void @sink_splat_or_scalable(i32* nocapture %a, i32 signext %x) {
726731
; CHECK-NEXT: j .LBB12_5
727732
; CHECK-NEXT: .LBB12_2: # %vector.ph
728733
; CHECK-NEXT: li a6, 0
729-
; CHECK-NEXT: remu a4, a3, a2
730-
; CHECK-NEXT: sub a3, a3, a4
734+
; CHECK-NEXT: addiw a3, a2, -1
735+
; CHECK-NEXT: andi a4, a3, 1024
736+
; CHECK-NEXT: xori a3, a4, 1024
731737
; CHECK-NEXT: slli a5, a5, 1
732738
; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu
733739
; CHECK-NEXT: mv a7, a0
@@ -818,8 +824,9 @@ define void @sink_splat_xor_scalable(i32* nocapture %a, i32 signext %x) {
818824
; CHECK-NEXT: j .LBB13_5
819825
; CHECK-NEXT: .LBB13_2: # %vector.ph
820826
; CHECK-NEXT: li a6, 0
821-
; CHECK-NEXT: remu a4, a3, a2
822-
; CHECK-NEXT: sub a3, a3, a4
827+
; CHECK-NEXT: addiw a3, a2, -1
828+
; CHECK-NEXT: andi a4, a3, 1024
829+
; CHECK-NEXT: xori a3, a4, 1024
823830
; CHECK-NEXT: slli a5, a5, 1
824831
; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu
825832
; CHECK-NEXT: mv a7, a0
@@ -1018,8 +1025,9 @@ define void @sink_splat_shl_scalable(i32* nocapture %a, i32 signext %x) {
10181025
; CHECK-NEXT: j .LBB17_5
10191026
; CHECK-NEXT: .LBB17_2: # %vector.ph
10201027
; CHECK-NEXT: li a6, 0
1021-
; CHECK-NEXT: remu a4, a3, a2
1022-
; CHECK-NEXT: sub a3, a3, a4
1028+
; CHECK-NEXT: addiw a3, a2, -1
1029+
; CHECK-NEXT: andi a4, a3, 1024
1030+
; CHECK-NEXT: xori a3, a4, 1024
10231031
; CHECK-NEXT: slli a5, a5, 1
10241032
; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu
10251033
; CHECK-NEXT: mv a7, a0
@@ -1110,8 +1118,9 @@ define void @sink_splat_lshr_scalable(i32* nocapture %a, i32 signext %x) {
11101118
; CHECK-NEXT: j .LBB18_5
11111119
; CHECK-NEXT: .LBB18_2: # %vector.ph
11121120
; CHECK-NEXT: li a6, 0
1113-
; CHECK-NEXT: remu a4, a3, a2
1114-
; CHECK-NEXT: sub a3, a3, a4
1121+
; CHECK-NEXT: addiw a3, a2, -1
1122+
; CHECK-NEXT: andi a4, a3, 1024
1123+
; CHECK-NEXT: xori a3, a4, 1024
11151124
; CHECK-NEXT: slli a5, a5, 1
11161125
; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu
11171126
; CHECK-NEXT: mv a7, a0
@@ -1202,8 +1211,9 @@ define void @sink_splat_ashr_scalable(i32* nocapture %a) {
12021211
; CHECK-NEXT: j .LBB19_5
12031212
; CHECK-NEXT: .LBB19_2: # %vector.ph
12041213
; CHECK-NEXT: li a5, 0
1205-
; CHECK-NEXT: remu a3, a2, a1
1206-
; CHECK-NEXT: sub a2, a2, a3
1214+
; CHECK-NEXT: addiw a2, a1, -1
1215+
; CHECK-NEXT: andi a3, a2, 1024
1216+
; CHECK-NEXT: xori a2, a3, 1024
12071217
; CHECK-NEXT: slli a4, a4, 1
12081218
; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, mu
12091219
; CHECK-NEXT: mv a6, a0
@@ -1510,8 +1520,9 @@ define void @sink_splat_fmul_scalable(float* nocapture %a, float %x) {
15101520
; CHECK-NEXT: j .LBB26_5
15111521
; CHECK-NEXT: .LBB26_2: # %vector.ph
15121522
; CHECK-NEXT: li a5, 0
1513-
; CHECK-NEXT: remu a4, a3, a2
1514-
; CHECK-NEXT: sub a3, a3, a4
1523+
; CHECK-NEXT: addiw a3, a2, -1
1524+
; CHECK-NEXT: andi a4, a3, 1024
1525+
; CHECK-NEXT: xori a3, a4, 1024
15151526
; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, mu
15161527
; CHECK-NEXT: mv a6, a0
15171528
; CHECK-NEXT: .LBB26_3: # %vector.body
@@ -1601,8 +1612,9 @@ define void @sink_splat_fdiv_scalable(float* nocapture %a, float %x) {
16011612
; CHECK-NEXT: j .LBB27_5
16021613
; CHECK-NEXT: .LBB27_2: # %vector.ph
16031614
; CHECK-NEXT: li a5, 0
1604-
; CHECK-NEXT: remu a4, a3, a2
1605-
; CHECK-NEXT: sub a3, a3, a4
1615+
; CHECK-NEXT: addiw a3, a2, -1
1616+
; CHECK-NEXT: andi a4, a3, 1024
1617+
; CHECK-NEXT: xori a3, a4, 1024
16061618
; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, mu
16071619
; CHECK-NEXT: mv a6, a0
16081620
; CHECK-NEXT: .LBB27_3: # %vector.body
@@ -1692,8 +1704,9 @@ define void @sink_splat_frdiv_scalable(float* nocapture %a, float %x) {
16921704
; CHECK-NEXT: j .LBB28_5
16931705
; CHECK-NEXT: .LBB28_2: # %vector.ph
16941706
; CHECK-NEXT: li a5, 0
1695-
; CHECK-NEXT: remu a4, a3, a2
1696-
; CHECK-NEXT: sub a3, a3, a4
1707+
; CHECK-NEXT: addiw a3, a2, -1
1708+
; CHECK-NEXT: andi a4, a3, 1024
1709+
; CHECK-NEXT: xori a3, a4, 1024
16971710
; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, mu
16981711
; CHECK-NEXT: mv a6, a0
16991712
; CHECK-NEXT: .LBB28_3: # %vector.body
@@ -1783,8 +1796,9 @@ define void @sink_splat_fadd_scalable(float* nocapture %a, float %x) {
17831796
; CHECK-NEXT: j .LBB29_5
17841797
; CHECK-NEXT: .LBB29_2: # %vector.ph
17851798
; CHECK-NEXT: li a5, 0
1786-
; CHECK-NEXT: remu a4, a3, a2
1787-
; CHECK-NEXT: sub a3, a3, a4
1799+
; CHECK-NEXT: addiw a3, a2, -1
1800+
; CHECK-NEXT: andi a4, a3, 1024
1801+
; CHECK-NEXT: xori a3, a4, 1024
17881802
; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, mu
17891803
; CHECK-NEXT: mv a6, a0
17901804
; CHECK-NEXT: .LBB29_3: # %vector.body
@@ -1874,8 +1888,9 @@ define void @sink_splat_fsub_scalable(float* nocapture %a, float %x) {
18741888
; CHECK-NEXT: j .LBB30_5
18751889
; CHECK-NEXT: .LBB30_2: # %vector.ph
18761890
; CHECK-NEXT: li a5, 0
1877-
; CHECK-NEXT: remu a4, a3, a2
1878-
; CHECK-NEXT: sub a3, a3, a4
1891+
; CHECK-NEXT: addiw a3, a2, -1
1892+
; CHECK-NEXT: andi a4, a3, 1024
1893+
; CHECK-NEXT: xori a3, a4, 1024
18791894
; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, mu
18801895
; CHECK-NEXT: mv a6, a0
18811896
; CHECK-NEXT: .LBB30_3: # %vector.body
@@ -1965,8 +1980,9 @@ define void @sink_splat_frsub_scalable(float* nocapture %a, float %x) {
19651980
; CHECK-NEXT: j .LBB31_5
19661981
; CHECK-NEXT: .LBB31_2: # %vector.ph
19671982
; CHECK-NEXT: li a5, 0
1968-
; CHECK-NEXT: remu a4, a3, a2
1969-
; CHECK-NEXT: sub a3, a3, a4
1983+
; CHECK-NEXT: addiw a3, a2, -1
1984+
; CHECK-NEXT: andi a4, a3, 1024
1985+
; CHECK-NEXT: xori a3, a4, 1024
19701986
; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, mu
19711987
; CHECK-NEXT: mv a6, a0
19721988
; CHECK-NEXT: .LBB31_3: # %vector.body
@@ -2139,8 +2155,9 @@ define void @sink_splat_fma_scalable(float* noalias nocapture %a, float* noalias
21392155
; CHECK-NEXT: .LBB34_2: # %vector.ph
21402156
; CHECK-NEXT: li a6, 0
21412157
; CHECK-NEXT: li a7, 0
2142-
; CHECK-NEXT: remu a5, a4, a3
2143-
; CHECK-NEXT: sub a4, a4, a5
2158+
; CHECK-NEXT: addiw a4, a3, -1
2159+
; CHECK-NEXT: andi a5, a4, 1024
2160+
; CHECK-NEXT: xori a4, a5, 1024
21442161
; CHECK-NEXT: vsetvli t0, zero, e32, m1, ta, mu
21452162
; CHECK-NEXT: .LBB34_3: # %vector.body
21462163
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -2241,8 +2258,9 @@ define void @sink_splat_fma_commute_scalable(float* noalias nocapture %a, float*
22412258
; CHECK-NEXT: .LBB35_2: # %vector.ph
22422259
; CHECK-NEXT: li a6, 0
22432260
; CHECK-NEXT: li a7, 0
2244-
; CHECK-NEXT: remu a5, a4, a3
2245-
; CHECK-NEXT: sub a4, a4, a5
2261+
; CHECK-NEXT: addiw a4, a3, -1
2262+
; CHECK-NEXT: andi a5, a4, 1024
2263+
; CHECK-NEXT: xori a4, a5, 1024
22462264
; CHECK-NEXT: vsetvli t0, zero, e32, m1, ta, mu
22472265
; CHECK-NEXT: .LBB35_3: # %vector.body
22482266
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -2567,8 +2585,9 @@ define void @sink_splat_udiv_scalable(i32* nocapture %a, i32 signext %x) {
25672585
; CHECK-NEXT: j .LBB42_5
25682586
; CHECK-NEXT: .LBB42_2: # %vector.ph
25692587
; CHECK-NEXT: li a6, 0
2570-
; CHECK-NEXT: remu a4, a3, a2
2571-
; CHECK-NEXT: sub a3, a3, a4
2588+
; CHECK-NEXT: addiw a3, a2, -1
2589+
; CHECK-NEXT: andi a4, a3, 1024
2590+
; CHECK-NEXT: xori a3, a4, 1024
25722591
; CHECK-NEXT: slli a5, a5, 1
25732592
; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu
25742593
; CHECK-NEXT: mv a7, a0
@@ -2659,8 +2678,9 @@ define void @sink_splat_sdiv_scalable(i32* nocapture %a, i32 signext %x) {
26592678
; CHECK-NEXT: j .LBB43_5
26602679
; CHECK-NEXT: .LBB43_2: # %vector.ph
26612680
; CHECK-NEXT: li a6, 0
2662-
; CHECK-NEXT: remu a4, a3, a2
2663-
; CHECK-NEXT: sub a3, a3, a4
2681+
; CHECK-NEXT: addiw a3, a2, -1
2682+
; CHECK-NEXT: andi a4, a3, 1024
2683+
; CHECK-NEXT: xori a3, a4, 1024
26642684
; CHECK-NEXT: slli a5, a5, 1
26652685
; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu
26662686
; CHECK-NEXT: mv a7, a0
@@ -2751,8 +2771,9 @@ define void @sink_splat_urem_scalable(i32* nocapture %a, i32 signext %x) {
27512771
; CHECK-NEXT: j .LBB44_5
27522772
; CHECK-NEXT: .LBB44_2: # %vector.ph
27532773
; CHECK-NEXT: li a6, 0
2754-
; CHECK-NEXT: remu a4, a3, a2
2755-
; CHECK-NEXT: sub a3, a3, a4
2774+
; CHECK-NEXT: addiw a3, a2, -1
2775+
; CHECK-NEXT: andi a4, a3, 1024
2776+
; CHECK-NEXT: xori a3, a4, 1024
27562777
; CHECK-NEXT: slli a5, a5, 1
27572778
; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu
27582779
; CHECK-NEXT: mv a7, a0
@@ -2843,8 +2864,9 @@ define void @sink_splat_srem_scalable(i32* nocapture %a, i32 signext %x) {
28432864
; CHECK-NEXT: j .LBB45_5
28442865
; CHECK-NEXT: .LBB45_2: # %vector.ph
28452866
; CHECK-NEXT: li a6, 0
2846-
; CHECK-NEXT: remu a4, a3, a2
2847-
; CHECK-NEXT: sub a3, a3, a4
2867+
; CHECK-NEXT: addiw a3, a2, -1
2868+
; CHECK-NEXT: andi a4, a3, 1024
2869+
; CHECK-NEXT: xori a3, a4, 1024
28482870
; CHECK-NEXT: slli a5, a5, 1
28492871
; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu
28502872
; CHECK-NEXT: mv a7, a0

llvm/test/CodeGen/RISCV/rvv/vscale-power-of-two.ll

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ define i64 @vscale_lshr(i64 %TC) {
88
; CHECK: # %bb.0:
99
; CHECK-NEXT: csrr a1, vlenb
1010
; CHECK-NEXT: srli a1, a1, 6
11-
; CHECK-NEXT: remu a0, a0, a1
11+
; CHECK-NEXT: addi a1, a1, -1
12+
; CHECK-NEXT: and a0, a0, a1
1213
; CHECK-NEXT: ret
1314
%vscale = call i64 @llvm.vscale.i64()
1415
%shifted = lshr i64 %vscale, 3
@@ -21,7 +22,8 @@ define i64 @vscale(i64 %TC) {
2122
; CHECK: # %bb.0:
2223
; CHECK-NEXT: csrr a1, vlenb
2324
; CHECK-NEXT: srli a1, a1, 3
24-
; CHECK-NEXT: remu a0, a0, a1
25+
; CHECK-NEXT: addi a1, a1, -1
26+
; CHECK-NEXT: and a0, a0, a1
2527
; CHECK-NEXT: ret
2628
%vscale = call i64 @llvm.vscale.i64()
2729
%urem = urem i64 %TC, %vscale
@@ -32,7 +34,8 @@ define i64 @vscale_shl(i64 %TC) {
3234
; CHECK-LABEL: vscale_shl:
3335
; CHECK: # %bb.0:
3436
; CHECK-NEXT: csrr a1, vlenb
35-
; CHECK-NEXT: remu a0, a0, a1
37+
; CHECK-NEXT: addi a1, a1, -1
38+
; CHECK-NEXT: and a0, a0, a1
3639
; CHECK-NEXT: ret
3740
%vscale = call i64 @llvm.vscale.i64()
3841
%shifted = shl i64 %vscale, 3
@@ -45,8 +48,8 @@ define i64 @TC_minus_rem(i64 %TC) {
4548
; CHECK: # %bb.0:
4649
; CHECK-NEXT: csrr a1, vlenb
4750
; CHECK-NEXT: srli a1, a1, 3
48-
; CHECK-NEXT: remu a1, a0, a1
49-
; CHECK-NEXT: sub a0, a0, a1
51+
; CHECK-NEXT: neg a1, a1
52+
; CHECK-NEXT: and a0, a0, a1
5053
; CHECK-NEXT: ret
5154
%vscale = call i64 @llvm.vscale.i64()
5255
%urem = urem i64 %TC, %vscale
@@ -58,8 +61,8 @@ define i64 @TC_minus_rem_shl(i64 %TC) {
5861
; CHECK-LABEL: TC_minus_rem_shl:
5962
; CHECK: # %bb.0:
6063
; CHECK-NEXT: csrr a1, vlenb
61-
; CHECK-NEXT: remu a1, a0, a1
62-
; CHECK-NEXT: sub a0, a0, a1
64+
; CHECK-NEXT: neg a1, a1
65+
; CHECK-NEXT: and a0, a0, a1
6366
; CHECK-NEXT: ret
6467
%vscale = call i64 @llvm.vscale.i64()
6568
%shifted = shl i64 %vscale, 3
@@ -73,9 +76,8 @@ define i64 @con1024_minus_rem() {
7376
; CHECK: # %bb.0:
7477
; CHECK-NEXT: csrr a0, vlenb
7578
; CHECK-NEXT: srli a0, a0, 3
76-
; CHECK-NEXT: li a1, 1024
77-
; CHECK-NEXT: remu a0, a1, a0
78-
; CHECK-NEXT: sub a0, a1, a0
79+
; CHECK-NEXT: negw a0, a0
80+
; CHECK-NEXT: andi a0, a0, 1024
7981
; CHECK-NEXT: ret
8082
%vscale = call i64 @llvm.vscale.i64()
8183
%urem = urem i64 1024, %vscale
@@ -90,10 +92,10 @@ define i64 @con2048_minus_rem() {
9092
; CHECK: # %bb.0:
9193
; CHECK-NEXT: csrr a0, vlenb
9294
; CHECK-NEXT: srli a0, a0, 3
95+
; CHECK-NEXT: neg a0, a0
9396
; CHECK-NEXT: lui a1, 1
9497
; CHECK-NEXT: addiw a1, a1, -2048
95-
; CHECK-NEXT: remu a0, a1, a0
96-
; CHECK-NEXT: sub a0, a1, a0
98+
; CHECK-NEXT: and a0, a0, a1
9799
; CHECK-NEXT: ret
98100
%vscale = call i64 @llvm.vscale.i64()
99101
%urem = urem i64 2048, %vscale

0 commit comments

Comments
 (0)