Skip to content

Commit e24a859

Browse files
topperctstellar
authored andcommitted
[SelectionDAG][RISCV] Remove code for handling too small shift type from SimplifyDemandedBits.
This code detected that the type returned from getShiftAmountTy was too small to hold the constant shift amount. But it used the full type size instead of scalar type size leading it to crash for scalable vectors. This code was necessary when getShiftAmountTy would always return the target preferred shift amount type for scalars even when the type was an illegal type larger than the target supported. For vectors, getShiftAmountTy has always returned the vector type. Fortunately, getShiftAmountTy was fixed a while ago to detect that the target's preferred size for scalars is not large enough for the type. So we can delete this code. Switched to use getShiftAmountConstant to further simplify the code. Fixs PR61561. (cherry picked from commit a37df84)
1 parent 46e68a2 commit e24a859

File tree

2 files changed

+36
-6
lines changed

2 files changed

+36
-6
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1724,12 +1724,9 @@ bool TargetLowering::SimplifyDemandedBits(
17241724
unsigned InnerBits = InnerVT.getScalarSizeInBits();
17251725
if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
17261726
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1727-
EVT ShTy = getShiftAmountTy(InnerVT, DL);
1728-
if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1729-
ShTy = InnerVT;
1730-
SDValue NarrowShl =
1731-
TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1732-
TLO.DAG.getConstant(ShAmt, dl, ShTy));
1727+
SDValue NarrowShl = TLO.DAG.getNode(
1728+
ISD::SHL, dl, InnerVT, InnerOp,
1729+
TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
17331730
return TLO.CombineTo(
17341731
Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
17351732
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
3+
4+
define <vscale x 4 x i8> @foo(ptr %p) {
5+
; CHECK-LABEL: foo:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: vl1re16.v v8, (a0)
8+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
9+
; CHECK-NEXT: vsll.vi v8, v8, 3
10+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
11+
; CHECK-NEXT: vzext.vf2 v10, v8
12+
; CHECK-NEXT: li a0, 248
13+
; CHECK-NEXT: vand.vx v8, v10, a0
14+
; CHECK-NEXT: lui a0, 4
15+
; CHECK-NEXT: vmv.v.x v10, a0
16+
; CHECK-NEXT: lui a0, 1
17+
; CHECK-NEXT: addiw a0, a0, -361
18+
; CHECK-NEXT: vmacc.vx v10, a0, v8
19+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
20+
; CHECK-NEXT: vnsrl.wi v8, v10, 15
21+
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
22+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
23+
; CHECK-NEXT: ret
24+
%i13 = load <vscale x 4 x i16>, ptr %p, align 2
25+
%i14 = zext <vscale x 4 x i16> %i13 to <vscale x 4 x i32>
26+
%i15 = shl nuw nsw <vscale x 4 x i32> %i14, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
27+
%i16 = and <vscale x 4 x i32> %i15, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 248, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
28+
%i17 = mul nuw nsw <vscale x 4 x i32> %i16, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3735, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
29+
%i18 = add nuw nsw <vscale x 4 x i32> %i17, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 16384, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
30+
%i21 = lshr <vscale x 4 x i32> %i18, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 15, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
31+
%i22 = trunc <vscale x 4 x i32> %i21 to <vscale x 4 x i8>
32+
ret <vscale x 4 x i8> %i22
33+
}

0 commit comments

Comments
 (0)