Skip to content

Commit eea865b

Browse files
committed
Recommit "[SelectionDAG][RISCV] Add very basic PromoteIntegerResult/Op support for VP_SIGN/ZERO_EXTEND."
I have fixed an existing DAGCombiner bug that caused the previous assertion failure. See 7163539. Original message We don't have VP_ANY_EXTEND or VP_SIGN_EXTEND_INREG yet so I've deviated a little from the non-VP lowering. My goal was to fix the crashes that occurs on these test cases without this patch. Reviewed By: fakepaper56 Differential Revision: https://reviews.llvm.org/D152854
1 parent f03a16e commit eea865b

File tree

5 files changed

+153
-3
lines changed

5 files changed

+153
-3
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
141141
Res = PromoteIntRes_EXTEND_VECTOR_INREG(N); break;
142142

143143
case ISD::SIGN_EXTEND:
144+
case ISD::VP_SIGN_EXTEND:
144145
case ISD::ZERO_EXTEND:
146+
case ISD::VP_ZERO_EXTEND:
145147
case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
146148

147149
case ISD::VP_FP_TO_SINT:
@@ -760,8 +762,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
760762
assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
761763

762764
// If the result and operand types are the same after promotion, simplify
763-
// to an in-register extension.
764-
if (NVT == Res.getValueType()) {
765+
// to an in-register extension. Unless this is a VP_*_EXTEND.
766+
if (NVT == Res.getValueType() && N->getNumOperands() == 1) {
765767
// The high bits are not guaranteed to be anything. Insert an extend.
766768
if (N->getOpcode() == ISD::SIGN_EXTEND)
767769
return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
@@ -774,6 +776,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
774776
}
775777

776778
// Otherwise, just extend the original operand all the way to the larger type.
779+
if (N->getNumOperands() != 1) {
780+
assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
781+
assert(N->isVPOpcode() && "Expected VP opcode");
782+
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0),
783+
N->getOperand(1), N->getOperand(2));
784+
}
777785
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
778786
}
779787

@@ -1663,6 +1671,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
16631671
case ISD::VP_SETCC:
16641672
case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
16651673
case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
1674+
case ISD::VP_SIGN_EXTEND: Res = PromoteIntOp_VP_SIGN_EXTEND(N); break;
16661675
case ISD::VP_SINT_TO_FP:
16671676
case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
16681677
case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
@@ -1684,6 +1693,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
16841693
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
16851694
case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
16861695
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
1696+
case ISD::VP_ZERO_EXTEND: Res = PromoteIntOp_VP_ZERO_EXTEND(N); break;
16871697
case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
16881698
case ISD::INSERT_SUBVECTOR: Res = PromoteIntOp_INSERT_SUBVECTOR(N); break;
16891699

@@ -2013,6 +2023,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
20132023
Op, DAG.getValueType(N->getOperand(0).getValueType()));
20142024
}
20152025

2026+
SDValue DAGTypeLegalizer::PromoteIntOp_VP_SIGN_EXTEND(SDNode *N) {
2027+
SDLoc dl(N);
2028+
EVT VT = N->getValueType(0);
2029+
SDValue Op = GetPromotedInteger(N->getOperand(0));
2030+
// FIXME: There is no VP_ANY_EXTEND yet.
2031+
Op = DAG.getNode(ISD::VP_ZERO_EXTEND, dl, VT, Op, N->getOperand(1),
2032+
N->getOperand(2));
2033+
unsigned Diff =
2034+
VT.getScalarSizeInBits() - N->getOperand(0).getScalarValueSizeInBits();
2035+
SDValue ShAmt = DAG.getShiftAmountConstant(Diff, VT, dl);
2036+
// FIXME: There is no VP_SIGN_EXTEND_INREG so use a pair of shifts.
2037+
SDValue Shl = DAG.getNode(ISD::VP_SHL, dl, VT, Op, ShAmt, N->getOperand(1),
2038+
N->getOperand(2));
2039+
return DAG.getNode(ISD::VP_ASHR, dl, VT, Shl, ShAmt, N->getOperand(1),
2040+
N->getOperand(2));
2041+
}
2042+
20162043
SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
20172044
if (N->getOpcode() == ISD::VP_SINT_TO_FP)
20182045
return SDValue(DAG.UpdateNodeOperands(N,
@@ -2164,6 +2191,19 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
21642191
return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
21652192
}
21662193

2194+
SDValue DAGTypeLegalizer::PromoteIntOp_VP_ZERO_EXTEND(SDNode *N) {
2195+
SDLoc dl(N);
2196+
EVT VT = N->getValueType(0);
2197+
SDValue Op = GetPromotedInteger(N->getOperand(0));
2198+
// FIXME: There is no VP_ANY_EXTEND yet.
2199+
Op = DAG.getNode(ISD::VP_ZERO_EXTEND, dl, VT, Op, N->getOperand(1),
2200+
N->getOperand(2));
2201+
APInt Imm = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
2202+
N->getOperand(0).getScalarValueSizeInBits());
2203+
return DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(Imm, dl, VT),
2204+
N->getOperand(1), N->getOperand(2));
2205+
}
2206+
21672207
SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo) {
21682208
assert(OpNo == 2 && "Don't know how to promote this operand!");
21692209

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,13 +385,15 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
385385
SDValue PromoteIntOp_Shift(SDNode *N);
386386
SDValue PromoteIntOp_FunnelShift(SDNode *N);
387387
SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
388+
SDValue PromoteIntOp_VP_SIGN_EXTEND(SDNode *N);
388389
SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
389390
SDValue PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N);
390391
SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
391392
SDValue PromoteIntOp_TRUNCATE(SDNode *N);
392393
SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
393394
SDValue PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N);
394395
SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
396+
SDValue PromoteIntOp_VP_ZERO_EXTEND(SDNode *N);
395397
SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
396398
SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
397399
SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6923,7 +6923,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
69236923
return N1;
69246924
break;
69256925
case ISD::VP_TRUNCATE:
6926-
// Don't create noop vp_truncate.
6926+
case ISD::VP_SIGN_EXTEND:
6927+
case ISD::VP_ZERO_EXTEND:
6928+
// Don't create noop casts.
69276929
if (N1.getValueType() == VT)
69286930
return N1;
69296931
break;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,3 +201,56 @@ define <32 x i64> @vsext_v32i64_v32i32_unmasked(<32 x i32> %va, i32 zeroext %evl
201201
%v = call <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl)
202202
ret <32 x i64> %v
203203
}
204+
205+
declare <4 x i16> @llvm.vp.sext.v4i16.v4i7(<4 x i7>, <4 x i1>, i32)
206+
207+
define <4 x i16> @vsext_v4i16_v4i7(<4 x i7> %va, <4 x i1> %m, i32 zeroext %evl) {
208+
; CHECK-LABEL: vsext_v4i16_v4i7:
209+
; CHECK: # %bb.0:
210+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
211+
; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
212+
; CHECK-NEXT: vsll.vi v8, v9, 9, v0.t
213+
; CHECK-NEXT: vsra.vi v8, v8, 9, v0.t
214+
; CHECK-NEXT: ret
215+
%v = call <4 x i16> @llvm.vp.sext.v4i16.v4i7(<4 x i7> %va, <4 x i1> %m, i32 %evl)
216+
ret <4 x i16> %v
217+
}
218+
219+
declare <4 x i8> @llvm.vp.sext.v4i8.v4i7(<4 x i7>, <4 x i1>, i32)
220+
221+
define <4 x i8> @vsext_v4i8_v4i7(<4 x i7> %va, <4 x i1> %m, i32 zeroext %evl) {
222+
; CHECK-LABEL: vsext_v4i8_v4i7:
223+
; CHECK: # %bb.0:
224+
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
225+
; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
226+
; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t
227+
; CHECK-NEXT: ret
228+
%v = call <4 x i8> @llvm.vp.sext.v4i8.v4i7(<4 x i7> %va, <4 x i1> %m, i32 %evl)
229+
ret <4 x i8> %v
230+
}
231+
232+
declare <4 x i15> @llvm.vp.sext.v4i15.v4i8(<4 x i8>, <4 x i1>, i32)
233+
234+
define <4 x i15> @vsext_v4i15_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
235+
; CHECK-LABEL: vsext_v4i15_v4i8:
236+
; CHECK: # %bb.0:
237+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
238+
; CHECK-NEXT: vsext.vf2 v9, v8, v0.t
239+
; CHECK-NEXT: vmv1r.v v8, v9
240+
; CHECK-NEXT: ret
241+
%v = call <4 x i15> @llvm.vp.sext.v4i15.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl)
242+
ret <4 x i15> %v
243+
}
244+
245+
declare <4 x i15> @llvm.vp.sext.v4i15.v4i9(<4 x i9>, <4 x i1>, i32)
246+
247+
define <4 x i15> @vsext_v4i15_v4i9(<4 x i9> %va, <4 x i1> %m, i32 zeroext %evl) {
248+
; CHECK-LABEL: vsext_v4i15_v4i9:
249+
; CHECK: # %bb.0:
250+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
251+
; CHECK-NEXT: vsll.vi v8, v8, 7, v0.t
252+
; CHECK-NEXT: vsra.vi v8, v8, 7, v0.t
253+
; CHECK-NEXT: ret
254+
%v = call <4 x i15> @llvm.vp.sext.v4i15.v4i9(<4 x i9> %va, <4 x i1> %m, i32 %evl)
255+
ret <4 x i15> %v
256+
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,3 +201,56 @@ define <32 x i64> @vzext_v32i64_v32i32_unmasked(<32 x i32> %va, i32 zeroext %evl
201201
%v = call <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl)
202202
ret <32 x i64> %v
203203
}
204+
205+
declare <4 x i16> @llvm.vp.zext.v4i16.v4i7(<4 x i7>, <4 x i1>, i32)
206+
207+
define <4 x i16> @vzext_v4i16_v4i7(<4 x i7> %va, <4 x i1> %m, i32 zeroext %evl) {
208+
; CHECK-LABEL: vzext_v4i16_v4i7:
209+
; CHECK: # %bb.0:
210+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
211+
; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
212+
; CHECK-NEXT: li a0, 127
213+
; CHECK-NEXT: vand.vx v8, v9, a0, v0.t
214+
; CHECK-NEXT: ret
215+
%v = call <4 x i16> @llvm.vp.zext.v4i16.v4i7(<4 x i7> %va, <4 x i1> %m, i32 %evl)
216+
ret <4 x i16> %v
217+
}
218+
219+
declare <4 x i8> @llvm.vp.zext.v4i8.v4i7(<4 x i7>, <4 x i1>, i32)
220+
221+
define <4 x i8> @vzext_v4i8_v4i7(<4 x i7> %va, <4 x i1> %m, i32 zeroext %evl) {
222+
; CHECK-LABEL: vzext_v4i8_v4i7:
223+
; CHECK: # %bb.0:
224+
; CHECK-NEXT: li a1, 127
225+
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
226+
; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
227+
; CHECK-NEXT: ret
228+
%v = call <4 x i8> @llvm.vp.zext.v4i8.v4i7(<4 x i7> %va, <4 x i1> %m, i32 %evl)
229+
ret <4 x i8> %v
230+
}
231+
232+
declare <4 x i15> @llvm.vp.zext.v4i15.v4i8(<4 x i8>, <4 x i1>, i32)
233+
234+
define <4 x i15> @vzext_v4i15_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
235+
; CHECK-LABEL: vzext_v4i15_v4i8:
236+
; CHECK: # %bb.0:
237+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
238+
; CHECK-NEXT: vzext.vf2 v9, v8, v0.t
239+
; CHECK-NEXT: vmv1r.v v8, v9
240+
; CHECK-NEXT: ret
241+
%v = call <4 x i15> @llvm.vp.zext.v4i15.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl)
242+
ret <4 x i15> %v
243+
}
244+
245+
declare <4 x i15> @llvm.vp.zext.v4i15.v4i9(<4 x i9>, <4 x i1>, i32)
246+
247+
define <4 x i15> @vzext_v4i15_v4i9(<4 x i9> %va, <4 x i1> %m, i32 zeroext %evl) {
248+
; CHECK-LABEL: vzext_v4i15_v4i9:
249+
; CHECK: # %bb.0:
250+
; CHECK-NEXT: li a1, 511
251+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
252+
; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
253+
; CHECK-NEXT: ret
254+
%v = call <4 x i15> @llvm.vp.zext.v4i15.v4i9(<4 x i9> %va, <4 x i1> %m, i32 %evl)
255+
ret <4 x i15> %v
256+
}

0 commit comments

Comments
 (0)