Skip to content

Commit 034e650

Browse files
committed
[AArch64] Remove custom zext/sext legalization code.
Currently performExtendCombine assumes that the src-element bitwidth * 2 is a valid MVT. But this is not the case for i1 and it causes a crash on the v64i1 test cases added in this patch. It turns out that this code appears to not be needed; the same patterns are handled by other code and we end up with the same results, even without the custom lowering. I also added additional test cases in a50037a. Let's just remove the unneeded code. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D99437 (cherry-picked from 4822830)
1 parent 82bb63d commit 034e650

File tree

2 files changed

+65
-72
lines changed

2 files changed

+65
-72
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -13883,78 +13883,7 @@ static SDValue performExtendCombine(SDNode *N,
1388313883

1388413884
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
1388513885
}
13886-
13887-
// This is effectively a custom type legalization for AArch64.
13888-
//
13889-
// Type legalization will split an extend of a small, legal, type to a larger
13890-
// illegal type by first splitting the destination type, often creating
13891-
// illegal source types, which then get legalized in isel-confusing ways,
13892-
// leading to really terrible codegen. E.g.,
13893-
// %result = v8i32 sext v8i8 %value
13894-
// becomes
13895-
// %losrc = extract_subreg %value, ...
13896-
// %hisrc = extract_subreg %value, ...
13897-
// %lo = v4i32 sext v4i8 %losrc
13898-
// %hi = v4i32 sext v4i8 %hisrc
13899-
// Things go rapidly downhill from there.
13900-
//
13901-
// For AArch64, the [sz]ext vector instructions can only go up one element
13902-
// size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32
13903-
// take two instructions.
13904-
//
13905-
// This implies that the most efficient way to do the extend from v8i8
13906-
// to two v4i32 values is to first extend the v8i8 to v8i16, then do
13907-
// the normal splitting to happen for the v8i16->v8i32.
13908-
13909-
// This is pre-legalization to catch some cases where the default
13910-
// type legalization will create ill-tempered code.
13911-
if (!DCI.isBeforeLegalizeOps())
13912-
return SDValue();
13913-
13914-
// We're only interested in cleaning things up for non-legal vector types
13915-
// here. If both the source and destination are legal, things will just
13916-
// work naturally without any fiddling.
13917-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13918-
EVT ResVT = N->getValueType(0);
13919-
if (!ResVT.isVector() || TLI.isTypeLegal(ResVT))
13920-
return SDValue();
13921-
// If the vector type isn't a simple VT, it's beyond the scope of what
13922-
// we're worried about here. Let legalization do its thing and hope for
13923-
// the best.
13924-
SDValue Src = N->getOperand(0);
13925-
EVT SrcVT = Src->getValueType(0);
13926-
if (!ResVT.isSimple() || !SrcVT.isSimple())
13927-
return SDValue();
13928-
13929-
// If the source VT is a 64-bit fixed or scalable vector, we can play games
13930-
// and get the better results we want.
13931-
if (SrcVT.getSizeInBits().getKnownMinSize() != 64)
13932-
return SDValue();
13933-
13934-
unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13935-
ElementCount SrcEC = SrcVT.getVectorElementCount();
13936-
SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), SrcEC);
13937-
SDLoc DL(N);
13938-
Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src);
13939-
13940-
// Now split the rest of the operation into two halves, each with a 64
13941-
// bit source.
13942-
EVT LoVT, HiVT;
13943-
SDValue Lo, Hi;
13944-
LoVT = HiVT = ResVT.getHalfNumVectorElementsVT(*DAG.getContext());
13945-
13946-
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
13947-
LoVT.getVectorElementCount());
13948-
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
13949-
DAG.getConstant(0, DL, MVT::i64));
13950-
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
13951-
DAG.getConstant(InNVT.getVectorMinNumElements(), DL, MVT::i64));
13952-
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
13953-
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
13954-
13955-
// Now combine the parts back together so we still have a single result
13956-
// like the combiner expects.
13957-
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
13886+
return SDValue();
1395813887
}
1395913888

1396013889
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,

llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,3 +202,67 @@ define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
202202
%r = sext <8 x i8> %v0 to <8 x i64>
203203
ret <8 x i64> %r
204204
}
205+
206+
; Extends of vectors of i1.
207+
208+
define <32 x i8> @zext_v32i1(<32 x i1> %arg) {
209+
; CHECK-LABEL: zext_v32i1:
210+
; CHECK: and.16b v0, v0, v2
211+
; CHECK-NEXT: and.16b v1, v1, v2
212+
; CHECK-NEXT: ret
213+
%res = zext <32 x i1> %arg to <32 x i8>
214+
ret <32 x i8> %res
215+
}
216+
217+
define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
218+
; CHECK-LABEL: sext_v32i1:
219+
; CHECK: shl.16b v0, v0, #7
220+
; CHECK-NEXT: shl.16b v1, v1, #7
221+
; CHECK-NEXT: sshr.16b v0, v0, #7
222+
; CHECK-NEXT: sshr.16b v1, v1, #7
223+
; CHECK-NEXT: ret
224+
;
225+
%res = sext <32 x i1> %arg to <32 x i8>
226+
ret <32 x i8> %res
227+
}
228+
229+
define <64 x i8> @zext_v64i1(<64 x i1> %arg) {
230+
; CHECK-LABEL: zext_v64i1:
231+
; CHECK: and.16b v0, v0, [[V4:v.+]]
232+
; CHECK-NEXT: and.16b v1, v1, [[V4]]
233+
; CHECK-NEXT: and.16b v2, v2, [[V4]]
234+
; CHECK-NEXT: and.16b v3, v3, [[V4]]
235+
; CHECK-NEXT: ret
236+
;
237+
%res = zext <64 x i1> %arg to <64 x i8>
238+
ret <64 x i8> %res
239+
}
240+
241+
define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
242+
; CHECK-LABEL: sext_v64i1:
243+
; CHECK: shl.16b v0, v0, #7
244+
; CHECK-NEXT: shl.16b v3, v3, #7
245+
; CHECK-NEXT: shl.16b v2, v2, #7
246+
; CHECK-NEXT: shl.16b [[V4:v.+]], v1, #7
247+
; CHECK-NEXT: sshr.16b v0, v0, #7
248+
; CHECK-NEXT: sshr.16b v1, v3, #7
249+
; CHECK-NEXT: sshr.16b v2, v2, #7
250+
; CHECK-NEXT: sshr.16b v3, [[V4]], #7
251+
; CHECK-NEXT: ret
252+
;
253+
%res = sext <64 x i1> %arg to <64 x i8>
254+
ret <64 x i8> %res
255+
}
256+
257+
define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
258+
; CHECK-LABEL: sext_v1x64:
259+
; CHECK-NEXT: .cfi_startproc
260+
; CHECK-NEXT: fmov x8, d0
261+
; CHECK-NEXT: asr x1, x8, #63
262+
; CHECK-NEXT: mov.d v0[1], x1
263+
; CHECK-NEXT: fmov x0, d0
264+
; CHECK-NEXT: ret
265+
;
266+
%res = sext <1 x i64> %arg to <1 x i128>
267+
ret <1 x i128> %res
268+
}

0 commit comments

Comments
 (0)