Skip to content

Commit 4822830

Browse files
committed
[AArch64] Remove custom zext/sext legalization code.
Currently performExtendCombine assumes that the src-element bitwidth * 2 is a valid MVT. But this is not the case for i1 and it causes a crash on the v64i1 test cases added in this patch. It turns out that this code appears to not be needed; the same patterns are handled by other code and we end up with the same results, even without the custom lowering. I also added additional test cases in a50037a. Let's just remove the unneeded code. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D99437
1 parent 047cbfe commit 4822830

File tree

2 files changed

+65
-72
lines changed

2 files changed

+65
-72
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -13939,78 +13939,7 @@ static SDValue performExtendCombine(SDNode *N,
1393913939

1394013940
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
1394113941
}
13942-
13943-
// This is effectively a custom type legalization for AArch64.
13944-
//
13945-
// Type legalization will split an extend of a small, legal, type to a larger
13946-
// illegal type by first splitting the destination type, often creating
13947-
// illegal source types, which then get legalized in isel-confusing ways,
13948-
// leading to really terrible codegen. E.g.,
13949-
// %result = v8i32 sext v8i8 %value
13950-
// becomes
13951-
// %losrc = extract_subreg %value, ...
13952-
// %hisrc = extract_subreg %value, ...
13953-
// %lo = v4i32 sext v4i8 %losrc
13954-
// %hi = v4i32 sext v4i8 %hisrc
13955-
// Things go rapidly downhill from there.
13956-
//
13957-
// For AArch64, the [sz]ext vector instructions can only go up one element
13958-
// size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32
13959-
// take two instructions.
13960-
//
13961-
// This implies that the most efficient way to do the extend from v8i8
13962-
// to two v4i32 values is to first extend the v8i8 to v8i16, then do
13963-
// the normal splitting to happen for the v8i16->v8i32.
13964-
13965-
// This is pre-legalization to catch some cases where the default
13966-
// type legalization will create ill-tempered code.
13967-
if (!DCI.isBeforeLegalizeOps())
13968-
return SDValue();
13969-
13970-
// We're only interested in cleaning things up for non-legal vector types
13971-
// here. If both the source and destination are legal, things will just
13972-
// work naturally without any fiddling.
13973-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13974-
EVT ResVT = N->getValueType(0);
13975-
if (!ResVT.isVector() || TLI.isTypeLegal(ResVT))
13976-
return SDValue();
13977-
// If the vector type isn't a simple VT, it's beyond the scope of what
13978-
// we're worried about here. Let legalization do its thing and hope for
13979-
// the best.
13980-
SDValue Src = N->getOperand(0);
13981-
EVT SrcVT = Src->getValueType(0);
13982-
if (!ResVT.isSimple() || !SrcVT.isSimple())
13983-
return SDValue();
13984-
13985-
// If the source VT is a 64-bit fixed or scalable vector, we can play games
13986-
// and get the better results we want.
13987-
if (SrcVT.getSizeInBits().getKnownMinSize() != 64)
13988-
return SDValue();
13989-
13990-
unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13991-
ElementCount SrcEC = SrcVT.getVectorElementCount();
13992-
SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), SrcEC);
13993-
SDLoc DL(N);
13994-
Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src);
13995-
13996-
// Now split the rest of the operation into two halves, each with a 64
13997-
// bit source.
13998-
EVT LoVT, HiVT;
13999-
SDValue Lo, Hi;
14000-
LoVT = HiVT = ResVT.getHalfNumVectorElementsVT(*DAG.getContext());
14001-
14002-
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
14003-
LoVT.getVectorElementCount());
14004-
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
14005-
DAG.getConstant(0, DL, MVT::i64));
14006-
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
14007-
DAG.getConstant(InNVT.getVectorMinNumElements(), DL, MVT::i64));
14008-
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
14009-
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
14010-
14011-
// Now combine the parts back together so we still have a single result
14012-
// like the combiner expects.
14013-
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
13942+
return SDValue();
1401413943
}
1401513944

1401613945
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,

llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,3 +202,67 @@ define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
202202
%r = sext <8 x i8> %v0 to <8 x i64>
203203
ret <8 x i64> %r
204204
}
205+
206+
; Extends of vectors of i1.
207+
208+
define <32 x i8> @zext_v32i1(<32 x i1> %arg) {
209+
; CHECK-LABEL: zext_v32i1:
210+
; CHECK: and.16b v0, v0, v2
211+
; CHECK-NEXT: and.16b v1, v1, v2
212+
; CHECK-NEXT: ret
213+
%res = zext <32 x i1> %arg to <32 x i8>
214+
ret <32 x i8> %res
215+
}
216+
217+
define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
218+
; CHECK-LABEL: sext_v32i1:
219+
; CHECK: shl.16b v0, v0, #7
220+
; CHECK-NEXT: shl.16b v1, v1, #7
221+
; CHECK-NEXT: sshr.16b v0, v0, #7
222+
; CHECK-NEXT: sshr.16b v1, v1, #7
223+
; CHECK-NEXT: ret
224+
;
225+
%res = sext <32 x i1> %arg to <32 x i8>
226+
ret <32 x i8> %res
227+
}
228+
229+
define <64 x i8> @zext_v64i1(<64 x i1> %arg) {
230+
; CHECK-LABEL: zext_v64i1:
231+
; CHECK: and.16b v0, v0, [[V4:v.+]]
232+
; CHECK-NEXT: and.16b v1, v1, [[V4]]
233+
; CHECK-NEXT: and.16b v2, v2, [[V4]]
234+
; CHECK-NEXT: and.16b v3, v3, [[V4]]
235+
; CHECK-NEXT: ret
236+
;
237+
%res = zext <64 x i1> %arg to <64 x i8>
238+
ret <64 x i8> %res
239+
}
240+
241+
define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
242+
; CHECK-LABEL: sext_v64i1:
243+
; CHECK: shl.16b v0, v0, #7
244+
; CHECK-NEXT: shl.16b v3, v3, #7
245+
; CHECK-NEXT: shl.16b v2, v2, #7
246+
; CHECK-NEXT: shl.16b [[V4:v.+]], v1, #7
247+
; CHECK-NEXT: sshr.16b v0, v0, #7
248+
; CHECK-NEXT: sshr.16b v1, v3, #7
249+
; CHECK-NEXT: sshr.16b v2, v2, #7
250+
; CHECK-NEXT: sshr.16b v3, [[V4]], #7
251+
; CHECK-NEXT: ret
252+
;
253+
%res = sext <64 x i1> %arg to <64 x i8>
254+
ret <64 x i8> %res
255+
}
256+
257+
define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
258+
; CHECK-LABEL: sext_v1x64:
259+
; CHECK-NEXT: .cfi_startproc
260+
; CHECK-NEXT: fmov x8, d0
261+
; CHECK-NEXT: asr x1, x8, #63
262+
; CHECK-NEXT: mov.d v0[1], x1
263+
; CHECK-NEXT: fmov x0, d0
264+
; CHECK-NEXT: ret
265+
;
266+
%res = sext <1 x i64> %arg to <1 x i128>
267+
ret <1 x i128> %res
268+
}

0 commit comments

Comments
 (0)