Skip to content

Commit 3b4d563

Browse files
committed
[AArch64] Limit vector splitting to vectors of size larger than 128bit
The intent of this code is to split larger vectors into smaller shuffles, but it currently triggering on some small vector types. Limit it to vectors of size >128bit.
1 parent df053d6 commit 3b4d563

File tree

3 files changed

+8
-5
lines changed

3 files changed

+8
-5
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5448,6 +5448,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(
54485448
// If we have a Mask, and the LT is being legalized somehow, split the Mask
54495449
// into smaller vectors and sum the cost of each shuffle.
54505450
if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
5451+
LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
54515452
Tp->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
54525453
Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
54535454

llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -253,11 +253,11 @@ define void @select_uniform_ugt_16xi8(ptr %ptr, i8 %x) {
253253
; CHECK-NEXT: [[S_8:%.*]] = select i1 [[CMP_8]], i8 [[TMP1]], i8 [[X:%.*]]
254254
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[GEP_9]], align 1
255255
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GEP_12]], align 1
256+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i8> [[TMP0]], <8 x i8> poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
257+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
256258
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
257259
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i8> [[TMP0]], <8 x i8> [[TMP4]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
258-
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> poison, i8 [[L_11]], i32 0
259-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison>
260-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> [[TMP11]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 8, i32 9, i32 10, i32 27, i32 poison, i32 poison, i32 poison, i32 poison>
260+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[L_11]], i32 11
261261
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP0]], i64 0)
262262
; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP7]], <4 x i8> [[TMP3]], i64 12)
263263
; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], splat (i8 -1)

llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,10 @@ define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) {
262262

263263
define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) {
264264
; CHECK-LABEL: @splattwice(
265-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
266-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
265+
; CHECK-NEXT: [[AS:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> zeroinitializer
266+
; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> zeroinitializer
267+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[AS]], <4 x half> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
268+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[BS]], <4 x half> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
267269
; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[TMP1]], [[TMP2]]
268270
; CHECK-NEXT: ret <8 x half> [[R]]
269271
;

0 commit comments

Comments
 (0)