Skip to content

Commit e351d60

Browse files
committed
[AArch64] Fix tryToConvertShuffleOfTbl2ToTbl4 with non-buildvectror input outoperands.
It looks like this code is only considering buildvector inputs, expecting the inputs to have at least 16 operands. This adds a check to make sure that is true. Fixes #135950
1 parent d3153ad commit e351d60

File tree

2 files changed

+44
-10
lines changed

2 files changed

+44
-10
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13871,25 +13871,29 @@ static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op,
1387113871
DAG.getTargetConstant(Intrinsic::aarch64_neon_tbl2, dl, MVT::i64);
1387213872

1387313873
EVT VT = Op.getValueType();
13874-
if (Tbl1->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13875-
Tbl1->getOperand(0) != Tbl2ID ||
13876-
Tbl2->getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13877-
Tbl2->getOperand(0) != Tbl2ID)
13874+
if (Tbl1.getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13875+
Tbl1.getOperand(0) != Tbl2ID ||
13876+
Tbl2.getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
13877+
Tbl2.getOperand(0) != Tbl2ID)
1387813878
return SDValue();
1387913879

13880-
if (Tbl1->getValueType(0) != MVT::v16i8 ||
13881-
Tbl2->getValueType(0) != MVT::v16i8)
13880+
if (Tbl1.getValueType() != MVT::v16i8 ||
13881+
Tbl2.getValueType() != MVT::v16i8)
13882+
return SDValue();
13883+
13884+
SDValue Mask1 = Tbl1.getOperand(3);
13885+
SDValue Mask2 = Tbl2.getOperand(3);
13886+
if (Mask1.getOpcode() != ISD::BUILD_VECTOR ||
13887+
Mask2.getOpcode() != ISD::BUILD_VECTOR)
1388213888
return SDValue();
1388313889

13884-
SDValue Mask1 = Tbl1->getOperand(3);
13885-
SDValue Mask2 = Tbl2->getOperand(3);
1388613890
SmallVector<SDValue, 16> TBLMaskParts(16, SDValue());
1388713891
for (unsigned I = 0; I < 16; I++) {
1388813892
if (ShuffleMask[I] < 16)
13889-
TBLMaskParts[I] = Mask1->getOperand(ShuffleMask[I]);
13893+
TBLMaskParts[I] = Mask1.getOperand(ShuffleMask[I]);
1389013894
else {
1389113895
auto *C =
13892-
dyn_cast<ConstantSDNode>(Mask2->getOperand(ShuffleMask[I] - 16));
13896+
dyn_cast<ConstantSDNode>(Mask2.getOperand(ShuffleMask[I] - 16));
1389313897
if (!C)
1389413898
return SDValue();
1389513899
TBLMaskParts[I] = DAG.getConstant(C->getSExtValue() + 32, dl, MVT::i32);

llvm/test/CodeGen/AArch64/arm64-tbl.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1254,6 +1254,36 @@ define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %
12541254
ret <16 x i8> %tmp3
12551255
}
12561256

1257+
define <16 x i8> @pr135950(<16 x i8> %A, <16 x i8> %B, <16 x i8> %M) {
1258+
; CHECK-SD-LABEL: pr135950:
1259+
; CHECK-SD: // %bb.0:
1260+
; CHECK-SD-NEXT: mov.16b v3, v1
1261+
; CHECK-SD-NEXT: movi.2d v1, #0000000000000000
1262+
; CHECK-SD-NEXT: mov.16b v4, v0
1263+
; CHECK-SD-NEXT: mov.16b v5, v3
1264+
; CHECK-SD-NEXT: tbl.16b v1, { v3, v4 }, v1
1265+
; CHECK-SD-NEXT: tbl.16b v0, { v4, v5 }, v2
1266+
; CHECK-SD-NEXT: zip1.16b v0, v0, v1
1267+
; CHECK-SD-NEXT: ret
1268+
;
1269+
; CHECK-GI-LABEL: pr135950:
1270+
; CHECK-GI: // %bb.0:
1271+
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
1272+
; CHECK-GI-NEXT: mov.16b v3, v2
1273+
; CHECK-GI-NEXT: movi.2d v4, #0000000000000000
1274+
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
1275+
; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v3
1276+
; CHECK-GI-NEXT: mov.16b v2, v0
1277+
; CHECK-GI-NEXT: tbl.16b v0, { v1, v2 }, v4
1278+
; CHECK-GI-NEXT: zip1.16b v0, v3, v0
1279+
; CHECK-GI-NEXT: ret
1280+
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %M)
1281+
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %B, <16 x i8> %A, <16 x i8> zeroinitializer)
1282+
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1283+
ret <16 x i8> %s
1284+
}
1285+
1286+
12571287
declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
12581288
declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
12591289
declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone

0 commit comments

Comments
 (0)