Skip to content

Commit 70eac25

Browse files
authored
[VectorCombine] Add fp cast handling for shuffletoidentity (llvm#120641)
This fixes some regressions from recent changes to vector combine in llvm#120216. It allows shuffleToIdentity to look through fp casts as other casts, and makes sure mismatching vector types in splats and casts do not block the transform, as only the lanes should matter.
1 parent b87a5fb commit 70eac25

File tree

2 files changed

+22
-63
lines changed

2 files changed

+22
-63
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2259,7 +2259,9 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
22592259
all_of(drop_begin(Item), [Item](InstLane &IL) {
22602260
Value *FrontV = Item.front().first->get();
22612261
Use *U = IL.first;
2262-
return !U || U->get() == FrontV;
2262+
return !U || (isa<Constant>(U->get()) &&
2263+
cast<Constant>(U->get())->getSplatValue() ==
2264+
cast<Constant>(FrontV)->getSplatValue());
22632265
})) {
22642266
SplatLeafs.insert(FrontU);
22652267
continue;
@@ -2289,7 +2291,8 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
22892291
if (CI->getPredicate() != cast<CmpInst>(FrontV)->getPredicate())
22902292
return false;
22912293
if (auto *CI = dyn_cast<CastInst>(V))
2292-
if (CI->getSrcTy() != cast<CastInst>(FrontV)->getSrcTy())
2294+
if (CI->getSrcTy()->getScalarType() !=
2295+
cast<CastInst>(FrontV)->getSrcTy()->getScalarType())
22932296
return false;
22942297
if (auto *SI = dyn_cast<SelectInst>(V))
22952298
if (!isa<VectorType>(SI->getOperand(0)->getType()) ||
@@ -2314,7 +2317,8 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
23142317
Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
23152318
Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));
23162319
continue;
2317-
} else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontU)) {
2320+
} else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
2321+
FPToUIInst, SIToFPInst, UIToFPInst>(FrontU)) {
23182322
Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
23192323
continue;
23202324
} else if (auto *BitCast = dyn_cast<BitCastInst>(FrontU)) {

llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll

Lines changed: 15 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,17 +1110,9 @@ define <8 x i8> @operandbundles_second(<8 x i8> %a) {
11101110

11111111
define <8 x i32> @fptoi(<8 x float> %a, <8 x float> %b) {
11121112
; CHECK-LABEL: @fptoi(
1113-
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1114-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
1115-
; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1116-
; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
1117-
; CHECK-NEXT: [[ABT:%.*]] = fptosi <4 x float> [[AT]] to <4 x i32>
1118-
; CHECK-NEXT: [[ABB:%.*]] = fptosi <4 x float> [[AB]] to <4 x i32>
1119-
; CHECK-NEXT: [[BBT:%.*]] = fptoui <4 x float> [[BT]] to <4 x i32>
1120-
; CHECK-NEXT: [[BBB:%.*]] = fptoui <4 x float> [[BB]] to <4 x i32>
1121-
; CHECK-NEXT: [[MT:%.*]] = mul <4 x i32> [[ABT]], [[BBT]]
1122-
; CHECK-NEXT: [[MB:%.*]] = mul <4 x i32> [[ABB]], [[BBB]]
1123-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[MT]], <4 x i32> [[MB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1113+
; CHECK-NEXT: [[TMP1:%.*]] = fptosi <8 x float> [[A:%.*]] to <8 x i32>
1114+
; CHECK-NEXT: [[TMP2:%.*]] = fptoui <8 x float> [[B:%.*]] to <8 x i32>
1115+
; CHECK-NEXT: [[R:%.*]] = mul <8 x i32> [[TMP1]], [[TMP2]]
11241116
; CHECK-NEXT: ret <8 x i32> [[R]]
11251117
;
11261118
%ab = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -1139,17 +1131,9 @@ define <8 x i32> @fptoi(<8 x float> %a, <8 x float> %b) {
11391131

11401132
define <8 x half> @itofp(<8 x i16> %a, <8 x i16> %b) {
11411133
; CHECK-LABEL: @itofp(
1142-
; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1143-
; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
1144-
; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1145-
; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
1146-
; CHECK-NEXT: [[ABT:%.*]] = sitofp <4 x i16> [[AT]] to <4 x half>
1147-
; CHECK-NEXT: [[ABB:%.*]] = sitofp <4 x i16> [[AB]] to <4 x half>
1148-
; CHECK-NEXT: [[BBT:%.*]] = uitofp <4 x i16> [[BT]] to <4 x half>
1149-
; CHECK-NEXT: [[BBB:%.*]] = uitofp <4 x i16> [[BB]] to <4 x half>
1150-
; CHECK-NEXT: [[MT:%.*]] = fmul <4 x half> [[ABT]], [[BBT]]
1151-
; CHECK-NEXT: [[MB:%.*]] = fmul <4 x half> [[ABB]], [[BBB]]
1152-
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[MT]], <4 x half> [[MB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1134+
; CHECK-NEXT: [[TMP1:%.*]] = sitofp <8 x i16> [[A:%.*]] to <8 x half>
1135+
; CHECK-NEXT: [[TMP2:%.*]] = uitofp <8 x i16> [[B:%.*]] to <8 x half>
1136+
; CHECK-NEXT: [[R:%.*]] = fmul <8 x half> [[TMP1]], [[TMP2]]
11531137
; CHECK-NEXT: ret <8 x half> [[R]]
11541138
;
11551139
%ab = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -1168,37 +1152,13 @@ define <8 x half> @itofp(<8 x i16> %a, <8 x i16> %b) {
11681152

11691153
define <16 x i32> @const_types(<16 x i32> %wide.vec, <16 x i32> %wide.vec116) {
11701154
; CHECK-LABEL: @const_types(
1171-
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC:%.*]], <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
1172-
; CHECK-NEXT: [[STRIDED_VEC113:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
1173-
; CHECK-NEXT: [[STRIDED_VEC114:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
1174-
; CHECK-NEXT: [[STRIDED_VEC115:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
1175-
; CHECK-NEXT: [[TMP1:%.*]] = sext <4 x i32> [[STRIDED_VEC]] to <4 x i64>
1176-
; CHECK-NEXT: [[STRIDED_VEC117:%.*]] = shufflevector <16 x i32> [[WIDE_VEC116:%.*]], <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
1177-
; CHECK-NEXT: [[STRIDED_VEC118:%.*]] = shufflevector <16 x i32> [[WIDE_VEC116]], <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
1178-
; CHECK-NEXT: [[STRIDED_VEC119:%.*]] = shufflevector <16 x i32> [[WIDE_VEC116]], <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
1179-
; CHECK-NEXT: [[STRIDED_VEC120:%.*]] = shufflevector <16 x i32> [[WIDE_VEC116]], <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
1180-
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[STRIDED_VEC117]] to <4 x i64>
1181-
; CHECK-NEXT: [[TMP3:%.*]] = mul nsw <4 x i64> [[TMP2]], [[TMP1]]
1182-
; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i32> [[STRIDED_VEC113]] to <4 x i64>
1183-
; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i32> [[STRIDED_VEC118]] to <4 x i64>
1184-
; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i64> [[TMP5]], [[TMP4]]
1185-
; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i32> [[STRIDED_VEC114]] to <4 x i64>
1186-
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i32> [[STRIDED_VEC119]] to <4 x i64>
1187-
; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i64> [[TMP8]], [[TMP7]]
1188-
; CHECK-NEXT: [[TMP10:%.*]] = sext <4 x i32> [[STRIDED_VEC115]] to <4 x i64>
1189-
; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i32> [[STRIDED_VEC120]] to <4 x i64>
1190-
; CHECK-NEXT: [[TMP12:%.*]] = mul nsw <4 x i64> [[TMP11]], [[TMP10]]
1191-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1192-
; CHECK-NEXT: [[TMP14:%.*]] = lshr <8 x i64> [[TMP13]], splat (i64 32)
1193-
; CHECK-NEXT: [[TMP15:%.*]] = trunc nuw <8 x i64> [[TMP14]] to <8 x i32>
1194-
; CHECK-NEXT: [[TMP16:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP15]], <8 x i32> splat (i32 1073741823))
1195-
; CHECK-NEXT: [[TMP17:%.*]] = shl <8 x i32> [[TMP16]], splat (i32 1)
1196-
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1197-
; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i64> [[TMP18]], splat (i64 32)
1198-
; CHECK-NEXT: [[TMP20:%.*]] = trunc nuw <8 x i64> [[TMP19]] to <8 x i32>
1199-
; CHECK-NEXT: [[TMP21:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP20]], <8 x i32> splat (i32 1073741823))
1200-
; CHECK-NEXT: [[TMP22:%.*]] = shl <8 x i32> [[TMP21]], splat (i32 1)
1201-
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP17]], <8 x i32> [[TMP22]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
1155+
; CHECK-NEXT: [[TMP1:%.*]] = sext <16 x i32> [[WIDE_VEC116:%.*]] to <16 x i64>
1156+
; CHECK-NEXT: [[TMP2:%.*]] = sext <16 x i32> [[WIDE_VEC:%.*]] to <16 x i64>
1157+
; CHECK-NEXT: [[TMP3:%.*]] = mul nsw <16 x i64> [[TMP1]], [[TMP2]]
1158+
; CHECK-NEXT: [[TMP4:%.*]] = lshr <16 x i64> [[TMP3]], splat (i64 32)
1159+
; CHECK-NEXT: [[TMP5:%.*]] = trunc nuw <16 x i64> [[TMP4]] to <16 x i32>
1160+
; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.smin.v16i32(<16 x i32> [[TMP5]], <16 x i32> splat (i32 1073741823))
1161+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shl <16 x i32> [[TMP6]], splat (i32 1)
12021162
; CHECK-NEXT: ret <16 x i32> [[INTERLEAVED_VEC]]
12031163
;
12041164
%strided.vec = shufflevector <16 x i32> %wide.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
@@ -1245,13 +1205,8 @@ define <16 x i32> @const_types(<16 x i32> %wide.vec, <16 x i32> %wide.vec116) {
12451205

12461206
define <32 x half> @cast_types(<32 x i16> %wide.vec) {
12471207
; CHECK-LABEL: @cast_types(
1248-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[WIDE_VEC:%.*]], <32 x i16> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
1249-
; CHECK-NEXT: [[TMP5:%.*]] = sitofp <16 x i16> [[TMP1]] to <16 x half>
1250-
; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <16 x half> [[TMP5]], splat (half 0xH0200)
1251-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i16> [[WIDE_VEC]], <32 x i16> poison, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
1252-
; CHECK-NEXT: [[TMP7:%.*]] = sitofp <16 x i16> [[TMP4]] to <16 x half>
1253-
; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <16 x half> [[TMP7]], splat (half 0xH0200)
1254-
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x half> [[TMP6]], <16 x half> [[TMP8]], <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
1208+
; CHECK-NEXT: [[TMP1:%.*]] = sitofp <32 x i16> [[WIDE_VEC:%.*]] to <32 x half>
1209+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fmul fast <32 x half> [[TMP1]], splat (half 0xH0200)
12551210
; CHECK-NEXT: ret <32 x half> [[INTERLEAVED_VEC]]
12561211
;
12571212
%strided.vec = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>

0 commit comments

Comments
 (0)