Skip to content

Commit f749901

Browse files
committed
[InstCombine] Avoid moving ops that do restrict undef across shuffles.
I think we have to be a bit more careful when it comes to moving ops across shuffles, if the op does restrict undef. For example, without this patch, we would move 'and %v, <0, 0, -1, -1>' over a 'shufflevector %a, undef, <undef, undef, 1, 2>'. As a result, the first 2 lanes of the result are undef after the combine, but they really should be 0, unless I am missing something. For ops that do fold to undef on undef operands, the current behavior should be fine. I've add conservative check OpDoesRestrictUndef, maybe there's a better existing utility? Reviewers: spatel, RKSimon, lebedev.ri Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D70093
1 parent c5b56ca commit f749901

File tree

2 files changed

+35
-14
lines changed

2 files changed

+35
-14
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1543,11 +1543,13 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
15431543
// If this is a widening shuffle, we must be able to extend with undef
15441544
// elements. If the original binop does not produce an undef in the high
15451545
// lanes, then this transform is not safe.
1546+
// Similarly for undef lanes due to the shuffle mask, we can only
1547+
// transform binops that preserve undef.
15461548
// TODO: We could shuffle those non-undef constant values into the
15471549
// result by using a constant vector (rather than an undef vector)
15481550
// as operand 1 of the new binop, but that might be too aggressive
15491551
// for target-independent shuffle creation.
1550-
if (I >= SrcVecNumElts) {
1552+
if (I >= SrcVecNumElts || ShMask[I] < 0) {
15511553
Constant *MaybeUndef =
15521554
ConstOp1 ? ConstantExpr::get(Opcode, UndefScalar, CElt)
15531555
: ConstantExpr::get(Opcode, CElt, UndefScalar);

llvm/test/Transforms/InstCombine/vec_shuffle.ll

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,10 +1004,13 @@ define <2 x i32> @and_splat_constant(<2 x i32> %x) {
10041004
ret <2 x i32> %r
10051005
}
10061006

1007+
; AND does not fold to undef for undef operands, we cannot move it
1008+
; across a shuffle with undef masks.
10071009
define <4 x i16> @and_constant_mask_undef(<4 x i16> %add) {
10081010
; CHECK-LABEL: @and_constant_mask_undef(
10091011
; CHECK-NEXT: entry:
1010-
; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
1012+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
1013+
; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], <i16 0, i16 0, i16 -1, i16 -1>
10111014
; CHECK-NEXT: ret <4 x i16> [[AND]]
10121015
;
10131016
entry:
@@ -1016,10 +1019,13 @@ entry:
10161019
ret <4 x i16> %and
10171020
}
10181021

1022+
; AND does not fold to undef for undef operands, we cannot move it
1023+
; across a shuffle with undef masks.
10191024
define <4 x i16> @and_constant_mask_undef_2(<4 x i16> %add) {
10201025
; CHECK-LABEL: @and_constant_mask_undef_2(
10211026
; CHECK-NEXT: entry:
1022-
; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 undef>
1027+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 undef>
1028+
; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], <i16 -1, i16 -1, i16 -1, i16 0>
10231029
; CHECK-NEXT: ret <4 x i16> [[AND]]
10241030
;
10251031
entry:
@@ -1028,22 +1034,26 @@ entry:
10281034
ret <4 x i16> %and
10291035
}
10301036

1037+
; FIXME: We should be able to move the AND across the shuffle, as -1 (AND identity value) is used for undef lanes.
10311038
define <4 x i16> @and_constant_mask_undef_3(<4 x i16> %add) {
10321039
; CHECK-LABEL: @and_constant_mask_undef_3(
10331040
; CHECK-NEXT: entry:
1034-
; CHECK-NEXT: ret <4 x i16> <i16 0, i16 0, i16 0, i16 undef>
1041+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 undef>
1042+
; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], <i16 0, i16 0, i16 0, i16 -1>
1043+
; CHECK-NEXT: ret <4 x i16> [[AND]]
10351044
;
10361045
entry:
10371046
%shuffle = shufflevector <4 x i16> %add, <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 undef>
10381047
%and = and <4 x i16> %shuffle, <i16 0, i16 0, i16 0, i16 -1>
10391048
ret <4 x i16> %and
10401049
}
10411050

1051+
; FIXME: We should be able to move the AND across the shuffle, as -1 (AND identity value) is used for undef lanes.
10421052
define <4 x i16> @and_constant_mask_undef_4(<4 x i16> %add) {
10431053
; CHECK-LABEL: @and_constant_mask_undef_4(
10441054
; CHECK-NEXT: entry:
1045-
; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i16> [[ADD:%.*]], <i16 9, i16 20, i16 undef, i16 undef>
1046-
; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 undef>
1055+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 undef>
1056+
; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], <i16 9, i16 20, i16 20, i16 -1>
10471057
; CHECK-NEXT: ret <4 x i16> [[AND]]
10481058
;
10491059
entry:
@@ -1052,7 +1062,6 @@ entry:
10521062
ret <4 x i16> %and
10531063
}
10541064

1055-
10561065
define <4 x i16> @and_constant_mask_not_undef(<4 x i16> %add) {
10571066
; CHECK-LABEL: @and_constant_mask_not_undef(
10581067
; CHECK-NEXT: entry:
@@ -1066,10 +1075,13 @@ entry:
10661075
ret <4 x i16> %and
10671076
}
10681077

1078+
; OR does not fold to undef for undef operands, we cannot move it
1079+
; across a shuffle with undef masks.
10691080
define <4 x i16> @or_constant_mask_undef(<4 x i16> %in) {
10701081
; CHECK-LABEL: @or_constant_mask_undef(
10711082
; CHECK-NEXT: entry:
1072-
; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
1083+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
1084+
; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], <i16 -1, i16 -1, i16 0, i16 0>
10731085
; CHECK-NEXT: ret <4 x i16> [[OR]]
10741086
;
10751087
entry:
@@ -1078,10 +1090,13 @@ entry:
10781090
ret <4 x i16> %or
10791091
}
10801092

1093+
; OR does not fold to undef for undef operands, we cannot move it
1094+
; across a shuffle with undef masks.
10811095
define <4 x i16> @or_constant_mask_undef_2(<4 x i16> %in) {
10821096
; CHECK-LABEL: @or_constant_mask_undef_2(
10831097
; CHECK-NEXT: entry:
1084-
; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
1098+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
1099+
; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], <i16 -1, i16 0, i16 0, i16 -1>
10851100
; CHECK-NEXT: ret <4 x i16> [[OR]]
10861101
;
10871102
entry:
@@ -1090,22 +1105,26 @@ entry:
10901105
ret <4 x i16> %or
10911106
}
10921107

1108+
; FIXME: We should be able to move the OR across the shuffle, as 0 (OR identity value) is used for undef lanes.
10931109
define <4 x i16> @or_constant_mask_undef_3(<4 x i16> %in) {
10941110
; CHECK-LABEL: @or_constant_mask_undef_3(
10951111
; CHECK-NEXT: entry:
1096-
; CHECK-NEXT: ret <4 x i16> <i16 undef, i16 -1, i16 -1, i16 undef>
1112+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
1113+
; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], <i16 0, i16 -1, i16 -1, i16 0>
1114+
; CHECK-NEXT: ret <4 x i16> [[OR]]
10971115
;
10981116
entry:
10991117
%shuffle = shufflevector <4 x i16> %in, <4 x i16> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
11001118
%or = or <4 x i16> %shuffle, <i16 0, i16 -1, i16 -1, i16 0>
11011119
ret <4 x i16> %or
11021120
}
11031121

1122+
; FIXME: We should be able to move the OR across the shuffle, as 0 (OR identity value) is used for undef lanes.
11041123
define <4 x i16> @or_constant_mask_undef_4(<4 x i16> %in) {
11051124
; CHECK-LABEL: @or_constant_mask_undef_4(
11061125
; CHECK-NEXT: entry:
1107-
; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i16> [[IN:%.*]], <i16 undef, i16 99, i16 undef, i16 undef>
1108-
; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
1126+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
1127+
; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], <i16 0, i16 99, i16 99, i16 0>
11091128
; CHECK-NEXT: ret <4 x i16> [[OR]]
11101129
;
11111130
entry:
@@ -1130,8 +1149,8 @@ entry:
11301149
define <4 x i16> @shl_constant_mask_undef(<4 x i16> %in) {
11311150
; CHECK-LABEL: @shl_constant_mask_undef(
11321151
; CHECK-NEXT: entry:
1133-
; CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i16> [[IN:%.*]], <i16 10, i16 0, i16 0, i16 0>
1134-
; CHECK-NEXT: [[SHL:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 0, i32 undef, i32 1, i32 1>
1152+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 0, i32 undef, i32 1, i32 1>
1153+
; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i16> [[SHUFFLE]], <i16 10, i16 3, i16 0, i16 0>
11351154
; CHECK-NEXT: ret <4 x i16> [[SHL]]
11361155
;
11371156
entry:

0 commit comments

Comments
 (0)