Skip to content

Commit 1ad74c5

Browse files
committed
[AArch64][SDAG] Detect non-zeroes in truncating buildvectors in fshl lowering
A BUILD_VECTOR can implicity shrink the bits of the operands if the operand types are not legal. For example a v8i16 constant BUILD_VECTOR might be represented as v8i16 BUILDVECTOR(i32 1, i32 2, ...). Unfortunately this means that the constants are not accepted by matchUnaryPredicateImpl, preventing in this case funnel shifts detecting that all the operands are non-zero. Add a flag to help it match.
1 parent 27a2d3d commit 1ad74c5

File tree

6 files changed

+26
-39
lines changed

6 files changed

+26
-39
lines changed

llvm/include/llvm/CodeGen/SelectionDAGNodes.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3261,13 +3261,16 @@ namespace ISD {
32613261
template <typename ConstNodeType>
32623262
bool matchUnaryPredicateImpl(SDValue Op,
32633263
std::function<bool(ConstNodeType *)> Match,
3264-
bool AllowUndefs = false);
3264+
bool AllowUndefs = false,
3265+
bool AllowTrunc = false);
32653266

32663267
/// Hook for matching ConstantSDNode predicate
32673268
inline bool matchUnaryPredicate(SDValue Op,
32683269
std::function<bool(ConstantSDNode *)> Match,
3269-
bool AllowUndefs = false) {
3270-
return matchUnaryPredicateImpl<ConstantSDNode>(Op, Match, AllowUndefs);
3270+
bool AllowUndefs = false,
3271+
bool AllowTrunc = false) {
3272+
return matchUnaryPredicateImpl<ConstantSDNode>(Op, Match, AllowUndefs,
3273+
AllowTrunc);
32713274
}
32723275

32733276
/// Hook for matching ConstantFPSDNode predicate

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ bool ISD::isFreezeUndef(const SDNode *N) {
363363
template <typename ConstNodeType>
364364
bool ISD::matchUnaryPredicateImpl(SDValue Op,
365365
std::function<bool(ConstNodeType *)> Match,
366-
bool AllowUndefs) {
366+
bool AllowUndefs, bool AllowTrunc) {
367367
// FIXME: Add support for scalar UNDEF cases?
368368
if (auto *C = dyn_cast<ConstNodeType>(Op))
369369
return Match(C);
@@ -382,16 +382,16 @@ bool ISD::matchUnaryPredicateImpl(SDValue Op,
382382
}
383383

384384
auto *Cst = dyn_cast<ConstNodeType>(Op.getOperand(i));
385-
if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
385+
if (!Cst || (!AllowTrunc && Cst->getValueType(0) != SVT) || !Match(Cst))
386386
return false;
387387
}
388388
return true;
389389
}
390390
// Build used template types.
391391
template bool ISD::matchUnaryPredicateImpl<ConstantSDNode>(
392-
SDValue, std::function<bool(ConstantSDNode *)>, bool);
392+
SDValue, std::function<bool(ConstantSDNode *)>, bool, bool);
393393
template bool ISD::matchUnaryPredicateImpl<ConstantFPSDNode>(
394-
SDValue, std::function<bool(ConstantFPSDNode *)>, bool);
394+
SDValue, std::function<bool(ConstantFPSDNode *)>, bool, bool);
395395

396396
bool ISD::matchBinaryPredicate(
397397
SDValue LHS, SDValue RHS,

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7970,7 +7970,7 @@ static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
79707970
return ISD::matchUnaryPredicate(
79717971
Z,
79727972
[=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7973-
true);
7973+
true, true);
79747974
}
79757975

79767976
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {

llvm/test/CodeGen/AArch64/fsh.ll

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3909,9 +3909,8 @@ entry:
39093909
define <8 x i8> @fshl_v8i8_c(<8 x i8> %a, <8 x i8> %b) {
39103910
; CHECK-SD-LABEL: fshl_v8i8_c:
39113911
; CHECK-SD: // %bb.0: // %entry
3912-
; CHECK-SD-NEXT: ushr v1.8b, v1.8b, #1
39133912
; CHECK-SD-NEXT: shl v0.8b, v0.8b, #3
3914-
; CHECK-SD-NEXT: usra v0.8b, v1.8b, #4
3913+
; CHECK-SD-NEXT: usra v0.8b, v1.8b, #5
39153914
; CHECK-SD-NEXT: ret
39163915
;
39173916
; CHECK-GI-LABEL: fshl_v8i8_c:
@@ -3928,8 +3927,7 @@ entry:
39283927
define <8 x i8> @fshr_v8i8_c(<8 x i8> %a, <8 x i8> %b) {
39293928
; CHECK-SD-LABEL: fshr_v8i8_c:
39303929
; CHECK-SD: // %bb.0: // %entry
3931-
; CHECK-SD-NEXT: add v0.8b, v0.8b, v0.8b
3932-
; CHECK-SD-NEXT: shl v0.8b, v0.8b, #4
3930+
; CHECK-SD-NEXT: shl v0.8b, v0.8b, #5
39333931
; CHECK-SD-NEXT: usra v0.8b, v1.8b, #3
39343932
; CHECK-SD-NEXT: ret
39353933
;
@@ -3947,9 +3945,8 @@ entry:
39473945
define <16 x i8> @fshl_v16i8_c(<16 x i8> %a, <16 x i8> %b) {
39483946
; CHECK-SD-LABEL: fshl_v16i8_c:
39493947
; CHECK-SD: // %bb.0: // %entry
3950-
; CHECK-SD-NEXT: ushr v1.16b, v1.16b, #1
39513948
; CHECK-SD-NEXT: shl v0.16b, v0.16b, #3
3952-
; CHECK-SD-NEXT: usra v0.16b, v1.16b, #4
3949+
; CHECK-SD-NEXT: usra v0.16b, v1.16b, #5
39533950
; CHECK-SD-NEXT: ret
39543951
;
39553952
; CHECK-GI-LABEL: fshl_v16i8_c:
@@ -3966,8 +3963,7 @@ entry:
39663963
define <16 x i8> @fshr_v16i8_c(<16 x i8> %a, <16 x i8> %b) {
39673964
; CHECK-SD-LABEL: fshr_v16i8_c:
39683965
; CHECK-SD: // %bb.0: // %entry
3969-
; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b
3970-
; CHECK-SD-NEXT: shl v0.16b, v0.16b, #4
3966+
; CHECK-SD-NEXT: shl v0.16b, v0.16b, #5
39713967
; CHECK-SD-NEXT: usra v0.16b, v1.16b, #3
39723968
; CHECK-SD-NEXT: ret
39733969
;
@@ -3985,9 +3981,8 @@ entry:
39853981
define <4 x i16> @fshl_v4i16_c(<4 x i16> %a, <4 x i16> %b) {
39863982
; CHECK-SD-LABEL: fshl_v4i16_c:
39873983
; CHECK-SD: // %bb.0: // %entry
3988-
; CHECK-SD-NEXT: ushr v1.4h, v1.4h, #1
39893984
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #3
3990-
; CHECK-SD-NEXT: usra v0.4h, v1.4h, #12
3985+
; CHECK-SD-NEXT: usra v0.4h, v1.4h, #13
39913986
; CHECK-SD-NEXT: ret
39923987
;
39933988
; CHECK-GI-LABEL: fshl_v4i16_c:
@@ -4004,8 +3999,7 @@ entry:
40043999
define <4 x i16> @fshr_v4i16_c(<4 x i16> %a, <4 x i16> %b) {
40054000
; CHECK-SD-LABEL: fshr_v4i16_c:
40064001
; CHECK-SD: // %bb.0: // %entry
4007-
; CHECK-SD-NEXT: add v0.4h, v0.4h, v0.4h
4008-
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #12
4002+
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #13
40094003
; CHECK-SD-NEXT: usra v0.4h, v1.4h, #3
40104004
; CHECK-SD-NEXT: ret
40114005
;
@@ -4024,7 +4018,6 @@ define <7 x i16> @fshl_v7i16_c(<7 x i16> %a, <7 x i16> %b) {
40244018
; CHECK-SD-LABEL: fshl_v7i16_c:
40254019
; CHECK-SD: // %bb.0: // %entry
40264020
; CHECK-SD-NEXT: adrp x8, .LCPI124_0
4027-
; CHECK-SD-NEXT: ushr v1.8h, v1.8h, #1
40284021
; CHECK-SD-NEXT: adrp x9, .LCPI124_1
40294022
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI124_0]
40304023
; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI124_1]
@@ -4066,7 +4059,6 @@ define <7 x i16> @fshr_v7i16_c(<7 x i16> %a, <7 x i16> %b) {
40664059
; CHECK-SD: // %bb.0: // %entry
40674060
; CHECK-SD-NEXT: adrp x8, .LCPI125_0
40684061
; CHECK-SD-NEXT: adrp x9, .LCPI125_1
4069-
; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h
40704062
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI125_0]
40714063
; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI125_1]
40724064
; CHECK-SD-NEXT: ushl v1.8h, v1.8h, v2.8h
@@ -4105,9 +4097,8 @@ entry:
41054097
define <8 x i16> @fshl_v8i16_c(<8 x i16> %a, <8 x i16> %b) {
41064098
; CHECK-SD-LABEL: fshl_v8i16_c:
41074099
; CHECK-SD: // %bb.0: // %entry
4108-
; CHECK-SD-NEXT: ushr v1.8h, v1.8h, #1
41094100
; CHECK-SD-NEXT: shl v0.8h, v0.8h, #3
4110-
; CHECK-SD-NEXT: usra v0.8h, v1.8h, #12
4101+
; CHECK-SD-NEXT: usra v0.8h, v1.8h, #13
41114102
; CHECK-SD-NEXT: ret
41124103
;
41134104
; CHECK-GI-LABEL: fshl_v8i16_c:
@@ -4124,8 +4115,7 @@ entry:
41244115
define <8 x i16> @fshr_v8i16_c(<8 x i16> %a, <8 x i16> %b) {
41254116
; CHECK-SD-LABEL: fshr_v8i16_c:
41264117
; CHECK-SD: // %bb.0: // %entry
4127-
; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h
4128-
; CHECK-SD-NEXT: shl v0.8h, v0.8h, #12
4118+
; CHECK-SD-NEXT: shl v0.8h, v0.8h, #13
41294119
; CHECK-SD-NEXT: usra v0.8h, v1.8h, #3
41304120
; CHECK-SD-NEXT: ret
41314121
;
@@ -4143,12 +4133,10 @@ entry:
41434133
define <16 x i16> @fshl_v16i16_c(<16 x i16> %a, <16 x i16> %b) {
41444134
; CHECK-SD-LABEL: fshl_v16i16_c:
41454135
; CHECK-SD: // %bb.0: // %entry
4146-
; CHECK-SD-NEXT: ushr v2.8h, v2.8h, #1
4147-
; CHECK-SD-NEXT: shl v0.8h, v0.8h, #3
4148-
; CHECK-SD-NEXT: ushr v3.8h, v3.8h, #1
41494136
; CHECK-SD-NEXT: shl v1.8h, v1.8h, #3
4150-
; CHECK-SD-NEXT: usra v0.8h, v2.8h, #12
4151-
; CHECK-SD-NEXT: usra v1.8h, v3.8h, #12
4137+
; CHECK-SD-NEXT: shl v0.8h, v0.8h, #3
4138+
; CHECK-SD-NEXT: usra v1.8h, v3.8h, #13
4139+
; CHECK-SD-NEXT: usra v0.8h, v2.8h, #13
41524140
; CHECK-SD-NEXT: ret
41534141
;
41544142
; CHECK-GI-LABEL: fshl_v16i16_c:
@@ -4168,10 +4156,8 @@ entry:
41684156
define <16 x i16> @fshr_v16i16_c(<16 x i16> %a, <16 x i16> %b) {
41694157
; CHECK-SD-LABEL: fshr_v16i16_c:
41704158
; CHECK-SD: // %bb.0: // %entry
4171-
; CHECK-SD-NEXT: add v1.8h, v1.8h, v1.8h
4172-
; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h
4173-
; CHECK-SD-NEXT: shl v1.8h, v1.8h, #12
4174-
; CHECK-SD-NEXT: shl v0.8h, v0.8h, #12
4159+
; CHECK-SD-NEXT: shl v1.8h, v1.8h, #13
4160+
; CHECK-SD-NEXT: shl v0.8h, v0.8h, #13
41754161
; CHECK-SD-NEXT: usra v1.8h, v3.8h, #3
41764162
; CHECK-SD-NEXT: usra v0.8h, v2.8h, #3
41774163
; CHECK-SD-NEXT: ret

llvm/test/CodeGen/AArch64/smul_fix.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,7 @@ define <4 x i16> @widemul(<4 x i16> %x, <4 x i16> %y) nounwind {
144144
; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
145145
; CHECK-NEXT: shrn v1.4h, v0.4s, #16
146146
; CHECK-NEXT: xtn v2.4h, v0.4s
147-
; CHECK-NEXT: add v1.4h, v1.4h, v1.4h
148-
; CHECK-NEXT: shl v0.4h, v1.4h, #13
147+
; CHECK-NEXT: shl v0.4h, v1.4h, #14
149148
; CHECK-NEXT: usra v0.4h, v2.4h, #2
150149
; CHECK-NEXT: ret
151150
%tmp = call <4 x i16> @llvm.smul.fix.v4i16(<4 x i16> %x, <4 x i16> %y, i32 2)

llvm/test/CodeGen/AArch64/umul_fix.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,7 @@ define <4 x i16> @widemul(<4 x i16> %x, <4 x i16> %y) nounwind {
152152
; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
153153
; CHECK-NEXT: shrn v1.4h, v0.4s, #16
154154
; CHECK-NEXT: xtn v2.4h, v0.4s
155-
; CHECK-NEXT: add v1.4h, v1.4h, v1.4h
156-
; CHECK-NEXT: shl v0.4h, v1.4h, #11
155+
; CHECK-NEXT: shl v0.4h, v1.4h, #12
157156
; CHECK-NEXT: usra v0.4h, v2.4h, #4
158157
; CHECK-NEXT: ret
159158
%tmp = call <4 x i16> @llvm.umul.fix.v4i16(<4 x i16> %x, <4 x i16> %y, i32 4)

0 commit comments

Comments
 (0)