Skip to content

Commit cae0d67

Browse files
authored
[AArch64][SDAG] Detect non-zeroes in truncating buildvectors in fshl lowering (#123597)
A BUILD_VECTOR can implicity shrink the bits of the operands if the operand types are not legal. For example a v8i16 constant BUILD_VECTOR might be represented as v8i16 BUILDVECTOR(i32 1, i32 2, ...). Unfortunately this means that the constants are not accepted by matchUnaryPredicateImpl, preventing in this case funnel shifts detecting that all the operands are non-zero. Add a flag to help it match.
1 parent 2dc17fd commit cae0d67

File tree

6 files changed

+27
-39
lines changed

6 files changed

+27
-39
lines changed

llvm/include/llvm/CodeGen/SelectionDAGNodes.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3261,13 +3261,16 @@ namespace ISD {
32613261
template <typename ConstNodeType>
32623262
bool matchUnaryPredicateImpl(SDValue Op,
32633263
std::function<bool(ConstNodeType *)> Match,
3264-
bool AllowUndefs = false);
3264+
bool AllowUndefs = false,
3265+
bool AllowTruncation = false);
32653266

32663267
/// Hook for matching ConstantSDNode predicate
32673268
inline bool matchUnaryPredicate(SDValue Op,
32683269
std::function<bool(ConstantSDNode *)> Match,
3269-
bool AllowUndefs = false) {
3270-
return matchUnaryPredicateImpl<ConstantSDNode>(Op, Match, AllowUndefs);
3270+
bool AllowUndefs = false,
3271+
bool AllowTruncation = false) {
3272+
return matchUnaryPredicateImpl<ConstantSDNode>(Op, Match, AllowUndefs,
3273+
AllowTruncation);
32713274
}
32723275

32733276
/// Hook for matching ConstantFPSDNode predicate

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ bool ISD::isFreezeUndef(const SDNode *N) {
363363
template <typename ConstNodeType>
364364
bool ISD::matchUnaryPredicateImpl(SDValue Op,
365365
std::function<bool(ConstNodeType *)> Match,
366-
bool AllowUndefs) {
366+
bool AllowUndefs, bool AllowTruncation) {
367367
// FIXME: Add support for scalar UNDEF cases?
368368
if (auto *C = dyn_cast<ConstNodeType>(Op))
369369
return Match(C);
@@ -382,16 +382,17 @@ bool ISD::matchUnaryPredicateImpl(SDValue Op,
382382
}
383383

384384
auto *Cst = dyn_cast<ConstNodeType>(Op.getOperand(i));
385-
if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
385+
if (!Cst || (!AllowTruncation && Cst->getValueType(0) != SVT) ||
386+
!Match(Cst))
386387
return false;
387388
}
388389
return true;
389390
}
390391
// Build used template types.
391392
template bool ISD::matchUnaryPredicateImpl<ConstantSDNode>(
392-
SDValue, std::function<bool(ConstantSDNode *)>, bool);
393+
SDValue, std::function<bool(ConstantSDNode *)>, bool, bool);
393394
template bool ISD::matchUnaryPredicateImpl<ConstantFPSDNode>(
394-
SDValue, std::function<bool(ConstantFPSDNode *)>, bool);
395+
SDValue, std::function<bool(ConstantFPSDNode *)>, bool, bool);
395396

396397
bool ISD::matchBinaryPredicate(
397398
SDValue LHS, SDValue RHS,

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7971,7 +7971,7 @@ static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
79717971
return ISD::matchUnaryPredicate(
79727972
Z,
79737973
[=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7974-
true);
7974+
/*AllowUndef=*/true, /*AllowTruncation=*/true);
79757975
}
79767976

79777977
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {

llvm/test/CodeGen/AArch64/fsh.ll

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3909,9 +3909,8 @@ entry:
39093909
define <8 x i8> @fshl_v8i8_c(<8 x i8> %a, <8 x i8> %b) {
39103910
; CHECK-SD-LABEL: fshl_v8i8_c:
39113911
; CHECK-SD: // %bb.0: // %entry
3912-
; CHECK-SD-NEXT: ushr v1.8b, v1.8b, #1
39133912
; CHECK-SD-NEXT: shl v0.8b, v0.8b, #3
3914-
; CHECK-SD-NEXT: usra v0.8b, v1.8b, #4
3913+
; CHECK-SD-NEXT: usra v0.8b, v1.8b, #5
39153914
; CHECK-SD-NEXT: ret
39163915
;
39173916
; CHECK-GI-LABEL: fshl_v8i8_c:
@@ -3927,8 +3926,7 @@ entry:
39273926
define <8 x i8> @fshr_v8i8_c(<8 x i8> %a, <8 x i8> %b) {
39283927
; CHECK-SD-LABEL: fshr_v8i8_c:
39293928
; CHECK-SD: // %bb.0: // %entry
3930-
; CHECK-SD-NEXT: add v0.8b, v0.8b, v0.8b
3931-
; CHECK-SD-NEXT: shl v0.8b, v0.8b, #4
3929+
; CHECK-SD-NEXT: shl v0.8b, v0.8b, #5
39323930
; CHECK-SD-NEXT: usra v0.8b, v1.8b, #3
39333931
; CHECK-SD-NEXT: ret
39343932
;
@@ -3945,9 +3943,8 @@ entry:
39453943
define <16 x i8> @fshl_v16i8_c(<16 x i8> %a, <16 x i8> %b) {
39463944
; CHECK-SD-LABEL: fshl_v16i8_c:
39473945
; CHECK-SD: // %bb.0: // %entry
3948-
; CHECK-SD-NEXT: ushr v1.16b, v1.16b, #1
39493946
; CHECK-SD-NEXT: shl v0.16b, v0.16b, #3
3950-
; CHECK-SD-NEXT: usra v0.16b, v1.16b, #4
3947+
; CHECK-SD-NEXT: usra v0.16b, v1.16b, #5
39513948
; CHECK-SD-NEXT: ret
39523949
;
39533950
; CHECK-GI-LABEL: fshl_v16i8_c:
@@ -3963,8 +3960,7 @@ entry:
39633960
define <16 x i8> @fshr_v16i8_c(<16 x i8> %a, <16 x i8> %b) {
39643961
; CHECK-SD-LABEL: fshr_v16i8_c:
39653962
; CHECK-SD: // %bb.0: // %entry
3966-
; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b
3967-
; CHECK-SD-NEXT: shl v0.16b, v0.16b, #4
3963+
; CHECK-SD-NEXT: shl v0.16b, v0.16b, #5
39683964
; CHECK-SD-NEXT: usra v0.16b, v1.16b, #3
39693965
; CHECK-SD-NEXT: ret
39703966
;
@@ -3981,9 +3977,8 @@ entry:
39813977
define <4 x i16> @fshl_v4i16_c(<4 x i16> %a, <4 x i16> %b) {
39823978
; CHECK-SD-LABEL: fshl_v4i16_c:
39833979
; CHECK-SD: // %bb.0: // %entry
3984-
; CHECK-SD-NEXT: ushr v1.4h, v1.4h, #1
39853980
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #3
3986-
; CHECK-SD-NEXT: usra v0.4h, v1.4h, #12
3981+
; CHECK-SD-NEXT: usra v0.4h, v1.4h, #13
39873982
; CHECK-SD-NEXT: ret
39883983
;
39893984
; CHECK-GI-LABEL: fshl_v4i16_c:
@@ -3999,8 +3994,7 @@ entry:
39993994
define <4 x i16> @fshr_v4i16_c(<4 x i16> %a, <4 x i16> %b) {
40003995
; CHECK-SD-LABEL: fshr_v4i16_c:
40013996
; CHECK-SD: // %bb.0: // %entry
4002-
; CHECK-SD-NEXT: add v0.4h, v0.4h, v0.4h
4003-
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #12
3997+
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #13
40043998
; CHECK-SD-NEXT: usra v0.4h, v1.4h, #3
40053999
; CHECK-SD-NEXT: ret
40064000
;
@@ -4018,7 +4012,6 @@ define <7 x i16> @fshl_v7i16_c(<7 x i16> %a, <7 x i16> %b) {
40184012
; CHECK-SD-LABEL: fshl_v7i16_c:
40194013
; CHECK-SD: // %bb.0: // %entry
40204014
; CHECK-SD-NEXT: adrp x8, .LCPI124_0
4021-
; CHECK-SD-NEXT: ushr v1.8h, v1.8h, #1
40224015
; CHECK-SD-NEXT: adrp x9, .LCPI124_1
40234016
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI124_0]
40244017
; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI124_1]
@@ -4060,7 +4053,6 @@ define <7 x i16> @fshr_v7i16_c(<7 x i16> %a, <7 x i16> %b) {
40604053
; CHECK-SD: // %bb.0: // %entry
40614054
; CHECK-SD-NEXT: adrp x8, .LCPI125_0
40624055
; CHECK-SD-NEXT: adrp x9, .LCPI125_1
4063-
; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h
40644056
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI125_0]
40654057
; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI125_1]
40664058
; CHECK-SD-NEXT: ushl v1.8h, v1.8h, v2.8h
@@ -4099,9 +4091,8 @@ entry:
40994091
define <8 x i16> @fshl_v8i16_c(<8 x i16> %a, <8 x i16> %b) {
41004092
; CHECK-SD-LABEL: fshl_v8i16_c:
41014093
; CHECK-SD: // %bb.0: // %entry
4102-
; CHECK-SD-NEXT: ushr v1.8h, v1.8h, #1
41034094
; CHECK-SD-NEXT: shl v0.8h, v0.8h, #3
4104-
; CHECK-SD-NEXT: usra v0.8h, v1.8h, #12
4095+
; CHECK-SD-NEXT: usra v0.8h, v1.8h, #13
41054096
; CHECK-SD-NEXT: ret
41064097
;
41074098
; CHECK-GI-LABEL: fshl_v8i16_c:
@@ -4117,8 +4108,7 @@ entry:
41174108
define <8 x i16> @fshr_v8i16_c(<8 x i16> %a, <8 x i16> %b) {
41184109
; CHECK-SD-LABEL: fshr_v8i16_c:
41194110
; CHECK-SD: // %bb.0: // %entry
4120-
; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h
4121-
; CHECK-SD-NEXT: shl v0.8h, v0.8h, #12
4111+
; CHECK-SD-NEXT: shl v0.8h, v0.8h, #13
41224112
; CHECK-SD-NEXT: usra v0.8h, v1.8h, #3
41234113
; CHECK-SD-NEXT: ret
41244114
;
@@ -4135,12 +4125,10 @@ entry:
41354125
define <16 x i16> @fshl_v16i16_c(<16 x i16> %a, <16 x i16> %b) {
41364126
; CHECK-SD-LABEL: fshl_v16i16_c:
41374127
; CHECK-SD: // %bb.0: // %entry
4138-
; CHECK-SD-NEXT: ushr v2.8h, v2.8h, #1
4139-
; CHECK-SD-NEXT: shl v0.8h, v0.8h, #3
4140-
; CHECK-SD-NEXT: ushr v3.8h, v3.8h, #1
41414128
; CHECK-SD-NEXT: shl v1.8h, v1.8h, #3
4142-
; CHECK-SD-NEXT: usra v0.8h, v2.8h, #12
4143-
; CHECK-SD-NEXT: usra v1.8h, v3.8h, #12
4129+
; CHECK-SD-NEXT: shl v0.8h, v0.8h, #3
4130+
; CHECK-SD-NEXT: usra v1.8h, v3.8h, #13
4131+
; CHECK-SD-NEXT: usra v0.8h, v2.8h, #13
41444132
; CHECK-SD-NEXT: ret
41454133
;
41464134
; CHECK-GI-LABEL: fshl_v16i16_c:
@@ -4158,10 +4146,8 @@ entry:
41584146
define <16 x i16> @fshr_v16i16_c(<16 x i16> %a, <16 x i16> %b) {
41594147
; CHECK-SD-LABEL: fshr_v16i16_c:
41604148
; CHECK-SD: // %bb.0: // %entry
4161-
; CHECK-SD-NEXT: add v1.8h, v1.8h, v1.8h
4162-
; CHECK-SD-NEXT: add v0.8h, v0.8h, v0.8h
4163-
; CHECK-SD-NEXT: shl v1.8h, v1.8h, #12
4164-
; CHECK-SD-NEXT: shl v0.8h, v0.8h, #12
4149+
; CHECK-SD-NEXT: shl v1.8h, v1.8h, #13
4150+
; CHECK-SD-NEXT: shl v0.8h, v0.8h, #13
41654151
; CHECK-SD-NEXT: usra v1.8h, v3.8h, #3
41664152
; CHECK-SD-NEXT: usra v0.8h, v2.8h, #3
41674153
; CHECK-SD-NEXT: ret

llvm/test/CodeGen/AArch64/smul_fix.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,7 @@ define <4 x i16> @widemul(<4 x i16> %x, <4 x i16> %y) nounwind {
144144
; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
145145
; CHECK-NEXT: shrn v1.4h, v0.4s, #16
146146
; CHECK-NEXT: xtn v2.4h, v0.4s
147-
; CHECK-NEXT: add v1.4h, v1.4h, v1.4h
148-
; CHECK-NEXT: shl v0.4h, v1.4h, #13
147+
; CHECK-NEXT: shl v0.4h, v1.4h, #14
149148
; CHECK-NEXT: usra v0.4h, v2.4h, #2
150149
; CHECK-NEXT: ret
151150
%tmp = call <4 x i16> @llvm.smul.fix.v4i16(<4 x i16> %x, <4 x i16> %y, i32 2)

llvm/test/CodeGen/AArch64/umul_fix.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,7 @@ define <4 x i16> @widemul(<4 x i16> %x, <4 x i16> %y) nounwind {
152152
; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
153153
; CHECK-NEXT: shrn v1.4h, v0.4s, #16
154154
; CHECK-NEXT: xtn v2.4h, v0.4s
155-
; CHECK-NEXT: add v1.4h, v1.4h, v1.4h
156-
; CHECK-NEXT: shl v0.4h, v1.4h, #11
155+
; CHECK-NEXT: shl v0.4h, v1.4h, #12
157156
; CHECK-NEXT: usra v0.4h, v2.4h, #4
158157
; CHECK-NEXT: ret
159158
%tmp = call <4 x i16> @llvm.umul.fix.v4i16(<4 x i16> %x, <4 x i16> %y, i32 4)

0 commit comments

Comments
 (0)