Skip to content

Commit 33fe899

Browse files
committed
[DAG][AArch64] Limit preferIncOfAddToSubOfNot until after legalization if the node has wrap flags
If the add node has wrap flags then they will be destroyed by converting to sub/not. The flags can be useful in converting to rhadd, for example, but that may be required late if the node types need to be legalized. This limits the preferIncOfAddToSubOfNot fold until after legalize DAG if the node have flags to allow more folding. Differential Revision: https://reviews.llvm.org/D148809
1 parent 7f00ecd commit 33fe899

File tree

3 files changed

+53
-37
lines changed

3 files changed

+53
-37
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2825,7 +2825,10 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
28252825
// And if the target does not like this form then turn into:
28262826
// sub y, (xor x, -1)
28272827
if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2828-
N0.hasOneUse()) {
2828+
N0.hasOneUse() &&
2829+
// Limit this to after legalization if the add has wrap flags
2830+
(Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2831+
!N->getFlags().hasNoSignedWrap()))) {
28292832
SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
28302833
DAG.getAllOnesConstant(DL, VT));
28312834
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
@@ -3041,7 +3044,10 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
30413044
// And if the target does not like this form then turn into:
30423045
// sub y, (xor x, -1)
30433046
if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3044-
N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) {
3047+
N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3048+
// Limit this to after legalization if the add has wrap flags
3049+
(Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3050+
!N0->getFlags().hasNoSignedWrap()))) {
30453051
SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
30463052
DAG.getAllOnesConstant(DL, VT));
30473053
return DAG.getNode(ISD::SUB, DL, VT, N1, Not);

llvm/test/CodeGen/AArch64/arm64-vhadd.ll

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -968,10 +968,10 @@ define <4 x i16> @rhadd8_sext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
968968
; CHECK: // %bb.0:
969969
; CHECK-NEXT: shl.4h v0, v0, #8
970970
; CHECK-NEXT: shl.4h v1, v1, #8
971+
; CHECK-NEXT: movi.4h v2, #1
971972
; CHECK-NEXT: sshr.4h v0, v0, #8
972-
; CHECK-NEXT: sshr.4h v1, v1, #8
973-
; CHECK-NEXT: mvn.8b v0, v0
974-
; CHECK-NEXT: sub.4h v0, v1, v0
973+
; CHECK-NEXT: ssra.4h v0, v1, #8
974+
; CHECK-NEXT: add.4h v0, v0, v2
975975
; CHECK-NEXT: ushr.4h v0, v0, #1
976976
; CHECK-NEXT: ret
977977
%zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
@@ -1004,9 +1004,7 @@ define <2 x i16> @rhadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
10041004
; CHECK-NEXT: shl.2s v1, v1, #24
10051005
; CHECK-NEXT: sshr.2s v0, v0, #24
10061006
; CHECK-NEXT: sshr.2s v1, v1, #24
1007-
; CHECK-NEXT: mvn.8b v0, v0
1008-
; CHECK-NEXT: sub.2s v0, v1, v0
1009-
; CHECK-NEXT: sshr.2s v0, v0, #1
1007+
; CHECK-NEXT: srhadd.2s v0, v0, v1
10101008
; CHECK-NEXT: ret
10111009
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
10121010
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
@@ -1022,9 +1020,7 @@ define <2 x i16> @rhadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) {
10221020
; CHECK-NEXT: movi d2, #0x0000ff000000ff
10231021
; CHECK-NEXT: and.8b v0, v0, v2
10241022
; CHECK-NEXT: and.8b v1, v1, v2
1025-
; CHECK-NEXT: mvn.8b v0, v0
1026-
; CHECK-NEXT: sub.2s v0, v1, v0
1027-
; CHECK-NEXT: ushr.2s v0, v0, #1
1023+
; CHECK-NEXT: urhadd.2s v0, v0, v1
10281024
; CHECK-NEXT: ret
10291025
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
10301026
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
@@ -1039,12 +1035,12 @@ define <2 x i16> @rhadd8x2_sext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
10391035
; CHECK: // %bb.0:
10401036
; CHECK-NEXT: shl.2s v0, v0, #24
10411037
; CHECK-NEXT: shl.2s v1, v1, #24
1042-
; CHECK-NEXT: movi d2, #0x00ffff0000ffff
1038+
; CHECK-NEXT: movi.2s v2, #1
10431039
; CHECK-NEXT: sshr.2s v0, v0, #24
1044-
; CHECK-NEXT: sshr.2s v1, v1, #24
1045-
; CHECK-NEXT: mvn.8b v0, v0
1046-
; CHECK-NEXT: sub.2s v0, v1, v0
1047-
; CHECK-NEXT: and.8b v0, v0, v2
1040+
; CHECK-NEXT: ssra.2s v0, v1, #24
1041+
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
1042+
; CHECK-NEXT: add.2s v0, v0, v2
1043+
; CHECK-NEXT: and.8b v0, v0, v1
10481044
; CHECK-NEXT: ushr.2s v0, v0, #1
10491045
; CHECK-NEXT: ret
10501046
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
@@ -1061,9 +1057,7 @@ define <2 x i16> @rhadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
10611057
; CHECK-NEXT: movi d2, #0x0000ff000000ff
10621058
; CHECK-NEXT: and.8b v0, v0, v2
10631059
; CHECK-NEXT: and.8b v1, v1, v2
1064-
; CHECK-NEXT: mvn.8b v0, v0
1065-
; CHECK-NEXT: sub.2s v0, v1, v0
1066-
; CHECK-NEXT: ushr.2s v0, v0, #1
1060+
; CHECK-NEXT: urhadd.2s v0, v0, v1
10671061
; CHECK-NEXT: ret
10681062
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
10691063
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>

llvm/test/CodeGen/AArch64/sve-hadd.ll

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -916,15 +916,23 @@ entry:
916916
}
917917

918918
define <vscale x 2 x i16> @rhaddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
919-
; CHECK-LABEL: rhaddu_v2i16:
920-
; CHECK: // %bb.0: // %entry
921-
; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
922-
; CHECK-NEXT: and z0.d, z0.d, #0xffff
923-
; CHECK-NEXT: and z1.d, z1.d, #0xffff
924-
; CHECK-NEXT: eor z0.d, z0.d, z2.d
925-
; CHECK-NEXT: sub z0.d, z1.d, z0.d
926-
; CHECK-NEXT: lsr z0.d, z0.d, #1
927-
; CHECK-NEXT: ret
919+
; SVE-LABEL: rhaddu_v2i16:
920+
; SVE: // %bb.0: // %entry
921+
; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
922+
; SVE-NEXT: and z0.d, z0.d, #0xffff
923+
; SVE-NEXT: and z1.d, z1.d, #0xffff
924+
; SVE-NEXT: eor z0.d, z0.d, z2.d
925+
; SVE-NEXT: sub z0.d, z1.d, z0.d
926+
; SVE-NEXT: lsr z0.d, z0.d, #1
927+
; SVE-NEXT: ret
928+
;
929+
; SVE2-LABEL: rhaddu_v2i16:
930+
; SVE2: // %bb.0: // %entry
931+
; SVE2-NEXT: ptrue p0.d
932+
; SVE2-NEXT: and z0.d, z0.d, #0xffff
933+
; SVE2-NEXT: and z1.d, z1.d, #0xffff
934+
; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
935+
; SVE2-NEXT: ret
928936
entry:
929937
%s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
930938
%s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
@@ -1127,15 +1135,23 @@ entry:
11271135
}
11281136

11291137
define <vscale x 4 x i8> @rhaddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
1130-
; CHECK-LABEL: rhaddu_v4i8:
1131-
; CHECK: // %bb.0: // %entry
1132-
; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
1133-
; CHECK-NEXT: and z0.s, z0.s, #0xff
1134-
; CHECK-NEXT: and z1.s, z1.s, #0xff
1135-
; CHECK-NEXT: eor z0.d, z0.d, z2.d
1136-
; CHECK-NEXT: sub z0.s, z1.s, z0.s
1137-
; CHECK-NEXT: lsr z0.s, z0.s, #1
1138-
; CHECK-NEXT: ret
1138+
; SVE-LABEL: rhaddu_v4i8:
1139+
; SVE: // %bb.0: // %entry
1140+
; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
1141+
; SVE-NEXT: and z0.s, z0.s, #0xff
1142+
; SVE-NEXT: and z1.s, z1.s, #0xff
1143+
; SVE-NEXT: eor z0.d, z0.d, z2.d
1144+
; SVE-NEXT: sub z0.s, z1.s, z0.s
1145+
; SVE-NEXT: lsr z0.s, z0.s, #1
1146+
; SVE-NEXT: ret
1147+
;
1148+
; SVE2-LABEL: rhaddu_v4i8:
1149+
; SVE2: // %bb.0: // %entry
1150+
; SVE2-NEXT: ptrue p0.s
1151+
; SVE2-NEXT: and z0.s, z0.s, #0xff
1152+
; SVE2-NEXT: and z1.s, z1.s, #0xff
1153+
; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
1154+
; SVE2-NEXT: ret
11391155
entry:
11401156
%s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
11411157
%s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>

0 commit comments

Comments
 (0)