[DAG] Matched Fixedwidth Pattern for ISD::AVGCEILU #85031

Sh0g0-1758 · 2024-03-13T07:15:07Z

Fixes: #84753

llvmbot · 2024-03-13T07:15:38Z

@llvm/pr-subscribers-backend-aarch64

Author: Shourya Goel (Sh0g0-1758)

Changes

Fixes: #84753

Full diff: https://github.com/llvm/llvm-project/pull/85031.diff

2 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+37)
(added) llvm/test/CodeGen/AArch64/sub_combine.ll (+34)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 735cec8ecc0627..413112ef896328 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2529,6 +2529,39 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
 }
 
+// Attempt to form ext(avgceilu(A, B)) from sub(or(A, B), lshr(xor(A, B), 1))
+static SDValue combineFixedwidthToAVGCEILU(SDNode *N, SelectionDAG &DAG) {
+  assert(N->getOpcode() == ISD::SUB and "SUB node is required here");
+  SDValue Or = N->getOperand(0);
+  SDValue Lshr = N->getOperand(1);
+  if (Or.getOpcode() != ISD::OR or Lshr.getOpcode() != ISD::SRL)
+    return SDValue();
+  SDValue Xor = Lshr.getOperand(0);
+  if (Xor.getOpcode() != ISD::XOR)
+    return SDValue();
+  SDValue Or1 = Or.getOperand(0);
+  SDValue Or2 = Or.getOperand(1);
+  SDValue Xor1 = Xor.getOperand(0);
+  SDValue Xor2 = Xor.getOperand(1);
+  if (Xor1 != Or1 or Xor2 != Or2)
+    return SDValue();
+  // Is the right shift using an immediate value of 1?
+  ConstantSDNode *N1C = isConstOrConstSplat(Lshr.getOperand(1));
+  if (!N1C or N1C->getAPIntValue() != 1)
+    return SDValue();
+  EVT VT = Or1.getValueType();
+  EVT NVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+  if (VT.isVector())
+    VT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
+  else
+    VT = NVT;
+  SDLoc DL(N);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!TLI.isOperationLegalOrCustom(ISD::AVGCEILU, VT))
+    return SDValue();
+  return DAG.getNode(ISD::AVGCEILU, DL, VT, Or1, Or2);
+}
+
 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
 /// a shift and add with a different constant.
 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
@@ -3859,6 +3892,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (SDValue V = foldAddSubOfSignBit(N, DAG))
     return V;
 
+  // Try to match AVGCEILU fixedwidth pattern
+  if (SDValue V = combineFixedwidthToAVGCEILU(N, DAG))
+    return V;
+
   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
     return V;
 
diff --git a/llvm/test/CodeGen/AArch64/sub_combine.ll b/llvm/test/CodeGen/AArch64/sub_combine.ll
new file mode 100644
index 00000000000000..f9df436a3db2a2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sub_combine.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -debugify-and-strip-all-safe -enable-machine-outliner=never -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
+
+define i4 @sub_fixedwidth_i4(i4 %a0, i4 %a1)  {
+; CHECK-LABEL: sub_fixedwidth_i4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor w8, w0, w1
+; CHECK-NEXT:    orr w9, w0, w1
+; CHECK-NEXT:    and w8, w8, #0xe
+; CHECK-NEXT:    sub w0, w9, w8, lsr #1
+; CHECK-NEXT:    ret
+  %or = or i4 %a0, %a1
+  %xor = xor i4 %a0, %a1
+  %srl = lshr i4 %xor, 1
+  %res = sub i4 %or, %srl
+  ret i4 %res
+}
+
+define <4 x i32> @sub_fixedwidth_v4i32(<4 x i32> %a0, <4 x i32> %a1)  {
+; CHECK-LABEL: sub_fixedwidth_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor v2.16b, v0.16b, v1.16b
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ushr v1.4s, v2.4s, #1
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %or = or <4 x i32> %a0, %a1
+  %xor = xor <4 x i32> %a0, %a1
+  %srl = lshr <4 x i32> %xor, <i32 1,i32 1,i32 1,i32 1>
+  %res = sub <4 x i32> %or, %srl
+  ret <4 x i32> %res
+}
+
+

llvmbot · 2024-03-13T07:15:38Z

@llvm/pr-subscribers-llvm-selectiondag

Author: Shourya Goel (Sh0g0-1758)

Changes

Fixes: #84753

Full diff: https://github.com/llvm/llvm-project/pull/85031.diff

2 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+37)
(added) llvm/test/CodeGen/AArch64/sub_combine.ll (+34)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 735cec8ecc0627..413112ef896328 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2529,6 +2529,39 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
 }
 
+// Attempt to form ext(avgceilu(A, B)) from sub(or(A, B), lshr(xor(A, B), 1))
+static SDValue combineFixedwidthToAVGCEILU(SDNode *N, SelectionDAG &DAG) {
+  assert(N->getOpcode() == ISD::SUB and "SUB node is required here");
+  SDValue Or = N->getOperand(0);
+  SDValue Lshr = N->getOperand(1);
+  if (Or.getOpcode() != ISD::OR or Lshr.getOpcode() != ISD::SRL)
+    return SDValue();
+  SDValue Xor = Lshr.getOperand(0);
+  if (Xor.getOpcode() != ISD::XOR)
+    return SDValue();
+  SDValue Or1 = Or.getOperand(0);
+  SDValue Or2 = Or.getOperand(1);
+  SDValue Xor1 = Xor.getOperand(0);
+  SDValue Xor2 = Xor.getOperand(1);
+  if (Xor1 != Or1 or Xor2 != Or2)
+    return SDValue();
+  // Is the right shift using an immediate value of 1?
+  ConstantSDNode *N1C = isConstOrConstSplat(Lshr.getOperand(1));
+  if (!N1C or N1C->getAPIntValue() != 1)
+    return SDValue();
+  EVT VT = Or1.getValueType();
+  EVT NVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+  if (VT.isVector())
+    VT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
+  else
+    VT = NVT;
+  SDLoc DL(N);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!TLI.isOperationLegalOrCustom(ISD::AVGCEILU, VT))
+    return SDValue();
+  return DAG.getNode(ISD::AVGCEILU, DL, VT, Or1, Or2);
+}
+
 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
 /// a shift and add with a different constant.
 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
@@ -3859,6 +3892,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (SDValue V = foldAddSubOfSignBit(N, DAG))
     return V;
 
+  // Try to match AVGCEILU fixedwidth pattern
+  if (SDValue V = combineFixedwidthToAVGCEILU(N, DAG))
+    return V;
+
   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
     return V;
 
diff --git a/llvm/test/CodeGen/AArch64/sub_combine.ll b/llvm/test/CodeGen/AArch64/sub_combine.ll
new file mode 100644
index 00000000000000..f9df436a3db2a2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sub_combine.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -debugify-and-strip-all-safe -enable-machine-outliner=never -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
+
+define i4 @sub_fixedwidth_i4(i4 %a0, i4 %a1)  {
+; CHECK-LABEL: sub_fixedwidth_i4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor w8, w0, w1
+; CHECK-NEXT:    orr w9, w0, w1
+; CHECK-NEXT:    and w8, w8, #0xe
+; CHECK-NEXT:    sub w0, w9, w8, lsr #1
+; CHECK-NEXT:    ret
+  %or = or i4 %a0, %a1
+  %xor = xor i4 %a0, %a1
+  %srl = lshr i4 %xor, 1
+  %res = sub i4 %or, %srl
+  ret i4 %res
+}
+
+define <4 x i32> @sub_fixedwidth_v4i32(<4 x i32> %a0, <4 x i32> %a1)  {
+; CHECK-LABEL: sub_fixedwidth_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eor v2.16b, v0.16b, v1.16b
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ushr v1.4s, v2.4s, #1
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %or = or <4 x i32> %a0, %a1
+  %xor = xor <4 x i32> %a0, %a1
+  %srl = lshr <4 x i32> %xor, <i32 1,i32 1,i32 1,i32 1>
+  %res = sub <4 x i32> %or, %srl
+  ret <4 x i32> %res
+}
+
+

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

jayfoad · 2024-03-13T10:49:18Z

If you're making small fixes, please do it by pushing an extra commit to your branch, instead of amending the commit and force-pushing it. This makes it easier for reviewers to see what has changed each time. When someone clicks the "squash and merge" button they will all be squashed into a single commit on the "main" branch.

(Personally I think force-push is OK sometimes, e.g. if you need to manually rebase on some upstream changes that cause conflicts in your patch. But it should not be used all the time.)

Sh0g0-1758 · 2024-03-13T10:51:19Z

Right, noted.

Sh0g0-1758 · 2024-03-14T16:53:23Z

Requesting review from @RKSimon, @davemgreen.

Sh0g0-1758 · 2024-03-16T12:07:27Z

Updated the code with sd_match pattern matching.

davemgreen · 2024-03-17T17:17:28Z

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+  SDValue N0 = N->getOperand(0);
+  EVT VT = N0.getValueType();
+  SDLoc DL(N);
+  if (TLI.isOperationLegal(ISD::AVGFLOORU, VT)) {


AVGFLOORU -> AVGCEILU

This shares quite a lot with #84903 and could be combined into the same function once that is submitted.

AVGFLOORU -> AVGCEILU

Updated.

This shares quite a lot with #84903 and could be combined into the same function once that is submitted

Sure.

RKSimon

LGTM - cheers

Fixes: llvm#84753

llvmbot added backend:AArch64 llvm:SelectionDAG SelectionDAGISel as well labels Mar 13, 2024

jayfoad reviewed Mar 13, 2024

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Show resolved Hide resolved

Sh0g0-1758 requested a review from jayfoad March 13, 2024 10:21

Sh0g0-1758 force-pushed the 84753 branch from 4491c95 to b240434 Compare March 13, 2024 10:41

Matched Fixed width Pattern for ISD::AVGCEILU

070c44d

Sh0g0-1758 force-pushed the 84753 branch from b240434 to 070c44d Compare March 13, 2024 10:42

Sh0g0-1758 added 2 commits March 13, 2024 16:24

Replaced and with && and similar changes.

37cda7f

Changed VT and moved tests

e59331c

Sh0g0-1758 added 5 commits March 16, 2024 17:29

Merge branch 'llvm:main' into 84753

a4de503

Updated Pattern matching with sd_pattern

dcd3385

Ran fmt

46e18ef

Removed namespace

47a7815

Ran fmt

22d786b

nit

4036561

davemgreen reviewed Mar 17, 2024

View reviewed changes

Sh0g0-1758 added 2 commits March 18, 2024 01:58

Update DAGCombiner.cpp

abc15f8

Merge branch 'llvm:main' into 84753

8a8234d

RKSimon approved these changes Mar 19, 2024

View reviewed changes

RKSimon merged commit 92764c9 into llvm:main Mar 19, 2024

chencha3 pushed a commit to chencha3/llvm-project that referenced this pull request Mar 23, 2024

[DAG] Matched Fixedwidth Pattern for ISD::AVGCEILU (llvm#85031)

5b936ac

Fixes: llvm#84753

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[DAG] Matched Fixedwidth Pattern for ISD::AVGCEILU #85031

[DAG] Matched Fixedwidth Pattern for ISD::AVGCEILU #85031

Uh oh!

Sh0g0-1758 commented Mar 13, 2024

Uh oh!

llvmbot commented Mar 13, 2024

Uh oh!

llvmbot commented Mar 13, 2024

Uh oh!

Uh oh!

jayfoad commented Mar 13, 2024

Uh oh!

Sh0g0-1758 commented Mar 13, 2024

Uh oh!

Sh0g0-1758 commented Mar 14, 2024

Uh oh!

Sh0g0-1758 commented Mar 16, 2024

Uh oh!

davemgreen Mar 17, 2024

Uh oh!

Sh0g0-1758 Mar 17, 2024

Uh oh!

RKSimon left a comment

Uh oh!

Uh oh!

[DAG] Matched Fixedwidth Pattern for ISD::AVGCEILU #85031

[DAG] Matched Fixedwidth Pattern for ISD::AVGCEILU #85031

Uh oh!

Conversation

Sh0g0-1758 commented Mar 13, 2024

Uh oh!

llvmbot commented Mar 13, 2024

Uh oh!

llvmbot commented Mar 13, 2024

Uh oh!

Uh oh!

jayfoad commented Mar 13, 2024

Uh oh!

Sh0g0-1758 commented Mar 13, 2024

Uh oh!

Sh0g0-1758 commented Mar 14, 2024

Uh oh!

Sh0g0-1758 commented Mar 16, 2024

Uh oh!

davemgreen Mar 17, 2024

Choose a reason for hiding this comment

Uh oh!

Sh0g0-1758 Mar 17, 2024

Choose a reason for hiding this comment

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!