-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DAG] Matched Fixedwidth Pattern for ISD::AVGCEILU #85031
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Shourya Goel (Sh0g0-1758) ChangesFixes: #84753 Full diff: https://github.com/llvm/llvm-project/pull/85031.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 735cec8ecc0627..413112ef896328 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2529,6 +2529,39 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
}
+// Attempt to form ext(avgceilu(A, B)) from sub(or(A, B), lshr(xor(A, B), 1))
+static SDValue combineFixedwidthToAVGCEILU(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::SUB and "SUB node is required here");
+ SDValue Or = N->getOperand(0);
+ SDValue Lshr = N->getOperand(1);
+ if (Or.getOpcode() != ISD::OR or Lshr.getOpcode() != ISD::SRL)
+ return SDValue();
+ SDValue Xor = Lshr.getOperand(0);
+ if (Xor.getOpcode() != ISD::XOR)
+ return SDValue();
+ SDValue Or1 = Or.getOperand(0);
+ SDValue Or2 = Or.getOperand(1);
+ SDValue Xor1 = Xor.getOperand(0);
+ SDValue Xor2 = Xor.getOperand(1);
+ if (Xor1 != Or1 or Xor2 != Or2)
+ return SDValue();
+ // Is the right shift using an immediate value of 1?
+ ConstantSDNode *N1C = isConstOrConstSplat(Lshr.getOperand(1));
+ if (!N1C or N1C->getAPIntValue() != 1)
+ return SDValue();
+ EVT VT = Or1.getValueType();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (VT.isVector())
+ VT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
+ else
+ VT = NVT;
+ SDLoc DL(N);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isOperationLegalOrCustom(ISD::AVGCEILU, VT))
+ return SDValue();
+ return DAG.getNode(ISD::AVGCEILU, DL, VT, Or1, Or2);
+}
+
/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
/// a shift and add with a different constant.
static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
@@ -3859,6 +3892,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue V = foldAddSubOfSignBit(N, DAG))
return V;
+ // Try to match AVGCEILU fixedwidth pattern
+ if (SDValue V = combineFixedwidthToAVGCEILU(N, DAG))
+ return V;
+
if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
return V;
diff --git a/llvm/test/CodeGen/AArch64/sub_combine.ll b/llvm/test/CodeGen/AArch64/sub_combine.ll
new file mode 100644
index 00000000000000..f9df436a3db2a2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sub_combine.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -debugify-and-strip-all-safe -enable-machine-outliner=never -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
+
+define i4 @sub_fixedwidth_i4(i4 %a0, i4 %a1) {
+; CHECK-LABEL: sub_fixedwidth_i4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: eor w8, w0, w1
+; CHECK-NEXT: orr w9, w0, w1
+; CHECK-NEXT: and w8, w8, #0xe
+; CHECK-NEXT: sub w0, w9, w8, lsr #1
+; CHECK-NEXT: ret
+ %or = or i4 %a0, %a1
+ %xor = xor i4 %a0, %a1
+ %srl = lshr i4 %xor, 1
+ %res = sub i4 %or, %srl
+ ret i4 %res
+}
+
+define <4 x i32> @sub_fixedwidth_v4i32(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: sub_fixedwidth_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: eor v2.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ushr v1.4s, v2.4s, #1
+; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %or = or <4 x i32> %a0, %a1
+ %xor = xor <4 x i32> %a0, %a1
+ %srl = lshr <4 x i32> %xor, <i32 1,i32 1,i32 1,i32 1>
+ %res = sub <4 x i32> %or, %srl
+ ret <4 x i32> %res
+}
+
+
|
@llvm/pr-subscribers-llvm-selectiondag Author: Shourya Goel (Sh0g0-1758) ChangesFixes: #84753 Full diff: https://github.com/llvm/llvm-project/pull/85031.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 735cec8ecc0627..413112ef896328 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2529,6 +2529,39 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
}
+// Attempt to form ext(avgceilu(A, B)) from sub(or(A, B), lshr(xor(A, B), 1))
+static SDValue combineFixedwidthToAVGCEILU(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::SUB and "SUB node is required here");
+ SDValue Or = N->getOperand(0);
+ SDValue Lshr = N->getOperand(1);
+ if (Or.getOpcode() != ISD::OR or Lshr.getOpcode() != ISD::SRL)
+ return SDValue();
+ SDValue Xor = Lshr.getOperand(0);
+ if (Xor.getOpcode() != ISD::XOR)
+ return SDValue();
+ SDValue Or1 = Or.getOperand(0);
+ SDValue Or2 = Or.getOperand(1);
+ SDValue Xor1 = Xor.getOperand(0);
+ SDValue Xor2 = Xor.getOperand(1);
+ if (Xor1 != Or1 or Xor2 != Or2)
+ return SDValue();
+ // Is the right shift using an immediate value of 1?
+ ConstantSDNode *N1C = isConstOrConstSplat(Lshr.getOperand(1));
+ if (!N1C or N1C->getAPIntValue() != 1)
+ return SDValue();
+ EVT VT = Or1.getValueType();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (VT.isVector())
+ VT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
+ else
+ VT = NVT;
+ SDLoc DL(N);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isOperationLegalOrCustom(ISD::AVGCEILU, VT))
+ return SDValue();
+ return DAG.getNode(ISD::AVGCEILU, DL, VT, Or1, Or2);
+}
+
/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
/// a shift and add with a different constant.
static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
@@ -3859,6 +3892,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue V = foldAddSubOfSignBit(N, DAG))
return V;
+ // Try to match AVGCEILU fixedwidth pattern
+ if (SDValue V = combineFixedwidthToAVGCEILU(N, DAG))
+ return V;
+
if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
return V;
diff --git a/llvm/test/CodeGen/AArch64/sub_combine.ll b/llvm/test/CodeGen/AArch64/sub_combine.ll
new file mode 100644
index 00000000000000..f9df436a3db2a2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sub_combine.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -debugify-and-strip-all-safe -enable-machine-outliner=never -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
+
+define i4 @sub_fixedwidth_i4(i4 %a0, i4 %a1) {
+; CHECK-LABEL: sub_fixedwidth_i4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: eor w8, w0, w1
+; CHECK-NEXT: orr w9, w0, w1
+; CHECK-NEXT: and w8, w8, #0xe
+; CHECK-NEXT: sub w0, w9, w8, lsr #1
+; CHECK-NEXT: ret
+ %or = or i4 %a0, %a1
+ %xor = xor i4 %a0, %a1
+ %srl = lshr i4 %xor, 1
+ %res = sub i4 %or, %srl
+ ret i4 %res
+}
+
+define <4 x i32> @sub_fixedwidth_v4i32(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: sub_fixedwidth_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: eor v2.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ushr v1.4s, v2.4s, #1
+; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %or = or <4 x i32> %a0, %a1
+ %xor = xor <4 x i32> %a0, %a1
+ %srl = lshr <4 x i32> %xor, <i32 1,i32 1,i32 1,i32 1>
+ %res = sub <4 x i32> %or, %srl
+ ret <4 x i32> %res
+}
+
+
|
If you're making small fixes, please do it by pushing an extra commit to your branch, instead of amending the commit and force-pushing it. This makes it easier for reviewers to see what has changed each time. When someone clicks the "squash and merge" button they will all be squashed into a single commit on the "main" branch. (Personally I think force-push is OK sometimes, e.g. if you need to manually rebase on some upstream changes that cause conflicts in your patch. But it should not be used all the time.) |
Right, noted. |
Requesting review from @RKSimon, @davemgreen. |
Updated the code with sd_match pattern matching. |
SDValue N0 = N->getOperand(0); | ||
EVT VT = N0.getValueType(); | ||
SDLoc DL(N); | ||
if (TLI.isOperationLegal(ISD::AVGFLOORU, VT)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
AVGFLOORU -> AVGCEILU
This shares quite a lot with #84903 and could be combined into the same function once that is submitted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
AVGFLOORU -> AVGCEILU
Updated.
This shares quite a lot with #84903 and could be combined into the same function once that is submitted
Sure.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
Fixes: #84753