Skip to content

Commit 777a58e

Browse files
committed
Support {S,U}REMEqFold before legalization
This allows these optimisations to apply to e.g. `urem i16` directly before `urem` is promoted to i32 on architectures where i16 operations are not intrinsically legal (such as on Aarch64). The legalization then later can happen more directly and generated code gets a chance to avoid wasting time on computing results in types wider than necessary, in the end. Seems like mostly an improvement in terms of results at least as far as x86_64 and aarch64 are concerned, with a few regressions here and there. It also helps in preventing regressions in changes like {D87976}. Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D88785
1 parent c88ee1a commit 777a58e

35 files changed

+4479
-6470
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5453,7 +5453,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
54535453
EVT ShSVT = ShVT.getScalarType();
54545454

54555455
// If MUL is unavailable, we cannot proceed in any case.
5456-
if (!isOperationLegalOrCustom(ISD::MUL, VT))
5456+
if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
54575457
return SDValue();
54585458

54595459
bool ComparingWithAllZeros = true;
@@ -5583,7 +5583,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
55835583
}
55845584

55855585
if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
5586-
if (!isOperationLegalOrCustom(ISD::SUB, VT))
5586+
if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
55875587
return SDValue(); // FIXME: Could/should use `ISD::ADD`?
55885588
assert(CompTargetNode.getValueType() == N.getValueType() &&
55895589
"Expecting that the types on LHS and RHS of comparisons match.");
@@ -5598,7 +5598,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
55985598
// divisors as a performance improvement, since rotating by 0 is a no-op.
55995599
if (HadEvenDivisor) {
56005600
// We need ROTR to do this.
5601-
if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5601+
if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
56025602
return SDValue();
56035603
SDNodeFlags Flags;
56045604
Flags.setExact(true);
@@ -5628,6 +5628,8 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
56285628
DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
56295629
Created.push_back(TautologicalInvertedChannels.getNode());
56305630

5631+
// NOTE: we avoid letting illegal types through even if we're before legalize
5632+
// ops – legalization has a hard time producing good code for this.
56315633
if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
56325634
// If we have a vector select, let's replace the comparison results in the
56335635
// affected lanes with the correct tautological result.
@@ -5638,6 +5640,8 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
56385640
}
56395641

56405642
// Else, we can just invert the comparison result in the appropriate lanes.
5643+
//
5644+
// NOTE: see the note above VSELECT above.
56415645
if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
56425646
return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
56435647
TautologicalInvertedChannels);
@@ -5692,8 +5696,9 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
56925696
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
56935697
EVT ShSVT = ShVT.getScalarType();
56945698

5695-
// If MUL is unavailable, we cannot proceed in any case.
5696-
if (!isOperationLegalOrCustom(ISD::MUL, VT))
5699+
// If we are after ops legalization, and MUL is unavailable, we can not
5700+
// proceed.
5701+
if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
56975702
return SDValue();
56985703

56995704
// TODO: Could support comparing with non-zero too.
@@ -5848,7 +5853,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
58485853

58495854
if (NeedToApplyOffset) {
58505855
// We need ADD to do this.
5851-
if (!isOperationLegalOrCustom(ISD::ADD, VT))
5856+
if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
58525857
return SDValue();
58535858

58545859
// (add (mul N, P), A)
@@ -5860,7 +5865,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
58605865
// divisors as a performance improvement, since rotating by 0 is a no-op.
58615866
if (HadEvenDivisor) {
58625867
// We need ROTR to do this.
5863-
if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5868+
if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
58645869
return SDValue();
58655870
SDNodeFlags Flags;
58665871
Flags.setExact(true);
@@ -5883,6 +5888,9 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
58835888
// we must fix-up results for said lanes.
58845889
assert(VT.isVector() && "Can/should only get here for vectors.");
58855890

5891+
// NOTE: we avoid letting illegal types through even if we're before legalize
5892+
// ops – legalization has a hard time producing good code for the code that
5893+
// follows.
58865894
if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
58875895
!isOperationLegalOrCustom(ISD::AND, VT) ||
58885896
!isOperationLegalOrCustom(Cond, VT) ||

llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
define i1 @test_srem_odd(i29 %X) nounwind {
55
; CHECK-LABEL: test_srem_odd:
66
; CHECK: // %bb.0:
7-
; CHECK-NEXT: mov w9, #33099
8-
; CHECK-NEXT: mov w10, #64874
9-
; CHECK-NEXT: sbfx w8, w0, #0, #29
10-
; CHECK-NEXT: movk w9, #48986, lsl #16
11-
; CHECK-NEXT: movk w10, #330, lsl #16
12-
; CHECK-NEXT: madd w8, w8, w9, w10
13-
; CHECK-NEXT: mov w9, #64213
14-
; CHECK-NEXT: movk w9, #661, lsl #16
7+
; CHECK-NEXT: mov w8, #33099
8+
; CHECK-NEXT: mov w9, #24493
9+
; CHECK-NEXT: movk w8, #8026, lsl #16
10+
; CHECK-NEXT: movk w9, #41, lsl #16
11+
; CHECK-NEXT: madd w8, w0, w8, w9
12+
; CHECK-NEXT: mov w9, #48987
13+
; CHECK-NEXT: and w8, w8, #0x1fffffff
14+
; CHECK-NEXT: movk w9, #82, lsl #16
1515
; CHECK-NEXT: cmp w8, w9
1616
; CHECK-NEXT: cset w0, lo
1717
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)