Skip to content

Commit 2b71269

Browse files
authored
[SelectionDAG][X86] Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1] (llvm#136565)
Closes llvm#51823.
1 parent ec3a905 commit 2b71269

File tree

3 files changed

+150
-158
lines changed

3 files changed

+150
-158
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3867,6 +3867,58 @@ static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG) {
38673867
return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
38683868
}
38693869

3870+
// Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
3871+
static SDValue foldRemainderIdiom(SDNode *N, SelectionDAG &DAG,
3872+
const SDLoc &DL) {
3873+
assert(N->getOpcode() == ISD::SUB && "Node must be a SUB");
3874+
SDValue Sub0 = N->getOperand(0);
3875+
SDValue Sub1 = N->getOperand(1);
3876+
3877+
auto CheckAndFoldMulCase = [&](SDValue DivRem, SDValue MaybeY) -> SDValue {
3878+
if ((DivRem.getOpcode() == ISD::SDIVREM ||
3879+
DivRem.getOpcode() == ISD::UDIVREM) &&
3880+
DivRem.getResNo() == 0 && DivRem.getOperand(0) == Sub0 &&
3881+
DivRem.getOperand(1) == MaybeY) {
3882+
return SDValue(DivRem.getNode(), 1);
3883+
}
3884+
return SDValue();
3885+
};
3886+
3887+
if (Sub1.getOpcode() == ISD::MUL) {
3888+
// (sub x, (mul divrem(x,y)[0], y))
3889+
SDValue Mul0 = Sub1.getOperand(0);
3890+
SDValue Mul1 = Sub1.getOperand(1);
3891+
3892+
if (SDValue Res = CheckAndFoldMulCase(Mul0, Mul1))
3893+
return Res;
3894+
3895+
if (SDValue Res = CheckAndFoldMulCase(Mul1, Mul0))
3896+
return Res;
3897+
3898+
} else if (Sub1.getOpcode() == ISD::SHL) {
3899+
// Handle (sub x, (shl divrem(x,y)[0], C)) where y = 1 << C
3900+
SDValue Shl0 = Sub1.getOperand(0);
3901+
SDValue Shl1 = Sub1.getOperand(1);
3902+
// Check if Shl0 is divrem(x, Y)[0]
3903+
if ((Shl0.getOpcode() == ISD::SDIVREM ||
3904+
Shl0.getOpcode() == ISD::UDIVREM) &&
3905+
Shl0.getResNo() == 0 && Shl0.getOperand(0) == Sub0) {
3906+
3907+
SDValue Divisor = Shl0.getOperand(1);
3908+
3909+
ConstantSDNode *DivC = isConstOrConstSplat(Divisor);
3910+
ConstantSDNode *ShC = isConstOrConstSplat(Shl1);
3911+
if (!DivC || !ShC)
3912+
return SDValue();
3913+
3914+
if (DivC->getAPIntValue().isPowerOf2() &&
3915+
DivC->getAPIntValue().logBase2() == ShC->getAPIntValue())
3916+
return SDValue(Shl0.getNode(), 1);
3917+
}
3918+
}
3919+
return SDValue();
3920+
}
3921+
38703922
// Since it may not be valid to emit a fold to zero for vector initializers
38713923
// check if we can before folding.
38723924
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
@@ -4094,6 +4146,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
40944146
if (SDValue V = foldSubToUSubSat(VT, N, DL))
40954147
return V;
40964148

4149+
if (SDValue V = foldRemainderIdiom(N, DAG, DL))
4150+
return V;
4151+
40974152
// (A - B) - 1 -> add (xor B, -1), A
40984153
if (sd_match(N, m_Sub(m_OneUse(m_Sub(m_Value(A), m_Value(B))), m_One())))
40994154
return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));

llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll

Lines changed: 62 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -11,25 +11,20 @@
1111
define i8 @scalar_i8(i8 %x, i8 %y, ptr %divdst) nounwind {
1212
; X86-LABEL: scalar_i8:
1313
; X86: # %bb.0:
14+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
15+
; X86-NEXT: idivb {{[0-9]+}}(%esp)
1416
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
15-
; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
16-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
17-
; X86-NEXT: movsbl %cl, %eax
18-
; X86-NEXT: idivb %ch
17+
; X86-NEXT: movsbl %ah, %ecx
1918
; X86-NEXT: movb %al, (%edx)
20-
; X86-NEXT: mulb %ch
21-
; X86-NEXT: subb %al, %cl
2219
; X86-NEXT: movl %ecx, %eax
2320
; X86-NEXT: retl
2421
;
2522
; X64-LABEL: scalar_i8:
2623
; X64: # %bb.0:
27-
; X64-NEXT: movsbl %dil, %ecx
28-
; X64-NEXT: movl %ecx, %eax
24+
; X64-NEXT: movsbl %dil, %eax
2925
; X64-NEXT: idivb %sil
26+
; X64-NEXT: movsbl %ah, %ecx
3027
; X64-NEXT: movb %al, (%rdx)
31-
; X64-NEXT: mulb %sil
32-
; X64-NEXT: subb %al, %cl
3328
; X64-NEXT: movl %ecx, %eax
3429
; X64-NEXT: retq
3530
%div = sdiv i8 %x, %y
@@ -42,34 +37,23 @@ define i8 @scalar_i8(i8 %x, i8 %y, ptr %divdst) nounwind {
4237
define i16 @scalar_i16(i16 %x, i16 %y, ptr %divdst) nounwind {
4338
; X86-LABEL: scalar_i16:
4439
; X86: # %bb.0:
45-
; X86-NEXT: pushl %edi
46-
; X86-NEXT: pushl %esi
47-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
48-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
49-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
50-
; X86-NEXT: movl %ecx, %eax
40+
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
5141
; X86-NEXT: cwtd
52-
; X86-NEXT: idivw %si
53-
; X86-NEXT: # kill: def $ax killed $ax def $eax
54-
; X86-NEXT: movw %ax, (%edi)
55-
; X86-NEXT: imull %eax, %esi
56-
; X86-NEXT: subl %esi, %ecx
57-
; X86-NEXT: movl %ecx, %eax
58-
; X86-NEXT: popl %esi
59-
; X86-NEXT: popl %edi
42+
; X86-NEXT: idivw {{[0-9]+}}(%esp)
43+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
44+
; X86-NEXT: movw %ax, (%ecx)
45+
; X86-NEXT: movl %edx, %eax
6046
; X86-NEXT: retl
6147
;
6248
; X64-LABEL: scalar_i16:
6349
; X64: # %bb.0:
6450
; X64-NEXT: movq %rdx, %rcx
6551
; X64-NEXT: movl %edi, %eax
52+
; X64-NEXT: # kill: def $ax killed $ax killed $eax
6653
; X64-NEXT: cwtd
6754
; X64-NEXT: idivw %si
68-
; X64-NEXT: # kill: def $ax killed $ax def $eax
6955
; X64-NEXT: movw %ax, (%rcx)
70-
; X64-NEXT: imull %eax, %esi
71-
; X64-NEXT: subl %esi, %edi
72-
; X64-NEXT: movl %edi, %eax
56+
; X64-NEXT: movl %edx, %eax
7357
; X64-NEXT: retq
7458
%div = sdiv i16 %x, %y
7559
store i16 %div, ptr %divdst, align 4
@@ -81,20 +65,12 @@ define i16 @scalar_i16(i16 %x, i16 %y, ptr %divdst) nounwind {
8165
define i32 @scalar_i32(i32 %x, i32 %y, ptr %divdst) nounwind {
8266
; X86-LABEL: scalar_i32:
8367
; X86: # %bb.0:
84-
; X86-NEXT: pushl %edi
85-
; X86-NEXT: pushl %esi
86-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
87-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
88-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
89-
; X86-NEXT: movl %ecx, %eax
68+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
9069
; X86-NEXT: cltd
91-
; X86-NEXT: idivl %edi
92-
; X86-NEXT: movl %eax, (%esi)
93-
; X86-NEXT: imull %edi, %eax
94-
; X86-NEXT: subl %eax, %ecx
95-
; X86-NEXT: movl %ecx, %eax
96-
; X86-NEXT: popl %esi
97-
; X86-NEXT: popl %edi
70+
; X86-NEXT: idivl {{[0-9]+}}(%esp)
71+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
72+
; X86-NEXT: movl %eax, (%ecx)
73+
; X86-NEXT: movl %edx, %eax
9874
; X86-NEXT: retl
9975
;
10076
; X64-LABEL: scalar_i32:
@@ -104,9 +80,7 @@ define i32 @scalar_i32(i32 %x, i32 %y, ptr %divdst) nounwind {
10480
; X64-NEXT: cltd
10581
; X64-NEXT: idivl %esi
10682
; X64-NEXT: movl %eax, (%rcx)
107-
; X64-NEXT: imull %esi, %eax
108-
; X64-NEXT: subl %eax, %edi
109-
; X64-NEXT: movl %edi, %eax
83+
; X64-NEXT: movl %edx, %eax
11084
; X64-NEXT: retq
11185
%div = sdiv i32 %x, %y
11286
store i32 %div, ptr %divdst, align 4
@@ -158,9 +132,7 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind {
158132
; X64-NEXT: cqto
159133
; X64-NEXT: idivq %rsi
160134
; X64-NEXT: movq %rax, (%rcx)
161-
; X64-NEXT: imulq %rsi, %rax
162-
; X64-NEXT: subq %rax, %rdi
163-
; X64-NEXT: movq %rdi, %rax
135+
; X64-NEXT: movq %rdx, %rax
164136
; X64-NEXT: retq
165137
%div = sdiv i64 %x, %y
166138
store i64 %div, ptr %divdst, align 4
@@ -1194,39 +1166,53 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y, ptr %divdst) nounw
11941166
ret <2 x i64> %t2
11951167
}
11961168

1169+
define i32 @scalar_i32_const_pow2_divisor(i32 %0, ptr %1) minsize nounwind {
1170+
; X86-LABEL: scalar_i32_const_pow2_divisor:
1171+
; X86: # %bb.0:
1172+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1173+
; X86-NEXT: movl $256, %ecx # imm = 0x100
1174+
; X86-NEXT: cltd
1175+
; X86-NEXT: idivl %ecx
1176+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1177+
; X86-NEXT: movl %edx, (%ecx)
1178+
; X86-NEXT: retl
1179+
;
1180+
; X64-LABEL: scalar_i32_const_pow2_divisor:
1181+
; X64: # %bb.0:
1182+
; X64-NEXT: movl %edi, %eax
1183+
; X64-NEXT: movl $256, %ecx # imm = 0x100
1184+
; X64-NEXT: cltd
1185+
; X64-NEXT: idivl %ecx
1186+
; X64-NEXT: movl %edx, (%rsi)
1187+
; X64-NEXT: retq
1188+
%3 = srem i32 %0, 256
1189+
store i32 %3, ptr %1, align 4
1190+
%4 = sdiv i32 %0, 256
1191+
ret i32 %4
1192+
}
1193+
11971194
; Special tests.
11981195

11991196
define i32 @scalar_i32_commutative(i32 %x, ptr %ysrc, ptr %divdst) nounwind {
12001197
; X86-LABEL: scalar_i32_commutative:
12011198
; X86: # %bb.0:
1202-
; X86-NEXT: pushl %edi
1203-
; X86-NEXT: pushl %esi
1204-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1205-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
12061199
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1207-
; X86-NEXT: movl (%eax), %edi
1208-
; X86-NEXT: movl %ecx, %eax
1200+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
12091201
; X86-NEXT: cltd
1210-
; X86-NEXT: idivl %edi
1211-
; X86-NEXT: movl %eax, (%esi)
1212-
; X86-NEXT: imull %eax, %edi
1213-
; X86-NEXT: subl %edi, %ecx
1214-
; X86-NEXT: movl %ecx, %eax
1215-
; X86-NEXT: popl %esi
1216-
; X86-NEXT: popl %edi
1202+
; X86-NEXT: idivl (%ecx)
1203+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1204+
; X86-NEXT: movl %eax, (%ecx)
1205+
; X86-NEXT: movl %edx, %eax
12171206
; X86-NEXT: retl
12181207
;
12191208
; X64-LABEL: scalar_i32_commutative:
12201209
; X64: # %bb.0:
12211210
; X64-NEXT: movq %rdx, %rcx
1222-
; X64-NEXT: movl (%rsi), %esi
12231211
; X64-NEXT: movl %edi, %eax
12241212
; X64-NEXT: cltd
1225-
; X64-NEXT: idivl %esi
1213+
; X64-NEXT: idivl (%rsi)
12261214
; X64-NEXT: movl %eax, (%rcx)
1227-
; X64-NEXT: imull %eax, %esi
1228-
; X64-NEXT: subl %esi, %edi
1229-
; X64-NEXT: movl %edi, %eax
1215+
; X64-NEXT: movl %edx, %eax
12301216
; X64-NEXT: retq
12311217
%y = load i32, ptr %ysrc, align 4
12321218
%div = sdiv i32 %x, %y
@@ -1240,24 +1226,20 @@ define i32 @scalar_i32_commutative(i32 %x, ptr %ysrc, ptr %divdst) nounwind {
12401226
define i32 @extrause(i32 %x, i32 %y, ptr %divdst, ptr %t1dst) nounwind {
12411227
; X86-LABEL: extrause:
12421228
; X86: # %bb.0:
1243-
; X86-NEXT: pushl %ebx
12441229
; X86-NEXT: pushl %edi
12451230
; X86-NEXT: pushl %esi
1246-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1247-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1231+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
12481232
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1249-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
1250-
; X86-NEXT: movl %ecx, %eax
12511233
; X86-NEXT: cltd
1252-
; X86-NEXT: idivl %ebx
1234+
; X86-NEXT: idivl %ecx
1235+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1236+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
12531237
; X86-NEXT: movl %eax, (%edi)
1254-
; X86-NEXT: imull %ebx, %eax
1238+
; X86-NEXT: imull %ecx, %eax
12551239
; X86-NEXT: movl %eax, (%esi)
1256-
; X86-NEXT: subl %eax, %ecx
1257-
; X86-NEXT: movl %ecx, %eax
1240+
; X86-NEXT: movl %edx, %eax
12581241
; X86-NEXT: popl %esi
12591242
; X86-NEXT: popl %edi
1260-
; X86-NEXT: popl %ebx
12611243
; X86-NEXT: retl
12621244
;
12631245
; X64-LABEL: extrause:
@@ -1269,8 +1251,7 @@ define i32 @extrause(i32 %x, i32 %y, ptr %divdst, ptr %t1dst) nounwind {
12691251
; X64-NEXT: movl %eax, (%r8)
12701252
; X64-NEXT: imull %esi, %eax
12711253
; X64-NEXT: movl %eax, (%rcx)
1272-
; X64-NEXT: subl %eax, %edi
1273-
; X64-NEXT: movl %edi, %eax
1254+
; X64-NEXT: movl %edx, %eax
12741255
; X64-NEXT: retq
12751256
%div = sdiv i32 %x, %y
12761257
store i32 %div, ptr %divdst, align 4
@@ -1296,14 +1277,14 @@ define i32 @multiple_bb(i32 %x, i32 %y, ptr %divdst, i1 zeroext %store_srem, ptr
12961277
; X86-NEXT: idivl %esi
12971278
; X86-NEXT: movl %eax, (%edi)
12981279
; X86-NEXT: testb %bl, %bl
1299-
; X86-NEXT: je .LBB11_2
1280+
; X86-NEXT: je .LBB12_2
13001281
; X86-NEXT: # %bb.1: # %do_srem
13011282
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
13021283
; X86-NEXT: movl %eax, %edi
13031284
; X86-NEXT: imull %esi, %edi
13041285
; X86-NEXT: subl %edi, %ecx
13051286
; X86-NEXT: movl %ecx, (%edx)
1306-
; X86-NEXT: .LBB11_2: # %end
1287+
; X86-NEXT: .LBB12_2: # %end
13071288
; X86-NEXT: popl %esi
13081289
; X86-NEXT: popl %edi
13091290
; X86-NEXT: popl %ebx
@@ -1317,13 +1298,13 @@ define i32 @multiple_bb(i32 %x, i32 %y, ptr %divdst, i1 zeroext %store_srem, ptr
13171298
; X64-NEXT: idivl %esi
13181299
; X64-NEXT: movl %eax, (%r9)
13191300
; X64-NEXT: testl %ecx, %ecx
1320-
; X64-NEXT: je .LBB11_2
1301+
; X64-NEXT: je .LBB12_2
13211302
; X64-NEXT: # %bb.1: # %do_srem
13221303
; X64-NEXT: movl %eax, %ecx
13231304
; X64-NEXT: imull %esi, %ecx
13241305
; X64-NEXT: subl %ecx, %edi
13251306
; X64-NEXT: movl %edi, (%r8)
1326-
; X64-NEXT: .LBB11_2: # %end
1307+
; X64-NEXT: .LBB12_2: # %end
13271308
; X64-NEXT: retq
13281309
%div = sdiv i32 %x, %y
13291310
store i32 %div, ptr %divdst, align 4

0 commit comments

Comments
 (0)