Skip to content

Commit 1143e6f

Browse files
committed
[AArch64] Improve the codegen for sdiv 2
If X's size is BitWidth, then X sdiv 2 can be expressived as X += X >> (BitWidth - 1) X >> 1 Fix #97884
1 parent ad01635 commit 1143e6f

File tree

4 files changed

+81
-154
lines changed

4 files changed

+81
-154
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6182,18 +6182,31 @@ SDValue TargetLowering::buildSDIVPow2WithCMov(
61826182
SDLoc DL(N);
61836183
SDValue N0 = N->getOperand(0);
61846184
SDValue Zero = DAG.getConstant(0, DL, VT);
6185-
APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6186-
SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6185+
SDValue CMov;
61876186

6188-
// If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6189-
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6190-
SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6191-
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6192-
SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6187+
if (Lg2 == 1) {
6188+
// If Divisor is 2, add 1 << (BitWidth -1) to it before shifting right.
6189+
unsigned BitWidth = VT.getSizeInBits();
6190+
SDValue SignVal = DAG.getNode(ISD::SRL, DL, VT, N0,
6191+
DAG.getConstant(BitWidth - 1, DL, VT));
6192+
CMov = DAG.getNode(ISD::ADD, DL, VT, N0, SignVal);
6193+
6194+
Created.push_back(SignVal.getNode());
6195+
Created.push_back(CMov.getNode());
6196+
} else {
6197+
APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6198+
SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
61936199

6194-
Created.push_back(Cmp.getNode());
6195-
Created.push_back(Add.getNode());
6196-
Created.push_back(CMov.getNode());
6200+
// If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6201+
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6202+
SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6203+
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6204+
CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6205+
6206+
Created.push_back(Cmp.getNode());
6207+
Created.push_back(Add.getNode());
6208+
Created.push_back(CMov.getNode());
6209+
}
61976210

61986211
// Divide by pow2.
61996212
SDValue SRA =

llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,9 +202,8 @@ define <4 x i32> @test_bit_sink_operand(<4 x i32> %src, <4 x i32> %dst, <4 x i32
202202
; CHECK-SD: // %bb.0: // %entry
203203
; CHECK-SD-NEXT: sub sp, sp, #32
204204
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
205-
; CHECK-SD-NEXT: cmp w0, #0
205+
; CHECK-SD-NEXT: add w8, w0, w0, lsr #31
206206
; CHECK-SD-NEXT: mov w9, wzr
207-
; CHECK-SD-NEXT: cinc w8, w0, lt
208207
; CHECK-SD-NEXT: asr w8, w8, #1
209208
; CHECK-SD-NEXT: .LBB11_1: // %do.body
210209
; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1

llvm/test/CodeGen/AArch64/sdivpow2.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,7 @@ define i64 @test7(i64 %x) {
9090
define i64 @test8(i64 %x) {
9191
; ISEL-LABEL: test8:
9292
; ISEL: // %bb.0:
93-
; ISEL-NEXT: cmp x0, #0
94-
; ISEL-NEXT: cinc x8, x0, lt
93+
; ISEL-NEXT: add x8, x0, x0, lsr #63
9594
; ISEL-NEXT: asr x0, x8, #1
9695
; ISEL-NEXT: ret
9796
;
@@ -110,10 +109,8 @@ define i32 @sdiv_int(i32 %begin, i32 %first) #0 {
110109
; ISEL-LABEL: sdiv_int:
111110
; ISEL: // %bb.0:
112111
; ISEL-NEXT: sub w8, w0, w1
113-
; ISEL-NEXT: add w9, w8, #1
114-
; ISEL-NEXT: add w10, w8, #2
115-
; ISEL-NEXT: cmp w9, #0
116-
; ISEL-NEXT: csinc w8, w10, w8, lt
112+
; ISEL-NEXT: add w8, w8, #1
113+
; ISEL-NEXT: add w8, w8, w8, lsr #31
117114
; ISEL-NEXT: sub w0, w0, w8, asr #1
118115
; ISEL-NEXT: ret
119116
;

llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll

Lines changed: 54 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -5,177 +5,95 @@
55
; RUN: | FileCheck -check-prefixes=CHECK,SFB %s
66

77
define signext i32 @sdiv2_32(i32 signext %0) {
8-
; NOSFB-LABEL: sdiv2_32:
9-
; NOSFB: # %bb.0:
10-
; NOSFB-NEXT: srliw a1, a0, 31
11-
; NOSFB-NEXT: add a0, a0, a1
12-
; NOSFB-NEXT: sraiw a0, a0, 1
13-
; NOSFB-NEXT: ret
14-
;
15-
; SFB-LABEL: sdiv2_32:
16-
; SFB: # %bb.0:
17-
; SFB-NEXT: bgez a0, .LBB0_2
18-
; SFB-NEXT: # %bb.1:
19-
; SFB-NEXT: addi a0, a0, 1
20-
; SFB-NEXT: .LBB0_2:
21-
; SFB-NEXT: sraiw a0, a0, 1
22-
; SFB-NEXT: ret
8+
; CHECK-LABEL: sdiv2_32:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: srliw a1, a0, 31
11+
; CHECK-NEXT: add a0, a0, a1
12+
; CHECK-NEXT: sraiw a0, a0, 1
13+
; CHECK-NEXT: ret
2314
%res = sdiv i32 %0, 2
2415
ret i32 %res
2516
}
2617

2718
define signext i32 @sdivneg2_32(i32 signext %0) {
28-
; NOSFB-LABEL: sdivneg2_32:
29-
; NOSFB: # %bb.0:
30-
; NOSFB-NEXT: srliw a1, a0, 31
31-
; NOSFB-NEXT: add a0, a0, a1
32-
; NOSFB-NEXT: sraiw a0, a0, 1
33-
; NOSFB-NEXT: neg a0, a0
34-
; NOSFB-NEXT: ret
35-
;
36-
; SFB-LABEL: sdivneg2_32:
37-
; SFB: # %bb.0:
38-
; SFB-NEXT: bgez a0, .LBB1_2
39-
; SFB-NEXT: # %bb.1:
40-
; SFB-NEXT: addi a0, a0, 1
41-
; SFB-NEXT: .LBB1_2:
42-
; SFB-NEXT: sraiw a0, a0, 1
43-
; SFB-NEXT: neg a0, a0
44-
; SFB-NEXT: ret
19+
; CHECK-LABEL: sdivneg2_32:
20+
; CHECK: # %bb.0:
21+
; CHECK-NEXT: srliw a1, a0, 31
22+
; CHECK-NEXT: add a0, a0, a1
23+
; CHECK-NEXT: sraiw a0, a0, 1
24+
; CHECK-NEXT: neg a0, a0
25+
; CHECK-NEXT: ret
4526
%res = sdiv i32 %0, -2
4627
ret i32 %res
4728
}
4829

4930
define i64 @sdiv2_64(i64 %0) {
50-
; NOSFB-LABEL: sdiv2_64:
51-
; NOSFB: # %bb.0:
52-
; NOSFB-NEXT: srli a1, a0, 63
53-
; NOSFB-NEXT: add a0, a0, a1
54-
; NOSFB-NEXT: srai a0, a0, 1
55-
; NOSFB-NEXT: ret
56-
;
57-
; SFB-LABEL: sdiv2_64:
58-
; SFB: # %bb.0:
59-
; SFB-NEXT: bgez a0, .LBB2_2
60-
; SFB-NEXT: # %bb.1:
61-
; SFB-NEXT: addi a0, a0, 1
62-
; SFB-NEXT: .LBB2_2:
63-
; SFB-NEXT: srai a0, a0, 1
64-
; SFB-NEXT: ret
31+
; CHECK-LABEL: sdiv2_64:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: srli a1, a0, 63
34+
; CHECK-NEXT: add a0, a0, a1
35+
; CHECK-NEXT: srai a0, a0, 1
36+
; CHECK-NEXT: ret
6537
%res = sdiv i64 %0, 2
6638
ret i64 %res
6739
}
6840

6941
define i64 @sdivneg2_64(i64 %0) {
70-
; NOSFB-LABEL: sdivneg2_64:
71-
; NOSFB: # %bb.0:
72-
; NOSFB-NEXT: srli a1, a0, 63
73-
; NOSFB-NEXT: add a0, a0, a1
74-
; NOSFB-NEXT: srai a0, a0, 1
75-
; NOSFB-NEXT: neg a0, a0
76-
; NOSFB-NEXT: ret
77-
;
78-
; SFB-LABEL: sdivneg2_64:
79-
; SFB: # %bb.0:
80-
; SFB-NEXT: bgez a0, .LBB3_2
81-
; SFB-NEXT: # %bb.1:
82-
; SFB-NEXT: addi a0, a0, 1
83-
; SFB-NEXT: .LBB3_2:
84-
; SFB-NEXT: srai a0, a0, 1
85-
; SFB-NEXT: neg a0, a0
86-
; SFB-NEXT: ret
42+
; CHECK-LABEL: sdivneg2_64:
43+
; CHECK: # %bb.0:
44+
; CHECK-NEXT: srli a1, a0, 63
45+
; CHECK-NEXT: add a0, a0, a1
46+
; CHECK-NEXT: srai a0, a0, 1
47+
; CHECK-NEXT: neg a0, a0
48+
; CHECK-NEXT: ret
8749
%res = sdiv i64 %0, -2
8850
ret i64 %res
8951
}
9052

9153
define signext i32 @srem2_32(i32 signext %0) {
92-
; NOSFB-LABEL: srem2_32:
93-
; NOSFB: # %bb.0:
94-
; NOSFB-NEXT: srliw a1, a0, 31
95-
; NOSFB-NEXT: add a1, a1, a0
96-
; NOSFB-NEXT: andi a1, a1, -2
97-
; NOSFB-NEXT: subw a0, a0, a1
98-
; NOSFB-NEXT: ret
99-
;
100-
; SFB-LABEL: srem2_32:
101-
; SFB: # %bb.0:
102-
; SFB-NEXT: mv a1, a0
103-
; SFB-NEXT: bgez a0, .LBB4_2
104-
; SFB-NEXT: # %bb.1:
105-
; SFB-NEXT: addi a1, a0, 1
106-
; SFB-NEXT: .LBB4_2:
107-
; SFB-NEXT: andi a1, a1, -2
108-
; SFB-NEXT: subw a0, a0, a1
109-
; SFB-NEXT: ret
54+
; CHECK-LABEL: srem2_32:
55+
; CHECK: # %bb.0:
56+
; CHECK-NEXT: srliw a1, a0, 31
57+
; CHECK-NEXT: add a1, a1, a0
58+
; CHECK-NEXT: andi a1, a1, -2
59+
; CHECK-NEXT: subw a0, a0, a1
60+
; CHECK-NEXT: ret
11061
%res = srem i32 %0, 2
11162
ret i32 %res
11263
}
11364

11465
define signext i32 @sremneg2_32(i32 signext %0) {
115-
; NOSFB-LABEL: sremneg2_32:
116-
; NOSFB: # %bb.0:
117-
; NOSFB-NEXT: srliw a1, a0, 31
118-
; NOSFB-NEXT: add a1, a1, a0
119-
; NOSFB-NEXT: andi a1, a1, -2
120-
; NOSFB-NEXT: subw a0, a0, a1
121-
; NOSFB-NEXT: ret
122-
;
123-
; SFB-LABEL: sremneg2_32:
124-
; SFB: # %bb.0:
125-
; SFB-NEXT: mv a1, a0
126-
; SFB-NEXT: bgez a0, .LBB5_2
127-
; SFB-NEXT: # %bb.1:
128-
; SFB-NEXT: addi a1, a0, 1
129-
; SFB-NEXT: .LBB5_2:
130-
; SFB-NEXT: andi a1, a1, -2
131-
; SFB-NEXT: subw a0, a0, a1
132-
; SFB-NEXT: ret
66+
; CHECK-LABEL: sremneg2_32:
67+
; CHECK: # %bb.0:
68+
; CHECK-NEXT: srliw a1, a0, 31
69+
; CHECK-NEXT: add a1, a1, a0
70+
; CHECK-NEXT: andi a1, a1, -2
71+
; CHECK-NEXT: subw a0, a0, a1
72+
; CHECK-NEXT: ret
13373
%res = srem i32 %0, -2
13474
ret i32 %res
13575
}
13676

13777
define i64 @srem2_64(i64 %0) {
138-
; NOSFB-LABEL: srem2_64:
139-
; NOSFB: # %bb.0:
140-
; NOSFB-NEXT: srli a1, a0, 63
141-
; NOSFB-NEXT: add a1, a1, a0
142-
; NOSFB-NEXT: andi a1, a1, -2
143-
; NOSFB-NEXT: sub a0, a0, a1
144-
; NOSFB-NEXT: ret
145-
;
146-
; SFB-LABEL: srem2_64:
147-
; SFB: # %bb.0:
148-
; SFB-NEXT: mv a1, a0
149-
; SFB-NEXT: bgez a0, .LBB6_2
150-
; SFB-NEXT: # %bb.1:
151-
; SFB-NEXT: addi a1, a0, 1
152-
; SFB-NEXT: .LBB6_2:
153-
; SFB-NEXT: andi a1, a1, -2
154-
; SFB-NEXT: sub a0, a0, a1
155-
; SFB-NEXT: ret
78+
; CHECK-LABEL: srem2_64:
79+
; CHECK: # %bb.0:
80+
; CHECK-NEXT: srli a1, a0, 63
81+
; CHECK-NEXT: add a1, a1, a0
82+
; CHECK-NEXT: andi a1, a1, -2
83+
; CHECK-NEXT: sub a0, a0, a1
84+
; CHECK-NEXT: ret
15685
%res = srem i64 %0, 2
15786
ret i64 %res
15887
}
15988

16089
define i64 @sremneg2_64(i64 %0) {
161-
; NOSFB-LABEL: sremneg2_64:
162-
; NOSFB: # %bb.0:
163-
; NOSFB-NEXT: srli a1, a0, 63
164-
; NOSFB-NEXT: add a1, a1, a0
165-
; NOSFB-NEXT: andi a1, a1, -2
166-
; NOSFB-NEXT: sub a0, a0, a1
167-
; NOSFB-NEXT: ret
168-
;
169-
; SFB-LABEL: sremneg2_64:
170-
; SFB: # %bb.0:
171-
; SFB-NEXT: mv a1, a0
172-
; SFB-NEXT: bgez a0, .LBB7_2
173-
; SFB-NEXT: # %bb.1:
174-
; SFB-NEXT: addi a1, a0, 1
175-
; SFB-NEXT: .LBB7_2:
176-
; SFB-NEXT: andi a1, a1, -2
177-
; SFB-NEXT: sub a0, a0, a1
178-
; SFB-NEXT: ret
90+
; CHECK-LABEL: sremneg2_64:
91+
; CHECK: # %bb.0:
92+
; CHECK-NEXT: srli a1, a0, 63
93+
; CHECK-NEXT: add a1, a1, a0
94+
; CHECK-NEXT: andi a1, a1, -2
95+
; CHECK-NEXT: sub a0, a0, a1
96+
; CHECK-NEXT: ret
17997
%res = srem i64 %0, -2
18098
ret i64 %res
18199
}

0 commit comments

Comments
 (0)