Skip to content

Commit d5d6b6a

Browse files
RKSimonAlexisPerry
authored andcommitted
[DAG] expandAVG - attempt to extend to a wider integer type for the add/shift to avoid overflow handling (llvm#95788)
1 parent 0dfcb0c commit d5d6b6a

File tree

6 files changed

+184
-229
lines changed

6 files changed

+184
-229
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9243,7 +9243,10 @@ SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
92439243
unsigned Opc = N->getOpcode();
92449244
bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
92459245
bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9246+
unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9247+
unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
92469248
unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9249+
unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
92479250
assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
92489251
Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
92499252
"Unknown AVG node");
@@ -9262,12 +9265,28 @@ SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
92629265
DAG.getShiftAmountConstant(1, VT, dl));
92639266
}
92649267

9268+
// For scalars, see if we can efficiently extend/truncate to use add+shift.
9269+
if (VT.isScalarInteger()) {
9270+
unsigned BW = VT.getScalarSizeInBits();
9271+
EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9272+
if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9273+
LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9274+
RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9275+
SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9276+
if (!IsFloor)
9277+
Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9278+
DAG.getConstant(1, dl, ExtVT));
9279+
// Just use SRL as we will be truncating away the extended sign bits.
9280+
Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9281+
DAG.getShiftAmountConstant(1, ExtVT, dl));
9282+
return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9283+
}
9284+
}
9285+
92659286
// avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
92669287
// avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
92679288
// avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
92689289
// avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9269-
unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9270-
unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
92719290
LHS = DAG.getFreeze(LHS);
92729291
RHS = DAG.getFreeze(RHS);
92739292
SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);

llvm/test/CodeGen/X86/avg.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1973,11 +1973,11 @@ define void @not_avg_v16i8_wide_constants(ptr %a, ptr %b) nounwind {
19731973
define <1 x i8> @avg_v1i8(<1 x i8> %x, <1 x i8> %y) {
19741974
; CHECK-LABEL: avg_v1i8:
19751975
; CHECK: # %bb.0:
1976-
; CHECK-NEXT: movl %edi, %eax
1977-
; CHECK-NEXT: orb %sil, %al
1978-
; CHECK-NEXT: xorb %sil, %dil
1979-
; CHECK-NEXT: shrb %dil
1980-
; CHECK-NEXT: subb %dil, %al
1976+
; CHECK-NEXT: movzbl %sil, %eax
1977+
; CHECK-NEXT: movzbl %dil, %ecx
1978+
; CHECK-NEXT: leal 1(%rcx,%rax), %eax
1979+
; CHECK-NEXT: shrl %eax
1980+
; CHECK-NEXT: # kill: def $al killed $al killed $eax
19811981
; CHECK-NEXT: retq
19821982
%a = zext <1 x i8> %x to <1 x i16>
19831983
%b = zext <1 x i8> %y to <1 x i16>

llvm/test/CodeGen/X86/avgceils-scalar.ll

Lines changed: 44 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,20 @@
1111
define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
1212
; X86-LABEL: test_fixed_i8:
1313
; X86: # %bb.0:
14-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
15-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
16-
; X86-NEXT: movl %edx, %eax
17-
; X86-NEXT: orb %cl, %al
18-
; X86-NEXT: xorb %cl, %dl
19-
; X86-NEXT: sarb %dl
20-
; X86-NEXT: subb %dl, %al
14+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
15+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
16+
; X86-NEXT: leal 1(%ecx,%eax), %eax
17+
; X86-NEXT: shrl %eax
18+
; X86-NEXT: # kill: def $al killed $al killed $eax
2119
; X86-NEXT: retl
2220
;
2321
; X64-LABEL: test_fixed_i8:
2422
; X64: # %bb.0:
25-
; X64-NEXT: movl %edi, %eax
26-
; X64-NEXT: orb %sil, %al
27-
; X64-NEXT: xorb %sil, %dil
28-
; X64-NEXT: sarb %dil
29-
; X64-NEXT: subb %dil, %al
23+
; X64-NEXT: movsbl %sil, %eax
24+
; X64-NEXT: movsbl %dil, %ecx
25+
; X64-NEXT: leal 1(%rcx,%rax), %eax
26+
; X64-NEXT: shrl %eax
27+
; X64-NEXT: # kill: def $al killed $al killed $eax
3028
; X64-NEXT: retq
3129
%or = or i8 %a0, %a1
3230
%xor = xor i8 %a0, %a1
@@ -38,22 +36,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
3836
define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
3937
; X86-LABEL: test_ext_i8:
4038
; X86: # %bb.0:
41-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
42-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
43-
; X86-NEXT: movl %edx, %eax
44-
; X86-NEXT: orb %cl, %al
45-
; X86-NEXT: xorb %cl, %dl
46-
; X86-NEXT: sarb %dl
47-
; X86-NEXT: subb %dl, %al
39+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
40+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
41+
; X86-NEXT: leal 1(%ecx,%eax), %eax
42+
; X86-NEXT: shrl %eax
43+
; X86-NEXT: # kill: def $al killed $al killed $eax
4844
; X86-NEXT: retl
4945
;
5046
; X64-LABEL: test_ext_i8:
5147
; X64: # %bb.0:
52-
; X64-NEXT: movl %edi, %eax
53-
; X64-NEXT: orb %sil, %al
54-
; X64-NEXT: xorb %sil, %dil
55-
; X64-NEXT: sarb %dil
56-
; X64-NEXT: subb %dil, %al
48+
; X64-NEXT: movsbl %sil, %eax
49+
; X64-NEXT: movsbl %dil, %ecx
50+
; X64-NEXT: leal 1(%rcx,%rax), %eax
51+
; X64-NEXT: shrl %eax
52+
; X64-NEXT: # kill: def $al killed $al killed $eax
5753
; X64-NEXT: retq
5854
%x0 = sext i8 %a0 to i16
5955
%x1 = sext i8 %a1 to i16
@@ -67,25 +63,19 @@ define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
6763
define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
6864
; X86-LABEL: test_fixed_i16:
6965
; X86: # %bb.0:
70-
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
71-
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
72-
; X86-NEXT: movl %edx, %eax
73-
; X86-NEXT: orl %ecx, %eax
74-
; X86-NEXT: xorl %ecx, %edx
75-
; X86-NEXT: movswl %dx, %ecx
76-
; X86-NEXT: sarl %ecx
77-
; X86-NEXT: subl %ecx, %eax
66+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
67+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
68+
; X86-NEXT: leal 1(%ecx,%eax), %eax
69+
; X86-NEXT: shrl %eax
7870
; X86-NEXT: # kill: def $ax killed $ax killed $eax
7971
; X86-NEXT: retl
8072
;
8173
; X64-LABEL: test_fixed_i16:
8274
; X64: # %bb.0:
83-
; X64-NEXT: movl %edi, %eax
84-
; X64-NEXT: orl %esi, %eax
85-
; X64-NEXT: xorl %esi, %edi
75+
; X64-NEXT: movswl %si, %eax
8676
; X64-NEXT: movswl %di, %ecx
87-
; X64-NEXT: sarl %ecx
88-
; X64-NEXT: subl %ecx, %eax
77+
; X64-NEXT: leal 1(%rcx,%rax), %eax
78+
; X64-NEXT: shrl %eax
8979
; X64-NEXT: # kill: def $ax killed $ax killed $eax
9080
; X64-NEXT: retq
9181
%or = or i16 %a0, %a1
@@ -98,25 +88,19 @@ define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
9888
define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind {
9989
; X86-LABEL: test_ext_i16:
10090
; X86: # %bb.0:
101-
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
102-
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
103-
; X86-NEXT: movl %edx, %eax
104-
; X86-NEXT: orl %ecx, %eax
105-
; X86-NEXT: xorl %ecx, %edx
106-
; X86-NEXT: movswl %dx, %ecx
107-
; X86-NEXT: sarl %ecx
108-
; X86-NEXT: subl %ecx, %eax
91+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
92+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
93+
; X86-NEXT: leal 1(%ecx,%eax), %eax
94+
; X86-NEXT: shrl %eax
10995
; X86-NEXT: # kill: def $ax killed $ax killed $eax
11096
; X86-NEXT: retl
11197
;
11298
; X64-LABEL: test_ext_i16:
11399
; X64: # %bb.0:
114-
; X64-NEXT: movl %edi, %eax
115-
; X64-NEXT: orl %esi, %eax
116-
; X64-NEXT: xorl %esi, %edi
100+
; X64-NEXT: movswl %si, %eax
117101
; X64-NEXT: movswl %di, %ecx
118-
; X64-NEXT: sarl %ecx
119-
; X64-NEXT: subl %ecx, %eax
102+
; X64-NEXT: leal 1(%rcx,%rax), %eax
103+
; X64-NEXT: shrl %eax
120104
; X64-NEXT: # kill: def $ax killed $ax killed $eax
121105
; X64-NEXT: retq
122106
%x0 = sext i16 %a0 to i32
@@ -142,11 +126,11 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind {
142126
;
143127
; X64-LABEL: test_fixed_i32:
144128
; X64: # %bb.0:
145-
; X64-NEXT: movl %edi, %eax
146-
; X64-NEXT: orl %esi, %eax
147-
; X64-NEXT: xorl %esi, %edi
148-
; X64-NEXT: sarl %edi
149-
; X64-NEXT: subl %edi, %eax
129+
; X64-NEXT: movslq %esi, %rax
130+
; X64-NEXT: movslq %edi, %rcx
131+
; X64-NEXT: leaq 1(%rcx,%rax), %rax
132+
; X64-NEXT: shrq %rax
133+
; X64-NEXT: # kill: def $eax killed $eax killed $rax
150134
; X64-NEXT: retq
151135
%or = or i32 %a0, %a1
152136
%xor = xor i32 %a1, %a0
@@ -169,11 +153,11 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
169153
;
170154
; X64-LABEL: test_ext_i32:
171155
; X64: # %bb.0:
172-
; X64-NEXT: movl %edi, %eax
173-
; X64-NEXT: orl %esi, %eax
174-
; X64-NEXT: xorl %esi, %edi
175-
; X64-NEXT: sarl %edi
176-
; X64-NEXT: subl %edi, %eax
156+
; X64-NEXT: movslq %esi, %rax
157+
; X64-NEXT: movslq %edi, %rcx
158+
; X64-NEXT: leaq 1(%rcx,%rax), %rax
159+
; X64-NEXT: shrq %rax
160+
; X64-NEXT: # kill: def $eax killed $eax killed $rax
177161
; X64-NEXT: retq
178162
%x0 = sext i32 %a0 to i64
179163
%x1 = sext i32 %a1 to i64

llvm/test/CodeGen/X86/avgceilu-scalar.ll

Lines changed: 40 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,20 @@
1111
define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
1212
; X86-LABEL: test_fixed_i8:
1313
; X86: # %bb.0:
14+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
1415
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
15-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
16-
; X86-NEXT: movl %edx, %eax
17-
; X86-NEXT: orb %cl, %al
18-
; X86-NEXT: xorb %cl, %dl
19-
; X86-NEXT: shrb %dl
20-
; X86-NEXT: subb %dl, %al
16+
; X86-NEXT: leal 1(%ecx,%eax), %eax
17+
; X86-NEXT: shrl %eax
18+
; X86-NEXT: # kill: def $al killed $al killed $eax
2119
; X86-NEXT: retl
2220
;
2321
; X64-LABEL: test_fixed_i8:
2422
; X64: # %bb.0:
25-
; X64-NEXT: movl %edi, %eax
26-
; X64-NEXT: orb %sil, %al
27-
; X64-NEXT: xorb %sil, %dil
28-
; X64-NEXT: shrb %dil
29-
; X64-NEXT: subb %dil, %al
23+
; X64-NEXT: movzbl %sil, %eax
24+
; X64-NEXT: movzbl %dil, %ecx
25+
; X64-NEXT: leal 1(%rcx,%rax), %eax
26+
; X64-NEXT: shrl %eax
27+
; X64-NEXT: # kill: def $al killed $al killed $eax
3028
; X64-NEXT: retq
3129
%or = or i8 %a0, %a1
3230
%xor = xor i8 %a0, %a1
@@ -38,22 +36,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
3836
define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
3937
; X86-LABEL: test_ext_i8:
4038
; X86: # %bb.0:
39+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
4140
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
42-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
43-
; X86-NEXT: movl %edx, %eax
44-
; X86-NEXT: orb %cl, %al
45-
; X86-NEXT: xorb %cl, %dl
46-
; X86-NEXT: shrb %dl
47-
; X86-NEXT: subb %dl, %al
41+
; X86-NEXT: leal 1(%ecx,%eax), %eax
42+
; X86-NEXT: shrl %eax
43+
; X86-NEXT: # kill: def $al killed $al killed $eax
4844
; X86-NEXT: retl
4945
;
5046
; X64-LABEL: test_ext_i8:
5147
; X64: # %bb.0:
52-
; X64-NEXT: movl %edi, %eax
53-
; X64-NEXT: orb %sil, %al
54-
; X64-NEXT: xorb %sil, %dil
55-
; X64-NEXT: shrb %dil
56-
; X64-NEXT: subb %dil, %al
48+
; X64-NEXT: movzbl %sil, %eax
49+
; X64-NEXT: movzbl %dil, %ecx
50+
; X64-NEXT: leal 1(%rcx,%rax), %eax
51+
; X64-NEXT: shrl %eax
52+
; X64-NEXT: # kill: def $al killed $al killed $eax
5753
; X64-NEXT: retq
5854
%x0 = zext i8 %a0 to i16
5955
%x1 = zext i8 %a1 to i16
@@ -67,25 +63,19 @@ define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
6763
define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
6864
; X86-LABEL: test_fixed_i16:
6965
; X86: # %bb.0:
66+
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
7067
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
71-
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
72-
; X86-NEXT: movl %edx, %eax
73-
; X86-NEXT: orl %ecx, %eax
74-
; X86-NEXT: xorl %ecx, %edx
75-
; X86-NEXT: movzwl %dx, %ecx
76-
; X86-NEXT: shrl %ecx
77-
; X86-NEXT: subl %ecx, %eax
68+
; X86-NEXT: leal 1(%ecx,%eax), %eax
69+
; X86-NEXT: shrl %eax
7870
; X86-NEXT: # kill: def $ax killed $ax killed $eax
7971
; X86-NEXT: retl
8072
;
8173
; X64-LABEL: test_fixed_i16:
8274
; X64: # %bb.0:
83-
; X64-NEXT: movl %edi, %eax
84-
; X64-NEXT: orl %esi, %eax
85-
; X64-NEXT: xorl %esi, %edi
75+
; X64-NEXT: movzwl %si, %eax
8676
; X64-NEXT: movzwl %di, %ecx
87-
; X64-NEXT: shrl %ecx
88-
; X64-NEXT: subl %ecx, %eax
77+
; X64-NEXT: leal 1(%rcx,%rax), %eax
78+
; X64-NEXT: shrl %eax
8979
; X64-NEXT: # kill: def $ax killed $ax killed $eax
9080
; X64-NEXT: retq
9181
%or = or i16 %a0, %a1
@@ -98,25 +88,19 @@ define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
9888
define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind {
9989
; X86-LABEL: test_ext_i16:
10090
; X86: # %bb.0:
91+
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
10192
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
102-
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
103-
; X86-NEXT: movl %edx, %eax
104-
; X86-NEXT: orl %ecx, %eax
105-
; X86-NEXT: xorl %ecx, %edx
106-
; X86-NEXT: movzwl %dx, %ecx
107-
; X86-NEXT: shrl %ecx
108-
; X86-NEXT: subl %ecx, %eax
93+
; X86-NEXT: leal 1(%ecx,%eax), %eax
94+
; X86-NEXT: shrl %eax
10995
; X86-NEXT: # kill: def $ax killed $ax killed $eax
11096
; X86-NEXT: retl
11197
;
11298
; X64-LABEL: test_ext_i16:
11399
; X64: # %bb.0:
114-
; X64-NEXT: movl %edi, %eax
115-
; X64-NEXT: orl %esi, %eax
116-
; X64-NEXT: xorl %esi, %edi
100+
; X64-NEXT: movzwl %si, %eax
117101
; X64-NEXT: movzwl %di, %ecx
118-
; X64-NEXT: shrl %ecx
119-
; X64-NEXT: subl %ecx, %eax
102+
; X64-NEXT: leal 1(%rcx,%rax), %eax
103+
; X64-NEXT: shrl %eax
120104
; X64-NEXT: # kill: def $ax killed $ax killed $eax
121105
; X64-NEXT: retq
122106
%x0 = zext i16 %a0 to i32
@@ -142,11 +126,11 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind {
142126
;
143127
; X64-LABEL: test_fixed_i32:
144128
; X64: # %bb.0:
145-
; X64-NEXT: movl %edi, %eax
146-
; X64-NEXT: orl %esi, %eax
147-
; X64-NEXT: xorl %esi, %edi
148-
; X64-NEXT: shrl %edi
149-
; X64-NEXT: subl %edi, %eax
129+
; X64-NEXT: movl %esi, %eax
130+
; X64-NEXT: movl %edi, %ecx
131+
; X64-NEXT: leaq 1(%rcx,%rax), %rax
132+
; X64-NEXT: shrq %rax
133+
; X64-NEXT: # kill: def $eax killed $eax killed $rax
150134
; X64-NEXT: retq
151135
%or = or i32 %a0, %a1
152136
%xor = xor i32 %a1, %a0
@@ -169,11 +153,11 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
169153
;
170154
; X64-LABEL: test_ext_i32:
171155
; X64: # %bb.0:
172-
; X64-NEXT: movl %edi, %eax
173-
; X64-NEXT: orl %esi, %eax
174-
; X64-NEXT: xorl %esi, %edi
175-
; X64-NEXT: shrl %edi
176-
; X64-NEXT: subl %edi, %eax
156+
; X64-NEXT: movl %esi, %eax
157+
; X64-NEXT: movl %edi, %ecx
158+
; X64-NEXT: leaq 1(%rcx,%rax), %rax
159+
; X64-NEXT: shrq %rax
160+
; X64-NEXT: # kill: def $eax killed $eax killed $rax
177161
; X64-NEXT: retq
178162
%x0 = zext i32 %a0 to i64
179163
%x1 = zext i32 %a1 to i64

0 commit comments

Comments
 (0)