Skip to content

[DAG] expandAVG - attempt to extend to a wider integer type for the add/shift to avoid overflow handling #95788

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9243,7 +9243,10 @@ SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
unsigned Opc = N->getOpcode();
bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
"Unknown AVG node");
Expand All @@ -9262,12 +9265,28 @@ SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
DAG.getShiftAmountConstant(1, VT, dl));
}

// For scalars, see if we can efficiently extend/truncate to use add+shift.
if (VT.isScalarInteger()) {
unsigned BW = VT.getScalarSizeInBits();
EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
if (!IsFloor)
Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
DAG.getConstant(1, dl, ExtVT));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mostly unrelated, but this got me thinking, it might be nice to create a helper for getting an assosiated operation w/ more than 2 helpers that would try to find existing DAG nodes of all the possible pairs.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raise that as an Issue? I don't really want this PR to get pulled into future work like that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did a preliminary search and didn't see so many cases. If I revisit it will be independent of this PR.

// Just use SRL as we will be truncating away the extended sign bits.
Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
DAG.getShiftAmountConstant(1, ExtVT, dl));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
}
}

// avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
// avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
// avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
// avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
LHS = DAG.getFreeze(LHS);
RHS = DAG.getFreeze(RHS);
SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/X86/avg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1973,11 +1973,11 @@ define void @not_avg_v16i8_wide_constants(ptr %a, ptr %b) nounwind {
define <1 x i8> @avg_v1i8(<1 x i8> %x, <1 x i8> %y) {
; CHECK-LABEL: avg_v1i8:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: orb %sil, %al
; CHECK-NEXT: xorb %sil, %dil
; CHECK-NEXT: shrb %dil
; CHECK-NEXT: subb %dil, %al
; CHECK-NEXT: movzbl %sil, %eax
; CHECK-NEXT: movzbl %dil, %ecx
; CHECK-NEXT: leal 1(%rcx,%rax), %eax
; CHECK-NEXT: shrl %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%a = zext <1 x i8> %x to <1 x i16>
%b = zext <1 x i8> %y to <1 x i16>
Expand Down
104 changes: 44 additions & 60 deletions llvm/test/CodeGen/X86/avgceils-scalar.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,20 @@
define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
; X86-LABEL: test_fixed_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %eax
; X86-NEXT: orb %cl, %al
; X86-NEXT: xorb %cl, %dl
; X86-NEXT: sarb %dl
; X86-NEXT: subb %dl, %al
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: leal 1(%ecx,%eax), %eax
; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_fixed_i8:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orb %sil, %al
; X64-NEXT: xorb %sil, %dil
; X64-NEXT: sarb %dil
; X64-NEXT: subb %dil, %al
; X64-NEXT: movsbl %sil, %eax
; X64-NEXT: movsbl %dil, %ecx
; X64-NEXT: leal 1(%rcx,%rax), %eax
; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%or = or i8 %a0, %a1
%xor = xor i8 %a0, %a1
Expand All @@ -38,22 +36,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
; X86-LABEL: test_ext_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %eax
; X86-NEXT: orb %cl, %al
; X86-NEXT: xorb %cl, %dl
; X86-NEXT: sarb %dl
; X86-NEXT: subb %dl, %al
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: leal 1(%ecx,%eax), %eax
; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_ext_i8:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orb %sil, %al
; X64-NEXT: xorb %sil, %dil
; X64-NEXT: sarb %dil
; X64-NEXT: subb %dil, %al
; X64-NEXT: movsbl %sil, %eax
; X64-NEXT: movsbl %dil, %ecx
; X64-NEXT: leal 1(%rcx,%rax), %eax
; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%x0 = sext i8 %a0 to i16
%x1 = sext i8 %a1 to i16
Expand All @@ -67,25 +63,19 @@ define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
; X86-LABEL: test_fixed_i16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: xorl %ecx, %edx
; X86-NEXT: movswl %dx, %ecx
; X86-NEXT: sarl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: leal 1(%ecx,%eax), %eax
; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_fixed_i16:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl %esi, %eax
; X64-NEXT: xorl %esi, %edi
; X64-NEXT: movswl %si, %eax
; X64-NEXT: movswl %di, %ecx
; X64-NEXT: sarl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: leal 1(%rcx,%rax), %eax
; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%or = or i16 %a0, %a1
Expand All @@ -98,25 +88,19 @@ define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind {
; X86-LABEL: test_ext_i16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: xorl %ecx, %edx
; X86-NEXT: movswl %dx, %ecx
; X86-NEXT: sarl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: leal 1(%ecx,%eax), %eax
; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_ext_i16:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl %esi, %eax
; X64-NEXT: xorl %esi, %edi
; X64-NEXT: movswl %si, %eax
; X64-NEXT: movswl %di, %ecx
; X64-NEXT: sarl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: leal 1(%rcx,%rax), %eax
; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%x0 = sext i16 %a0 to i32
Expand All @@ -142,11 +126,11 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_fixed_i32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl %esi, %eax
; X64-NEXT: xorl %esi, %edi
; X64-NEXT: sarl %edi
; X64-NEXT: subl %edi, %eax
; X64-NEXT: movslq %esi, %rax
; X64-NEXT: movslq %edi, %rcx
; X64-NEXT: leaq 1(%rcx,%rax), %rax
; X64-NEXT: shrq %rax
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%or = or i32 %a0, %a1
%xor = xor i32 %a1, %a0
Expand All @@ -169,11 +153,11 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_ext_i32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl %esi, %eax
; X64-NEXT: xorl %esi, %edi
; X64-NEXT: sarl %edi
; X64-NEXT: subl %edi, %eax
; X64-NEXT: movslq %esi, %rax
; X64-NEXT: movslq %edi, %rcx
; X64-NEXT: leaq 1(%rcx,%rax), %rax
; X64-NEXT: shrq %rax
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%x0 = sext i32 %a0 to i64
%x1 = sext i32 %a1 to i64
Expand Down
96 changes: 40 additions & 56 deletions llvm/test/CodeGen/X86/avgceilu-scalar.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,20 @@
define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
; X86-LABEL: test_fixed_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %eax
; X86-NEXT: orb %cl, %al
; X86-NEXT: xorb %cl, %dl
; X86-NEXT: shrb %dl
; X86-NEXT: subb %dl, %al
; X86-NEXT: leal 1(%ecx,%eax), %eax
; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_fixed_i8:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orb %sil, %al
; X64-NEXT: xorb %sil, %dil
; X64-NEXT: shrb %dil
; X64-NEXT: subb %dil, %al
; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movzbl %dil, %ecx
; X64-NEXT: leal 1(%rcx,%rax), %eax
; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%or = or i8 %a0, %a1
%xor = xor i8 %a0, %a1
Expand All @@ -38,22 +36,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
; X86-LABEL: test_ext_i8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %eax
; X86-NEXT: orb %cl, %al
; X86-NEXT: xorb %cl, %dl
; X86-NEXT: shrb %dl
; X86-NEXT: subb %dl, %al
; X86-NEXT: leal 1(%ecx,%eax), %eax
; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_ext_i8:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orb %sil, %al
; X64-NEXT: xorb %sil, %dil
; X64-NEXT: shrb %dil
; X64-NEXT: subb %dil, %al
; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movzbl %dil, %ecx
; X64-NEXT: leal 1(%rcx,%rax), %eax
; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%x0 = zext i8 %a0 to i16
%x1 = zext i8 %a1 to i16
Expand All @@ -67,25 +63,19 @@ define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
; X86-LABEL: test_fixed_i16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: xorl %ecx, %edx
; X86-NEXT: movzwl %dx, %ecx
; X86-NEXT: shrl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: leal 1(%ecx,%eax), %eax
; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_fixed_i16:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl %esi, %eax
; X64-NEXT: xorl %esi, %edi
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: movzwl %di, %ecx
; X64-NEXT: shrl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: leal 1(%rcx,%rax), %eax
; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%or = or i16 %a0, %a1
Expand All @@ -98,25 +88,19 @@ define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind {
; X86-LABEL: test_ext_i16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: xorl %ecx, %edx
; X86-NEXT: movzwl %dx, %ecx
; X86-NEXT: shrl %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: leal 1(%ecx,%eax), %eax
; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_ext_i16:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl %esi, %eax
; X64-NEXT: xorl %esi, %edi
; X64-NEXT: movzwl %si, %eax
; X64-NEXT: movzwl %di, %ecx
; X64-NEXT: shrl %ecx
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: leal 1(%rcx,%rax), %eax
; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%x0 = zext i16 %a0 to i32
Expand All @@ -142,11 +126,11 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_fixed_i32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl %esi, %eax
; X64-NEXT: xorl %esi, %edi
; X64-NEXT: shrl %edi
; X64-NEXT: subl %edi, %eax
; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: leaq 1(%rcx,%rax), %rax
; X64-NEXT: shrq %rax
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%or = or i32 %a0, %a1
%xor = xor i32 %a1, %a0
Expand All @@ -169,11 +153,11 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_ext_i32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl %esi, %eax
; X64-NEXT: xorl %esi, %edi
; X64-NEXT: shrl %edi
; X64-NEXT: subl %edi, %eax
; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: leaq 1(%rcx,%rax), %rax
; X64-NEXT: shrq %rax
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%x0 = zext i32 %a0 to i64
%x1 = zext i32 %a1 to i64
Expand Down
Loading
Loading