-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DAG] expandAVG - attempt to extend to a wider integer type for the add/shift to avoid overflow handling #95788
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesPatch is 24.82 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95788.diff 6 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ad957aaa8f141..63d304a62fc21 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9243,7 +9243,10 @@ SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
unsigned Opc = N->getOpcode();
bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
+ unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
+ unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
+ unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
"Unknown AVG node");
@@ -9262,12 +9265,28 @@ SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
DAG.getShiftAmountConstant(1, VT, dl));
}
+ // For scalars, see if we can efficiently extend/truncate to use add+shift.
+ if (VT.isScalarInteger()) {
+ unsigned BW = VT.getScalarSizeInBits();
+ EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
+ if (isTypeLegal(ExtVT) &&
+ ((!IsSigned && isZExtFree(VT, ExtVT)) || isTruncateFree(ExtVT, VT))) {
+ LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
+ RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
+ SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
+ if (!IsFloor)
+ Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
+ DAG.getConstant(1, dl, ExtVT));
+ Avg = DAG.getNode(ShiftOpc, dl, ExtVT, Avg,
+ DAG.getShiftAmountConstant(1, ExtVT, dl));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
+ }
+ }
+
// avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
// avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
// avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
// avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
- unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
- unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
LHS = DAG.getFreeze(LHS);
RHS = DAG.getFreeze(RHS);
SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll
index b0fd4c848c5f8..0de308a9e0738 100644
--- a/llvm/test/CodeGen/X86/avg.ll
+++ b/llvm/test/CodeGen/X86/avg.ll
@@ -1973,11 +1973,11 @@ define void @not_avg_v16i8_wide_constants(ptr %a, ptr %b) nounwind {
define <1 x i8> @avg_v1i8(<1 x i8> %x, <1 x i8> %y) {
; CHECK-LABEL: avg_v1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: orb %sil, %al
-; CHECK-NEXT: xorb %sil, %dil
-; CHECK-NEXT: shrb %dil
-; CHECK-NEXT: subb %dil, %al
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: movzbl %dil, %ecx
+; CHECK-NEXT: leal 1(%rcx,%rax), %eax
+; CHECK-NEXT: shrl %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%a = zext <1 x i8> %x to <1 x i16>
%b = zext <1 x i8> %y to <1 x i16>
diff --git a/llvm/test/CodeGen/X86/avgceils-scalar.ll b/llvm/test/CodeGen/X86/avgceils-scalar.ll
index 86de35d36f076..91121bd4ad935 100644
--- a/llvm/test/CodeGen/X86/avgceils-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgceils-scalar.ll
@@ -11,22 +11,20 @@
define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
; X86-LABEL: test_fixed_i8:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: orb %cl, %al
-; X86-NEXT: xorb %cl, %dl
-; X86-NEXT: sarb %dl
-; X86-NEXT: subb %dl, %al
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal 1(%ecx,%eax), %eax
+; X86-NEXT: shrl %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_fixed_i8:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: orb %sil, %al
-; X64-NEXT: xorb %sil, %dil
-; X64-NEXT: sarb %dil
-; X64-NEXT: subb %dil, %al
+; X64-NEXT: movsbl %sil, %eax
+; X64-NEXT: movsbl %dil, %ecx
+; X64-NEXT: leal 1(%rcx,%rax), %eax
+; X64-NEXT: shrl %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%or = or i8 %a0, %a1
%xor = xor i8 %a0, %a1
@@ -38,22 +36,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
; X86-LABEL: test_ext_i8:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: orb %cl, %al
-; X86-NEXT: xorb %cl, %dl
-; X86-NEXT: sarb %dl
-; X86-NEXT: subb %dl, %al
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal 1(%ecx,%eax), %eax
+; X86-NEXT: shrl %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_ext_i8:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: orb %sil, %al
-; X64-NEXT: xorb %sil, %dil
-; X64-NEXT: sarb %dil
-; X64-NEXT: subb %dil, %al
+; X64-NEXT: movsbl %sil, %eax
+; X64-NEXT: movsbl %dil, %ecx
+; X64-NEXT: leal 1(%rcx,%rax), %eax
+; X64-NEXT: shrl %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%x0 = sext i8 %a0 to i16
%x1 = sext i8 %a1 to i16
@@ -67,25 +63,19 @@ define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
; X86-LABEL: test_fixed_i16:
; X86: # %bb.0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: xorl %ecx, %edx
-; X86-NEXT: movswl %dx, %ecx
-; X86-NEXT: sarl %ecx
-; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal 1(%ecx,%eax), %eax
+; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_fixed_i16:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: orl %esi, %eax
-; X64-NEXT: xorl %esi, %edi
+; X64-NEXT: movswl %si, %eax
; X64-NEXT: movswl %di, %ecx
-; X64-NEXT: sarl %ecx
-; X64-NEXT: subl %ecx, %eax
+; X64-NEXT: leal 1(%rcx,%rax), %eax
+; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%or = or i16 %a0, %a1
@@ -98,25 +88,19 @@ define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind {
; X86-LABEL: test_ext_i16:
; X86: # %bb.0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: xorl %ecx, %edx
-; X86-NEXT: movswl %dx, %ecx
-; X86-NEXT: sarl %ecx
-; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal 1(%ecx,%eax), %eax
+; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_ext_i16:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: orl %esi, %eax
-; X64-NEXT: xorl %esi, %edi
+; X64-NEXT: movswl %si, %eax
; X64-NEXT: movswl %di, %ecx
-; X64-NEXT: sarl %ecx
-; X64-NEXT: subl %ecx, %eax
+; X64-NEXT: leal 1(%rcx,%rax), %eax
+; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%x0 = sext i16 %a0 to i32
@@ -142,11 +126,11 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_fixed_i32:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: orl %esi, %eax
-; X64-NEXT: xorl %esi, %edi
-; X64-NEXT: sarl %edi
-; X64-NEXT: subl %edi, %eax
+; X64-NEXT: movslq %esi, %rax
+; X64-NEXT: movslq %edi, %rcx
+; X64-NEXT: leaq 1(%rcx,%rax), %rax
+; X64-NEXT: shrq %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%or = or i32 %a0, %a1
%xor = xor i32 %a1, %a0
@@ -169,11 +153,11 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_ext_i32:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: orl %esi, %eax
-; X64-NEXT: xorl %esi, %edi
-; X64-NEXT: sarl %edi
-; X64-NEXT: subl %edi, %eax
+; X64-NEXT: movslq %esi, %rax
+; X64-NEXT: movslq %edi, %rcx
+; X64-NEXT: leaq 1(%rcx,%rax), %rax
+; X64-NEXT: shrq %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%x0 = sext i32 %a0 to i64
%x1 = sext i32 %a1 to i64
diff --git a/llvm/test/CodeGen/X86/avgceilu-scalar.ll b/llvm/test/CodeGen/X86/avgceilu-scalar.ll
index 014c984528141..4ab4851eccd2c 100644
--- a/llvm/test/CodeGen/X86/avgceilu-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgceilu-scalar.ll
@@ -11,22 +11,20 @@
define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
; X86-LABEL: test_fixed_i8:
; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: orb %cl, %al
-; X86-NEXT: xorb %cl, %dl
-; X86-NEXT: shrb %dl
-; X86-NEXT: subb %dl, %al
+; X86-NEXT: leal 1(%ecx,%eax), %eax
+; X86-NEXT: shrl %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_fixed_i8:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: orb %sil, %al
-; X64-NEXT: xorb %sil, %dil
-; X64-NEXT: shrb %dil
-; X64-NEXT: subb %dil, %al
+; X64-NEXT: movzbl %sil, %eax
+; X64-NEXT: movzbl %dil, %ecx
+; X64-NEXT: leal 1(%rcx,%rax), %eax
+; X64-NEXT: shrl %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%or = or i8 %a0, %a1
%xor = xor i8 %a0, %a1
@@ -38,22 +36,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
; X86-LABEL: test_ext_i8:
; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: orb %cl, %al
-; X86-NEXT: xorb %cl, %dl
-; X86-NEXT: shrb %dl
-; X86-NEXT: subb %dl, %al
+; X86-NEXT: leal 1(%ecx,%eax), %eax
+; X86-NEXT: shrl %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_ext_i8:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: orb %sil, %al
-; X64-NEXT: xorb %sil, %dil
-; X64-NEXT: shrb %dil
-; X64-NEXT: subb %dil, %al
+; X64-NEXT: movzbl %sil, %eax
+; X64-NEXT: movzbl %dil, %ecx
+; X64-NEXT: leal 1(%rcx,%rax), %eax
+; X64-NEXT: shrl %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%x0 = zext i8 %a0 to i16
%x1 = zext i8 %a1 to i16
@@ -67,25 +63,19 @@ define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
; X86-LABEL: test_fixed_i16:
; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: xorl %ecx, %edx
-; X86-NEXT: movzwl %dx, %ecx
-; X86-NEXT: shrl %ecx
-; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: leal 1(%ecx,%eax), %eax
+; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_fixed_i16:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: orl %esi, %eax
-; X64-NEXT: xorl %esi, %edi
+; X64-NEXT: movzwl %si, %eax
; X64-NEXT: movzwl %di, %ecx
-; X64-NEXT: shrl %ecx
-; X64-NEXT: subl %ecx, %eax
+; X64-NEXT: leal 1(%rcx,%rax), %eax
+; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%or = or i16 %a0, %a1
@@ -98,25 +88,19 @@ define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind {
; X86-LABEL: test_ext_i16:
; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: xorl %ecx, %edx
-; X86-NEXT: movzwl %dx, %ecx
-; X86-NEXT: shrl %ecx
-; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: leal 1(%ecx,%eax), %eax
+; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_ext_i16:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: orl %esi, %eax
-; X64-NEXT: xorl %esi, %edi
+; X64-NEXT: movzwl %si, %eax
; X64-NEXT: movzwl %di, %ecx
-; X64-NEXT: shrl %ecx
-; X64-NEXT: subl %ecx, %eax
+; X64-NEXT: leal 1(%rcx,%rax), %eax
+; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%x0 = zext i16 %a0 to i32
@@ -142,11 +126,11 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_fixed_i32:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: orl %esi, %eax
-; X64-NEXT: xorl %esi, %edi
-; X64-NEXT: shrl %edi
-; X64-NEXT: subl %edi, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: leaq 1(%rcx,%rax), %rax
+; X64-NEXT: shrq %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%or = or i32 %a0, %a1
%xor = xor i32 %a1, %a0
@@ -169,11 +153,11 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_ext_i32:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: orl %esi, %eax
-; X64-NEXT: xorl %esi, %edi
-; X64-NEXT: shrl %edi
-; X64-NEXT: subl %edi, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: leaq 1(%rcx,%rax), %rax
+; X64-NEXT: shrq %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%x0 = zext i32 %a0 to i64
%x1 = zext i32 %a1 to i64
diff --git a/llvm/test/CodeGen/X86/avgfloors-scalar.ll b/llvm/test/CodeGen/X86/avgfloors-scalar.ll
index 4c591d4079040..deb79d54be7c4 100644
--- a/llvm/test/CodeGen/X86/avgfloors-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgfloors-scalar.ll
@@ -11,22 +11,20 @@
define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
; X86-LABEL: test_fixed_i8:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: andb %cl, %dl
-; X86-NEXT: xorb %cl, %al
-; X86-NEXT: sarb %al
-; X86-NEXT: addb %dl, %al
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: shrl %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_fixed_i8:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andb %sil, %al
-; X64-NEXT: xorb %sil, %dil
-; X64-NEXT: sarb %dil
-; X64-NEXT: addb %dil, %al
+; X64-NEXT: movsbl %sil, %ecx
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: addl %ecx, %eax
+; X64-NEXT: shrl %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%and = and i8 %a0, %a1
%xor = xor i8 %a0, %a1
@@ -38,22 +36,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
; X86-LABEL: test_ext_i8:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: andb %cl, %dl
-; X86-NEXT: xorb %cl, %al
-; X86-NEXT: sarb %al
-; X86-NEXT: addb %dl, %al
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: shrl %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_ext_i8:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andb %sil, %al
-; X64-NEXT: xorb %sil, %dil
-; X64-NEXT: sarb %dil
-; X64-NEXT: addb %dil, %al
+; X64-NEXT: movsbl %sil, %ecx
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: addl %ecx, %eax
+; X64-NEXT: shrl %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%x0 = sext i8 %a0 to i16
%x1 = sext i8 %a1 to i16
@@ -66,25 +62,19 @@ define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
; X86-LABEL: test_fixed_i16:
; X86: # %bb.0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: andl %eax, %edx
-; X86-NEXT: xorl %eax, %ecx
-; X86-NEXT: movswl %cx, %eax
-; X86-NEXT: sarl %eax
-; X86-NEXT: addl %edx, %eax
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_fixed_i16:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: andl %esi, %ecx
-; X64-NEXT: xorl %esi, %edi
+; X64-NEXT: movswl %si, %ecx
; X64-NEXT: movswl %di, %eax
-; X64-NEXT: sarl %eax
; X64-NEXT: addl %ecx, %eax
+; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%and = and i16 %a0, %a1
@@ -97,25 +87,19 @@ define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind {
; X86-LABEL: test_ext_i16:
; X86: # %bb.0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: andl %eax, %edx
-; X86-NEXT: xorl %eax, %ecx
-; X86-NEXT: movswl %cx, %eax
-; X86-NEXT: sarl %eax
-; X86-NEXT: addl %edx, %eax
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_ext_i16:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: andl %esi, %ecx
-; X64-NEXT: xorl %esi, %edi
+; X64-NEXT: movswl %si, %ecx
; X64-NEXT: movswl %di, %eax
-; X64-NEXT: sarl %eax
; X64-NEXT: addl %ecx, %eax
+; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%x0 = sext i16 %a0 to i32
@@ -140,11 +124,11 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_fixed_i32:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl %esi, %eax
-; X64-NEXT: xorl %esi, %edi
-; X64-NEXT: sarl %edi
-; X64-NEXT: addl %edi, %eax
+; X64-NEXT: movslq %esi, %rcx
+; X64-NEXT: movslq %edi, %rax
+; X64-NEXT: addq %rcx, %rax
+; X64-NEXT: shrq %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%and = and i32 %a0, %a1
%xor = xor i32 %a1, %a0
@@ -167,11 +151,11 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_ext_i32:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl %esi, %eax
-; X64-NEXT: xorl %esi, %edi
-; X64-NEXT: sarl %edi
-; X64-NEXT: addl %edi, %eax
+; X64-NEXT: movslq %esi, %rcx
+; X64-NEXT: movslq %edi, %rax
+; X64-NEXT: addq %rcx, %rax
+; X64-NEXT: shrq %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%x0 = sext i32 %a0 to i64
%x1 = sext i32 %a1 to i64
diff --git a/llvm/test/CodeGen/X86/avgflooru-scalar.ll b/llvm/test/CodeGen/X86/avgflooru-scalar.ll
index 592e5e15b936a..d21c9d65ea9c8 100644
--- a/llvm/test/CodeGen/X86/avgflooru-scalar.ll
+++ b/llvm/test/CodeGen/X86/a...
[truncated]
|
unsigned BW = VT.getScalarSizeInBits(); | ||
EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW); | ||
if (isTypeLegal(ExtVT) && | ||
((!IsSigned && isZExtFree(VT, ExtVT)) || isTruncateFree(ExtVT, VT))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess there is no isSExtFree
that you can use? You just assume the sext is free if the truncate is free?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TBH the isZExtFree is useless as well as isTruncateFree works fine for the x86 cases
280393e
to
9f124b8
Compare
9f124b8
to
604b7d3
Compare
@@ -9262,12 +9265,28 @@ SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const { | |||
DAG.getShiftAmountConstant(1, VT, dl)); | |||
} | |||
|
|||
// For scalars, see if we can efficiently extend/truncate to use add+shift. | |||
// We can always use SRL as we will be truncating away the extended sign bits. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Think this line of the comment belongs where the SRL node is created.
SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS); | ||
if (!IsFloor) | ||
Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg, | ||
DAG.getConstant(1, dl, ExtVT)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Mostly unrelated, but this got me thinking, it might be nice to create a helper for getting an assosiated operation w/ more than 2 helpers that would try to find existing DAG nodes of all the possible pairs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Raise that as an Issue? I don't really want this PR to get pulled into future work like that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did a preliminary search and didn't see so many cases. If I revisit it will be independent of this PR.
…dd/shift to avoid overflow handling
604b7d3
to
626cb4f
Compare
LGTM. Wait a day or so before pushing please. |
…dd/shift to avoid overflow handling (llvm#95788)
…dd/shift to avoid overflow handling (llvm#95788)
No description provided.