[DAG] expandAVG - attempt to extend to a wider integer type for the add/shift to avoid overflow handling #95788

RKSimon · 2024-06-17T13:54:21Z

No description provided.

llvmbot · 2024-06-17T13:54:51Z

@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Patch is 24.82 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95788.diff

6 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+21-2)
(modified) llvm/test/CodeGen/X86/avg.ll (+5-5)
(modified) llvm/test/CodeGen/X86/avgceils-scalar.ll (+44-60)
(modified) llvm/test/CodeGen/X86/avgceilu-scalar.ll (+40-56)
(modified) llvm/test/CodeGen/X86/avgfloors-scalar.ll (+42-58)
(modified) llvm/test/CodeGen/X86/avgflooru-scalar.ll (+32-48)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ad957aaa8f141..63d304a62fc21 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9243,7 +9243,10 @@ SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
   unsigned Opc = N->getOpcode();
   bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
   bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
+  unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
+  unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
   unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
+  unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
   assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
           Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
          "Unknown AVG node");
@@ -9262,12 +9265,28 @@ SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
                        DAG.getShiftAmountConstant(1, VT, dl));
   }
 
+  // For scalars, see if we can efficiently extend/truncate to use add+shift.
+  if (VT.isScalarInteger()) {
+    unsigned BW = VT.getScalarSizeInBits();
+    EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
+    if (isTypeLegal(ExtVT) &&
+        ((!IsSigned && isZExtFree(VT, ExtVT)) || isTruncateFree(ExtVT, VT))) {
+      LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
+      RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
+      SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
+      if (!IsFloor)
+        Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
+                          DAG.getConstant(1, dl, ExtVT));
+      Avg = DAG.getNode(ShiftOpc, dl, ExtVT, Avg,
+                        DAG.getShiftAmountConstant(1, ExtVT, dl));
+      return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
+    }
+  }
+
   // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
   // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
   // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
   // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
-  unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
-  unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
   LHS = DAG.getFreeze(LHS);
   RHS = DAG.getFreeze(RHS);
   SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll
index b0fd4c848c5f8..0de308a9e0738 100644
--- a/llvm/test/CodeGen/X86/avg.ll
+++ b/llvm/test/CodeGen/X86/avg.ll
@@ -1973,11 +1973,11 @@ define void @not_avg_v16i8_wide_constants(ptr %a, ptr %b) nounwind {
 define <1 x i8> @avg_v1i8(<1 x i8> %x, <1 x i8> %y) {
 ; CHECK-LABEL: avg_v1i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    orb %sil, %al
-; CHECK-NEXT:    xorb %sil, %dil
-; CHECK-NEXT:    shrb %dil
-; CHECK-NEXT:    subb %dil, %al
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    movzbl %dil, %ecx
+; CHECK-NEXT:    leal 1(%rcx,%rax), %eax
+; CHECK-NEXT:    shrl %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %a = zext <1 x i8> %x to <1 x i16>
   %b = zext <1 x i8> %y to <1 x i16>
diff --git a/llvm/test/CodeGen/X86/avgceils-scalar.ll b/llvm/test/CodeGen/X86/avgceils-scalar.ll
index 86de35d36f076..91121bd4ad935 100644
--- a/llvm/test/CodeGen/X86/avgceils-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgceils-scalar.ll
@@ -11,22 +11,20 @@
 define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-LABEL: test_fixed_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:    orb %cl, %al
-; X86-NEXT:    xorb %cl, %dl
-; X86-NEXT:    sarb %dl
-; X86-NEXT:    subb %dl, %al
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal 1(%ecx,%eax), %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_fixed_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    orb %sil, %al
-; X64-NEXT:    xorb %sil, %dil
-; X64-NEXT:    sarb %dil
-; X64-NEXT:    subb %dil, %al
+; X64-NEXT:    movsbl %sil, %eax
+; X64-NEXT:    movsbl %dil, %ecx
+; X64-NEXT:    leal 1(%rcx,%rax), %eax
+; X64-NEXT:    shrl %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %or = or i8 %a0, %a1
   %xor = xor i8 %a0, %a1
@@ -38,22 +36,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
 define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-LABEL: test_ext_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:    orb %cl, %al
-; X86-NEXT:    xorb %cl, %dl
-; X86-NEXT:    sarb %dl
-; X86-NEXT:    subb %dl, %al
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal 1(%ecx,%eax), %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_ext_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    orb %sil, %al
-; X64-NEXT:    xorb %sil, %dil
-; X64-NEXT:    sarb %dil
-; X64-NEXT:    subb %dil, %al
+; X64-NEXT:    movsbl %sil, %eax
+; X64-NEXT:    movsbl %dil, %ecx
+; X64-NEXT:    leal 1(%rcx,%rax), %eax
+; X64-NEXT:    shrl %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %x0 = sext i8 %a0 to i16
   %x1 = sext i8 %a1 to i16
@@ -67,25 +63,19 @@ define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
 define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
 ; X86-LABEL: test_fixed_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    xorl %ecx, %edx
-; X86-NEXT:    movswl %dx, %ecx
-; X86-NEXT:    sarl %ecx
-; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal 1(%ecx,%eax), %eax
+; X86-NEXT:    shrl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_fixed_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    orl %esi, %eax
-; X64-NEXT:    xorl %esi, %edi
+; X64-NEXT:    movswl %si, %eax
 ; X64-NEXT:    movswl %di, %ecx
-; X64-NEXT:    sarl %ecx
-; X64-NEXT:    subl %ecx, %eax
+; X64-NEXT:    leal 1(%rcx,%rax), %eax
+; X64-NEXT:    shrl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %or = or i16 %a0, %a1
@@ -98,25 +88,19 @@ define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
 define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind {
 ; X86-LABEL: test_ext_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    xorl %ecx, %edx
-; X86-NEXT:    movswl %dx, %ecx
-; X86-NEXT:    sarl %ecx
-; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal 1(%ecx,%eax), %eax
+; X86-NEXT:    shrl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_ext_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    orl %esi, %eax
-; X64-NEXT:    xorl %esi, %edi
+; X64-NEXT:    movswl %si, %eax
 ; X64-NEXT:    movswl %di, %ecx
-; X64-NEXT:    sarl %ecx
-; X64-NEXT:    subl %ecx, %eax
+; X64-NEXT:    leal 1(%rcx,%rax), %eax
+; X64-NEXT:    shrl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %x0 = sext i16 %a0 to i32
@@ -142,11 +126,11 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind {
 ;
 ; X64-LABEL: test_fixed_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    orl %esi, %eax
-; X64-NEXT:    xorl %esi, %edi
-; X64-NEXT:    sarl %edi
-; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    movslq %esi, %rax
+; X64-NEXT:    movslq %edi, %rcx
+; X64-NEXT:    leaq 1(%rcx,%rax), %rax
+; X64-NEXT:    shrq %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %or = or i32 %a0, %a1
   %xor = xor i32 %a1, %a0
@@ -169,11 +153,11 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
 ;
 ; X64-LABEL: test_ext_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    orl %esi, %eax
-; X64-NEXT:    xorl %esi, %edi
-; X64-NEXT:    sarl %edi
-; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    movslq %esi, %rax
+; X64-NEXT:    movslq %edi, %rcx
+; X64-NEXT:    leaq 1(%rcx,%rax), %rax
+; X64-NEXT:    shrq %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %x0 = sext i32 %a0 to i64
   %x1 = sext i32 %a1 to i64
diff --git a/llvm/test/CodeGen/X86/avgceilu-scalar.ll b/llvm/test/CodeGen/X86/avgceilu-scalar.ll
index 014c984528141..4ab4851eccd2c 100644
--- a/llvm/test/CodeGen/X86/avgceilu-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgceilu-scalar.ll
@@ -11,22 +11,20 @@
 define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-LABEL: test_fixed_i8:
 ; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:    orb %cl, %al
-; X86-NEXT:    xorb %cl, %dl
-; X86-NEXT:    shrb %dl
-; X86-NEXT:    subb %dl, %al
+; X86-NEXT:    leal 1(%ecx,%eax), %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_fixed_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    orb %sil, %al
-; X64-NEXT:    xorb %sil, %dil
-; X64-NEXT:    shrb %dil
-; X64-NEXT:    subb %dil, %al
+; X64-NEXT:    movzbl %sil, %eax
+; X64-NEXT:    movzbl %dil, %ecx
+; X64-NEXT:    leal 1(%rcx,%rax), %eax
+; X64-NEXT:    shrl %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %or = or i8 %a0, %a1
   %xor = xor i8 %a0, %a1
@@ -38,22 +36,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
 define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-LABEL: test_ext_i8:
 ; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:    orb %cl, %al
-; X86-NEXT:    xorb %cl, %dl
-; X86-NEXT:    shrb %dl
-; X86-NEXT:    subb %dl, %al
+; X86-NEXT:    leal 1(%ecx,%eax), %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_ext_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    orb %sil, %al
-; X64-NEXT:    xorb %sil, %dil
-; X64-NEXT:    shrb %dil
-; X64-NEXT:    subb %dil, %al
+; X64-NEXT:    movzbl %sil, %eax
+; X64-NEXT:    movzbl %dil, %ecx
+; X64-NEXT:    leal 1(%rcx,%rax), %eax
+; X64-NEXT:    shrl %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %x0 = zext i8 %a0 to i16
   %x1 = zext i8 %a1 to i16
@@ -67,25 +63,19 @@ define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
 define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
 ; X86-LABEL: test_fixed_i16:
 ; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    xorl %ecx, %edx
-; X86-NEXT:    movzwl %dx, %ecx
-; X86-NEXT:    shrl %ecx
-; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    leal 1(%ecx,%eax), %eax
+; X86-NEXT:    shrl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_fixed_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    orl %esi, %eax
-; X64-NEXT:    xorl %esi, %edi
+; X64-NEXT:    movzwl %si, %eax
 ; X64-NEXT:    movzwl %di, %ecx
-; X64-NEXT:    shrl %ecx
-; X64-NEXT:    subl %ecx, %eax
+; X64-NEXT:    leal 1(%rcx,%rax), %eax
+; X64-NEXT:    shrl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %or = or i16 %a0, %a1
@@ -98,25 +88,19 @@ define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
 define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind {
 ; X86-LABEL: test_ext_i16:
 ; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    xorl %ecx, %edx
-; X86-NEXT:    movzwl %dx, %ecx
-; X86-NEXT:    shrl %ecx
-; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    leal 1(%ecx,%eax), %eax
+; X86-NEXT:    shrl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_ext_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    orl %esi, %eax
-; X64-NEXT:    xorl %esi, %edi
+; X64-NEXT:    movzwl %si, %eax
 ; X64-NEXT:    movzwl %di, %ecx
-; X64-NEXT:    shrl %ecx
-; X64-NEXT:    subl %ecx, %eax
+; X64-NEXT:    leal 1(%rcx,%rax), %eax
+; X64-NEXT:    shrl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %x0 = zext i16 %a0 to i32
@@ -142,11 +126,11 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind {
 ;
 ; X64-LABEL: test_fixed_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    orl %esi, %eax
-; X64-NEXT:    xorl %esi, %edi
-; X64-NEXT:    shrl %edi
-; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    leaq 1(%rcx,%rax), %rax
+; X64-NEXT:    shrq %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %or = or i32 %a0, %a1
   %xor = xor i32 %a1, %a0
@@ -169,11 +153,11 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
 ;
 ; X64-LABEL: test_ext_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    orl %esi, %eax
-; X64-NEXT:    xorl %esi, %edi
-; X64-NEXT:    shrl %edi
-; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    leaq 1(%rcx,%rax), %rax
+; X64-NEXT:    shrq %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %x0 = zext i32 %a0 to i64
   %x1 = zext i32 %a1 to i64
diff --git a/llvm/test/CodeGen/X86/avgfloors-scalar.ll b/llvm/test/CodeGen/X86/avgfloors-scalar.ll
index 4c591d4079040..deb79d54be7c4 100644
--- a/llvm/test/CodeGen/X86/avgfloors-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgfloors-scalar.ll
@@ -11,22 +11,20 @@
 define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-LABEL: test_fixed_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %edx
-; X86-NEXT:    andb %cl, %dl
-; X86-NEXT:    xorb %cl, %al
-; X86-NEXT:    sarb %al
-; X86-NEXT:    addb %dl, %al
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_fixed_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andb %sil, %al
-; X64-NEXT:    xorb %sil, %dil
-; X64-NEXT:    sarb %dil
-; X64-NEXT:    addb %dil, %al
+; X64-NEXT:    movsbl %sil, %ecx
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    addl %ecx, %eax
+; X64-NEXT:    shrl %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %and = and i8 %a0, %a1
   %xor = xor i8 %a0, %a1
@@ -38,22 +36,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
 define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-LABEL: test_ext_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %edx
-; X86-NEXT:    andb %cl, %dl
-; X86-NEXT:    xorb %cl, %al
-; X86-NEXT:    sarb %al
-; X86-NEXT:    addb %dl, %al
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_ext_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andb %sil, %al
-; X64-NEXT:    xorb %sil, %dil
-; X64-NEXT:    sarb %dil
-; X64-NEXT:    addb %dil, %al
+; X64-NEXT:    movsbl %sil, %ecx
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    addl %ecx, %eax
+; X64-NEXT:    shrl %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %x0 = sext i8 %a0 to i16
   %x1 = sext i8 %a1 to i16
@@ -66,25 +62,19 @@ define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
 define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
 ; X86-LABEL: test_fixed_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    andl %eax, %edx
-; X86-NEXT:    xorl %eax, %ecx
-; X86-NEXT:    movswl %cx, %eax
-; X86-NEXT:    sarl %eax
-; X86-NEXT:    addl %edx, %eax
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    shrl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_fixed_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %ecx
-; X64-NEXT:    andl %esi, %ecx
-; X64-NEXT:    xorl %esi, %edi
+; X64-NEXT:    movswl %si, %ecx
 ; X64-NEXT:    movswl %di, %eax
-; X64-NEXT:    sarl %eax
 ; X64-NEXT:    addl %ecx, %eax
+; X64-NEXT:    shrl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %and = and i16 %a0, %a1
@@ -97,25 +87,19 @@ define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
 define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind {
 ; X86-LABEL: test_ext_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    andl %eax, %edx
-; X86-NEXT:    xorl %eax, %ecx
-; X86-NEXT:    movswl %cx, %eax
-; X86-NEXT:    sarl %eax
-; X86-NEXT:    addl %edx, %eax
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    shrl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_ext_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %ecx
-; X64-NEXT:    andl %esi, %ecx
-; X64-NEXT:    xorl %esi, %edi
+; X64-NEXT:    movswl %si, %ecx
 ; X64-NEXT:    movswl %di, %eax
-; X64-NEXT:    sarl %eax
 ; X64-NEXT:    addl %ecx, %eax
+; X64-NEXT:    shrl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %x0 = sext i16 %a0 to i32
@@ -140,11 +124,11 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind {
 ;
 ; X64-LABEL: test_fixed_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andl %esi, %eax
-; X64-NEXT:    xorl %esi, %edi
-; X64-NEXT:    sarl %edi
-; X64-NEXT:    addl %edi, %eax
+; X64-NEXT:    movslq %esi, %rcx
+; X64-NEXT:    movslq %edi, %rax
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    shrq %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %and = and i32 %a0, %a1
   %xor = xor i32 %a1, %a0
@@ -167,11 +151,11 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
 ;
 ; X64-LABEL: test_ext_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    andl %esi, %eax
-; X64-NEXT:    xorl %esi, %edi
-; X64-NEXT:    sarl %edi
-; X64-NEXT:    addl %edi, %eax
+; X64-NEXT:    movslq %esi, %rcx
+; X64-NEXT:    movslq %edi, %rax
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    shrq %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %x0 = sext i32 %a0 to i64
   %x1 = sext i32 %a1 to i64
diff --git a/llvm/test/CodeGen/X86/avgflooru-scalar.ll b/llvm/test/CodeGen/X86/avgflooru-scalar.ll
index 592e5e15b936a..d21c9d65ea9c8 100644
--- a/llvm/test/CodeGen/X86/avgflooru-scalar.ll
+++ b/llvm/test/CodeGen/X86/a...
[truncated]

jayfoad · 2024-06-17T14:51:47Z

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

+    unsigned BW = VT.getScalarSizeInBits();
+    EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
+    if (isTypeLegal(ExtVT) &&
+        ((!IsSigned && isZExtFree(VT, ExtVT)) || isTruncateFree(ExtVT, VT))) {


I guess there is no isSExtFree that you can use? You just assume the sext is free if the truncate is free?

TBH the isZExtFree is useless as well as isTruncateFree works fine for the x86 cases

llvm/test/CodeGen/X86/avg.ll

llvm/test/CodeGen/X86/avgfloors-scalar.ll

PR Link: llvm/llvm-project#95788

goldsteinn · 2024-06-24T11:16:49Z

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

@@ -9262,12 +9265,28 @@ SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
                       DAG.getShiftAmountConstant(1, VT, dl));
  }

+  // For scalars, see if we can efficiently extend/truncate to use add+shift.
+  // We can always use SRL as we will be truncating away the extended sign bits.


Nit: Think this line of the comment belongs where the SRL node is created.

goldsteinn · 2024-06-24T11:28:50Z

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

+      SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
+      if (!IsFloor)
+        Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
+                          DAG.getConstant(1, dl, ExtVT));


Mostly unrelated, but this got me thinking, it might be nice to create a helper for getting an assosiated operation w/ more than 2 helpers that would try to find existing DAG nodes of all the possible pairs.

Raise that as an Issue? I don't really want this PR to get pulled into future work like that.

I did a preliminary search and didn't see so many cases. If I revisit it will be independent of this PR.

…dd/shift to avoid overflow handling

goldsteinn · 2024-06-25T03:29:24Z

LGTM. Wait a day or so before pushing please.

…dd/shift to avoid overflow handling (llvm#95788)

llvmbot added backend:X86 llvm:SelectionDAG SelectionDAGISel as well labels Jun 17, 2024

jayfoad reviewed Jun 17, 2024

View reviewed changes

RKSimon force-pushed the expand-avg-extend branch from 280393e to 9f124b8 Compare June 17, 2024 17:04

RKSimon requested review from phoebewang and goldsteinn June 18, 2024 16:50

phoebewang reviewed Jun 19, 2024

View reviewed changes

llvm/test/CodeGen/X86/avg.ll Show resolved Hide resolved

goldsteinn reviewed Jun 21, 2024

View reviewed changes

llvm/test/CodeGen/X86/avgfloors-scalar.ll Show resolved Hide resolved

dtcxzyw added a commit to dtcxzyw/llvm-codegen-benchmark that referenced this pull request Jun 22, 2024

pre-commit: test PR95788

4a61d8b

PR Link: llvm/llvm-project#95788

dtcxzyw mentioned this pull request Jun 22, 2024

pre-commit: test PR95788 dtcxzyw/llvm-codegen-benchmark#70

Closed

RKSimon force-pushed the expand-avg-extend branch from 9f124b8 to 604b7d3 Compare June 24, 2024 10:43

goldsteinn reviewed Jun 24, 2024

View reviewed changes

RKSimon added 2 commits June 24, 2024 12:40

[DAG] expandAVG - attempt to extend to a wider integer type for the a…

158fc6d

…dd/shift to avoid overflow handling

Move comment

626cb4f

RKSimon force-pushed the expand-avg-extend branch from 604b7d3 to 626cb4f Compare June 24, 2024 11:42

RKSimon merged commit 92715cf into llvm:main Jun 26, 2024
7 checks passed

RKSimon deleted the expand-avg-extend branch June 26, 2024 12:33

lravenclaw pushed a commit to lravenclaw/llvm-project that referenced this pull request Jul 3, 2024

[DAG] expandAVG - attempt to extend to a wider integer type for the a…

263832a

…dd/shift to avoid overflow handling (llvm#95788)

AlexisPerry pushed a commit to llvm-project-tlp/llvm-project that referenced this pull request Jul 9, 2024

[DAG] expandAVG - attempt to extend to a wider integer type for the a…

d5d6b6a

…dd/shift to avoid overflow handling (llvm#95788)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[DAG] expandAVG - attempt to extend to a wider integer type for the add/shift to avoid overflow handling #95788

[DAG] expandAVG - attempt to extend to a wider integer type for the add/shift to avoid overflow handling #95788

Uh oh!

RKSimon commented Jun 17, 2024

Uh oh!

llvmbot commented Jun 17, 2024 •

edited

Loading

Uh oh!

jayfoad Jun 17, 2024

Uh oh!

RKSimon Jun 17, 2024

Uh oh!

Uh oh!

Uh oh!

goldsteinn Jun 24, 2024

Uh oh!

goldsteinn Jun 24, 2024

Uh oh!

RKSimon Jun 24, 2024

Uh oh!

goldsteinn Jun 25, 2024

Uh oh!

goldsteinn commented Jun 25, 2024

Uh oh!

Uh oh!

Uh oh!

[DAG] expandAVG - attempt to extend to a wider integer type for the add/shift to avoid overflow handling #95788

[DAG] expandAVG - attempt to extend to a wider integer type for the add/shift to avoid overflow handling #95788

Uh oh!

Conversation

RKSimon commented Jun 17, 2024

Uh oh!

llvmbot commented Jun 17, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jayfoad Jun 17, 2024

Choose a reason for hiding this comment

Uh oh!

RKSimon Jun 17, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

goldsteinn Jun 24, 2024

Choose a reason for hiding this comment

Uh oh!

goldsteinn Jun 24, 2024

Choose a reason for hiding this comment

Uh oh!

RKSimon Jun 24, 2024

Choose a reason for hiding this comment

Uh oh!

goldsteinn Jun 25, 2024

Choose a reason for hiding this comment

Uh oh!

goldsteinn commented Jun 25, 2024

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Jun 17, 2024 •

edited

Loading