[CGP] adjust target constraints for forming uaddo

rotateright · rotateright · commit 84ceae6048cb · 2019-02-03T17:53:09.000Z
There are 2 changes visible here:
1. There's no reason to limit this transform based on number
   of condition registers. That diff allows PPC to produce 
   slightly better (dot-instructions should be generally good) 
   code.
   Note: someone that cares about PPC codegen might want to 
   look closer at that output because it seems like we could
   still improve this.

2. We (probably?) should not bother trying to form uaddo (or
   other overflow ops) when there's no target support for such
   an op. This goes beyond checking whether the op is expanded
   because both PPC and AArch64 show better codegen for standard
   types regardless of whether the op is legal/custom.

llvm-svn: 353001
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1149,20 +1149,22 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
 
 /// Try to combine the compare into a call to the llvm.uadd.with.overflow
 /// intrinsic. Return true if any changes were made.
-static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI) {
-  // TODO: Why is this transform limited by this condition?
-  if (TLI.hasMultipleConditionRegisters())
-    return false;
-
+static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI,
+                                      const DataLayout &DL) {
   Value *A, *B;
   Instruction *AddI;
   if (!match(Cmp,
              m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
     return false;
 
+  // Allow the transform as long as we have an integer type that is not
+  // obviously illegal and unsupported.
   Type *Ty = AddI->getType();
   if (!isa<IntegerType>(Ty))
     return false;
+  EVT CodegenVT = TLI.getValueType(DL, Ty);
+  if (!CodegenVT.isSimple() && TLI.isOperationExpand(ISD::UADDO, CodegenVT))
+    return false;
 
   // We don't want to move around uses of condition values this late, so we we
   // check if it is legal to create the call to the intrinsic in the basic
@@ -1263,11 +1265,12 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
   return MadeChange;
 }
 
-static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
+static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
+                                  const DataLayout &DL) {
   if (sinkCmpExpression(Cmp, TLI))
     return true;
 
-  if (combineToUAddWithOverflow(Cmp, TLI))
+  if (combineToUAddWithOverflow(Cmp, TLI, DL))
     return true;
 
   return false;
@@ -6714,7 +6717,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
   }
 
   if (CmpInst *CI = dyn_cast<CmpInst>(I))
-    if (TLI && optimizeCmpExpression(CI, *TLI))
+    if (TLI && optimizeCmpExpression(CI, *TLI, *DL))
       return true;
 
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -24,12 +24,11 @@ define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
 define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi 5, 3, 42
 ; CHECK-NEXT:    rlwinm 3, 3, 0, 24, 31
+; CHECK-NEXT:    addi 3, 3, 42
+; CHECK-NEXT:    andi. 4, 3, 256
 ; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    clrlwi 6, 5, 24
-; CHECK-NEXT:    cmplw 3, 6
-; CHECK-NEXT:    isel 3, 4, 5, 1
+; CHECK-NEXT:    isel 3, 3, 4, 2
 ; CHECK-NEXT:    blr
   %a = add i8 %x, 42
   %c = icmp ugt i8 %x, %a
@@ -70,12 +69,11 @@ define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
 define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi 5, 3, 42
 ; CHECK-NEXT:    rlwinm 3, 3, 0, 16, 31
+; CHECK-NEXT:    addi 3, 3, 42
+; CHECK-NEXT:    andis. 4, 3, 1
 ; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    clrlwi 6, 5, 16
-; CHECK-NEXT:    cmplw 3, 6
-; CHECK-NEXT:    isel 3, 4, 5, 1
+; CHECK-NEXT:    isel 3, 3, 4, 2
 ; CHECK-NEXT:    blr
   %a = add i16 %x, 42
   %c = icmp ugt i16 %x, %a
@@ -117,8 +115,8 @@ define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi 5, 3, 42
 ; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    cmplw 0, 3, 5
-; CHECK-NEXT:    isel 3, 4, 5, 1
+; CHECK-NEXT:    cmplw 0, 5, 3
+; CHECK-NEXT:    isel 3, 4, 5, 0
 ; CHECK-NEXT:    blr
   %a = add i32 %x, 42
   %c = icmp ugt i32 %x, %a
@@ -160,8 +158,8 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi 5, 3, 42
 ; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    cmpld 3, 5
-; CHECK-NEXT:    isel 3, 4, 5, 1
+; CHECK-NEXT:    cmpld 5, 3
+; CHECK-NEXT:    isel 3, 4, 5, 0
 ; CHECK-NEXT:    blr
   %a = add i64 %x, 42
   %c = icmp ugt i64 %x, %a
@@ -204,12 +202,12 @@ define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
 define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    add 4, 3, 4
+; CHECK-NEXT:    rlwinm 4, 4, 0, 24, 31
 ; CHECK-NEXT:    rlwinm 3, 3, 0, 24, 31
-; CHECK-NEXT:    li 5, -1
-; CHECK-NEXT:    clrlwi 6, 4, 24
-; CHECK-NEXT:    cmplw 3, 6
-; CHECK-NEXT:    isel 3, 5, 4, 1
+; CHECK-NEXT:    add 3, 3, 4
+; CHECK-NEXT:    andi. 4, 3, 256
+; CHECK-NEXT:    li 4, -1
+; CHECK-NEXT:    isel 3, 3, 4, 2
 ; CHECK-NEXT:    blr
   %a = add i8 %x, %y
   %c = icmp ugt i8 %x, %a
@@ -255,12 +253,12 @@ define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
 define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    add 4, 3, 4
+; CHECK-NEXT:    rlwinm 4, 4, 0, 16, 31
 ; CHECK-NEXT:    rlwinm 3, 3, 0, 16, 31
-; CHECK-NEXT:    li 5, -1
-; CHECK-NEXT:    clrlwi 6, 4, 16
-; CHECK-NEXT:    cmplw 3, 6
-; CHECK-NEXT:    isel 3, 5, 4, 1
+; CHECK-NEXT:    add 3, 3, 4
+; CHECK-NEXT:    andis. 4, 3, 1
+; CHECK-NEXT:    li 4, -1
+; CHECK-NEXT:    isel 3, 3, 4, 2
 ; CHECK-NEXT:    blr
   %a = add i16 %x, %y
   %c = icmp ugt i16 %x, %a
@@ -306,8 +304,8 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    add 4, 3, 4
 ; CHECK-NEXT:    li 5, -1
-; CHECK-NEXT:    cmplw 0, 3, 4
-; CHECK-NEXT:    isel 3, 5, 4, 1
+; CHECK-NEXT:    cmplw 0, 4, 3
+; CHECK-NEXT:    isel 3, 5, 4, 0
 ; CHECK-NEXT:    blr
   %a = add i32 %x, %y
   %c = icmp ugt i32 %x, %a
@@ -351,8 +349,8 @@ define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    add 4, 3, 4
 ; CHECK-NEXT:    li 5, -1
-; CHECK-NEXT:    cmpld 3, 4
-; CHECK-NEXT:    isel 3, 5, 4, 1
+; CHECK-NEXT:    cmpld 4, 3
+; CHECK-NEXT:    isel 3, 5, 4, 0
 ; CHECK-NEXT:    blr
   %a = add i64 %x, %y
   %c = icmp ugt i64 %x, %a
diff --git a/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll b/llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll
@@ -252,15 +252,14 @@ define void @test_18446744073709551615(i64*, i64*) {
 define i1 @illegal_type(i17 %x, i17* %p) {
 ; CHECK-LABEL: illegal_type:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andl $131071, %edi # imm = 0x1FFFF
 ; CHECK-NEXT:    addl $29, %edi
-; CHECK-NEXT:    movl %edi, %ecx
-; CHECK-NEXT:    andl $131071, %ecx # imm = 0x1FFFF
-; CHECK-NEXT:    cmpl %edi, %ecx
-; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    movw %di, (%rsi)
-; CHECK-NEXT:    shrl $16, %ecx
-; CHECK-NEXT:    movb %cl, 2(%rsi)
+; CHECK-NEXT:    andl $131071, %edi # imm = 0x1FFFF
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    shrl $16, %eax
+; CHECK-NEXT:    movb %al, 2(%rsi)
+; CHECK-NEXT:    cmpl $29, %edi
+; CHECK-NEXT:    setb %al
 ; CHECK-NEXT:    retq
   %a = add i17 %x, 29
   store i17 %a, i17* %p
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
@@ -163,11 +163,10 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, i16* %p) {
 
 define i1 @uaddo_i42_increment_illegal_type(i42 %x, i42* %p) {
 ; CHECK-LABEL: @uaddo_i42_increment_illegal_type(
-; CHECK-NEXT:    [[UADD_OVERFLOW:%.*]] = call { i42, i1 } @llvm.uadd.with.overflow.i42(i42 [[X:%.*]], i42 1)
-; CHECK-NEXT:    [[UADD:%.*]] = extractvalue { i42, i1 } [[UADD_OVERFLOW]], 0
-; CHECK-NEXT:    [[OVERFLOW:%.*]] = extractvalue { i42, i1 } [[UADD_OVERFLOW]], 1
-; CHECK-NEXT:    store i42 [[UADD]], i42* [[P:%.*]]
-; CHECK-NEXT:    ret i1 [[OVERFLOW]]
+; CHECK-NEXT:    [[A:%.*]] = add i42 [[X:%.*]], 1
+; CHECK-NEXT:    [[OV:%.*]] = icmp eq i42 [[A]], 0
+; CHECK-NEXT:    store i42 [[A]], i42* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV]]
 ;
   %a = add i42 %x, 1
   %ov = icmp eq i42 %a, 0