Skip to content

Commit 84ceae6

Browse files
committed
[CGP] adjust target constraints for forming uaddo
There are 2 changes visible here: 1. There's no reason to limit this transform based on number of condition registers. That diff allows PPC to produce slightly better (dot-instructions should be generally good) code. Note: someone that cares about PPC codegen might want to look closer at that output because it seems like we could still improve this. 2. We (probably?) should not bother trying to form uaddo (or other overflow ops) when there's no target support for such an op. This goes beyond checking whether the op is expanded because both PPC and AArch64 show better codegen for standard types regardless of whether the op is legal/custom. llvm-svn: 353001
1 parent e2469b1 commit 84ceae6

File tree

4 files changed

+45
-46
lines changed

4 files changed

+45
-46
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,20 +1149,22 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
11491149

11501150
/// Try to combine the compare into a call to the llvm.uadd.with.overflow
11511151
/// intrinsic. Return true if any changes were made.
1152-
static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI) {
1153-
// TODO: Why is this transform limited by this condition?
1154-
if (TLI.hasMultipleConditionRegisters())
1155-
return false;
1156-
1152+
static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI,
1153+
const DataLayout &DL) {
11571154
Value *A, *B;
11581155
Instruction *AddI;
11591156
if (!match(Cmp,
11601157
m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
11611158
return false;
11621159

1160+
// Allow the transform as long as we have an integer type that is not
1161+
// obviously illegal and unsupported.
11631162
Type *Ty = AddI->getType();
11641163
if (!isa<IntegerType>(Ty))
11651164
return false;
1165+
EVT CodegenVT = TLI.getValueType(DL, Ty);
1166+
if (!CodegenVT.isSimple() && TLI.isOperationExpand(ISD::UADDO, CodegenVT))
1167+
return false;
11661168

11671169
// We don't want to move around uses of condition values this late, so we we
11681170
// check if it is legal to create the call to the intrinsic in the basic
@@ -1263,11 +1265,12 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
12631265
return MadeChange;
12641266
}
12651267

1266-
static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1268+
static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
1269+
const DataLayout &DL) {
12671270
if (sinkCmpExpression(Cmp, TLI))
12681271
return true;
12691272

1270-
if (combineToUAddWithOverflow(Cmp, TLI))
1273+
if (combineToUAddWithOverflow(Cmp, TLI, DL))
12711274
return true;
12721275

12731276
return false;
@@ -6714,7 +6717,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
67146717
}
67156718

67166719
if (CmpInst *CI = dyn_cast<CmpInst>(I))
6717-
if (TLI && optimizeCmpExpression(CI, *TLI))
6720+
if (TLI && optimizeCmpExpression(CI, *TLI, *DL))
67186721
return true;
67196722

67206723
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {

llvm/test/CodeGen/PowerPC/sat-add.ll

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,11 @@ define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
2424
define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
2525
; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
2626
; CHECK: # %bb.0:
27-
; CHECK-NEXT: addi 5, 3, 42
2827
; CHECK-NEXT: rlwinm 3, 3, 0, 24, 31
28+
; CHECK-NEXT: addi 3, 3, 42
29+
; CHECK-NEXT: andi. 4, 3, 256
2930
; CHECK-NEXT: li 4, -1
30-
; CHECK-NEXT: clrlwi 6, 5, 24
31-
; CHECK-NEXT: cmplw 3, 6
32-
; CHECK-NEXT: isel 3, 4, 5, 1
31+
; CHECK-NEXT: isel 3, 3, 4, 2
3332
; CHECK-NEXT: blr
3433
%a = add i8 %x, 42
3534
%c = icmp ugt i8 %x, %a
@@ -70,12 +69,11 @@ define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
7069
define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
7170
; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
7271
; CHECK: # %bb.0:
73-
; CHECK-NEXT: addi 5, 3, 42
7472
; CHECK-NEXT: rlwinm 3, 3, 0, 16, 31
73+
; CHECK-NEXT: addi 3, 3, 42
74+
; CHECK-NEXT: andis. 4, 3, 1
7575
; CHECK-NEXT: li 4, -1
76-
; CHECK-NEXT: clrlwi 6, 5, 16
77-
; CHECK-NEXT: cmplw 3, 6
78-
; CHECK-NEXT: isel 3, 4, 5, 1
76+
; CHECK-NEXT: isel 3, 3, 4, 2
7977
; CHECK-NEXT: blr
8078
%a = add i16 %x, 42
8179
%c = icmp ugt i16 %x, %a
@@ -117,8 +115,8 @@ define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
117115
; CHECK: # %bb.0:
118116
; CHECK-NEXT: addi 5, 3, 42
119117
; CHECK-NEXT: li 4, -1
120-
; CHECK-NEXT: cmplw 0, 3, 5
121-
; CHECK-NEXT: isel 3, 4, 5, 1
118+
; CHECK-NEXT: cmplw 0, 5, 3
119+
; CHECK-NEXT: isel 3, 4, 5, 0
122120
; CHECK-NEXT: blr
123121
%a = add i32 %x, 42
124122
%c = icmp ugt i32 %x, %a
@@ -160,8 +158,8 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
160158
; CHECK: # %bb.0:
161159
; CHECK-NEXT: addi 5, 3, 42
162160
; CHECK-NEXT: li 4, -1
163-
; CHECK-NEXT: cmpld 3, 5
164-
; CHECK-NEXT: isel 3, 4, 5, 1
161+
; CHECK-NEXT: cmpld 5, 3
162+
; CHECK-NEXT: isel 3, 4, 5, 0
165163
; CHECK-NEXT: blr
166164
%a = add i64 %x, 42
167165
%c = icmp ugt i64 %x, %a
@@ -204,12 +202,12 @@ define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
204202
define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
205203
; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
206204
; CHECK: # %bb.0:
207-
; CHECK-NEXT: add 4, 3, 4
205+
; CHECK-NEXT: rlwinm 4, 4, 0, 24, 31
208206
; CHECK-NEXT: rlwinm 3, 3, 0, 24, 31
209-
; CHECK-NEXT: li 5, -1
210-
; CHECK-NEXT: clrlwi 6, 4, 24
211-
; CHECK-NEXT: cmplw 3, 6
212-
; CHECK-NEXT: isel 3, 5, 4, 1
207+
; CHECK-NEXT: add 3, 3, 4
208+
; CHECK-NEXT: andi. 4, 3, 256
209+
; CHECK-NEXT: li 4, -1
210+
; CHECK-NEXT: isel 3, 3, 4, 2
213211
; CHECK-NEXT: blr
214212
%a = add i8 %x, %y
215213
%c = icmp ugt i8 %x, %a
@@ -255,12 +253,12 @@ define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
255253
define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
256254
; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
257255
; CHECK: # %bb.0:
258-
; CHECK-NEXT: add 4, 3, 4
256+
; CHECK-NEXT: rlwinm 4, 4, 0, 16, 31
259257
; CHECK-NEXT: rlwinm 3, 3, 0, 16, 31
260-
; CHECK-NEXT: li 5, -1
261-
; CHECK-NEXT: clrlwi 6, 4, 16
262-
; CHECK-NEXT: cmplw 3, 6
263-
; CHECK-NEXT: isel 3, 5, 4, 1
258+
; CHECK-NEXT: add 3, 3, 4
259+
; CHECK-NEXT: andis. 4, 3, 1
260+
; CHECK-NEXT: li 4, -1
261+
; CHECK-NEXT: isel 3, 3, 4, 2
264262
; CHECK-NEXT: blr
265263
%a = add i16 %x, %y
266264
%c = icmp ugt i16 %x, %a
@@ -306,8 +304,8 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
306304
; CHECK: # %bb.0:
307305
; CHECK-NEXT: add 4, 3, 4
308306
; CHECK-NEXT: li 5, -1
309-
; CHECK-NEXT: cmplw 0, 3, 4
310-
; CHECK-NEXT: isel 3, 5, 4, 1
307+
; CHECK-NEXT: cmplw 0, 4, 3
308+
; CHECK-NEXT: isel 3, 5, 4, 0
311309
; CHECK-NEXT: blr
312310
%a = add i32 %x, %y
313311
%c = icmp ugt i32 %x, %a
@@ -351,8 +349,8 @@ define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) {
351349
; CHECK: # %bb.0:
352350
; CHECK-NEXT: add 4, 3, 4
353351
; CHECK-NEXT: li 5, -1
354-
; CHECK-NEXT: cmpld 3, 4
355-
; CHECK-NEXT: isel 3, 5, 4, 1
352+
; CHECK-NEXT: cmpld 4, 3
353+
; CHECK-NEXT: isel 3, 5, 4, 0
356354
; CHECK-NEXT: blr
357355
%a = add i64 %x, %y
358356
%c = icmp ugt i64 %x, %a

llvm/test/CodeGen/X86/codegen-prepare-uaddo.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -252,15 +252,14 @@ define void @test_18446744073709551615(i64*, i64*) {
252252
define i1 @illegal_type(i17 %x, i17* %p) {
253253
; CHECK-LABEL: illegal_type:
254254
; CHECK: # %bb.0:
255-
; CHECK-NEXT: andl $131071, %edi # imm = 0x1FFFF
256255
; CHECK-NEXT: addl $29, %edi
257-
; CHECK-NEXT: movl %edi, %ecx
258-
; CHECK-NEXT: andl $131071, %ecx # imm = 0x1FFFF
259-
; CHECK-NEXT: cmpl %edi, %ecx
260-
; CHECK-NEXT: setne %al
261256
; CHECK-NEXT: movw %di, (%rsi)
262-
; CHECK-NEXT: shrl $16, %ecx
263-
; CHECK-NEXT: movb %cl, 2(%rsi)
257+
; CHECK-NEXT: andl $131071, %edi # imm = 0x1FFFF
258+
; CHECK-NEXT: movl %edi, %eax
259+
; CHECK-NEXT: shrl $16, %eax
260+
; CHECK-NEXT: movb %al, 2(%rsi)
261+
; CHECK-NEXT: cmpl $29, %edi
262+
; CHECK-NEXT: setb %al
264263
; CHECK-NEXT: retq
265264
%a = add i17 %x, 29
266265
store i17 %a, i17* %p

llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -163,11 +163,10 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, i16* %p) {
163163

164164
define i1 @uaddo_i42_increment_illegal_type(i42 %x, i42* %p) {
165165
; CHECK-LABEL: @uaddo_i42_increment_illegal_type(
166-
; CHECK-NEXT: [[UADD_OVERFLOW:%.*]] = call { i42, i1 } @llvm.uadd.with.overflow.i42(i42 [[X:%.*]], i42 1)
167-
; CHECK-NEXT: [[UADD:%.*]] = extractvalue { i42, i1 } [[UADD_OVERFLOW]], 0
168-
; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i42, i1 } [[UADD_OVERFLOW]], 1
169-
; CHECK-NEXT: store i42 [[UADD]], i42* [[P:%.*]]
170-
; CHECK-NEXT: ret i1 [[OVERFLOW]]
166+
; CHECK-NEXT: [[A:%.*]] = add i42 [[X:%.*]], 1
167+
; CHECK-NEXT: [[OV:%.*]] = icmp eq i42 [[A]], 0
168+
; CHECK-NEXT: store i42 [[A]], i42* [[P:%.*]]
169+
; CHECK-NEXT: ret i1 [[OV]]
171170
;
172171
%a = add i42 %x, 1
173172
%ov = icmp eq i42 %a, 0

0 commit comments

Comments
 (0)