Skip to content

Commit 837552f

Browse files
committed
[PatternMatch] add special-case uaddo matching for increment-by-one (2nd try)
This is the most important uaddo problem mentioned in PR31754: https://bugs.llvm.org/show_bug.cgi?id=31754 ...but that was overcome in x86 codegen with D57637. That patch also corrects the inc vs. add regressions seen with the previous attempt at this. Still, we want to make this matcher complete, so we can potentially canonicalize the pattern even if it's an 'add 1' operation. Pattern matching, however, shouldn't assume that we have canonicalized IR, so we match 4 commuted variants of uaddo. There's also a test with a crazy type to show that the existing CGP transform based on this matcher is not limited by target legality checks. I'm not sure if the Hexagon diff means the test is no longer testing what it intended to test, but that should be solvable in a follow-up. Differential Revision: https://reviews.llvm.org/D57516 llvm-svn: 352998
1 parent 18b73a6 commit 837552f

File tree

5 files changed

+54
-35
lines changed

5 files changed

+54
-35
lines changed

llvm/include/llvm/IR/PatternMatch.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,6 +1463,20 @@ struct UAddWithOverflow_match {
14631463
if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
14641464
return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
14651465

1466+
// Match special-case for increment-by-1.
1467+
if (Pred == ICmpInst::ICMP_EQ) {
1468+
// (a + 1) == 0
1469+
// (1 + a) == 0
1470+
if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) &&
1471+
(m_One().match(AddLHS) || m_One().match(AddRHS)))
1472+
return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
1473+
// 0 == (a + 1)
1474+
// 0 == (1 + a)
1475+
if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) &&
1476+
(m_One().match(AddLHS) || m_One().match(AddRHS)))
1477+
return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
1478+
}
1479+
14661480
return false;
14671481
}
14681482
};

llvm/test/CodeGen/Hexagon/swp-epilog-phi5.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
; CHECK: loop0
1111
; CHECK: [[REG0:r([0-9]+)]] += mpyi
12-
; CHECK-NOT: r{{[0-9]+}} += add([[REG0]],#8)
13-
; CHECK: endloop1
12+
; CHECK: [[REG2:r([0-9]+)]] = add([[REG1:r([0-9]+)]],add([[REG0]],#8
13+
; CHECK: endloop0
1414

1515
%s.0 = type { %s.1*, %s.4*, %s.7*, i8*, i8, i32, %s.8*, i32, i32, i32, i8, i8, i32, i32, double, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %s.9*], [4 x %s.10*], [4 x %s.10*], i32, %s.23*, i8, i8, [16 x i8], [16 x i8], [16 x i8], i32, i8, i8, i8, i8, i16, i16, i8, i8, i8, %s.11*, i32, i32, i32, i32, i8*, i32, [4 x %s.23*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %s.12*, %s.13*, %s.14*, %s.15*, %s.16*, %s.17*, %s.18*, %s.19*, %s.20*, %s.21*, %s.22* }
1616
%s.1 = type { void (%s.2*)*, void (%s.2*, i32)*, void (%s.2*)*, void (%s.2*, i8*)*, void (%s.2*)*, i32, %s.3, i32, i32, i8**, i32, i8**, i32, i32 }

llvm/test/CodeGen/X86/copy-eflags.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,13 @@ define i32 @test2(i32* %ptr) nounwind {
102102
; X32-NEXT: calll external
103103
; X32-NEXT: addl $4, %esp
104104
; X32-NEXT: testb %bl, %bl
105-
; X32-NEXT: je .LBB1_1
106-
; X32-NEXT: # %bb.2: # %else
107-
; X32-NEXT: xorl %eax, %eax
105+
; X32-NEXT: jne .LBB1_2
106+
; X32-NEXT: # %bb.1: # %then
107+
; X32-NEXT: movl $64, %eax
108108
; X32-NEXT: popl %ebx
109109
; X32-NEXT: retl
110-
; X32-NEXT: .LBB1_1: # %then
111-
; X32-NEXT: movl $64, %eax
110+
; X32-NEXT: .LBB1_2: # %else
111+
; X32-NEXT: xorl %eax, %eax
112112
; X32-NEXT: popl %ebx
113113
; X32-NEXT: retl
114114
;
@@ -120,13 +120,13 @@ define i32 @test2(i32* %ptr) nounwind {
120120
; X64-NEXT: movl $42, %edi
121121
; X64-NEXT: callq external
122122
; X64-NEXT: testb %bl, %bl
123-
; X64-NEXT: je .LBB1_1
124-
; X64-NEXT: # %bb.2: # %else
125-
; X64-NEXT: xorl %eax, %eax
123+
; X64-NEXT: jne .LBB1_2
124+
; X64-NEXT: # %bb.1: # %then
125+
; X64-NEXT: movl $64, %eax
126126
; X64-NEXT: popq %rbx
127127
; X64-NEXT: retq
128-
; X64-NEXT: .LBB1_1: # %then
129-
; X64-NEXT: movl $64, %eax
128+
; X64-NEXT: .LBB1_2: # %else
129+
; X64-NEXT: xorl %eax, %eax
130130
; X64-NEXT: popq %rbx
131131
; X64-NEXT: retq
132132
entry:

llvm/test/CodeGen/X86/rd-mod-wr-eflags.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -177,11 +177,11 @@ define void @example_inc(%struct.obj2* %o) nounwind uwtable ssp {
177177
; CHECK-NEXT: jne .LBB4_4
178178
; CHECK-NEXT: # %bb.3: # %if.end2
179179
; CHECK-NEXT: incb 14(%rdi)
180-
; CHECK-NEXT: je .LBB4_5
180+
; CHECK-NEXT: jne .LBB4_4
181+
; CHECK-NEXT: # %bb.5: # %if.end4
182+
; CHECK-NEXT: jmp other # TAILCALL
181183
; CHECK-NEXT: .LBB4_4: # %return
182184
; CHECK-NEXT: retq
183-
; CHECK-NEXT: .LBB4_5: # %if.end4
184-
; CHECK-NEXT: jmp other # TAILCALL
185185
entry:
186186
%s64 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 0
187187
%0 = load i64, i64* %s64, align 8

llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,11 @@ exit:
105105

106106
define i1 @uaddo_i64_increment(i64 %x, i64* %p) {
107107
; CHECK-LABEL: @uaddo_i64_increment(
108-
; CHECK-NEXT: [[A:%.*]] = add i64 [[X:%.*]], 1
109-
; CHECK-NEXT: [[OV:%.*]] = icmp eq i64 [[A]], 0
110-
; CHECK-NEXT: store i64 [[A]], i64* [[P:%.*]]
111-
; CHECK-NEXT: ret i1 [[OV]]
108+
; CHECK-NEXT: [[UADD_OVERFLOW:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 1)
109+
; CHECK-NEXT: [[UADD:%.*]] = extractvalue { i64, i1 } [[UADD_OVERFLOW]], 0
110+
; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i64, i1 } [[UADD_OVERFLOW]], 1
111+
; CHECK-NEXT: store i64 [[UADD]], i64* [[P:%.*]]
112+
; CHECK-NEXT: ret i1 [[OVERFLOW]]
112113
;
113114
%a = add i64 %x, 1
114115
%ov = icmp eq i64 %a, 0
@@ -118,10 +119,11 @@ define i1 @uaddo_i64_increment(i64 %x, i64* %p) {
118119

119120
define i1 @uaddo_i8_increment_noncanonical_1(i8 %x, i8* %p) {
120121
; CHECK-LABEL: @uaddo_i8_increment_noncanonical_1(
121-
; CHECK-NEXT: [[A:%.*]] = add i8 1, [[X:%.*]]
122-
; CHECK-NEXT: [[OV:%.*]] = icmp eq i8 [[A]], 0
123-
; CHECK-NEXT: store i8 [[A]], i8* [[P:%.*]]
124-
; CHECK-NEXT: ret i1 [[OV]]
122+
; CHECK-NEXT: [[UADD_OVERFLOW:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 1, i8 [[X:%.*]])
123+
; CHECK-NEXT: [[UADD:%.*]] = extractvalue { i8, i1 } [[UADD_OVERFLOW]], 0
124+
; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[UADD_OVERFLOW]], 1
125+
; CHECK-NEXT: store i8 [[UADD]], i8* [[P:%.*]]
126+
; CHECK-NEXT: ret i1 [[OVERFLOW]]
125127
;
126128
%a = add i8 1, %x ; commute
127129
%ov = icmp eq i8 %a, 0
@@ -131,10 +133,11 @@ define i1 @uaddo_i8_increment_noncanonical_1(i8 %x, i8* %p) {
131133

132134
define i1 @uaddo_i32_increment_noncanonical_2(i32 %x, i32* %p) {
133135
; CHECK-LABEL: @uaddo_i32_increment_noncanonical_2(
134-
; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], 1
135-
; CHECK-NEXT: [[OV:%.*]] = icmp eq i32 0, [[A]]
136-
; CHECK-NEXT: store i32 [[A]], i32* [[P:%.*]]
137-
; CHECK-NEXT: ret i1 [[OV]]
136+
; CHECK-NEXT: [[UADD_OVERFLOW:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 1)
137+
; CHECK-NEXT: [[UADD:%.*]] = extractvalue { i32, i1 } [[UADD_OVERFLOW]], 0
138+
; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i32, i1 } [[UADD_OVERFLOW]], 1
139+
; CHECK-NEXT: store i32 [[UADD]], i32* [[P:%.*]]
140+
; CHECK-NEXT: ret i1 [[OVERFLOW]]
138141
;
139142
%a = add i32 %x, 1
140143
%ov = icmp eq i32 0, %a ; commute
@@ -144,10 +147,11 @@ define i1 @uaddo_i32_increment_noncanonical_2(i32 %x, i32* %p) {
144147

145148
define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, i16* %p) {
146149
; CHECK-LABEL: @uaddo_i16_increment_noncanonical_3(
147-
; CHECK-NEXT: [[A:%.*]] = add i16 1, [[X:%.*]]
148-
; CHECK-NEXT: [[OV:%.*]] = icmp eq i16 0, [[A]]
149-
; CHECK-NEXT: store i16 [[A]], i16* [[P:%.*]]
150-
; CHECK-NEXT: ret i1 [[OV]]
150+
; CHECK-NEXT: [[UADD_OVERFLOW:%.*]] = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 1, i16 [[X:%.*]])
151+
; CHECK-NEXT: [[UADD:%.*]] = extractvalue { i16, i1 } [[UADD_OVERFLOW]], 0
152+
; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i16, i1 } [[UADD_OVERFLOW]], 1
153+
; CHECK-NEXT: store i16 [[UADD]], i16* [[P:%.*]]
154+
; CHECK-NEXT: ret i1 [[OVERFLOW]]
151155
;
152156
%a = add i16 1, %x ; commute
153157
%ov = icmp eq i16 0, %a ; commute
@@ -159,10 +163,11 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, i16* %p) {
159163

160164
define i1 @uaddo_i42_increment_illegal_type(i42 %x, i42* %p) {
161165
; CHECK-LABEL: @uaddo_i42_increment_illegal_type(
162-
; CHECK-NEXT: [[A:%.*]] = add i42 [[X:%.*]], 1
163-
; CHECK-NEXT: [[OV:%.*]] = icmp eq i42 [[A]], 0
164-
; CHECK-NEXT: store i42 [[A]], i42* [[P:%.*]]
165-
; CHECK-NEXT: ret i1 [[OV]]
166+
; CHECK-NEXT: [[UADD_OVERFLOW:%.*]] = call { i42, i1 } @llvm.uadd.with.overflow.i42(i42 [[X:%.*]], i42 1)
167+
; CHECK-NEXT: [[UADD:%.*]] = extractvalue { i42, i1 } [[UADD_OVERFLOW]], 0
168+
; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i42, i1 } [[UADD_OVERFLOW]], 1
169+
; CHECK-NEXT: store i42 [[UADD]], i42* [[P:%.*]]
170+
; CHECK-NEXT: ret i1 [[OVERFLOW]]
166171
;
167172
%a = add i42 %x, 1
168173
%ov = icmp eq i42 %a, 0

0 commit comments

Comments
 (0)