Skip to content

Commit 49f7513

Browse files
committed
[DivRemPairs] Freeze operands if they can be undef values
Summary: DivRemPairs is unsound with respect to undef values. ``` // bb1: // %rem = srem %x, %y // bb2: // %div = sdiv %x, %y // --> // bb1: // %div = sdiv %x, %y // %mul = mul %div, %y // %rem = sub %x, %mul ``` If X can be undef, X should be frozen first. For example, let's assume that Y = 1 & X = undef: ``` %div = sdiv undef, 1 // %div = undef %rem = srem undef, 1 // %rem = 0 => %div = sdiv undef, 1 // %div = undef %mul = mul %div, 1 // %mul = undef %rem = sub %x, %mul // %rem = undef - undef = undef ``` http://volta.cs.utah.edu:8080/z/m7Xrx5 Same for Y. If X = 1 and Y = (undef | 1), %rem in src is either 1 or 0, but %rem in tgt can be one of many integer values. This resolves https://bugs.llvm.org/show_bug.cgi?id=42619 . This miscompilation disappears if undef value is removed, but it may take a while. DivRemPair happens pretty late during the optimization pipeline, so this optimization seemed as a good candidate to fix without major regression using freeze than other broken optimizations. Reviewers: spatel, lebedev.ri, george.burgess.iv Reviewed By: spatel Subscribers: wuzish, regehr, nlopes, nemanjai, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D76483
1 parent 0019c2f commit 49f7513

File tree

4 files changed

+84
-36
lines changed

4 files changed

+84
-36
lines changed

llvm/lib/Transforms/Scalar/DivRemPairs.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "llvm/ADT/Statistic.h"
1818
#include "llvm/Analysis/GlobalsModRef.h"
1919
#include "llvm/Analysis/TargetTransformInfo.h"
20+
#include "llvm/Analysis/ValueTracking.h"
2021
#include "llvm/IR/Dominators.h"
2122
#include "llvm/IR/Function.h"
2223
#include "llvm/IR/PatternMatch.h"
@@ -303,6 +304,29 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
303304
Mul->insertAfter(RemInst);
304305
Sub->insertAfter(Mul);
305306

307+
// If X can be undef, X should be frozen first.
308+
// For example, let's assume that Y = 1 & X = undef:
309+
// %div = sdiv undef, 1 // %div = undef
310+
// %rem = srem undef, 1 // %rem = 0
311+
// =>
312+
// %div = sdiv undef, 1 // %div = undef
313+
// %mul = mul %div, 1 // %mul = undef
314+
// %rem = sub %x, %mul // %rem = undef - undef = undef
315+
// If X is not frozen, %rem becomes undef after transformation.
316+
// TODO: We need a undef-specific checking function in ValueTracking
317+
if (!isGuaranteedNotToBeUndefOrPoison(X, DivInst, &DT)) {
318+
auto *FrX = new FreezeInst(X, X->getName() + ".frozen", DivInst);
319+
DivInst->setOperand(0, FrX);
320+
Sub->setOperand(0, FrX);
321+
}
322+
// Same for Y. If X = 1 and Y = (undef | 1), %rem in src is either 1 or 0,
323+
// but %rem in tgt can be one of many integer values.
324+
if (!isGuaranteedNotToBeUndefOrPoison(Y, DivInst, &DT)) {
325+
auto *FrY = new FreezeInst(Y, Y->getName() + ".frozen", DivInst);
326+
DivInst->setOperand(1, FrY);
327+
Mul->setOperand(1, FrY);
328+
}
329+
306330
// Now kill the explicit remainder. We have replaced it with:
307331
// (sub X, (mul (div X, Y), Y)
308332
Sub->setName(RemInst->getName() + ".decomposed");

llvm/test/Transforms/DivRemPairs/PowerPC/div-expanded-rem-pair.ll

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,14 +100,18 @@ end:
100100
define i32 @srem_of_srem_unexpanded(i32 %X, i32 %Y, i32 %Z) {
101101
; CHECK-LABEL: @srem_of_srem_unexpanded(
102102
; CHECK-NEXT: [[T0:%.*]] = mul nsw i32 [[Z:%.*]], [[Y:%.*]]
103-
; CHECK-NEXT: [[T1:%.*]] = sdiv i32 [[X:%.*]], [[T0]]
103+
; CHECK-NEXT: [[X_FROZEN:%.*]] = freeze i32 [[X:%.*]]
104+
; CHECK-NEXT: [[T0_FROZEN:%.*]] = freeze i32 [[T0]]
105+
; CHECK-NEXT: [[T1:%.*]] = sdiv i32 [[X_FROZEN]], [[T0_FROZEN]]
104106
; CHECK-NEXT: [[T2:%.*]] = mul nsw i32 [[T0]], [[T1]]
105-
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[T1]], [[T0]]
106-
; CHECK-NEXT: [[T3_DECOMPOSED:%.*]] = sub i32 [[X]], [[TMP1]]
107-
; CHECK-NEXT: [[T4:%.*]] = sdiv i32 [[T3_DECOMPOSED]], [[Y]]
107+
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[T1]], [[T0_FROZEN]]
108+
; CHECK-NEXT: [[T3_DECOMPOSED:%.*]] = sub i32 [[X_FROZEN]], [[TMP1]]
109+
; CHECK-NEXT: [[T3_DECOMPOSED_FROZEN:%.*]] = freeze i32 [[T3_DECOMPOSED]]
110+
; CHECK-NEXT: [[Y_FROZEN:%.*]] = freeze i32 [[Y]]
111+
; CHECK-NEXT: [[T4:%.*]] = sdiv i32 [[T3_DECOMPOSED_FROZEN]], [[Y_FROZEN]]
108112
; CHECK-NEXT: [[T5:%.*]] = mul nsw i32 [[T4]], [[Y]]
109-
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[T4]], [[Y]]
110-
; CHECK-NEXT: [[T6_DECOMPOSED:%.*]] = sub i32 [[T3_DECOMPOSED]], [[TMP2]]
113+
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[T4]], [[Y_FROZEN]]
114+
; CHECK-NEXT: [[T6_DECOMPOSED:%.*]] = sub i32 [[T3_DECOMPOSED_FROZEN]], [[TMP2]]
111115
; CHECK-NEXT: ret i32 [[T6_DECOMPOSED]]
112116
;
113117
%t0 = mul nsw i32 %Z, %Y

llvm/test/Transforms/DivRemPairs/PowerPC/div-rem-pairs.ll

Lines changed: 45 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@ declare void @foo(i32, i32)
55

66
define void @decompose_illegal_srem_same_block(i32 %a, i32 %b) {
77
; CHECK-LABEL: @decompose_illegal_srem_same_block(
8-
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]]
9-
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[DIV]], [[B]]
10-
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i32 [[A]], [[TMP1]]
8+
; CHECK-NEXT: [[A_FROZEN:%.*]] = freeze i32 [[A:%.*]]
9+
; CHECK-NEXT: [[B_FROZEN:%.*]] = freeze i32 [[B:%.*]]
10+
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[A_FROZEN]], [[B_FROZEN]]
11+
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[DIV]], [[B_FROZEN]]
12+
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i32 [[A_FROZEN]], [[TMP1]]
1113
; CHECK-NEXT: call void @foo(i32 [[REM_DECOMPOSED]], i32 [[DIV]])
1214
; CHECK-NEXT: ret void
1315
;
@@ -19,9 +21,11 @@ define void @decompose_illegal_srem_same_block(i32 %a, i32 %b) {
1921

2022
define void @decompose_illegal_urem_same_block(i32 %a, i32 %b) {
2123
; CHECK-LABEL: @decompose_illegal_urem_same_block(
22-
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[A:%.*]], [[B:%.*]]
23-
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[DIV]], [[B]]
24-
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i32 [[A]], [[TMP1]]
24+
; CHECK-NEXT: [[A_FROZEN:%.*]] = freeze i32 [[A:%.*]]
25+
; CHECK-NEXT: [[B_FROZEN:%.*]] = freeze i32 [[B:%.*]]
26+
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[A_FROZEN]], [[B_FROZEN]]
27+
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[DIV]], [[B_FROZEN]]
28+
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i32 [[A_FROZEN]], [[TMP1]]
2529
; CHECK-NEXT: call void @foo(i32 [[REM_DECOMPOSED]], i32 [[DIV]])
2630
; CHECK-NEXT: ret void
2731
;
@@ -37,9 +41,11 @@ define void @decompose_illegal_urem_same_block(i32 %a, i32 %b) {
3741
define i32 @hoist_sdiv(i32 %a, i32 %b) {
3842
; CHECK-LABEL: @hoist_sdiv(
3943
; CHECK-NEXT: entry:
40-
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]]
41-
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[DIV]], [[B]]
42-
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i32 [[A]], [[TMP0]]
44+
; CHECK-NEXT: [[A_FROZEN:%.*]] = freeze i32 [[A:%.*]]
45+
; CHECK-NEXT: [[B_FROZEN:%.*]] = freeze i32 [[B:%.*]]
46+
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[A_FROZEN]], [[B_FROZEN]]
47+
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[DIV]], [[B_FROZEN]]
48+
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i32 [[A_FROZEN]], [[TMP0]]
4349
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM_DECOMPOSED]], 42
4450
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
4551
; CHECK: if:
@@ -67,9 +73,11 @@ end:
6773
define i64 @hoist_udiv(i64 %a, i64 %b) {
6874
; CHECK-LABEL: @hoist_udiv(
6975
; CHECK-NEXT: entry:
70-
; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[A:%.*]], [[B:%.*]]
71-
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[DIV]], [[B]]
72-
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i64 [[A]], [[TMP0]]
76+
; CHECK-NEXT: [[A_FROZEN:%.*]] = freeze i64 [[A:%.*]]
77+
; CHECK-NEXT: [[B_FROZEN:%.*]] = freeze i64 [[B:%.*]]
78+
; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[A_FROZEN]], [[B_FROZEN]]
79+
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[DIV]], [[B_FROZEN]]
80+
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i64 [[A_FROZEN]], [[TMP0]]
7381
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[REM_DECOMPOSED]], 42
7482
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
7583
; CHECK: if:
@@ -97,12 +105,14 @@ end:
97105
define i16 @hoist_srem(i16 %a, i16 %b) {
98106
; CHECK-LABEL: @hoist_srem(
99107
; CHECK-NEXT: entry:
100-
; CHECK-NEXT: [[DIV:%.*]] = sdiv i16 [[A:%.*]], [[B:%.*]]
108+
; CHECK-NEXT: [[A_FROZEN:%.*]] = freeze i16 [[A:%.*]]
109+
; CHECK-NEXT: [[B_FROZEN:%.*]] = freeze i16 [[B:%.*]]
110+
; CHECK-NEXT: [[DIV:%.*]] = sdiv i16 [[A_FROZEN]], [[B_FROZEN]]
101111
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[DIV]], 42
102112
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
103113
; CHECK: if:
104-
; CHECK-NEXT: [[TMP0:%.*]] = mul i16 [[DIV]], [[B]]
105-
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i16 [[A]], [[TMP0]]
114+
; CHECK-NEXT: [[TMP0:%.*]] = mul i16 [[DIV]], [[B_FROZEN]]
115+
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i16 [[A_FROZEN]], [[TMP0]]
106116
; CHECK-NEXT: br label [[END]]
107117
; CHECK: end:
108118
; CHECK-NEXT: [[RET:%.*]] = phi i16 [ [[REM_DECOMPOSED]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
@@ -127,12 +137,14 @@ end:
127137
define i8 @hoist_urem(i8 %a, i8 %b) {
128138
; CHECK-LABEL: @hoist_urem(
129139
; CHECK-NEXT: entry:
130-
; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[A:%.*]], [[B:%.*]]
140+
; CHECK-NEXT: [[A_FROZEN:%.*]] = freeze i8 [[A:%.*]]
141+
; CHECK-NEXT: [[B_FROZEN:%.*]] = freeze i8 [[B:%.*]]
142+
; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[A_FROZEN]], [[B_FROZEN]]
131143
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[DIV]], 42
132144
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
133145
; CHECK: if:
134-
; CHECK-NEXT: [[TMP0:%.*]] = mul i8 [[DIV]], [[B]]
135-
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i8 [[A]], [[TMP0]]
146+
; CHECK-NEXT: [[TMP0:%.*]] = mul i8 [[DIV]], [[B_FROZEN]]
147+
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i8 [[A_FROZEN]], [[TMP0]]
136148
; CHECK-NEXT: br label [[END]]
137149
; CHECK: end:
138150
; CHECK-NEXT: [[RET:%.*]] = phi i8 [ [[REM_DECOMPOSED]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
@@ -157,14 +169,18 @@ end:
157169
define i32 @srem_of_srem_unexpanded(i32 %X, i32 %Y, i32 %Z) {
158170
; CHECK-LABEL: @srem_of_srem_unexpanded(
159171
; CHECK-NEXT: [[T0:%.*]] = mul nsw i32 [[Z:%.*]], [[Y:%.*]]
160-
; CHECK-NEXT: [[T1:%.*]] = sdiv i32 [[X:%.*]], [[T0]]
172+
; CHECK-NEXT: [[X_FROZEN:%.*]] = freeze i32 [[X:%.*]]
173+
; CHECK-NEXT: [[T0_FROZEN:%.*]] = freeze i32 [[T0]]
174+
; CHECK-NEXT: [[T1:%.*]] = sdiv i32 [[X_FROZEN]], [[T0_FROZEN]]
161175
; CHECK-NEXT: [[T2:%.*]] = mul nsw i32 [[T0]], [[T1]]
162-
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[T1]], [[T0]]
163-
; CHECK-NEXT: [[T3_DECOMPOSED:%.*]] = sub i32 [[X]], [[TMP1]]
164-
; CHECK-NEXT: [[T4:%.*]] = sdiv i32 [[T3_DECOMPOSED]], [[Y]]
176+
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[T1]], [[T0_FROZEN]]
177+
; CHECK-NEXT: [[T3_DECOMPOSED:%.*]] = sub i32 [[X_FROZEN]], [[TMP1]]
178+
; CHECK-NEXT: [[T3_DECOMPOSED_FROZEN:%.*]] = freeze i32 [[T3_DECOMPOSED]]
179+
; CHECK-NEXT: [[Y_FROZEN:%.*]] = freeze i32 [[Y]]
180+
; CHECK-NEXT: [[T4:%.*]] = sdiv i32 [[T3_DECOMPOSED_FROZEN]], [[Y_FROZEN]]
165181
; CHECK-NEXT: [[T5:%.*]] = mul nsw i32 [[T4]], [[Y]]
166-
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[T4]], [[Y]]
167-
; CHECK-NEXT: [[T6_DECOMPOSED:%.*]] = sub i32 [[T3_DECOMPOSED]], [[TMP2]]
182+
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[T4]], [[Y_FROZEN]]
183+
; CHECK-NEXT: [[T6_DECOMPOSED:%.*]] = sub i32 [[T3_DECOMPOSED_FROZEN]], [[TMP2]]
168184
; CHECK-NEXT: ret i32 [[T6_DECOMPOSED]]
169185
;
170186
%t0 = mul nsw i32 %Z, %Y
@@ -289,12 +305,14 @@ end:
289305
define i128 @dont_hoist_urem(i128 %a, i128 %b) {
290306
; CHECK-LABEL: @dont_hoist_urem(
291307
; CHECK-NEXT: entry:
292-
; CHECK-NEXT: [[DIV:%.*]] = udiv i128 [[A:%.*]], [[B:%.*]]
308+
; CHECK-NEXT: [[A_FROZEN:%.*]] = freeze i128 [[A:%.*]]
309+
; CHECK-NEXT: [[B_FROZEN:%.*]] = freeze i128 [[B:%.*]]
310+
; CHECK-NEXT: [[DIV:%.*]] = udiv i128 [[A_FROZEN]], [[B_FROZEN]]
293311
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[DIV]], 42
294312
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
295313
; CHECK: if:
296-
; CHECK-NEXT: [[TMP0:%.*]] = mul i128 [[DIV]], [[B]]
297-
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i128 [[A]], [[TMP0]]
314+
; CHECK-NEXT: [[TMP0:%.*]] = mul i128 [[DIV]], [[B_FROZEN]]
315+
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i128 [[A_FROZEN]], [[TMP0]]
298316
; CHECK-NEXT: br label [[END]]
299317
; CHECK: end:
300318
; CHECK-NEXT: [[RET:%.*]] = phi i128 [ [[REM_DECOMPOSED]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]

llvm/test/Transforms/DivRemPairs/X86/div-rem-pairs.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -281,12 +281,14 @@ end:
281281
define i128 @dont_hoist_urem(i128 %a, i128 %b) {
282282
; CHECK-LABEL: @dont_hoist_urem(
283283
; CHECK-NEXT: entry:
284-
; CHECK-NEXT: [[DIV:%.*]] = udiv i128 [[A:%.*]], [[B:%.*]]
284+
; CHECK-NEXT: [[A_FROZEN:%.*]] = freeze i128 [[A:%.*]]
285+
; CHECK-NEXT: [[B_FROZEN:%.*]] = freeze i128 [[B:%.*]]
286+
; CHECK-NEXT: [[DIV:%.*]] = udiv i128 [[A_FROZEN]], [[B_FROZEN]]
285287
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[DIV]], 42
286288
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
287289
; CHECK: if:
288-
; CHECK-NEXT: [[TMP0:%.*]] = mul i128 [[DIV]], [[B]]
289-
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i128 [[A]], [[TMP0]]
290+
; CHECK-NEXT: [[TMP0:%.*]] = mul i128 [[DIV]], [[B_FROZEN]]
291+
; CHECK-NEXT: [[REM_DECOMPOSED:%.*]] = sub i128 [[A_FROZEN]], [[TMP0]]
290292
; CHECK-NEXT: br label [[END]]
291293
; CHECK: end:
292294
; CHECK-NEXT: [[RET:%.*]] = phi i128 [ [[REM_DECOMPOSED]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]

0 commit comments

Comments
 (0)