Skip to content

Commit 5869edc

Browse files
committed
[SCEV] Fold zext(C+A)<nsw> -> (sext(C) + zext(A))<nsw> if possible.
Simplify zext(C+A)<nsw> -> (sext(C) + zext(A))<nsw> if * zext (C + A)<nsw> >=s 0 and * A >=s V. For now this is limited to cases where the first operand is a constant, so the SExt can be folded to a new constant. This can be relaxed in the future. Alive2 proof of the general pattern and the test changes in zext-nuw.ll (times out in the online instance but verifies locally) https://alive2.llvm.org/ce/z/_BtyGy
1 parent 1340ecf commit 5869edc

File tree

5 files changed

+29
-19
lines changed

5 files changed

+29
-19
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1793,6 +1793,18 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
17931793
return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1);
17941794
}
17951795

1796+
const SCEVConstant *C;
1797+
const SCEV *A;
1798+
// zext (C + A)<nsw> -> (sext(C) + zext(A))<nsw> if zext (C + A)<nsw> >=s 0
1799+
// and A >=s V.
1800+
if (SA->hasNoSignedWrap() && isKnownNonNegative(SA) &&
1801+
match(SA, m_scev_Add(m_SCEVConstant(C), m_SCEV(A))) &&
1802+
isKnownPredicate(CmpInst::ICMP_SGE, A, C)) {
1803+
SmallVector<const SCEV *, 4> Ops = {getSignExtendExpr(C, Ty, Depth + 1),
1804+
getZeroExtendExpr(A, Ty, Depth + 1)};
1805+
return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1);
1806+
}
1807+
17961808
// zext(C + x + y + ...) --> (zext(D) + zext((C - D) + x + y + ...))
17971809
// if D + (C - D + x + y + ...) could be proven to not unsigned wrap
17981810
// where D maximizes the number of trailing zeros of (C - D + x + y + ...)

llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1231,7 +1231,7 @@ define void @optimized_range_check_unsigned3(ptr %pred, i1 %c) {
12311231
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
12321232
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,3) S: [0,3) Exits: (-1 + %N)<nsw> LoopDispositions: { %loop: Computable }
12331233
; CHECK-NEXT: %gep = getelementptr inbounds i16, ptr %pred, i32 %iv
1234-
; CHECK-NEXT: --> {%pred,+,2}<nuw><%loop> U: full-set S: full-set Exits: ((2 * (zext i32 (-1 + %N)<nsw> to i64))<nuw><nsw> + %pred) LoopDispositions: { %loop: Computable }
1234+
; CHECK-NEXT: --> {%pred,+,2}<nuw><%loop> U: full-set S: full-set Exits: (-2 + (2 * (zext i32 %N to i64))<nuw><nsw> + %pred) LoopDispositions: { %loop: Computable }
12351235
; CHECK-NEXT: %iv.next = add nuw nsw i32 %iv, 1
12361236
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,4) S: [1,4) Exits: %N LoopDispositions: { %loop: Computable }
12371237
; CHECK-NEXT: Determining loop execution counts for: @optimized_range_check_unsigned3

llvm/test/Transforms/IndVarSimplify/zext-nuw.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,9 @@ define void @_Z3fn1v() {
1515
; CHECK-NEXT: [[J_SROA_0_0_COPYLOAD:%.*]] = load i8, ptr [[X5]], align 1
1616
; CHECK-NEXT: br label [[DOTPREHEADER4_LR_PH:%.*]]
1717
; CHECK: .preheader4.lr.ph:
18-
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[X4]], -1
19-
; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
20-
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
2118
; CHECK-NEXT: [[TMP4:%.*]] = sext i8 [[J_SROA_0_0_COPYLOAD]] to i64
22-
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP3]], [[TMP4]]
19+
; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[X4]] to i64
20+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP2]]
2321
; CHECK-NEXT: br label [[DOTPREHEADER4:%.*]]
2422
; CHECK: .preheader4:
2523
; CHECK-NEXT: [[K_09:%.*]] = phi ptr [ undef, [[DOTPREHEADER4_LR_PH]] ], [ [[X25:%.*]], [[X22:%.*]] ]

llvm/test/Transforms/LoopIdiom/X86/memset-size-compute.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ define void @test(ptr %ptr) {
1515
; CHECK: for.body.preheader:
1616
; CHECK-NEXT: [[LIM_0:%.*]] = phi i32 [ 65, [[ENTRY:%.*]] ], [ 1, [[DEAD:%.*]] ]
1717
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 8
18-
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[LIM_0]], i32 2)
19-
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[UMAX]], -1
20-
; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TMP0]] to i64
18+
; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[LIM_0]] to i64
19+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 2)
2120
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
22-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SCEVGEP]], i8 0, i64 [[TMP2]], i1 false)
21+
; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], -8
22+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[SCEVGEP]], i8 0, i64 [[TMP3]], i1 false)
2323
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
2424
; CHECK: for.body:
2525
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 1, [[FOR_BODY_PREHEADER]] ]

llvm/test/Transforms/LoopVectorize/reduction.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,13 +1199,13 @@ define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) {
11991199
; CHECK-SAME: i16 [[N:%.*]], ptr [[A:%.*]]) {
12001200
; CHECK-NEXT: entry:
12011201
; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 2)
1202-
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i16 [[SMAX]], -1
1203-
; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i16 [[TMP0]] to i32
1202+
; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i16 [[SMAX]] to i32
1203+
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], -1
12041204
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i16 [[N]], 5
12051205
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
12061206
; CHECK: vector.ph:
1207-
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], 32764
1208-
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw nsw i32 [[N_VEC]] to i16
1207+
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], -4
1208+
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nsw i32 [[N_VEC]] to i16
12091209
; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i16 [[DOTCAST]], 1
12101210
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
12111211
; CHECK: vector.body:
@@ -1222,7 +1222,7 @@ define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) {
12221222
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
12231223
; CHECK: middle.block:
12241224
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]])
1225-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]]
1225+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
12261226
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
12271227
; CHECK: scalar.ph:
12281228
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
@@ -1277,13 +1277,13 @@ define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) {
12771277
; CHECK-SAME: i16 [[N:%.*]], ptr [[A:%.*]]) {
12781278
; CHECK-NEXT: entry:
12791279
; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 2)
1280-
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i16 [[SMAX]], -1
1281-
; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i16 [[TMP0]] to i32
1280+
; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i16 [[SMAX]] to i32
1281+
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[TMP0]], -1
12821282
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i16 [[N]], 5
12831283
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
12841284
; CHECK: vector.ph:
1285-
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], 32764
1286-
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw nsw i32 [[N_VEC]] to i16
1285+
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], -4
1286+
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nsw i32 [[N_VEC]] to i16
12871287
; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i16 [[DOTCAST]], 1
12881288
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
12891289
; CHECK: vector.body:
@@ -1300,7 +1300,7 @@ define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) {
13001300
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
13011301
; CHECK: middle.block:
13021302
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]])
1303-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]]
1303+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
13041304
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
13051305
; CHECK: scalar.ph:
13061306
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]

0 commit comments

Comments
 (0)