Skip to content

Commit e0cc556

Browse files
authored
[IndVars] Teach widenLoopCompare to use sext if narrow IV is positive and other operand is already sext. (#142703)
This prevents us from ending up with (zext (sext X)). The zext will require an instruction on targets where zext isn't free like RISC-V.
1 parent f72dd4e commit e0cc556

File tree

2 files changed

+76
-4
lines changed

2 files changed

+76
-4
lines changed

llvm/lib/Transforms/Utils/SimplifyIndVar.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,13 +1605,13 @@ bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) {
16051605
//
16061606
// - The signedness of the IV extension and comparison match
16071607
//
1608-
// - The narrow IV is always positive (and thus its sign extension is equal
1609-
// to its zero extension). For instance, let's say we're zero extending
1610-
// %narrow for the following use
1608+
// - The narrow IV is always non-negative (and thus its sign extension is
1609+
// equal to its zero extension). For instance, let's say we're zero
1610+
// extending %narrow for the following use
16111611
//
16121612
// icmp slt i32 %narrow, %val ... (A)
16131613
//
1614-
// and %narrow is always positive. Then
1614+
// and %narrow is always non-negative. Then
16151615
//
16161616
// (A) == icmp slt i32 sext(%narrow), sext(%val)
16171617
// == icmp slt i32 zext(%narrow), sext(%val)
@@ -1630,6 +1630,12 @@ bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) {
16301630

16311631
// Widen the other operand of the compare, if necessary.
16321632
if (CastWidth < IVWidth) {
1633+
// If the narrow IV is always non-negative and the other operand is sext,
1634+
// widen using sext so we can combine them. This works for all non-signed
1635+
// comparison predicates.
1636+
if (DU.NeverNegative && isa<SExtInst>(Op) && !Cmp->isSigned())
1637+
CmpPreferredSign = true;
1638+
16331639
Value *ExtOp = createExtendInst(Op, WideType, CmpPreferredSign, Cmp);
16341640
DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
16351641
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=indvars -S | FileCheck %s
3+
4+
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
5+
target triple = "riscv64"
6+
7+
define void @foo(ptr %x, i32 %n) {
8+
; CHECK-LABEL: define void @foo(
9+
; CHECK-SAME: ptr [[X:%.*]], i32 [[N:%.*]]) {
10+
; CHECK-NEXT: [[ENTRY:.*:]]
11+
; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[N]], 0
12+
; CHECK-NEXT: br i1 [[CMP10]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]]
13+
; CHECK: [[FOR_BODY_PREHEADER]]:
14+
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
15+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
16+
; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
17+
; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
18+
; CHECK: [[FOR_COND_CLEANUP]]:
19+
; CHECK-NEXT: ret void
20+
; CHECK: [[FOR_BODY]]:
21+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
22+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i16, ptr [[X]], i64 [[INDVARS_IV]]
23+
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
24+
; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
25+
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[CONV]] to i64
26+
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[INDVARS_IV]], [[TMP1]]
27+
; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[FOR_INC]]
28+
; CHECK: [[IF_THEN]]:
29+
; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX]], align 2
30+
; CHECK-NEXT: br label %[[FOR_INC]]
31+
; CHECK: [[FOR_INC]]:
32+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
33+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
34+
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]]
35+
;
36+
entry:
37+
%cmp10 = icmp sgt i32 %n, 0
38+
br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup
39+
40+
for.body.preheader: ; preds = %entry
41+
br label %for.body
42+
43+
for.cond.cleanup.loopexit: ; preds = %for.inc
44+
br label %for.cond.cleanup
45+
46+
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
47+
ret void
48+
49+
for.body: ; preds = %for.body.preheader, %for.inc
50+
%i.011 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
51+
%idxprom = zext nneg i32 %i.011 to i64
52+
%arrayidx = getelementptr inbounds nuw i16, ptr %x, i64 %idxprom
53+
%0 = load i16, ptr %arrayidx, align 2
54+
%conv = sext i16 %0 to i32
55+
%cmp1 = icmp eq i32 %i.011, %conv
56+
br i1 %cmp1, label %if.then, label %for.inc
57+
58+
if.then: ; preds = %for.body
59+
store i16 0, ptr %arrayidx, align 2
60+
br label %for.inc
61+
62+
for.inc: ; preds = %for.body, %if.then
63+
%inc = add nuw nsw i32 %i.011, 1
64+
%cmp = icmp slt i32 %inc, %n
65+
br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
66+
}

0 commit comments

Comments
 (0)