-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64][LoopIdiom] Disable LoopIdiomTransform when NoImplicitFloat is present #87677
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][LoopIdiom] Disable LoopIdiomTransform when NoImplicitFloat is present #87677
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-transforms Author: Min-Yih Hsu (mshockwave) ChangesThis behavior is aligned with both LoopVectorizer and SLPVectorizer. Full diff: https://github.com/llvm/llvm-project/pull/87677.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
index 6dfb2b9df7135d..a9bd8d877fb2ee 100644
--- a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
@@ -190,17 +190,23 @@ AArch64LoopIdiomTransformPass::run(Loop &L, LoopAnalysisManager &AM,
bool AArch64LoopIdiomTransform::run(Loop *L) {
CurLoop = L;
- if (DisableAll || L->getHeader()->getParent()->hasOptSize())
+ Function &F = *L->getHeader()->getParent();
+ if (DisableAll || F.hasOptSize())
return false;
+ if (F.hasFnAttribute(Attribute::NoImplicitFloat)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << " is disabled on " << F.getName()
+ << " due to its NoImplicitFloat attribute");
+ return false;
+ }
+
// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.
if (!L->getLoopPreheader())
return false;
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
- << CurLoop->getHeader()->getParent()->getName()
- << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F[" << F.getName() << "] Loop %"
+ << CurLoop->getHeader()->getName() << "\n");
return recognizeByteCompare();
}
diff --git a/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll b/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll
index daa64f2e2ea7c0..27ab11446b5719 100644
--- a/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll
+++ b/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll
@@ -2090,3 +2090,99 @@ while.end:
ret i32 %res
}
+; The optimization should be disabled when noimplicitfloat is present.
+define i32 @no_implicit_float(ptr %a, ptr %b, i32 %len, i32 %extra, i32 %n) noimplicitfloat {
+; CHECK-LABEL: define i32 @no_implicit_float(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[EXTRA:%.*]], i32 [[N:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK: while.cond:
+; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; CHECK-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK: while.body:
+; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; CHECK: while.end:
+; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[EXTRA_PHI:%.*]] = phi i32 [ [[EXTRA]], [[WHILE_BODY]] ], [ [[EXTRA]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+; LOOP-DEL-LABEL: define i32 @no_implicit_float(
+; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[EXTRA:%.*]], i32 [[N:%.*]]) #[[ATTR2:[0-9]+]] {
+; LOOP-DEL-NEXT: entry:
+; LOOP-DEL-NEXT: br label [[WHILE_COND:%.*]]
+; LOOP-DEL: while.cond:
+; LOOP-DEL-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; LOOP-DEL-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; LOOP-DEL-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; LOOP-DEL-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; LOOP-DEL: while.body:
+; LOOP-DEL-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; LOOP-DEL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; LOOP-DEL-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; LOOP-DEL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; LOOP-DEL-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; LOOP-DEL-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; LOOP-DEL-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; LOOP-DEL: while.end:
+; LOOP-DEL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; LOOP-DEL-NEXT: [[EXTRA_PHI:%.*]] = phi i32 [ [[EXTRA]], [[WHILE_BODY]] ], [ [[EXTRA]], [[WHILE_COND]] ]
+; LOOP-DEL-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]]
+; LOOP-DEL-NEXT: ret i32 [[RES]]
+;
+; NO-TRANSFORM-LABEL: define i32 @no_implicit_float(
+; NO-TRANSFORM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[EXTRA:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
+; NO-TRANSFORM-NEXT: entry:
+; NO-TRANSFORM-NEXT: br label [[WHILE_COND:%.*]]
+; NO-TRANSFORM: while.cond:
+; NO-TRANSFORM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
+; NO-TRANSFORM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
+; NO-TRANSFORM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; NO-TRANSFORM: while.body:
+; NO-TRANSFORM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
+; NO-TRANSFORM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-TRANSFORM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
+; NO-TRANSFORM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; NO-TRANSFORM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
+; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
+; NO-TRANSFORM: while.end:
+; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NEXT: [[EXTRA_PHI:%.*]] = phi i32 [ [[EXTRA]], [[WHILE_BODY]] ], [ [[EXTRA]], [[WHILE_COND]] ]
+; NO-TRANSFORM-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]]
+; NO-TRANSFORM-NEXT: ret i32 [[RES]]
+;
+entry:
+ br label %while.cond
+
+while.cond:
+ %len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ]
+ %inc = add i32 %len.addr, 1
+ %cmp.not = icmp eq i32 %inc, %n
+ br i1 %cmp.not, label %while.end, label %while.body
+
+while.body:
+ %idxprom = zext i32 %inc to i64
+ %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom
+ %0 = load i8, ptr %arrayidx
+ %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom
+ %1 = load i8, ptr %arrayidx2
+ %cmp.not2 = icmp eq i8 %0, %1
+ br i1 %cmp.not2, label %while.cond, label %while.end
+
+while.end:
+ %inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ]
+ %extra.phi = phi i32 [ %extra, %while.body ], [ %extra, %while.cond ]
+ %res = add i32 %inc.lcssa, %extra.phi
+ ret i32 %res
+}
+
|
LGTM, but I'd like @david-arm to agree. implicitfloat attribute documentation from LangRef.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM! Agree with @topperc.
This behavior is aligned with both LoopVectorizer and SLPVectorizer.