Skip to content

Commit 43ae4b6

Browse files
committed
[InstCombine] use loop info when running the pass after loop vectorization
This is the follow-up to D144199 and suggestion from D144045. We make use of loop info explicit via InstCombine pass parameter rather than semi-arbitrary via caching. The only InstCombine transform that uses LoopInfo currently is a GEP fold in visitGEPOfGEP(), so that shows up as a failure in the dedicated test for the fold as well as several LoopVectorizer tests that run extra passes. I don't see any pass manager regression tests that actually check for pass options, but this is intended to be NFC for the pass pipeline behavior - we only try to use loop info where it would have been used before via caching . Differential Revision: https://reviews.llvm.org/D144274
1 parent 772aa05 commit 43ae4b6

File tree

8 files changed

+35
-24
lines changed

8 files changed

+35
-24
lines changed

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,7 +1117,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
11171117
FPM.addPass(LoopLoadEliminationPass());
11181118
}
11191119
// Cleanup after the loop optimization passes.
1120-
FPM.addPass(InstCombinePass());
1120+
FPM.addPass(InstCombinePass(InstCombineOptions().setUseLoopInfo(true)));
11211121

11221122
if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
11231123
ExtraVectorPassManager ExtraPasses;
@@ -1129,7 +1129,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
11291129
// dead (or speculatable) control flows or more combining opportunities.
11301130
ExtraPasses.addPass(EarlyCSEPass());
11311131
ExtraPasses.addPass(CorrelatedValuePropagationPass());
1132-
ExtraPasses.addPass(InstCombinePass());
1132+
ExtraPasses.addPass(
1133+
InstCombinePass(InstCombineOptions().setUseLoopInfo(true)));
11331134
LoopPassManager LPM;
11341135
LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
11351136
/*AllowSpeculation=*/true));
@@ -1203,7 +1204,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
12031204
// or SimplifyCFG passes scheduled after us, that would cleanup
12041205
// the CFG mess this may created if allowed to modify CFG, so forbid that.
12051206
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1206-
FPM.addPass(InstCombinePass());
1207+
FPM.addPass(InstCombinePass(InstCombineOptions().setUseLoopInfo(true)));
12071208
FPM.addPass(
12081209
RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
12091210
FPM.addPass(createFunctionToLoopPassAdaptor(
@@ -1217,7 +1218,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
12171218
FPM.addPass(AlignmentFromAssumptionsPass());
12181219

12191220
if (IsFullLTO)
1220-
FPM.addPass(InstCombinePass());
1221+
FPM.addPass(InstCombinePass(InstCombineOptions().setUseLoopInfo(true)));
12211222
}
12221223

12231224
ModulePassManager

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4651,10 +4651,9 @@ PreservedAnalyses InstCombinePass::run(Function &F,
46514651
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
46524652
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
46534653

4654-
// TODO: Only use LoopInfo when the option is set. This requires that the
4655-
// callers in the pass pipeline explicitly set the option.
4656-
auto *LI = AM.getCachedResult<LoopAnalysis>(F);
4657-
if (!LI && Options.UseLoopInfo)
4654+
// Only use LoopInfo when the option is set by callers.
4655+
LoopInfo *LI = nullptr;
4656+
if (Options.UseLoopInfo)
46584657
LI = &AM.getResult<LoopAnalysis>(F);
46594658

46604659
auto *AA = &AM.getResult<AAManager>(F);

llvm/test/Transforms/InstCombine/constant-fold-gep.ll

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt < %s -passes='require<loops>,instcombine' -S | FileCheck %s
2+
; RUN: opt < %s -passes='instcombine' -S | FileCheck %s --check-prefixes=CHECK,NOLOOPINFO
3+
; RUN: opt < %s -passes='instcombine<use-loop-info>' -S | FileCheck %s --check-prefixes=CHECK,LOOPINFO
4+
35
target datalayout = "E-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
46

57
; Constant folding should fix notionally out-of-bounds indices
@@ -158,13 +160,22 @@ define ptr @gep_plus_addr_sub_self(i64 %addr) {
158160
}
159161

160162
define ptr @gep_plus_addr_sub_self_in_loop() {
161-
; CHECK-LABEL: @gep_plus_addr_sub_self_in_loop(
162-
; CHECK-NEXT: br label [[LOOP:%.*]]
163-
; CHECK: loop:
164-
; CHECK-NEXT: [[ADDR:%.*]] = call i64 @get.i64()
165-
; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr getelementptr (i8, ptr @g, i64 sub (i64 0, i64 ptrtoint (ptr @g to i64))), i64 [[ADDR]]
166-
; CHECK-NEXT: call void @use.ptr(ptr [[P2]])
167-
; CHECK-NEXT: br label [[LOOP]]
163+
; NOLOOPINFO-LABEL: @gep_plus_addr_sub_self_in_loop(
164+
; NOLOOPINFO-NEXT: br label [[LOOP:%.*]]
165+
; NOLOOPINFO: loop:
166+
; NOLOOPINFO-NEXT: [[ADDR:%.*]] = call i64 @get.i64()
167+
; NOLOOPINFO-NEXT: [[P1:%.*]] = getelementptr i8, ptr @g, i64 [[ADDR]]
168+
; NOLOOPINFO-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P1]], i64 sub (i64 0, i64 ptrtoint (ptr @g to i64))
169+
; NOLOOPINFO-NEXT: call void @use.ptr(ptr [[P2]])
170+
; NOLOOPINFO-NEXT: br label [[LOOP]]
171+
;
172+
; LOOPINFO-LABEL: @gep_plus_addr_sub_self_in_loop(
173+
; LOOPINFO-NEXT: br label [[LOOP:%.*]]
174+
; LOOPINFO: loop:
175+
; LOOPINFO-NEXT: [[ADDR:%.*]] = call i64 @get.i64()
176+
; LOOPINFO-NEXT: [[P2:%.*]] = getelementptr i8, ptr getelementptr (i8, ptr @g, i64 sub (i64 0, i64 ptrtoint (ptr @g to i64))), i64 [[ADDR]]
177+
; LOOPINFO-NEXT: call void @use.ptr(ptr [[P2]])
178+
; LOOPINFO-NEXT: br label [[LOOP]]
168179
;
169180
%p.int = ptrtoint ptr @g to i64
170181
%p.int.neg = sub i64 0, %p.int

llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -passes=loop-vectorize,instcombine,simplifycfg -simplifycfg-require-and-preserve-domtree=1 -tail-predication=enabled < %s -S -o - | FileCheck %s
2+
; RUN: opt -passes='loop-vectorize,instcombine<use-loop-info>,simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -tail-predication=enabled < %s -S -o - | FileCheck %s
33

44
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
55
target triple = "thumbv8.1m.main-arm-none-eabi"

llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -mcpu=skx -S -passes=loop-vectorize,instcombine,simplifycfg -simplifycfg-require-and-preserve-domtree=1 -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses < %s | FileCheck %s -check-prefix=DISABLED_MASKED_STRIDED
3-
; RUN: opt -mcpu=skx -S -passes=loop-vectorize,instcombine,simplifycfg -simplifycfg-require-and-preserve-domtree=1 -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses < %s | FileCheck %s -check-prefix=ENABLED_MASKED_STRIDED
2+
; RUN: opt -mcpu=skx -S -passes='loop-vectorize,instcombine<use-loop-info>,simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses < %s | FileCheck %s -check-prefix=DISABLED_MASKED_STRIDED
3+
; RUN: opt -mcpu=skx -S -passes='loop-vectorize,instcombine<use-loop-info>,simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses < %s | FileCheck %s -check-prefix=ENABLED_MASKED_STRIDED
44

55
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
66
target triple = "i386-unknown-linux-gnu"

llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -mcpu=skx -S -passes='loop-vectorize,instcombine,simplifycfg,loop-mssa(licm)' -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s -check-prefix=DISABLED_MASKED_STRIDED
3-
; RUN: opt -mcpu=skx -S -passes='loop-vectorize,instcombine,simplifycfg,loop-mssa(licm)' -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s -check-prefix=ENABLED_MASKED_STRIDED
2+
; RUN: opt -mcpu=skx -S -passes='loop-vectorize,instcombine<use-loop-info>,simplifycfg,loop-mssa(licm)' -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s -check-prefix=DISABLED_MASKED_STRIDED
3+
; RUN: opt -mcpu=skx -S -passes='loop-vectorize,instcombine<use-loop-info>,simplifycfg,loop-mssa(licm)' -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s -check-prefix=ENABLED_MASKED_STRIDED
44

55
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
66
target triple = "x86_64-unknown-linux-gnu"

llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; REQUIRES: asserts
2-
; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s
3-
; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s --check-prefix=INTER
2+
; RUN: opt -opaque-pointers=0 < %s -passes='loop-vectorize,instcombine<use-loop-info>' -force-vector-width=4 -force-vector-interleave=1 -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s
3+
; RUN: opt -opaque-pointers=0 < %s -passes='loop-vectorize,instcombine<use-loop-info>' -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s --check-prefix=INTER
44

55
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
66

llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -opaque-pointers=0 -S -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s
2+
; RUN: opt -opaque-pointers=0 -S -passes='loop-vectorize,instcombine<use-loop-info>' -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s
33

44
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
55

0 commit comments

Comments
 (0)