Skip to content

Commit a0e9a8d

Browse files
authored
[flang][NFC] speedup BoxedProcedure for derived types with many components (#86144)
This patch speeds up the compilation time of the example in #76478 (comment) from 2 minutes with my builds to about 2 seconds. MLIR timers showed more than 98% of the time was spend in BoxedProcedure trying to figure out if a type needs to be converted. This is because walking the fir.type members is very expansive for types containing many components and/or components with many sub-components. Increase the caching time of visited types from "the type being visited" to "the whole pass". Use DenseMap since it is not ok anymore to assume this container will only have a few elements.
1 parent f0eb908 commit a0e9a8d

File tree

2 files changed

+30
-9
lines changed

2 files changed

+30
-9
lines changed

flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -69,17 +69,32 @@ class BoxprocTypeRewriter : public mlir::TypeConverter {
6969
return false;
7070
}
7171
if (auto recTy = ty.dyn_cast<RecordType>()) {
72-
if (llvm::is_contained(visitedTypes, recTy))
73-
return false;
72+
auto visited = visitedTypes.find(ty);
73+
if (visited != visitedTypes.end())
74+
return visited->second;
75+
[[maybe_unused]] auto newIt = visitedTypes.try_emplace(ty, false);
76+
assert(newIt.second && "expected ty to not be in the map");
77+
bool wasAlreadyVisitingRecordType = needConversionIsVisitingRecordType;
78+
needConversionIsVisitingRecordType = true;
7479
bool result = false;
75-
visitedTypes.push_back(recTy);
7680
for (auto t : recTy.getTypeList()) {
7781
if (needsConversion(t.second)) {
7882
result = true;
7983
break;
8084
}
8185
}
82-
visitedTypes.pop_back();
86+
// Only keep the result cached if the fir.type visited was a "top-level
87+
// type". Nested types with a recursive reference to the "top-level type"
88+
// may incorrectly have been resolved as not needed conversions because it
89+
// had not been determined yet if the "top-level type" needed conversion.
90+
// This is not an issue to determine the "top-level type" need of
91+
// conversion, but the result should not be kept and later used in other
92+
// contexts.
93+
needConversionIsVisitingRecordType = wasAlreadyVisitingRecordType;
94+
if (needConversionIsVisitingRecordType)
95+
visitedTypes.erase(ty);
96+
else
97+
visitedTypes.find(ty)->second = result;
8398
return result;
8499
}
85100
if (auto boxTy = ty.dyn_cast<BaseBoxType>())
@@ -140,9 +155,7 @@ class BoxprocTypeRewriter : public mlir::TypeConverter {
140155
if (rec.isFinalized())
141156
return rec;
142157
auto it = convertedTypes.try_emplace(ty, rec);
143-
if (!it.second) {
144-
llvm::errs() << "failed\n" << ty << "\n";
145-
}
158+
assert(it.second && "expected ty to not be in the map");
146159
std::vector<RecordType::TypePair> ps = ty.getLenParamList();
147160
std::vector<RecordType::TypePair> cs;
148161
for (auto t : ty.getTypeList()) {
@@ -171,11 +184,12 @@ class BoxprocTypeRewriter : public mlir::TypeConverter {
171184
void setLocation(mlir::Location location) { loc = location; }
172185

173186
private:
174-
llvm::SmallVector<mlir::Type> visitedTypes;
175-
// Map to deal with recursive derived types (avoid infinite loops).
187+
// Maps to deal with recursive derived types (avoid infinite loops).
176188
// Caching is also beneficial for apps with big types (dozens of
177189
// components and or parent types), so the lifetime of the cache
178190
// is the whole pass.
191+
llvm::DenseMap<mlir::Type, bool> visitedTypes;
192+
bool needConversionIsVisitingRecordType = false;
179193
llvm::DenseMap<mlir::Type, mlir::Type> convertedTypes;
180194
mlir::Location loc;
181195
};

flang/test/Fir/boxproc-2.fir

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,10 @@ func.func @very_nested_type(%arg0: !fir.type<t0{t01:!fir.type<t01{t02:!fir.type<
108108
}
109109
// CHECK-LABEL: func.func @very_nested_type(
110110
// CHECK-SAME: !fir.type<t0UnboxProc{t01:!fir.type<t01UnboxProc{t02:!fir.type<t02UnboxProc{t3:!fir.type<t3UnboxProc{t4:!fir.type<t4UnboxProc{t5:!fir.type<t5UnboxProc{t6:!fir.type<t6UnboxProc{t7:!fir.type<t7UnboxProc{t8:!fir.type<t8UnboxProc{t9:!fir.type<t9UnboxProc{t10:!fir.type<t10UnboxProc{t11:!fir.type<t11UnboxProc{t12:!fir.type<t12UnboxProc{t13:!fir.type<t13UnboxProc{t14:!fir.type<t14UnboxProc{t15:!fir.type<t15UnboxProc{t16:!fir.type<t16UnboxProc{t17:!fir.type<t17UnboxProc{t18:!fir.type<t18UnboxProc{t19:!fir.type<t19UnboxProc{b:() -> ()}>}>}>}>}>}>}>}>}>}>}>}>}>}>}>}>}>}>}>}>)
111+
112+
func.func @test_indirect_type_recursion() {
113+
%0 = fir.zero_bits !fir.ptr<!fir.type<t001{x:!fir.ptr<!fir.type<t002{x:!fir.ptr<!fir.type<t001>>}>>,p:!fir.boxproc<() -> ()>}>>
114+
return
115+
}
116+
// CHECK-LABEL: func.func @test_indirect_type_recursion(
117+
// CHECK: fir.zero_bits !fir.ptr<!fir.type<t001UnboxProc{x:!fir.ptr<!fir.type<t002UnboxProc{x:!fir.ptr<!fir.type<t001UnboxProc>>}>>,p:() -> ()}>>

0 commit comments

Comments
 (0)