Skip to content

Commit 3c246ef

Browse files
zdukaPetr Maj
andauthored
True fixpoint algorithm in RS4GC (#75826)
Fixes a problem where the explicit marking of various instructions as conflicts did not propagate to their users. An example of this: ``` %getelementptr = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 888, i64 908> %shufflevector = shufflevector <2 x ptr addrspace(1)> %getelementptr, <2 x ptr addrspace(1)> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %shufflevector1 = shufflevector <2 x ptr addrspace(1)> %getelementptr, <2 x ptr addrspace(1)> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %select = select i1 false, <4 x ptr addrspace(1)> %shufflevector1, <4 x ptr addrspace(1)> %shufflevector ``` Here the vector shuffles will get single base (gep) during the fixpoint and therefore the select will get a known base (gep). We later mark the shuffles as conflicts, but this does not change the base of select. This gets caught by an assert where the select's type will differ from its (wrong) base later on. The solution in the MR is to move the explicit conflict marking into the fixpoint phase. --------- Co-authored-by: Petr Maj <[email protected]>
1 parent bfef161 commit 3c246ef

File tree

2 files changed

+79
-49
lines changed

2 files changed

+79
-49
lines changed

llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp

Lines changed: 48 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -967,6 +967,44 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
967967
return BDVState(BaseValue, BDVState::Base, BaseValue);
968968
};
969969

970+
// Even though we have identified a concrete base (or a conflict) for all live
971+
// pointers at this point, there are cases where the base is of an
972+
// incompatible type compared to the original instruction. We conservatively
973+
// mark those as conflicts to ensure that corresponding BDVs will be generated
974+
// in the next steps.
975+
976+
// this is a rather explicit check for all cases where we should mark the
977+
// state as a conflict to force the latter stages of the algorithm to emit
978+
// the BDVs.
979+
// TODO: in many cases the instructions emited for the conflicting states
980+
// will be identical to the I itself (if the I's operate on their BDVs
981+
// themselves). We should exploit this, but can't do it here since it would
982+
// break the invariant about the BDVs not being known to be a base.
983+
// TODO: the code also does not handle constants at all - the algorithm relies
984+
// on all constants having the same BDV and therefore constant-only insns
985+
// will never be in conflict, but this check is ignored here. If the
986+
// constant conflicts will be to BDVs themselves, they will be identical
987+
// instructions and will get optimized away (as in the above TODO)
988+
auto MarkConflict = [&](Instruction *I, Value *BaseValue) {
989+
// II and EE mixes vector & scalar so is always a conflict
990+
if (isa<InsertElementInst>(I) || isa<ExtractElementInst>(I))
991+
return true;
992+
// Shuffle vector is always a conflict as it creates new vector from
993+
// existing ones.
994+
if (isa<ShuffleVectorInst>(I))
995+
return true;
996+
// Any instructions where the computed base type differs from the
997+
// instruction type. An example is where an extract instruction is used by a
998+
// select. Here the select's BDV is a vector (because of extract's BDV),
999+
// while the select itself is a scalar type. Note that the IE and EE
1000+
// instruction check is not fully subsumed by the vector<->scalar check at
1001+
// the end, this is due to the BDV algorithm being ignorant of BDV types at
1002+
// this junction.
1003+
if (!areBothVectorOrScalar(BaseValue, I))
1004+
return true;
1005+
return false;
1006+
};
1007+
9701008
bool Progress = true;
9711009
while (Progress) {
9721010
#ifndef NDEBUG
@@ -993,6 +1031,14 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
9931031
NewState.meet(OpState);
9941032
});
9951033

1034+
// if the instruction has known base, but should in fact be marked as
1035+
// conflict because of incompatible in/out types, we mark it as such
1036+
// ensuring that it will propagate through the fixpoint iteration
1037+
auto I = cast<Instruction>(BDV);
1038+
auto BV = NewState.getBaseValue();
1039+
if (BV && MarkConflict(I, BV))
1040+
NewState = BDVState(I, BDVState::Conflict);
1041+
9961042
BDVState OldState = Pair.second;
9971043
if (OldState != NewState) {
9981044
Progress = true;
@@ -1012,44 +1058,8 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
10121058
}
10131059
#endif
10141060

1015-
// Even though we have identified a concrete base (or a conflict) for all live
1016-
// pointers at this point, there are cases where the base is of an
1017-
// incompatible type compared to the original instruction. We conservatively
1018-
// mark those as conflicts to ensure that corresponding BDVs will be generated
1019-
// in the next steps.
1020-
1021-
// this is a rather explicit check for all cases where we should mark the
1022-
// state as a conflict to force the latter stages of the algorithm to emit
1023-
// the BDVs.
1024-
// TODO: in many cases the instructions emited for the conflicting states
1025-
// will be identical to the I itself (if the I's operate on their BDVs
1026-
// themselves). We should expoit this, but can't do it here since it would
1027-
// break the invariant about the BDVs not being known to be a base.
1028-
// TODO: the code also does not handle constants at all - the algorithm relies
1029-
// on all constants having the same BDV and therefore constant-only insns
1030-
// will never be in conflict, but this check is ignored here. If the
1031-
// constant conflicts will be to BDVs themselves, they will be identical
1032-
// instructions and will get optimized away (as in the above TODO)
1033-
auto MarkConflict = [&](Instruction *I, Value *BaseValue) {
1034-
// II and EE mixes vector & scalar so is always a conflict
1035-
if (isa<InsertElementInst>(I) || isa<ExtractElementInst>(I))
1036-
return true;
1037-
// Shuffle vector is always a conflict as it creates new vector from
1038-
// existing ones.
1039-
if (isa<ShuffleVectorInst>(I))
1040-
return true;
1041-
// Any instructions where the computed base type differs from the
1042-
// instruction type. An example is where an extract instruction is used by a
1043-
// select. Here the select's BDV is a vector (because of extract's BDV),
1044-
// while the select itself is a scalar type. Note that the IE and EE
1045-
// instruction check is not fully subsumed by the vector<->scalar check at
1046-
// the end, this is due to the BDV algorithm being ignorant of BDV types at
1047-
// this junction.
1048-
if (!areBothVectorOrScalar(BaseValue, I))
1049-
return true;
1050-
return false;
1051-
};
1052-
1061+
// since we do the conflict marking as part of the fixpoint iteration this
1062+
// loop only asserts that invariants are met
10531063
for (auto Pair : States) {
10541064
Instruction *I = cast<Instruction>(Pair.first);
10551065
BDVState State = Pair.second;
@@ -1061,14 +1071,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
10611071
(!isKnownBase(I, KnownBases) || !areBothVectorOrScalar(I, BaseValue)) &&
10621072
"why did it get added?");
10631073
assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
1064-
1065-
// since we only mark vec-scalar insns as conflicts in the pass, our work is
1066-
// done if the instruction already conflicts
1067-
if (State.isConflict())
1068-
continue;
1069-
1070-
if (MarkConflict(I, BaseValue))
1071-
States[I] = BDVState(I, BDVState::Conflict);
10721074
}
10731075

10741076
#ifndef NDEBUG

llvm/test/Transforms/RewriteStatepointsForGC/base-pointers.ll

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ define ptr addrspace(1) @test1(i32 %caller, ptr addrspace(1) %a, ptr addrspace(1
3939
; CHECK-NEXT: br i1 [[C:%.*]], label [[LEFT:%.*]], label [[RIGHT:%.*]]
4040
; CHECK: left:
4141
; CHECK-NEXT: switch i32 [[UNKNOWN:%.*]], label [[RIGHT]] [
42-
; CHECK-NEXT: i32 0, label [[MERGE:%.*]]
43-
; CHECK-NEXT: i32 1, label [[MERGE]]
44-
; CHECK-NEXT: i32 5, label [[MERGE]]
42+
; CHECK-NEXT: i32 0, label [[MERGE:%.*]]
43+
; CHECK-NEXT: i32 1, label [[MERGE]]
44+
; CHECK-NEXT: i32 5, label [[MERGE]]
4545
; CHECK-NEXT: ]
4646
; CHECK: right:
4747
; CHECK-NEXT: br label [[MERGE]]
@@ -221,4 +221,32 @@ next:
221221
ret ptr addrspace(1) %bdv
222222
}
223223

224+
declare i32 @snork()
225+
226+
define void @test7() gc "statepoint-example" {
227+
; CHECK-LABEL: @test7(
228+
; CHECK-NEXT: bb:
229+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 888, i64 908>
230+
; CHECK-NEXT: [[SHUFFLEVECTOR_BASE:%.*]] = shufflevector <2 x ptr addrspace(1)> zeroinitializer, <2 x ptr addrspace(1)> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>, !is_base_value !0
231+
; CHECK-NEXT: [[SHUFFLEVECTOR:%.*]] = shufflevector <2 x ptr addrspace(1)> [[GETELEMENTPTR]], <2 x ptr addrspace(1)> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
232+
; CHECK-NEXT: [[SHUFFLEVECTOR1_BASE:%.*]] = shufflevector <2 x ptr addrspace(1)> zeroinitializer, <2 x ptr addrspace(1)> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>, !is_base_value !0
233+
; CHECK-NEXT: [[SHUFFLEVECTOR1:%.*]] = shufflevector <2 x ptr addrspace(1)> [[GETELEMENTPTR]], <2 x ptr addrspace(1)> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
234+
; CHECK-NEXT: [[SELECT_BASE:%.*]] = select i1 false, <4 x ptr addrspace(1)> [[SHUFFLEVECTOR1_BASE]], <4 x ptr addrspace(1)> [[SHUFFLEVECTOR_BASE]], !is_base_value !0
235+
; CHECK-NEXT: [[SELECT:%.*]] = select i1 false, <4 x ptr addrspace(1)> [[SHUFFLEVECTOR1]], <4 x ptr addrspace(1)> [[SHUFFLEVECTOR]]
236+
; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(i32 ()) @snork, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(<4 x ptr addrspace(1)> [[SELECT]], <4 x ptr addrspace(1)> [[SELECT_BASE]]) ]
237+
; CHECK-NEXT: [[SELECT_RELOCATED:%.*]] = call coldcc <4 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v4p1(token [[STATEPOINT_TOKEN]], i32 1, i32 0)
238+
; CHECK-NEXT: [[SELECT_BASE_RELOCATED:%.*]] = call coldcc <4 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v4p1(token [[STATEPOINT_TOKEN]], i32 1, i32 1)
239+
; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(1)> [[SELECT_RELOCATED]], i32 0
240+
; CHECK-NEXT: ret void
241+
;
242+
bb:
243+
%getelementptr = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 888, i64 908>
244+
%shufflevector = shufflevector <2 x ptr addrspace(1)> %getelementptr, <2 x ptr addrspace(1)> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
245+
%shufflevector1 = shufflevector <2 x ptr addrspace(1)> %getelementptr, <2 x ptr addrspace(1)> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
246+
%select = select i1 false, <4 x ptr addrspace(1)> %shufflevector1, <4 x ptr addrspace(1)> %shufflevector
247+
%call = call i32 @snork()
248+
%extractelement = extractelement <4 x ptr addrspace(1)> %select, i32 0
249+
ret void
250+
}
251+
224252
declare void @foo()

0 commit comments

Comments
 (0)