Skip to content

Commit a135822

Browse files
zdukaPetr Maj
andauthored
Improvements to RS4GC BDV Algorithm (#69795)
Previously, after the algorithm fixpointed, the state was manually patched by emitting BDVs for EE instructions earlier, while marking some (but not all) vector and vector<->scalar instructions as conflict. This causes issues as not all instructions that required BDVs had them emitted and due to after-fixpoint patching, the extra BDVs did not propagate to their users. This change fixes both by rewriting the logic for BDV insertion & patching. Instead of inserting the BDV for EE earlier, it merely marks every EE instruction as a conflict. The two phase insertion algorithm (first insert empty instructions and patch the BDVState, then actually connect the BDV instructions to their input bases) then ensures correct propagation to all its users. Furthermore the shufflevector instruction as well as all instances of IE instruction are conservatively marked as conflicts as well, fixing the second problem. This change does not fix the handling of constant values and vectors in the BDV. --------- Co-authored-by: Petr Maj <[email protected]>
1 parent 1f54ef7 commit a135822

File tree

5 files changed

+148
-26
lines changed

5 files changed

+148
-26
lines changed

llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -993,7 +993,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
993993
NewState.meet(OpState);
994994
});
995995

996-
BDVState OldState = States[BDV];
996+
BDVState OldState = Pair.second;
997997
if (OldState != NewState) {
998998
Progress = true;
999999
States[BDV] = NewState;
@@ -1012,8 +1012,44 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
10121012
}
10131013
#endif
10141014

1015-
// Handle all instructions that have a vector BDV, but the instruction itself
1016-
// is of scalar type.
1015+
// Even though we have identified a concrete base (or a conflict) for all live
1016+
// pointers at this point, there are cases where the base is of an
1017+
// incompatible type compared to the original instruction. We conservatively
1018+
// mark those as conflicts to ensure that corresponding BDVs will be generated
1019+
// in the next steps.
1020+
1021+
// this is a rather explicit check for all cases where we should mark the
1022+
// state as a conflict to force the latter stages of the algorithm to emit
1023+
// the BDVs.
1024+
// TODO: in many cases the instructions emited for the conflicting states
1025+
// will be identical to the I itself (if the I's operate on their BDVs
1026+
// themselves). We should expoit this, but can't do it here since it would
1027+
// break the invariant about the BDVs not being known to be a base.
1028+
// TODO: the code also does not handle constants at all - the algorithm relies
1029+
// on all constants having the same BDV and therefore constant-only insns
1030+
// will never be in conflict, but this check is ignored here. If the
1031+
// constant conflicts will be to BDVs themselves, they will be identical
1032+
// instructions and will get optimized away (as in the above TODO)
1033+
auto MarkConflict = [&](Instruction *I, Value *BaseValue) {
1034+
// II and EE mixes vector & scalar so is always a conflict
1035+
if (isa<InsertElementInst>(I) || isa<ExtractElementInst>(I))
1036+
return true;
1037+
// Shuffle vector is always a conflict as it creates new vector from
1038+
// existing ones.
1039+
if (isa<ShuffleVectorInst>(I))
1040+
return true;
1041+
// Any instructions where the computed base type differs from the
1042+
// instruction type. An example is where an extract instruction is used by a
1043+
// select. Here the select's BDV is a vector (because of extract's BDV),
1044+
// while the select itself is a scalar type. Note that the IE and EE
1045+
// instruction check is not fully subsumed by the vector<->scalar check at
1046+
// the end, this is due to the BDV algorithm being ignorant of BDV types at
1047+
// this junction.
1048+
if (!areBothVectorOrScalar(BaseValue, I))
1049+
return true;
1050+
return false;
1051+
};
1052+
10171053
for (auto Pair : States) {
10181054
Instruction *I = cast<Instruction>(Pair.first);
10191055
BDVState State = Pair.second;
@@ -1026,30 +1062,13 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
10261062
"why did it get added?");
10271063
assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
10281064

1029-
if (!State.isBase() || !isa<VectorType>(BaseValue->getType()))
1065+
// since we only mark vec-scalar insns as conflicts in the pass, our work is
1066+
// done if the instruction already conflicts
1067+
if (State.isConflict())
10301068
continue;
1031-
// extractelement instructions are a bit special in that we may need to
1032-
// insert an extract even when we know an exact base for the instruction.
1033-
// The problem is that we need to convert from a vector base to a scalar
1034-
// base for the particular indice we're interested in.
1035-
if (isa<ExtractElementInst>(I)) {
1036-
auto *EE = cast<ExtractElementInst>(I);
1037-
// TODO: In many cases, the new instruction is just EE itself. We should
1038-
// exploit this, but can't do it here since it would break the invariant
1039-
// about the BDV not being known to be a base.
1040-
auto *BaseInst = ExtractElementInst::Create(
1041-
State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE);
1042-
BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
1043-
States[I] = BDVState(I, BDVState::Base, BaseInst);
1044-
setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases);
1045-
} else if (!isa<VectorType>(I->getType())) {
1046-
// We need to handle cases that have a vector base but the instruction is
1047-
// a scalar type (these could be phis or selects or any instruction that
1048-
// are of scalar type, but the base can be a vector type). We
1049-
// conservatively set this as conflict. Setting the base value for these
1050-
// conflicts is handled in the next loop which traverses States.
1069+
1070+
if (MarkConflict(I, BaseValue))
10511071
States[I] = BDVState(I, BDVState::Conflict);
1052-
}
10531072
}
10541073

10551074
#ifndef NDEBUG

llvm/test/Transforms/RewriteStatepointsForGC/base-vector.ll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,3 +352,36 @@ entry:
352352
call void @use_vec(<4 x ptr addrspace(1)> %vec)
353353
ret void
354354
}
355+
356+
define i32 @test13() gc "statepoint-example" {
357+
; CHECK-LABEL: define i32 @test13() gc "statepoint-example" {
358+
; CHECK-NEXT: bb:
359+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 948, i64 896>
360+
; CHECK-NEXT: [[SHUFFLEVECTOR_BASE:%.*]] = shufflevector <2 x ptr addrspace(1)> zeroinitializer, <2 x ptr addrspace(1)> zeroinitializer, <4 x i32> <i32 0, i32 3, i32 1, i32 2>, !is_base_value !0
361+
; CHECK-NEXT: [[SHUFFLEVECTOR:%.*]] = shufflevector <2 x ptr addrspace(1)> zeroinitializer, <2 x ptr addrspace(1)> [[GETELEMENTPTR]], <4 x i32> <i32 0, i32 3, i32 1, i32 2>
362+
; CHECK-NEXT: br label [[BB1:%.*]]
363+
; CHECK: bb1:
364+
; CHECK-NEXT: [[PHI_BASE:%.*]] = phi <4 x ptr addrspace(1)> [ [[SHUFFLEVECTOR_BASE]], [[BB:%.*]] ], [ zeroinitializer, [[BB1]] ], !is_base_value !0
365+
; CHECK-NEXT: [[PHI:%.*]] = phi <4 x ptr addrspace(1)> [ [[SHUFFLEVECTOR]], [[BB]] ], [ zeroinitializer, [[BB1]] ]
366+
; CHECK-NEXT: [[EXTRACTELEMENT_BASE:%.*]] = extractelement <4 x ptr addrspace(1)> [[PHI_BASE]], i32 0, !is_base_value !0
367+
; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(1)> [[PHI]], i32 0
368+
; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(i32 ()) @spam, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(ptr addrspace(1) [[EXTRACTELEMENT]], ptr addrspace(1) [[EXTRACTELEMENT_BASE]]) ]
369+
; CHECK-NEXT: [[EXTRACTELEMENT_RELOCATED:%.*]] = call coldcc ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token [[STATEPOINT_TOKEN]], i32 1, i32 0)
370+
; CHECK-NEXT: [[EXTRACTELEMENT_BASE_RELOCATED:%.*]] = call coldcc ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token [[STATEPOINT_TOKEN]], i32 1, i32 1)
371+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[EXTRACTELEMENT_RELOCATED]], align 4
372+
; CHECK-NEXT: br label [[BB1]]
373+
;
374+
bb:
375+
%getelementptr = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 948, i64 896>
376+
%shufflevector = shufflevector <2 x ptr addrspace(1)> zeroinitializer, <2 x ptr addrspace(1)> %getelementptr, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
377+
br label %bb1
378+
379+
bb1: ; preds = %bb1, %bb
380+
%phi = phi <4 x ptr addrspace(1)> [ %shufflevector, %bb ], [ zeroinitializer, %bb1 ]
381+
%extractelement = extractelement <4 x ptr addrspace(1)> %phi, i32 0
382+
%call = call i32 @spam()
383+
%load = load i32, ptr addrspace(1) %extractelement, align 4
384+
br label %bb1
385+
}
386+
387+
declare i32 @spam() gc "statepoint-example"

llvm/test/Transforms/RewriteStatepointsForGC/constants.ll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,3 +262,16 @@ entry:
262262
; CHECK-DAG: call {{.*}}gc.relocate{{.*}}(%val.base, %val)
263263
ret <2 x ptr addrspace(1)> %val
264264
}
265+
266+
define ptr addrspace(1) @test14() gc "statepoint-example" {
267+
; CHECK-LABEL: @test14
268+
; FIXME: this is a extract element of constant vector and as such should not be relocated, but our current code relocates it since the extractelement will conservatively be marked as conflict during the BDV insertion algorithm. In many cases the extra instruction will be identical to the one already present and will be removed by later phases.
269+
entry:
270+
%val = extractelement <2 x ptr addrspace(1)> <ptr addrspace(1) inttoptr (i64 5 to ptr addrspace(1)),
271+
ptr addrspace(1) inttoptr (i64 15 to ptr addrspace(1))>, i32 0
272+
; CHECK: gc.statepoint
273+
call void @foo() [ "deopt"() ]
274+
; CHECK-DAG: call {{.*}}gc.relocate{{.*}}(%val.base, %val.base)
275+
; CHECK-DAG: call {{.*}}gc.relocate{{.*}}(%val.base, %val)
276+
ret ptr addrspace(1) %val
277+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2+
; RUN: opt < %s -passes=rewrite-statepoints-for-gc -S 2>&1 | FileCheck %s
3+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
4+
target triple = "x86_64-unknown-linux-gnu"
5+
6+
define void @barney() gc "statepoint-example" {
7+
; CHECK-LABEL: define void @barney() gc "statepoint-example" {
8+
; CHECK-NEXT: bb:
9+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
10+
; CHECK-NEXT: [[EXTRACTELEMENT_BASE:%.*]] = extractelement <2 x ptr addrspace(1)> zeroinitializer, i32 0, !is_base_value !0
11+
; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <2 x ptr addrspace(1)> [[GETELEMENTPTR]], i32 0
12+
; CHECK-NEXT: [[INSERTELEMENT_BASE:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[EXTRACTELEMENT_BASE]], i32 0, !is_base_value !0
13+
; CHECK-NEXT: [[INSERTELEMENT:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[EXTRACTELEMENT]], i32 0
14+
; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(i8 ()) @foo, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(<2 x ptr addrspace(1)> [[INSERTELEMENT]], <2 x ptr addrspace(1)> [[INSERTELEMENT_BASE]]) ]
15+
; CHECK-NEXT: [[INSERTELEMENT_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 0)
16+
; CHECK-NEXT: [[INSERTELEMENT_BASE_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 1)
17+
; CHECK-NEXT: [[EXTRACTELEMENT1:%.*]] = extractelement <2 x ptr addrspace(1)> [[INSERTELEMENT_RELOCATED]], i32 0
18+
; CHECK-NEXT: ret void
19+
;
20+
bb:
21+
%getelementptr = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
22+
%extractelement = extractelement <2 x ptr addrspace(1)> %getelementptr, i32 0
23+
%insertelement = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) %extractelement, i32 0
24+
%call = call i8 @foo()
25+
%extractelement1 = extractelement <2 x ptr addrspace(1)> %insertelement, i32 0
26+
ret void
27+
}
28+
29+
30+
; same as above, but ensures that transitive uses of insertelement emit proper code as well
31+
define void @bart() gc "statepoint-example" {
32+
; CHECK-LABEL: define void @bart() gc "statepoint-example" {
33+
; CHECK-NEXT: always_continue:
34+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
35+
; CHECK-NEXT: [[BASE_EE:%.*]] = extractelement <2 x ptr addrspace(1)> zeroinitializer, i32 0, !is_base_value !0
36+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x ptr addrspace(1)> [[TMP0]], i32 0
37+
; CHECK-NEXT: [[BASE_IE:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[BASE_EE]], i32 0, !is_base_value !0
38+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[TMP1]], i32 0
39+
; CHECK-NEXT: [[OTHER_BASE:%.*]] = insertelement <2 x ptr addrspace(1)> [[BASE_IE]], ptr addrspace(1) [[BASE_EE]], i32 0, !is_base_value !0
40+
; CHECK-NEXT: [[OTHER:%.*]] = insertelement <2 x ptr addrspace(1)> [[TMP2]], ptr addrspace(1) [[TMP1]], i32 0
41+
; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(i8 ()) @foo, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(<2 x ptr addrspace(1)> [[OTHER]], <2 x ptr addrspace(1)> [[OTHER_BASE]]) ]
42+
; CHECK-NEXT: [[OTHER_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 0)
43+
; CHECK-NEXT: [[OTHER_BASE_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 1)
44+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x ptr addrspace(1)> [[OTHER_RELOCATED]], i32 0
45+
; CHECK-NEXT: ret void
46+
;
47+
always_continue:
48+
%0 = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
49+
%1 = extractelement <2 x ptr addrspace(1)> %0, i32 0
50+
%2 = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) %1, i32 0
51+
%other = insertelement <2 x ptr addrspace(1)> %2, ptr addrspace(1) %1, i32 0
52+
%3 = call i8 @foo()
53+
%4 = extractelement <2 x ptr addrspace(1)> %other, i32 0
54+
ret void
55+
}
56+
57+
declare i8 @foo()

llvm/test/Transforms/RewriteStatepointsForGC/vector-bitcast.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ define i32 @test() gc "statepoint-example" {
1414
entry:
1515
; CHECK-LABEL: entry
1616
; CHECK: %bc = bitcast
17-
; CHECK: %[[p1:[A-Za-z0-9_]+]] = extractelement
17+
; CHECK: %[[p1:[A-Za-z0-9_.]+]] = extractelement
1818
; CHECK: %[[p2:[A-Za-z0-9_]+]] = extractelement
1919
; CHECK: llvm.experimental.gc.statepoint
2020
; CHECK: %[[p2]].relocated = {{.+}} @llvm.experimental.gc.relocate

0 commit comments

Comments
 (0)