[RS4GC] Handle uses of extractelement for conversion from vector to scalar base

annamthomas · annamthomas · commit 59029b9eef23 · 2020-03-06T16:28:49.000-05:00
As mentioned in the comments, extractelement is special since we actually want a scalar base for that element we extracted from the vector (i.e. not a vector base). This same logic should apply to uses of the extractelement such as phis and selects which have the same BDV as the extractelement. Howeber, for these uses we conservatively mark the BDV state as conflict, since setting the EE's new base BDV does not always dominate these uses. Added testcase showcases the problem where the BDV identification chokes on the incorrect cast from vector to scalar for the phi use of extractelement. Tests-Run: make check, internal fuzzer testing Reviewers: reames, skatkov, dantrushin Reviewed-By: dantrushin Differential Revision: https://reviews.llvm.org/D75704
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -926,8 +926,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
   }
 #endif
 
-  // Insert Phis for all conflicts
-  // TODO: adjust naming patterns to avoid this order of iteration dependency
+  // Handle extractelement instructions and their uses.
   for (auto Pair : States) {
     Instruction *I = cast<Instruction>(Pair.first);
     BDVState State = Pair.second;
@@ -938,17 +937,40 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
     // insert an extract even when we know an exact base for the instruction.
     // The problem is that we need to convert from a vector base to a scalar
     // base for the particular indice we're interested in.
-    if (State.isBase() && isa<ExtractElementInst>(I) &&
-        isa<VectorType>(State.getBaseValue()->getType())) {
-      auto *EE = cast<ExtractElementInst>(I);
-      // TODO: In many cases, the new instruction is just EE itself.  We should
-      // exploit this, but can't do it here since it would break the invariant
-      // about the BDV not being known to be a base.
-      auto *BaseInst = ExtractElementInst::Create(
-          State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE);
-      BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
-      States[I] = BDVState(BDVState::Base, BaseInst);
+    if (!State.isBase() || !isa<ExtractElementInst>(I) ||
+        !isa<VectorType>(State.getBaseValue()->getType()))
+      continue;
+    auto *EE = cast<ExtractElementInst>(I);
+    // TODO: In many cases, the new instruction is just EE itself.  We should
+    // exploit this, but can't do it here since it would break the invariant
+    // about the BDV not being known to be a base.
+    auto *BaseInst = ExtractElementInst::Create(
+        State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE);
+    BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
+    States[I] = BDVState(BDVState::Base, BaseInst);
+
+    // We need to handle uses of the extractelement that have the same vector
+    // base as well but the use is a scalar type. Since we cannot reuse the
+    // same BaseInst above (may not satisfy property that base pointer should
+    // always dominate derived pointer), we conservatively set this as conflict.
+    // Setting the base value for these conflicts is handled in the next loop
+    // which traverses States.
+    for (User *U : I->users()) {
+      auto *UseI = dyn_cast<Instruction>(U);
+      if (!UseI || !States.count(UseI))
+        continue;
+      if (!isa<VectorType>(UseI->getType()) && States[UseI] == State)
+        States[UseI] = BDVState(BDVState::Conflict);
     }
+  }
+
+  // Insert Phis for all conflicts
+  // TODO: adjust naming patterns to avoid this order of iteration dependency
+  for (auto Pair : States) {
+    Instruction *I = cast<Instruction>(Pair.first);
+    BDVState State = Pair.second;
+    assert(!isKnownBaseResult(I) && "why did it get added?");
+    assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
 
     // Since we're joining a vector and scalar base, they can never be the
     // same.  As a result, we should always see insert element having reached
diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll b/llvm/test/Transforms/RewriteStatepointsForGC/scalar-base-vector.ll
@@ -141,3 +141,56 @@ entry:
   %ptr = extractelement <2 x i32 addrspace(1)*> %vec, i32 0
   ret i32 addrspace(1)* %ptr
 }
+
+define void @test6() gc "statepoint-example" {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[HEADER:%.*]]
+; CHECK:       header:
+; CHECK-NEXT:    [[TMP_BASE:%.*]] = phi i8 addrspace(1)* [ [[TMP6_BASE:%.*]], [[LATCH:%.*]] ], [ null, [[BB:%.*]] ], !is_base_value !0
+; CHECK-NEXT:    [[TMP:%.*]] = phi i8 addrspace(1)* [ [[TMP6:%.*]], [[LATCH]] ], [ undef, [[BB]] ]
+; CHECK-NEXT:    br label [[BB10:%.*]]
+; CHECK:       bb10:
+; CHECK-NEXT:    [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @spam, i32 0, i32 0, i32 0, i32 1, i8 addrspace(1)* [[TMP]], i8 addrspace(1)* [[TMP]], i8 addrspace(1)* [[TMP_BASE]])
+; CHECK-NEXT:    [[TMP_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN]], i32 9, i32 8)
+; CHECK-NEXT:    [[TMP_BASE_RELOCATED:%.*]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[STATEPOINT_TOKEN]], i32 9, i32 9)
+; CHECK-NEXT:    br label [[BB25:%.*]]
+; CHECK:       bb25:
+; CHECK-NEXT:    [[STATEPOINT_TOKEN1:%.*]] = call token (i64, i32, <2 x i8 addrspace(1)*> ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_v2p1i8f(i64 2882400000, i32 0, <2 x i8 addrspace(1)*> ()* @baz, i32 0, i32 0, i32 0, i32 0)
+; CHECK-NEXT:    [[TMP262:%.*]] = call <2 x i8 addrspace(1)*> @llvm.experimental.gc.result.v2p1i8(token [[STATEPOINT_TOKEN1]])
+; CHECK-NEXT:    [[BASE_EE:%.*]] = extractelement <2 x i8 addrspace(1)*> [[TMP262]], i32 0, !is_base_value !0
+; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <2 x i8 addrspace(1)*> [[TMP262]], i32 0
+; CHECK-NEXT:    br i1 undef, label [[BB7:%.*]], label [[LATCH]]
+; CHECK:       bb7:
+; CHECK-NEXT:    br label [[LATCH]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[TMP6_BASE]] = phi i8 addrspace(1)* [ [[BASE_EE]], [[BB25]] ], [ [[BASE_EE]], [[BB7]] ], !is_base_value !0
+; CHECK-NEXT:    [[TMP6]] = phi i8 addrspace(1)* [ [[TMP27]], [[BB25]] ], [ [[TMP27]], [[BB7]] ]
+; CHECK-NEXT:    br label [[HEADER]]
+;
+bb:
+  br label %header
+
+header:                                              ; preds = %latch, %bb
+  %tmp = phi i8 addrspace(1)* [ %tmp6, %latch ], [ undef, %bb ]
+  br label %bb10
+
+bb10:                                             ; preds = %bb2
+  call void @spam() [ "deopt"(i8 addrspace(1)* %tmp) ]
+  br label %bb25
+
+bb25:                                             ; preds = %bb24
+  %tmp26 = call <2 x i8 addrspace(1)*> @baz()
+  %tmp27 = extractelement <2 x i8 addrspace(1)*> %tmp26, i32 0
+  br i1 undef, label %bb7, label %latch
+
+bb7:                                              ; preds = %bb25
+  br label %latch
+
+latch:                                              ; preds = %bb25, %bb7
+  %tmp6 = phi i8 addrspace(1)* [ %tmp27, %bb25 ], [ %tmp27, %bb7 ]
+  br label %header
+}
+
+declare void @spam()
+declare <2 x i8 addrspace(1)*> @baz()