swiftlang · kubamracek · Sep 21, 2021 · Sep 13, 2021 · aschwaighofer · Sep 20, 2021
@@ -355,6 +355,8 @@ class IRGenOptions {
 
   unsigned VirtualFunctionElimination : 1;
 
+  unsigned WitnessMethodElimination : 1;
+
   /// The number of threads for multi-threaded code generation.
   unsigned NumThreads = 0;
 
@@ -414,6 +416,7 @@ class IRGenOptions {
         DisableRoundTripDebugTypes(false), DisableDebuggerShadowCopies(false),
         DisableConcreteTypeMetadataMangledNameAccessors(false),
         EnableGlobalISel(false), VirtualFunctionElimination(false),
+        WitnessMethodElimination(false),
         CmdArgs(),
         SanitizeCoverage(llvm::SanitizerCoverageOptions()),
         TypeInfoFilter(TypeInfoDumpFilter::All) {}

@@ -495,7 +495,11 @@ def enable_implicit_dynamic : Flag<["-"], "enable-implicit-dynamic">,
 
 def enable_llvm_vfe : Flag<["-"], "enable-llvm-vfe">,
   Flags<[FrontendOption, NoInteractiveOption, HelpHidden]>,
-  HelpText<"Use LLVM Virtual Function Elimination on Swift class virtual tables">;
+  HelpText<"Use LLVM IR Virtual Function Elimination on Swift class virtual tables">;
+
+def enable_llvm_wme : Flag<["-"], "enable-llvm-wme">,
+  Flags<[FrontendOption, NoInteractiveOption, HelpHidden]>,
+  HelpText<"Use LLVM IR Witness Method Elimination on Swift protocol witness tables">;
 
 def disable_previous_implementation_calls_in_dynamic_replacements :
   Flag<["-"], "disable-previous-implementation-calls-in-dynamic-replacements">,

@@ -1908,6 +1908,10 @@ static bool ParseIRGenArgs(IRGenOptions &Opts, ArgList &Args,
     Opts.VirtualFunctionElimination = true;
   }
 
+  if (Args.hasArg(OPT_enable_llvm_wme)) {
+    Opts.WitnessMethodElimination = true;
+  }
+
   // Default to disabling swift async extended frame info on anything but
   // darwin. Other platforms are unlikely to have support for extended frame
   // pointer information.

@@ -2699,6 +2699,8 @@ static llvm::Value *emitVTableSlotLoad(IRGenFunction &IGF, Address slot,
     args.push_back(llvm::ConstantInt::get(IGF.IGM.Int32Ty, 0));
     args.push_back(llvm::MetadataAsValue::get(*IGF.IGM.LLVMContext, typeId));
 
+    // TODO/FIXME: Using @llvm.type.checked.load loses the "invariant" marker
+    // which could mean redundant loads don't get removed.
     llvm::Value *checkedLoad =
         IGF.Builder.CreateCall(checkedLoadIntrinsic, args);
     auto fnPtr = IGF.Builder.CreateExtractValue(checkedLoad, 0);

@@ -309,26 +309,29 @@ llvm::PointerType *IRGenModule::getEnumValueWitnessTablePtrTy() {
                                            "swift.enum_vwtable", true);
 }
 
-/// Load a specific witness from a known table.  The result is
-/// always an i8*.
-llvm::Value *irgen::emitInvariantLoadOfOpaqueWitness(IRGenFunction &IGF,
-                                                     llvm::Value *table,
-                                                     WitnessIndex index,
-                                                     llvm::Value **slotPtr) {
+Address irgen::slotForLoadOfOpaqueWitness(IRGenFunction &IGF,
+                                          llvm::Value *table,
+                                          WitnessIndex index) {
   assert(table->getType() == IGF.IGM.WitnessTablePtrTy);
 
   // GEP to the appropriate index, avoiding spurious IR in the trivial case.
   llvm::Value *slot = table;
   if (index.getValue() != 0)
     slot = IGF.Builder.CreateConstInBoundsGEP1_32(
-       table->getType()->getPointerElementType(), table, index.getValue());
+        table->getType()->getPointerElementType(), table, index.getValue());
 
-  if (slotPtr) *slotPtr = slot;
+  return Address(slot, IGF.IGM.getPointerAlignment());
+}
 
-  auto witness =
-    IGF.Builder.CreateLoad(Address(slot, IGF.IGM.getPointerAlignment()));
-  IGF.setInvariantLoad(witness);
-  return witness;
+/// Load a specific witness from a known table.  The result is
+/// always an i8*.
+llvm::Value *irgen::emitInvariantLoadOfOpaqueWitness(IRGenFunction &IGF,
+                                                     llvm::Value *table,
+                                                     WitnessIndex index,
+                                                     llvm::Value **slotPtr) {
+  auto slot = slotForLoadOfOpaqueWitness(IGF, table, index);
+  if (slotPtr) *slotPtr = slot.getAddress();
+  return IGF.emitInvariantLoad(slot);
 }
 
 /// Load a specific witness from a known table.  The result is

@@ -38,6 +38,11 @@ namespace irgen {
   /// Return the alignment of a fixed buffer.
   Alignment getFixedBufferAlignment(IRGenModule &IGM);
 
+  /// Given a witness table (protocol or value), return the address of the slot
+  /// for one of the witnesses.
+  Address slotForLoadOfOpaqueWitness(IRGenFunction &IGF, llvm::Value *table,
+                                     WitnessIndex index);
+
   /// Given a witness table (protocol or value), load one of the
   /// witnesses.
   ///

@@ -2163,6 +2163,42 @@ static bool isConstantWitnessTable(SILWitnessTable *wt) {
   return true;
 }
 
+static void addWTableTypeMetadata(IRGenModule &IGM,
+                                  llvm::GlobalVariable *global,
+                                  SILWitnessTable *wt) {
+  auto conf = wt->getConformance();
+  for (auto entry : wt->getEntries()) {
+    if (entry.getKind() != SILWitnessTable::WitnessKind::Method)
+      continue;
+
+    auto mw = entry.getMethodWitness();
+    auto member = mw.Requirement;
+    auto &fnProtoInfo =
+        IGM.getProtocolInfo(conf->getProtocol(), ProtocolInfoKind::Full);
+    auto index = fnProtoInfo.getFunctionIndex(member).forProtocolWitnessTable();
+    auto offset = index.getValue() * IGM.getPointerSize().getValue();
+    global->addTypeMetadata(offset, typeIdForMethod(IGM, member));
+  }
+
+  auto linkage = stripExternalFromLinkage(wt->getLinkage());
+  switch (linkage) {
+  case SILLinkage::Private:
+    global->setVCallVisibilityMetadata(
+        llvm::GlobalObject::VCallVisibility::VCallVisibilityTranslationUnit);
+    break;
+  case SILLinkage::Hidden:
+  case SILLinkage::Shared:
+    global->setVCallVisibilityMetadata(
+        llvm::GlobalObject::VCallVisibility::VCallVisibilityLinkageUnit);
+    break;
+  case SILLinkage::Public:
+  default:
+    global->setVCallVisibilityMetadata(
+        llvm::GlobalObject::VCallVisibility::VCallVisibilityPublic);
+    break;
+  }
+}
+
 void IRGenModule::emitSILWitnessTable(SILWitnessTable *wt) {
   // Don't emit a witness table if it is a declaration.
   if (wt->isDeclaration())
@@ -2207,6 +2243,10 @@ void IRGenModule::emitSILWitnessTable(SILWitnessTable *wt) {
     global->setAlignment(
         llvm::MaybeAlign(getWitnessTableAlignment().getValue()));
 
+    if (getOptions().WitnessMethodElimination) {
+      addWTableTypeMetadata(*this, global, wt);
+    }
+
     tableSize = wtableBuilder.getTableSize();
     instantiationFunction = wtableBuilder.buildInstantiationFunction();
   } else {
@@ -3378,6 +3418,35 @@ void irgen::expandTrailingWitnessSignature(IRGenModule &IGM,
   out.push_back(IGM.WitnessTablePtrTy);
 }
 
+static llvm::Value *emitWTableSlotLoad(IRGenFunction &IGF, llvm::Value *wtable,
+                                       SILDeclRef member, Address slot) {
+  if (IGF.IGM.getOptions().WitnessMethodElimination) {
+    // For LLVM IR WME, emit a @llvm.type.checked.load with the type of the
+    // method.
+    llvm::Function *checkedLoadIntrinsic = llvm::Intrinsic::getDeclaration(
+        &IGF.IGM.Module, llvm::Intrinsic::type_checked_load);
+    auto slotAsPointer = IGF.Builder.CreateBitCast(slot, IGF.IGM.Int8PtrTy);
+    auto typeId = typeIdForMethod(IGF.IGM, member);
+
+    // Arguments for @llvm.type.checked.load: 1) target address, 2) offset -
+    // always 0 because target address is directly pointing to the right slot,
+    // 3) type identifier, i.e. the mangled name of the *base* method.
+    SmallVector<llvm::Value *, 8> args;
+    args.push_back(slotAsPointer.getAddress());
+    args.push_back(llvm::ConstantInt::get(IGF.IGM.Int32Ty, 0));
+    args.push_back(llvm::MetadataAsValue::get(*IGF.IGM.LLVMContext, typeId));
+
+    // TODO/FIXME: Using @llvm.type.checked.load loses the "invariant" marker
+    // which could mean redundant loads don't get removed.
+    llvm::Value *checkedLoad =
+        IGF.Builder.CreateCall(checkedLoadIntrinsic, args);
+    return IGF.Builder.CreateExtractValue(checkedLoad, 0);
+  }
+
+  // Not doing LLVM IR WME, can just be a direct load.
+  return IGF.emitInvariantLoad(slot);
+}
+
 FunctionPointer irgen::emitWitnessMethodValue(IRGenFunction &IGF,
                                               llvm::Value *wtable,
                                               SILDeclRef member) {
@@ -3389,10 +3458,9 @@ FunctionPointer irgen::emitWitnessMethodValue(IRGenFunction &IGF,
   // Find the witness we're interested in.
   auto &fnProtoInfo = IGF.IGM.getProtocolInfo(proto, ProtocolInfoKind::Full);
   auto index = fnProtoInfo.getFunctionIndex(member);
-  llvm::Value *slot;
-  llvm::Value *witnessFnPtr =
-    emitInvariantLoadOfOpaqueWitness(IGF, wtable,
-                                     index.forProtocolWitnessTable(), &slot);
+  auto slot =
+      slotForLoadOfOpaqueWitness(IGF, wtable, index.forProtocolWitnessTable());
+  llvm::Value *witnessFnPtr = emitWTableSlotLoad(IGF, wtable, member, slot);
 
   auto fnType = IGF.IGM.getSILTypes().getConstantFunctionType(
       IGF.IGM.getMaximalTypeExpansionContext(), member);
@@ -3403,7 +3471,7 @@ FunctionPointer irgen::emitWitnessMethodValue(IRGenFunction &IGF,
   auto &schema = fnType->isAsync()
                      ? IGF.getOptions().PointerAuth.AsyncProtocolWitnesses
                      : IGF.getOptions().PointerAuth.ProtocolWitnesses;
-  auto authInfo = PointerAuthInfo::emit(IGF, schema, slot, member);
+  auto authInfo = PointerAuthInfo::emit(IGF, schema, slot.getAddress(), member);
 
   return FunctionPointer(fnType, witnessFnPtr, authInfo, signature);
 }

@@ -205,7 +205,8 @@ void setModuleFlags(IRGenModule &IGM) {
   Module->addModuleFlag(llvm::Module::Error, "Swift Version",
                         IRGenModule::swiftVersion);
 
-  if (IGM.getOptions().VirtualFunctionElimination) {
+  if (IGM.getOptions().VirtualFunctionElimination ||
+      IGM.getOptions().WitnessMethodElimination) {
     Module->addModuleFlag(llvm::Module::Error, "Virtual Function Elim", 1);
   }
 }

@@ -0,0 +1,32 @@
+// Tests that under -enable-llvm-wme, LLVM GlobalDCE is able to remove unused
+// witness methods, and that used witness methods are not removed (by running
+// the program).
+
+// RUN: %empty-directory(%t)
+// RUN: %target-build-swift -Xfrontend -disable-objc-interop -Xfrontend -enable-llvm-wme %s -emit-ir -o %t/main.ll
+// RUN: %target-clang %t/main.ll -isysroot %sdk -L%swift_obj_root/lib/swift/%target-sdk-name -flto -o %t/main
+// RUN: %target-run %t/main | %FileCheck %s
+
+// RUN: %llvm-nm --defined-only %t/main | %FileCheck %s --check-prefix=NM
+
+// REQUIRES: executable_test
+
+// Test disabled until LLVM GlobalDCE supports Swift vtables.
+// REQUIRES: rdar81868930
+
+protocol TheProtocol {
+  func func1_live()
+  func func2_dead()
+}
+
+struct MyStruct : TheProtocol {
+  func func1_live() { print("MyStruct.func1_live") }
+  func func2_dead() { print("MyStruct.func2_dead") }
+}
+
+let x: TheProtocol = MyStruct()
+x.func1_live()
+// CHECK: MyStruct.func1_live
+
+// NM:     $s4main8MyStructV10func1_liveyyF
+// NM-NOT: $s4main8MyStructV10func2_deadyyF
@@ -0,0 +1,49 @@
+// Tests that under -enable-llvm-wme, IRGen marks wtables and wcall sites with
+// the right attributes and intrinsics.
+
+// RUN: %target-build-swift -Xfrontend -disable-objc-interop -Xfrontend -enable-llvm-wme \
+// RUN:    %s -emit-ir -o - | %FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-%target-ptrsize
+
+protocol TheProtocol {
+  func func1_live()
+  func func2_dead()
+}
+
+struct MyStruct : TheProtocol {
+  func func1_live() { print("MyStruct.func1_live") }
+  func func2_dead() { print("MyStruct.func2_dead") }
+}
+
+// CHECK:         @"$s4main8MyStructVAA11TheProtocolAAWP" =
+// CHECK-SAME:    i8* bitcast (%swift.protocol_conformance_descriptor* @"$s4main8MyStructVAA11TheProtocolAAMc" to i8*)
+// CHECK-SAME:    i8* bitcast (void (%T4main8MyStructV*, %swift.type*, i8**)* @"$s4main8MyStructVAA11TheProtocolA2aDP10func1_liveyyFTW" to i8*)
+// CHECK-SAME:    i8* bitcast (void (%T4main8MyStructV*, %swift.type*, i8**)* @"$s4main8MyStructVAA11TheProtocolA2aDP10func2_deadyyFTW" to i8*)
+// CHECK-64-SAME: align 8, !type !0, !type !1, !vcall_visibility !2
+// CHECK-32-SAME: align 4, !type !0, !type !1, !vcall_visibility !2
+
+func test1() {
+  // CHECK: define hidden swiftcc void @"$s4main5test1yyF"()
+  let x: MyStruct = MyStruct()
+  x.func1_live()
+  // CHECK:      call swiftcc void @"$s4main8MyStructVACycfC"()
+  // CHECK-NEXT: call swiftcc void @"$s4main8MyStructV10func1_liveyyF"()
+  // CHECK-NEXT: ret void
+}
+
+func test2() {
+  // CHECK: define hidden swiftcc void @"$s4main5test2yyF"()
+  let x: TheProtocol = MyStruct()
+  x.func1_live()
+  // CHECK:  [[WTABLE:%.*]]    = load i8**, i8*** {{.*}}
+  // CHECK:  [[SLOT:%.*]]      = getelementptr inbounds i8*, i8** [[WTABLE]], i32 1
+  // CHECK:  [[SLOTASPTR:%.*]] = bitcast i8** [[SLOT]] to i8*
+  // CHECK:                      call { i8*, i1 } @llvm.type.checked.load(i8* [[SLOTASPTR]], i32 0, metadata !"$s4main11TheProtocolP10func1_liveyyFTq")
+}
+
+// CHECK-64: !0 = !{i64 8, !"$s4main11TheProtocolP10func1_liveyyFTq"}
+// CHECK-64: !1 = !{i64 16, !"$s4main11TheProtocolP10func2_deadyyFTq"}
+// CHECK-64: !2 = !{i64 1}
+
+// CHECK-32: !0 = !{i64 4, !"$s4main11TheProtocolP10func1_liveyyFTq"}
+// CHECK-32: !1 = !{i64 8, !"$s4main11TheProtocolP10func2_deadyyFTq"}
+// CHECK-32: !2 = !{i64 1}