Skip to content

Commit 0ac6d36

Browse files
Merge pull request #72781 from aschwaighofer/large_types_c_function_forwarding
LargeTypesReg2Mem: Forward the address of large formally by-val arguments and return values of C functions
2 parents 2ee26d9 + 1aaa71f commit 0ac6d36

File tree

7 files changed

+205
-7
lines changed

7 files changed

+205
-7
lines changed

lib/IRGen/GenCall.cpp

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "clang/CodeGen/CodeGenABITypes.h"
3232
#include "clang/CodeGen/ModuleBuilder.h"
3333
#include "clang/Sema/Sema.h"
34+
#include "llvm/Analysis/ValueTracking.h"
3435
#include "llvm/IR/GlobalPtrAuthInfo.h"
3536
#include "llvm/IR/GlobalValue.h"
3637
#include "llvm/Support/Compiler.h"
@@ -3962,6 +3963,26 @@ void irgen::emitClangExpandedParameter(IRGenFunction &IGF,
39623963
swiftTI.deallocateStack(IGF, tempAlloc, swiftType);
39633964
}
39643965

3966+
Address getForwardableAlloca(const TypeInfo &TI, bool isForwardableArgument,
3967+
Explosion &in) {
3968+
if (!isForwardableArgument)
3969+
return Address();
3970+
3971+
auto *load = dyn_cast<llvm::LoadInst>(*in.begin());
3972+
if (!load)
3973+
return Address();
3974+
3975+
auto *gep = dyn_cast<llvm::GetElementPtrInst>(load->getPointerOperand());
3976+
if (!gep)
3977+
return Address();
3978+
3979+
auto *alloca = dyn_cast<llvm::AllocaInst>(getUnderlyingObject(gep));
3980+
if (!alloca)
3981+
return Address();
3982+
3983+
return TI.getAddressForPointer(alloca);
3984+
}
3985+
39653986
static void externalizeArguments(IRGenFunction &IGF, const Callee &callee,
39663987
Explosion &in, Explosion &out,
39673988
TemporarySet &temporaries,
@@ -4021,6 +4042,8 @@ static void externalizeArguments(IRGenFunction &IGF, const Callee &callee,
40214042
SILType paramType = silConv.getSILType(
40224043
params[i - firstParam], IGF.IGM.getMaximalTypeExpansionContext());
40234044

4045+
bool isForwardableArgument = IGF.isForwardableArgument(i - firstParam);
4046+
40244047
// In Swift, values that are foreign references types will always be
40254048
// pointers. Additionally, we only import functions which use foreign
40264049
// reference types indirectly (as pointers), so we know in every case, if
@@ -4082,8 +4105,17 @@ static void externalizeArguments(IRGenFunction &IGF, const Callee &callee,
40824105
Alignment(ABIAlign.getQuantity()));
40834106
}
40844107
}
4085-
4086-
ti.initialize(IGF, in, addr, isOutlined);
4108+
Address forwardFromAddr = getForwardableAlloca(ti, isForwardableArgument,
4109+
in);
4110+
// Try to forward the address from a `load` instruction "immediately"
4111+
// preceeding the apply.
4112+
if (isForwardableArgument && forwardFromAddr.isValid()) {
4113+
ti.initializeWithTake(IGF, addr, forwardFromAddr,
4114+
paramType.getAddressType(), isOutlined);
4115+
(void)in.claim(ti.getSchema().size());
4116+
} else {
4117+
ti.initialize(IGF, in, addr, isOutlined);
4118+
}
40874119

40884120
out.add(addr.getAddress());
40894121
break;

lib/IRGen/IRGen.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
9191
#include "llvm/Transforms/ObjCARC.h"
9292
#include "llvm/Transforms/Scalar.h"
93+
#include "llvm/Transforms/Scalar/DCE.h"
9394

9495
#include <thread>
9596

@@ -329,7 +330,14 @@ void swift::performLLVMOptimizations(const IRGenOptions &Opts,
329330
MPM.addPass(InstrProfiling(options, false));
330331
});
331332
}
332-
333+
if (Opts.shouldOptimize()) {
334+
PB.registerPipelineStartEPCallback(
335+
[](ModulePassManager &MPM, OptimizationLevel level) {
336+
// Run this before SROA to avoid un-neccessary expansion of dead
337+
// loads.
338+
MPM.addPass(createModuleToFunctionPassAdaptor(DCEPass()));
339+
});
340+
}
333341
bool isThinLTO = Opts.LLVMLTOKind == IRGenLLVMLTOKind::Thin;
334342
bool isFullLTO = Opts.LLVMLTOKind == IRGenLLVMLTOKind::Full;
335343
if (!Opts.shouldOptimize() || Opts.DisableLLVMOptzns) {

lib/IRGen/IRGenFunction.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,21 @@ class IRGenFunction {
808808
LocalTypeDataCache const *getLocalTypeData() { return LocalTypeData; }
809809
#endif
810810

811+
/// A forwardable argument is a load that is immediately preceeds the apply it
812+
/// is used as argument to. If there is no side-effecting instructions between
813+
/// said load and the apply, we can memcpy the loads address to the apply's
814+
/// indirect argument alloca.
815+
void clearForwardableArguments() {
816+
forwardableArguments.clear();
817+
}
818+
819+
void setForwardableArgument(unsigned idx) {
820+
forwardableArguments.insert(idx);
821+
}
822+
823+
bool isForwardableArgument(unsigned idx) const {
824+
return forwardableArguments.contains(idx);
825+
}
811826
private:
812827
LocalTypeDataCache &getOrCreateLocalTypeData();
813828
void destroyLocalTypeData();
@@ -826,6 +841,8 @@ class IRGenFunction {
826841
CanType SelfType;
827842
bool SelfTypeIsExact = false;
828843
DynamicSelfKind SelfKind;
844+
845+
llvm::SmallSetVector<unsigned, 16> forwardableArguments;
829846
};
830847

831848
using ConditionalDominanceScope = IRGenFunction::ConditionalDominanceScope;

lib/IRGen/IRGenSIL.cpp

Lines changed: 89 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616
//===----------------------------------------------------------------------===//
1717

1818
#include "GenKeyPath.h"
19-
#include "swift/AST/ExtInfo.h"
2019
#include "swift/AST/ASTContext.h"
2120
#include "swift/AST/ASTMangler.h"
2221
#include "swift/AST/DiagnosticsIRGen.h"
22+
#include "swift/AST/ExtInfo.h"
2323
#include "swift/AST/GenericEnvironment.h"
2424
#include "swift/AST/IRGenOptions.h"
2525
#include "swift/AST/ParameterList.h"
@@ -53,6 +53,7 @@
5353
#include "llvm/ADT/MapVector.h"
5454
#include "llvm/ADT/SmallBitVector.h"
5555
#include "llvm/ADT/TinyPtrVector.h"
56+
#include "llvm/Analysis/ValueTracking.h"
5657
#include "llvm/IR/Constant.h"
5758
#include "llvm/IR/Constants.h"
5859
#include "llvm/IR/DIBuilder.h"
@@ -3258,7 +3259,9 @@ void IRGenSILFunction::visitExistentialMetatypeInst(
32583259
static void emitApplyArgument(IRGenSILFunction &IGF,
32593260
SILValue arg,
32603261
SILType paramType,
3261-
Explosion &out) {
3262+
Explosion &out,
3263+
SILInstruction *apply = nullptr,
3264+
unsigned idx = 0) {
32623265
bool isSubstituted = (arg->getType() != paramType);
32633266

32643267
// For indirect arguments, we just need to pass a pointer.
@@ -3282,7 +3285,24 @@ static void emitApplyArgument(IRGenSILFunction &IGF,
32823285

32833286
// Fast path: avoid an unnecessary temporary explosion.
32843287
if (!isSubstituted) {
3288+
bool canForwardLoadToIndirect = false;
3289+
auto *load = dyn_cast<LoadInst>(arg);
3290+
[&]() {
3291+
if (apply && load && apply->getParent() == load->getParent()) {
3292+
for (auto it = std::next(load->getIterator()), e = apply->getIterator();
3293+
it != e; ++it) {
3294+
if (isa<LoadInst>(&(*it))) {
3295+
continue;
3296+
}
3297+
return;
3298+
}
3299+
canForwardLoadToIndirect = true;
3300+
}
3301+
}();
32853302
IGF.getLoweredExplosion(arg, out);
3303+
if (canForwardLoadToIndirect) {
3304+
IGF.setForwardableArgument(idx);
3305+
}
32863306
return;
32873307
}
32883308

@@ -3710,6 +3730,8 @@ void IRGenSILFunction::visitFullApplySite(FullApplySite site) {
37103730
// Lower the SIL arguments to IR arguments.
37113731

37123732
// Turn the formal SIL parameters into IR-gen things.
3733+
clearForwardableArguments();
3734+
37133735
for (auto index : indices(args)) {
37143736
if (origConv.hasIndirectSILErrorResults() &&
37153737
index == origConv.getNumIndirectSILResults()) {
@@ -3718,7 +3740,7 @@ void IRGenSILFunction::visitFullApplySite(FullApplySite site) {
37183740
continue;
37193741
}
37203742
emitApplyArgument(*this, args[index], emission->getParameterType(index),
3721-
llArgs);
3743+
llArgs, site.getInstruction(), index);
37223744
}
37233745

37243746
auto &calleeFP = emission->getCallee().getFunctionPointer();
@@ -3744,6 +3766,9 @@ void IRGenSILFunction::visitFullApplySite(FullApplySite site) {
37443766
Explosion result;
37453767
emission->emitToExplosion(result, false);
37463768

3769+
// We might have set forwardable arguments. Clear it for the next round.
3770+
clearForwardableArguments();
3771+
37473772
// For a simple apply, just bind the apply result to the result of the call.
37483773
if (auto apply = dyn_cast<ApplyInst>(i)) {
37493774
setLoweredExplosion(apply, result);
@@ -5336,12 +5361,72 @@ void IRGenSILFunction::visitLoadInst(swift::LoadInst *i) {
53365361
setLoweredExplosion(i, lowered);
53375362
}
53385363

5364+
static Address canForwardIndirectResultAlloca(const TypeInfo &TI,
5365+
StoreInst *store,
5366+
Explosion &argSrc,
5367+
llvm::Instruction * &insertPt) {
5368+
// Check that the store stores the result of and apply instruction immediately
5369+
// preceeding the store.
5370+
auto *apply = dyn_cast<ApplyInst>(store->getSrc());
5371+
auto *allocStack = dyn_cast<AllocStackInst>(store->getDest());
5372+
if (!apply || !allocStack || apply->getParent() != store->getParent() ||
5373+
std::next(apply->getIterator()) != store->getIterator())
5374+
return Address();
5375+
5376+
auto explosionSize = TI.getSchema().size();
5377+
if (argSrc.size() < 1 || explosionSize < 4)
5378+
return Address();
5379+
5380+
auto *load = dyn_cast<llvm::LoadInst>(*argSrc.begin());
5381+
if (!load)
5382+
return Address();
5383+
auto *gep = dyn_cast<llvm::GetElementPtrInst>(load->getPointerOperand());
5384+
if (!gep)
5385+
return Address();
5386+
5387+
auto *alloca = dyn_cast<llvm::AllocaInst>(getUnderlyingObject(gep));
5388+
if (!alloca)
5389+
return Address();
5390+
5391+
// Check all the other loads.
5392+
for (size_t i = 1, e = explosionSize; i != e; ++i) {
5393+
auto *load = dyn_cast<llvm::LoadInst>(*(argSrc.begin() + i));
5394+
if (!load)
5395+
return Address();
5396+
auto *alloca2 = dyn_cast<llvm::AllocaInst>(
5397+
getUnderlyingObject(load->getPointerOperand()));
5398+
if (!alloca2 || alloca2 != alloca)
5399+
return Address();
5400+
}
5401+
5402+
// Set insertPt to the first load such that we are within the lifetime of the
5403+
// alloca marked by the lifetime intrinsic.
5404+
insertPt = load;
5405+
5406+
return TI.getAddressForPointer(alloca);
5407+
}
5408+
53395409
void IRGenSILFunction::visitStoreInst(swift::StoreInst *i) {
53405410
Explosion source = getLoweredExplosion(i->getSrc());
53415411
Address dest = getLoweredAddress(i->getDest());
53425412
SILType objType = i->getSrc()->getType().getObjectType();
5343-
53445413
const auto &typeInfo = cast<LoadableTypeInfo>(getTypeInfo(objType));
5414+
5415+
5416+
llvm::Instruction *insertPt = nullptr;
5417+
auto forwardAddr = canForwardIndirectResultAlloca(typeInfo, i, source,
5418+
insertPt);
5419+
if (forwardAddr.isValid()) {
5420+
const auto &addrTI = getTypeInfo(i->getDest()->getType());
5421+
// Set the insert point to the first load instruction. We need to be with
5422+
// the lifetime of the alloca.
5423+
IRBuilder::SavedInsertionPointRAII insertRAII(this->Builder, insertPt);
5424+
addrTI.initializeWithTake(*this, dest, forwardAddr, i->getDest()->getType(),
5425+
false);
5426+
(void)source.claimAll();
5427+
return;
5428+
}
5429+
53455430
switch (i->getOwnershipQualifier()) {
53465431
case StoreOwnershipQualifier::Unqualified:
53475432
case StoreOwnershipQualifier::Init:
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#pragma once
2+
3+
typedef struct {
4+
unsigned long long arr[16];
5+
} large_thing;
6+
7+
large_thing pass_and_return(large_thing a, large_thing b);
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// RUN: %target-swift-frontend %s -Osize -import-objc-header %S/Inputs/large_argument_result_c.h -emit-ir -o - 2>&1 | %FileCheck %s
2+
3+
// REQUIRES: PTRSIZE=64
4+
5+
// Whether llvm can remove the first two memcmp's dependents on the ABI (arm64's
6+
// PCS, says stack arguments might be written to; x86-64 ABI copies indirect
7+
// parameters for the call)
8+
// REQUIRES: CPU=arm64 || CPU=arm64e
9+
10+
// CHECK: define swiftcc void @"$s23large_argument_result_c7runTestyySo0A6_thingaF"(ptr {{.*}} %0)
11+
// CHECK: [[CALL_ALLOCA:%.*]] = alloca <{ %Ts6UInt64V, %Ts6UInt64V, %Ts6UInt64V
12+
// CHECK: [[TMP_ALLOCA:%.*]] = alloca %TSo11large_thinga, align 8
13+
// CHECK: [[TMP_ALLOCA2:%.*]] = alloca %TSo11large_thinga, align 8
14+
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} [[TMP_ALLOCA2]], ptr {{.*}} %0, i64 128, i1 false)
15+
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} [[TMP_ALLOCA]], ptr {{.*}} %0, i64 128, i1 false)
16+
// CHECK: call void @pass_and_return(ptr {{.*}} [[CALL_ALLOCA]], ptr nonnull [[TMP_ALLOCA]], ptr nonnull [[TMP_ALLOCA2]])
17+
// CHECK: call {{.*}} @swift_allocObject
18+
// CHECK: [[BOX:%.*]] = call noalias ptr @swift_allocObject(
19+
// CHECK: [[ADDR_IN_BOX:%.*]] = getelementptr inbounds <{ %swift.refcounted, [128 x i8] }>, ptr [[BOX]], i64 0, i32 1
20+
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} [[ADDR_IN_BOX]], ptr {{.*}} [[CALL_ALLOCA]], i64 128, i1 false)
21+
// CHECK: call void @llvm.lifetime.end.p0(i64 128, ptr nonnull [[CALL_ALLOCA]])
22+
public func runTest(_ l : large_thing) {
23+
let r = pass_and_return(l, l)
24+
print(r)
25+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// RUN: %target-swift-frontend %S/large_argument_result_c.swift -Osize -import-objc-header %S/Inputs/large_argument_result_c.h -emit-ir -o - 2>&1 | %FileCheck %s
2+
3+
// REQUIRES: PTRSIZE=64
4+
5+
// Windows has a different ABI (not byval)
6+
// UNSUPPORTED: OS=windows-msvc
7+
8+
// Whether llvm can remove the first two memcmp's dependents on the ABI (arm64's
9+
// PCS, says stack arguments might be written to; x86-64 ABI copies indirect
10+
// parameters for the call)
11+
// REQUIRES: CPU=x86_64
12+
13+
// CHECK: define {{.*}}swiftcc void @"$s23large_argument_result_c7runTestyySo0A6_thingaF"(ptr {{.*}} %0)
14+
// CHECK: [[CALL_ALLOCA:%.*]] = alloca <{ %Ts6UInt64V, %Ts6UInt64V, %Ts6UInt64V
15+
// CHECK: call void @pass_and_return(ptr {{.*}} [[CALL_ALLOCA]], ptr nonnull byval{{.*}} %0, ptr nonnull byval{{.*}} %0)
16+
// CHECK: call {{.*}} @swift_allocObject
17+
// CHECK: [[BOX:%.*]] = {{.*}}call noalias ptr @swift_allocObject(
18+
// CHECK: [[ADDR_IN_BOX:%.*]] = getelementptr inbounds <{ %swift.refcounted, [128 x i8] }>, ptr [[BOX]], i64 0, i32 1
19+
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} [[ADDR_IN_BOX]], ptr {{.*}} [[CALL_ALLOCA]], i64 128, i1 false)
20+
// CHECK: call void @llvm.lifetime.end.p0(i64 128, ptr nonnull [[CALL_ALLOCA]])
21+
public func runTest(_ l : large_thing) {
22+
let r = pass_and_return(l, l)
23+
print(r)
24+
}

0 commit comments

Comments
 (0)