Skip to content

Commit f0997c0

Browse files
authored
Merge pull request #17014 from eeckstein/optimize-string-constants
Optimize string constants
2 parents 6f94f6c + 4191149 commit f0997c0

File tree

10 files changed

+427
-7
lines changed

10 files changed

+427
-7
lines changed

include/swift/AST/Builtins.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,11 @@ BUILTIN_MISC_OPERATION(StaticReport, "staticReport", "", Special)
475475
BUILTIN_MISC_OPERATION(AssertConf, "assert_configuration", "n", Special)
476476

477477
/// StringObjectOr has type (T,T) -> T.
478+
/// Sets bits in a string object. The first operand is bit-cast string literal
479+
/// pointer to an integer. The second operand is the bit mask to be or'd into
480+
/// the high bits of the pointer.
481+
/// It is required that the or'd bits are all 0 in the first operand. So this
482+
/// or-operation is actually equivalent to an addition.
478483
BUILTIN_MISC_OPERATION(StringObjectOr, "stringObjectOr", "n", Integer)
479484

480485
/// Special truncation builtins that check for sign and overflow errors. These

lib/IRGen/GenConstant.cpp

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,32 @@ static llvm::Constant *emitConstantValue(IRGenModule &IGM, SILValue operand) {
8383
} else if (auto *SLI = dyn_cast<StringLiteralInst>(operand)) {
8484
return emitAddrOfConstantString(IGM, SLI);
8585
} else if (auto *BI = dyn_cast<BuiltinInst>(operand)) {
86-
assert(IGM.getSILModule().getBuiltinInfo(BI->getName()).ID ==
87-
BuiltinValueKind::PtrToInt);
88-
llvm::Constant *ptr = emitConstantValue(IGM, BI->getArguments()[0]);
89-
return llvm::ConstantExpr::getPtrToInt(ptr, IGM.IntPtrTy);
86+
switch (IGM.getSILModule().getBuiltinInfo(BI->getName()).ID) {
87+
case BuiltinValueKind::PtrToInt: {
88+
llvm::Constant *ptr = emitConstantValue(IGM, BI->getArguments()[0]);
89+
return llvm::ConstantExpr::getPtrToInt(ptr, IGM.IntPtrTy);
90+
}
91+
case BuiltinValueKind::ZExtOrBitCast: {
92+
llvm::Constant *value = emitConstantValue(IGM, BI->getArguments()[0]);
93+
return llvm::ConstantExpr::getZExtOrBitCast(value, IGM.getStorageType(BI->getType()));
94+
}
95+
case BuiltinValueKind::StringObjectOr: {
96+
llvm::Constant *lhs = emitConstantValue(IGM, BI->getArguments()[0]);
97+
llvm::Constant *rhs = emitConstantValue(IGM, BI->getArguments()[1]);
98+
// It is a requirement that the or'd bits in the left argument are
99+
// initialized with 0. Therefore the or-operation is equivalent to an
100+
// addition. We need an addition to generate a valid relocation.
101+
return llvm::ConstantExpr::getAdd(lhs, rhs);
102+
}
103+
default:
104+
llvm_unreachable("unsupported builtin for constant expression");
105+
}
106+
} else if (auto *VTBI = dyn_cast<ValueToBridgeObjectInst>(operand)) {
107+
auto *SI = cast<StructInst>(VTBI->getOperand());
108+
assert(SI->getElements().size() == 1);
109+
auto *val = emitConstantValue(IGM, SI->getElements()[0]);
110+
auto *sTy = IGM.getTypeInfo(VTBI->getType()).getStorageType();
111+
return llvm::ConstantExpr::getIntToPtr(val, sTy);
90112
} else {
91113
llvm_unreachable("Unsupported SILInstruction in static initializer!");
92114
}

lib/SIL/SILGlobalVariable.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,17 @@ bool SILGlobalVariable::isValidStaticInitializerInst(const SILInstruction *I,
8686
if (isa<LiteralInst>(bi->getArguments()[0]))
8787
return true;
8888
break;
89+
case BuiltinValueKind::StringObjectOr:
90+
// The first operand can be a string literal (i.e. a pointer), but the
91+
// second operand must be a constant. This enables creating a
92+
// a pointer+offset relocation.
93+
// Note that StringObjectOr requires the or'd bits in the first
94+
// operand to be 0, so the operation is equivalent to an addition.
95+
if (isa<IntegerLiteralInst>(bi->getArguments()[1]))
96+
return true;
97+
break;
98+
case BuiltinValueKind::ZExtOrBitCast:
99+
return true;
89100
default:
90101
break;
91102
}
@@ -107,6 +118,7 @@ bool SILGlobalVariable::isValidStaticInitializerInst(const SILInstruction *I,
107118
case SILInstructionKind::IntegerLiteralInst:
108119
case SILInstructionKind::FloatLiteralInst:
109120
case SILInstructionKind::ObjectInst:
121+
case SILInstructionKind::ValueToBridgeObjectInst:
110122
return true;
111123
default:
112124
return false;

lib/SILOptimizer/PassManager/PassPipeline.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,12 @@ static void addClosureSpecializePassPipeline(SILPassPipelinePlan &P) {
383383
P.addDeadFunctionElimination();
384384
P.addDeadObjectElimination();
385385

386+
// These few passes are needed to cleanup between loop unrolling and GlobalOpt.
387+
P.addSimplifyCFG();
388+
P.addSILCombine();
389+
P.addPerformanceConstantPropagation();
390+
P.addSimplifyCFG();
391+
386392
// Hoist globals out of loops.
387393
// Global-init functions should not be inlined GlobalOpt is done.
388394
P.addGlobalOpt();

lib/SILOptimizer/SILCombiner/SILCombiner.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,12 +198,15 @@ class SILCombiner :
198198
SILInstruction *visitPartialApplyInst(PartialApplyInst *AI);
199199
SILInstruction *visitApplyInst(ApplyInst *AI);
200200
SILInstruction *visitTryApplyInst(TryApplyInst *AI);
201+
SILInstruction *optimizeStringObject(BuiltinInst *BI);
201202
SILInstruction *visitBuiltinInst(BuiltinInst *BI);
202203
SILInstruction *visitCondFailInst(CondFailInst *CFI);
203204
SILInstruction *visitStrongRetainInst(StrongRetainInst *SRI);
204205
SILInstruction *visitRefToRawPointerInst(RefToRawPointerInst *RRPI);
205206
SILInstruction *visitUpcastInst(UpcastInst *UCI);
207+
SILInstruction *optimizeLoadFromStringLiteral(LoadInst *LI);
206208
SILInstruction *visitLoadInst(LoadInst *LI);
209+
SILInstruction *visitIndexAddrInst(IndexAddrInst *IA);
207210
SILInstruction *visitAllocStackInst(AllocStackInst *AS);
208211
SILInstruction *visitAllocRefInst(AllocRefInst *AR);
209212
SILInstruction *visitSwitchEnumAddrInst(SwitchEnumAddrInst *SEAI);

lib/SILOptimizer/SILCombiner/SILCombinerBuiltinVisitors.cpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,87 @@ SILInstruction *optimizeBitOp(BuiltinInst *BI,
430430
return nullptr;
431431
}
432432

433+
/// Returns a 64-bit integer constant if \p op is an integer_literal instruction
434+
/// with a value which fits into 64 bits.
435+
static Optional<uint64_t> getIntConst(SILValue op) {
436+
if (auto *ILI = dyn_cast<IntegerLiteralInst>(op)) {
437+
if (ILI->getValue().getActiveBits() <= 64)
438+
return ILI->getValue().getZExtValue();
439+
}
440+
return None;
441+
}
442+
443+
/// Optimize the bit extract of a string object. Example in SIL pseudo-code,
444+
/// omitting the type-conversion instructions:
445+
///
446+
/// %0 = string_literal
447+
/// %1 = integer_literal 0x8000000000000000
448+
/// %2 = builtin "stringObjectOr_Int64" (%0, %1)
449+
/// %3 = integer_literal 0x4000000000000000
450+
/// %4 = builtin "and_Int64" (%2, %3)
451+
///
452+
/// optimizes to an integer_literal of 0.
453+
SILInstruction *SILCombiner::optimizeStringObject(BuiltinInst *BI) {
454+
assert(BI->getBuiltinInfo().ID == BuiltinValueKind::And);
455+
auto AndOp = getIntConst(BI->getArguments()[1]);
456+
if (!AndOp)
457+
return nullptr;
458+
459+
uint64_t andBits = AndOp.getValue();
460+
461+
// TODO: It's bad that we have to hardcode the payload bit mask here.
462+
// Instead we should introduce builtins (or instructions) to extract the
463+
// payload and extra bits, respectively.
464+
const uint64_t payloadBits = 0x00ffffffffffffffll;
465+
if ((andBits & payloadBits) != 0)
466+
return nullptr;
467+
468+
uint64_t setBits = 0;
469+
SILValue val = BI->getArguments()[0];
470+
while (val->getKind() != ValueKind::StringLiteralInst) {
471+
switch (val->getKind()) {
472+
// Look through all the type conversion and projection instructions.
473+
case ValueKind::StructExtractInst:
474+
case ValueKind::UncheckedTrivialBitCastInst:
475+
case ValueKind::ValueToBridgeObjectInst:
476+
val = cast<SingleValueInstruction>(val)->getOperand(0);
477+
break;
478+
case ValueKind::StructInst: {
479+
auto *SI = cast<StructInst>(val);
480+
if (SI->getNumOperands() != 1)
481+
return nullptr;
482+
val = SI->getOperand(0);
483+
break;
484+
}
485+
case ValueKind::BuiltinInst: {
486+
auto *B = cast<BuiltinInst>(val);
487+
switch (B->getBuiltinInfo().ID) {
488+
case BuiltinValueKind::StringObjectOr:
489+
// Note that it is a requirement that the or'd bits of the left
490+
// operand are initially zero.
491+
if (auto opVal = getIntConst(B->getArguments()[1])) {
492+
setBits |= opVal.getValue();
493+
} else {
494+
return nullptr;
495+
}
496+
LLVM_FALLTHROUGH;
497+
case BuiltinValueKind::ZExtOrBitCast:
498+
case BuiltinValueKind::PtrToInt:
499+
val = B->getArguments()[0];
500+
break;
501+
default:
502+
return nullptr;
503+
}
504+
break;
505+
}
506+
default:
507+
return nullptr;
508+
}
509+
}
510+
return Builder.createIntegerLiteral(BI->getLoc(), BI->getType(),
511+
setBits & andBits);
512+
}
513+
433514
SILInstruction *SILCombiner::visitBuiltinInst(BuiltinInst *I) {
434515
if (I->getBuiltinInfo().ID == BuiltinValueKind::CanBeObjCClass)
435516
return optimizeBuiltinCanBeObjCClass(I);
@@ -496,6 +577,9 @@ SILInstruction *SILCombiner::visitBuiltinInst(BuiltinInst *I) {
496577
break;
497578
}
498579
case BuiltinValueKind::And:
580+
if (SILInstruction *optimized = optimizeStringObject(I))
581+
return optimized;
582+
499583
return optimizeBitOp(I,
500584
[](APInt &left, const APInt &right) { left &= right; } /* combine */,
501585
[](const APInt &i) -> bool { return i.isAllOnesValue(); } /* isNeutral */,

lib/SILOptimizer/SILCombiner/SILCombinerMiscVisitors.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,58 @@ SILInstruction *SILCombiner::visitAllocRefInst(AllocRefInst *AR) {
465465
return nullptr;
466466
}
467467

468+
/// Returns the base address if \p val is an index_addr with constant index.
469+
static SILValue isConstIndexAddr(SILValue val, unsigned &index) {
470+
auto *IA = dyn_cast<IndexAddrInst>(val);
471+
if (!IA)
472+
return nullptr;
473+
auto *Index = dyn_cast<IntegerLiteralInst>(IA->getIndex());
474+
475+
// Limiting to 32 bits is more than enough. The reason why not limiting to 64
476+
// bits is to let room for overflow when we add two indices.
477+
if (!Index || Index->getValue().getActiveBits() > 32)
478+
return nullptr;
479+
480+
index = Index->getValue().getZExtValue();
481+
return IA->getBase();
482+
}
483+
484+
/// Optimize loading bytes from a string literal.
485+
/// Example in SIL pseudo code:
486+
/// %0 = string_literal "abc"
487+
/// %1 = integer_literal 2
488+
/// %2 = index_addr %0, %2
489+
/// %3 = load %2
490+
/// ->
491+
/// %3 = integer_literal 'c'
492+
SILInstruction *SILCombiner::optimizeLoadFromStringLiteral(LoadInst *LI) {
493+
auto *SEA = dyn_cast<StructElementAddrInst>(LI->getOperand());
494+
if (!SEA)
495+
return nullptr;
496+
497+
SILValue addr = SEA->getOperand();
498+
unsigned index = 0;
499+
if (SILValue iaBase = isConstIndexAddr(addr, index))
500+
addr = iaBase;
501+
502+
auto *PTA = dyn_cast<PointerToAddressInst>(addr);
503+
if (!PTA)
504+
return nullptr;
505+
auto *Literal = dyn_cast<StringLiteralInst>(PTA->getOperand());
506+
if (!Literal || Literal->getEncoding() != StringLiteralInst::Encoding::UTF8)
507+
return nullptr;
508+
509+
BuiltinIntegerType *BIType = LI->getType().getAs<BuiltinIntegerType>();
510+
if (!BIType || !BIType->isFixedWidth(8))
511+
return nullptr;
512+
513+
StringRef str = Literal->getValue();
514+
if (index >= str.size())
515+
return nullptr;
516+
517+
return Builder.createIntegerLiteral(LI->getLoc(), LI->getType(), str[index]);
518+
}
519+
468520
SILInstruction *SILCombiner::visitLoadInst(LoadInst *LI) {
469521
// (load (upcast-ptr %x)) -> (upcast-ref (load %x))
470522
Builder.setCurrentDebugScope(LI->getDebugScope());
@@ -474,6 +526,9 @@ SILInstruction *SILCombiner::visitLoadInst(LoadInst *LI) {
474526
return Builder.createUpcast(LI->getLoc(), NewLI, LI->getType());
475527
}
476528

529+
if (SILInstruction *I = optimizeLoadFromStringLiteral(LI))
530+
return I;
531+
477532
// Given a load with multiple struct_extracts/tuple_extracts and no other
478533
// uses, canonicalize the load into several (struct_element_addr (load))
479534
// pairs.
@@ -535,6 +590,28 @@ SILInstruction *SILCombiner::visitLoadInst(LoadInst *LI) {
535590
return eraseInstFromFunction(*LI);
536591
}
537592

593+
/// Optimize nested index_addr instructions:
594+
/// Example in SIL pseudo code:
595+
/// %1 = index_addr %ptr, x
596+
/// %2 = index_addr %1, y
597+
/// ->
598+
/// %2 = index_addr %ptr, x+y
599+
SILInstruction *SILCombiner::visitIndexAddrInst(IndexAddrInst *IA) {
600+
unsigned index = 0;
601+
SILValue base = isConstIndexAddr(IA, index);
602+
if (!base)
603+
return nullptr;
604+
605+
unsigned index2 = 0;
606+
SILValue base2 = isConstIndexAddr(base, index2);
607+
if (!base2)
608+
return nullptr;
609+
610+
auto *newIndex = Builder.createIntegerLiteral(IA->getLoc(),
611+
IA->getIndex()->getType(), index + index2);
612+
return Builder.createIndexAddr(IA->getLoc(), base2, newIndex);
613+
}
614+
538615
SILInstruction *SILCombiner::visitReleaseValueInst(ReleaseValueInst *RVI) {
539616
SILValue Operand = RVI->getOperand();
540617
SILType OperandTy = Operand->getType();

stdlib/public/core/StringObject.swift

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,7 @@ extension _StringObject {
247247
internal
248248
static var _variantMask: UInt {
249249
@inline(__always)
250-
get { return UInt(Builtin.stringObjectOr_Int64(
251-
_isValueBit._value, _subVariantBit._value)) }
250+
get { return _isValueBit | _subVariantBit }
252251
}
253252

254253
@inlinable
@@ -861,7 +860,7 @@ extension _StringObject {
861860
self.init(.strong(Builtin.reinterpretCast(_payloadBits)), bits)
862861
#else
863862
_sanityCheck(_payloadBits & ~_StringObject._payloadMask == 0)
864-
var rawBits = _payloadBits & _StringObject._payloadMask
863+
var rawBits = _payloadBits
865864
if isValue {
866865
var rawBitsBuiltin = Builtin.stringObjectOr_Int64(
867866
rawBits._value, _StringObject._isValueBit._value)

0 commit comments

Comments
 (0)