Skip to content

Commit b3a2052

Browse files
authored
Merge pull request #21711 from apple/marcrasi-const-evaluator-strings
2 parents 1374147 + 2b6b161 commit b3a2052

File tree

5 files changed

+164
-0
lines changed

5 files changed

+164
-0
lines changed

include/swift/SIL/SILConstants.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ class SymbolicValue {
9595
/// This value is represented with an inline integer representation.
9696
RK_IntegerInline,
9797

98+
/// This value is represented with a bump-pointer allocated char array
99+
/// representing a UTF-8 encoded string.
100+
RK_String,
101+
98102
/// This value is a struct or tuple of constants. This is tracked by the
99103
/// "aggregate" member of the value union.
100104
RK_Aggregate,
@@ -124,6 +128,10 @@ class SymbolicValue {
124128
/// This holds the bits of an integer for an inline representation.
125129
uint64_t integerInline;
126130

131+
/// When this SymbolicValue is of "String" kind, this pointer stores
132+
/// information about the StringRef value it holds.
133+
const char *string;
134+
127135
/// When this SymbolicValue is of "Aggregate" kind, this pointer stores
128136
/// information about the array elements and count.
129137
const SymbolicValue *aggregate;
@@ -147,6 +155,9 @@ class SymbolicValue {
147155
/// representation, which makes the number of entries in the list derivable.
148156
unsigned integerBitwidth;
149157

158+
/// This is the number of bytes for an RK_String representation.
159+
unsigned stringNumBytes;
160+
150161
/// This is the number of elements for an RK_Aggregate representation.
151162
unsigned aggregateNumElements;
152163
} auxInfo;
@@ -168,6 +179,10 @@ class SymbolicValue {
168179
/// This is an integer constant.
169180
Integer,
170181

182+
/// String values may have SIL type of Builtin.RawPointer or Builtin.Word
183+
/// type.
184+
String,
185+
171186
/// This can be an array, struct, tuple, etc.
172187
Aggregate,
173188

@@ -242,6 +257,13 @@ class SymbolicValue {
242257
APInt getIntegerValue() const;
243258
unsigned getIntegerValueBitWidth() const;
244259

260+
/// Returns a SymbolicValue representing a UTF-8 encoded string.
261+
static SymbolicValue getString(StringRef string,
262+
ASTContext &astContext);
263+
264+
/// Returns the UTF-8 encoded string underlying a SymbolicValue.
265+
StringRef getStringValue() const;
266+
245267
/// This returns an aggregate value with the specified elements in it. This
246268
/// copies the elements into the specified ASTContext.
247269
static SymbolicValue getAggregate(ArrayRef<SymbolicValue> elements,

lib/SIL/SILConstants.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ void SymbolicValue::print(llvm::raw_ostream &os, unsigned indent) const {
6161
case RK_IntegerInline:
6262
os << "int: " << getIntegerValue() << "\n";
6363
return;
64+
case RK_String:
65+
os << "string: \"" << getStringValue() << "\"\n";
66+
return;
6467
case RK_Aggregate: {
6568
ArrayRef<SymbolicValue> elements = getAggregateValue();
6669
switch (elements.size()) {
@@ -111,6 +114,8 @@ SymbolicValue::Kind SymbolicValue::getKind() const {
111114
case RK_Integer:
112115
case RK_IntegerInline:
113116
return Integer;
117+
case RK_String:
118+
return String;
114119
case RK_DirectAddress:
115120
case RK_DerivedAddress:
116121
return Address;
@@ -131,6 +136,8 @@ SymbolicValue::cloneInto(ASTContext &astContext) const {
131136
case RK_IntegerInline:
132137
case RK_Integer:
133138
return SymbolicValue::getInteger(getIntegerValue(), astContext);
139+
case RK_String:
140+
return SymbolicValue::getString(getStringValue(), astContext);
134141
case RK_Aggregate: {
135142
auto elts = getAggregateValue();
136143
SmallVector<SymbolicValue, 4> results;
@@ -215,6 +222,34 @@ unsigned SymbolicValue::getIntegerValueBitWidth() const {
215222
return auxInfo.integerBitwidth;
216223
}
217224

225+
//===----------------------------------------------------------------------===//
226+
// Strings
227+
//===----------------------------------------------------------------------===//
228+
229+
// Returns a SymbolicValue representing a UTF-8 encoded string.
230+
SymbolicValue SymbolicValue::getString(StringRef string,
231+
ASTContext &astContext) {
232+
// TODO: Could have an inline representation for strings if thre was demand,
233+
// just store a char[8] as the storage.
234+
235+
auto *resultPtr = astContext.Allocate<char>(string.size()).data();
236+
std::uninitialized_copy(string.begin(), string.end(), resultPtr);
237+
238+
SymbolicValue result;
239+
result.representationKind = RK_String;
240+
result.value.string = resultPtr;
241+
result.auxInfo.stringNumBytes = string.size();
242+
return result;
243+
}
244+
245+
// Returns the UTF-8 encoded string underlying a SymbolicValue.
246+
StringRef SymbolicValue::getStringValue() const {
247+
assert(getKind() == String);
248+
249+
assert(representationKind == RK_String);
250+
return StringRef(value.string, auxInfo.stringNumBytes);
251+
}
252+
218253
//===----------------------------------------------------------------------===//
219254
// Aggregates
220255
//===----------------------------------------------------------------------===//

lib/SILOptimizer/Utils/ConstExpr.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,21 @@ evaluateAndCacheCall(SILFunction &fn, SubstitutionMap substitutionMap,
3737
// ConstantFolding.h/cpp files should be subsumed by this, as this is a more
3838
// general framework.
3939

40+
enum class WellKnownFunction {
41+
// String.init()
42+
StringInitEmpty,
43+
// String.init(_builtinStringLiteral:utf8CodeUnitCount:isASCII:)
44+
StringMakeUTF8
45+
};
46+
47+
static llvm::Optional<WellKnownFunction> classifyFunction(SILFunction *fn) {
48+
if (fn->hasSemanticsAttr("string.init_empty"))
49+
return WellKnownFunction::StringInitEmpty;
50+
if (fn->hasSemanticsAttr("string.makeUTF8"))
51+
return WellKnownFunction::StringMakeUTF8;
52+
return None;
53+
}
54+
4055
//===----------------------------------------------------------------------===//
4156
// ConstExprFunctionState implementation.
4257
//===----------------------------------------------------------------------===//
@@ -115,6 +130,9 @@ class ConstExprFunctionState {
115130
llvm::Optional<SymbolicValue> computeOpaqueCallResult(ApplyInst *apply,
116131
SILFunction *callee);
117132

133+
llvm::Optional<SymbolicValue>
134+
computeWellKnownCallResult(ApplyInst *apply, WellKnownFunction callee);
135+
118136
SymbolicValue getSingleWriterAddressValue(SILValue addr);
119137
SymbolicValue getConstAddrAndLoadResult(SILValue addr);
120138
SymbolicValue loadAddrValue(SILValue addr, SymbolicValue addrVal);
@@ -149,6 +167,8 @@ SymbolicValue ConstExprFunctionState::computeConstantValue(SILValue value) {
149167
// immediately.
150168
if (auto *ili = dyn_cast<IntegerLiteralInst>(value))
151169
return SymbolicValue::getInteger(ili->getValue(), evaluator.getASTContext());
170+
if (auto *sli = dyn_cast<StringLiteralInst>(value))
171+
return SymbolicValue::getString(sli->getValue(), evaluator.getASTContext());
152172

153173
if (auto *fri = dyn_cast<FunctionRefInst>(value))
154174
return SymbolicValue::getFunction(fri->getReferencedFunction());
@@ -523,6 +543,46 @@ ConstExprFunctionState::computeOpaqueCallResult(ApplyInst *apply,
523543
return evaluator.getUnknown((SILInstruction *)apply, UnknownReason::Default);
524544
}
525545

546+
/// Given a call to a well known function, collect its arguments as constants,
547+
/// fold it, and return None. If any of the arguments are not constants, marks
548+
/// the call's results as Unknown, and return an Unknown with information about
549+
/// the error.
550+
llvm::Optional<SymbolicValue>
551+
ConstExprFunctionState::computeWellKnownCallResult(ApplyInst *apply,
552+
WellKnownFunction callee) {
553+
auto conventions = apply->getSubstCalleeConv();
554+
switch (callee) {
555+
case WellKnownFunction::StringInitEmpty: { // String.init()
556+
assert(conventions.getNumDirectSILResults() == 1 &&
557+
conventions.getNumIndirectSILResults() == 0 &&
558+
"unexpected String.init() signature");
559+
auto result = SymbolicValue::getString("", evaluator.getASTContext());
560+
setValue(apply, result);
561+
return None;
562+
}
563+
case WellKnownFunction::StringMakeUTF8: {
564+
// String.init(_builtinStringLiteral start: Builtin.RawPointer,
565+
// utf8CodeUnitCount: Builtin.Word,
566+
// isASCII: Builtin.Int1)
567+
assert(conventions.getNumDirectSILResults() == 1 &&
568+
conventions.getNumIndirectSILResults() == 0 &&
569+
conventions.getNumParameters() == 4 && "unexpected signature");
570+
auto literal = getConstantValue(apply->getOperand(1));
571+
if (literal.getKind() != SymbolicValue::String)
572+
break;
573+
auto literalVal = literal.getStringValue();
574+
575+
auto byteCount = getConstantValue(apply->getOperand(2));
576+
if (byteCount.getKind() != SymbolicValue::Integer ||
577+
byteCount.getIntegerValue().getLimitedValue() != literalVal.size())
578+
break;
579+
setValue(apply, literal);
580+
return None;
581+
}
582+
}
583+
llvm_unreachable("unhandled WellKnownFunction");
584+
}
585+
526586
/// Given a call to a function, determine whether it is a call to a constexpr
527587
/// function. If so, collect its arguments as constants, fold it and return
528588
/// None. If not, mark the results as Unknown, and return an Unknown with
@@ -539,6 +599,10 @@ ConstExprFunctionState::computeCallResult(ApplyInst *apply) {
539599

540600
SILFunction *callee = calleeFn.getFunctionValue();
541601

602+
// If this is a well-known function, do not step into it.
603+
if (auto wellKnownFunction = classifyFunction(callee))
604+
return computeWellKnownCallResult(apply, *wellKnownFunction);
605+
542606
// Verify that we can fold all of the arguments to the call.
543607
SmallVector<SymbolicValue, 4> paramConstants;
544608
for (unsigned i = 0, e = apply->getNumOperands() - 1; i != e; ++i) {

stdlib/public/core/String.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ public struct String {
367367
/// let empty = ""
368368
/// let alsoEmpty = String()
369369
@inlinable @inline(__always)
370+
@_semantics("string.init_empty")
370371
public init() { self.init(_StringGuts()) }
371372
}
372373

test/SILOptimizer/pound_assert.swift

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,3 +455,45 @@ func testStructPassedAsProtocols() {
455455
#assert(callProtoSimpleMethod(s) == 0) // expected-error {{#assert condition not constant}}
456456
// expected-note@-1 {{could not fold operation}}
457457
}
458+
459+
//===----------------------------------------------------------------------===//
460+
// Strings
461+
//
462+
// TODO: The constant evaluator does not implement string accesses/comparisons
463+
// so theses tests cannot test that the implemented string operations produce
464+
// correct values in the arrays. These tests only test that the implemented
465+
// string operations do not crash or produce unknown values. As soon as we have
466+
// string accesses/comparisons, modify these tests to check the values in the
467+
// strings.
468+
//===----------------------------------------------------------------------===//
469+
470+
struct ContainsString {
471+
let x: Int
472+
let str: String
473+
}
474+
475+
func stringInitEmptyTopLevel() {
476+
let c = ContainsString(x: 1, str: "")
477+
#assert(c.x == 1)
478+
}
479+
480+
func stringInitNonEmptyTopLevel() {
481+
let c = ContainsString(x: 1, str: "hello world")
482+
#assert(c.x == 1)
483+
}
484+
485+
func stringInitEmptyFlowSensitive() -> ContainsString {
486+
return ContainsString(x: 1, str: "")
487+
}
488+
489+
func invokeStringInitEmptyFlowSensitive() {
490+
#assert(stringInitEmptyFlowSensitive().x == 1)
491+
}
492+
493+
func stringInitNonEmptyFlowSensitive() -> ContainsString {
494+
return ContainsString(x: 1, str: "hello world")
495+
}
496+
497+
func invokeStringInitNonEmptyFlowSensitive() {
498+
#assert(stringInitNonEmptyFlowSensitive().x == 1)
499+
}

0 commit comments

Comments
 (0)