Skip to content

AST/SILGen support for constant string literals #8701

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion include/swift/AST/Expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ class alignas(8) Expr {
friend class StringLiteralExpr;
unsigned : NumLiteralExprBits;

unsigned Encoding : 2;
unsigned Encoding : 3;
unsigned IsSingleUnicodeScalar : 1;
unsigned IsSingleExtendedGraphemeCluster : 1;
};
Expand Down Expand Up @@ -940,6 +940,12 @@ class StringLiteralExpr : public LiteralExpr {
/// A UTF-16 string.
UTF16,

/// A UTF-8 constant string.
UTF8ConstString,

/// A UTF-16 constant string.
UTF16ConstString,

/// A single UnicodeScalar, passed as an integer.
OneUnicodeScalar
};
Expand Down
2 changes: 2 additions & 0 deletions include/swift/AST/KnownIdentifiers.def
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ IDENTIFIER(unicodeScalarLiteral)
IDENTIFIER(stringLiteral)
IDENTIFIER_(builtinUTF16StringLiteral)
IDENTIFIER_(builtinStringLiteral)
IDENTIFIER_(builtinConstUTF16StringLiteral)
IDENTIFIER_(builtinConstStringLiteral)
IDENTIFIER(StringLiteralType)
IDENTIFIER(stringInterpolation)
IDENTIFIER(stringInterpolationSegment)
Expand Down
2 changes: 2 additions & 0 deletions include/swift/AST/KnownProtocols.def
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ BUILTIN_EXPRESSIBLE_BY_LITERAL_PROTOCOL_(ExpressibleByBuiltinIntegerLiteral)
BUILTIN_EXPRESSIBLE_BY_LITERAL_PROTOCOL_(ExpressibleByBuiltinStringLiteral)
BUILTIN_EXPRESSIBLE_BY_LITERAL_PROTOCOL_(ExpressibleByBuiltinUTF16StringLiteral)
BUILTIN_EXPRESSIBLE_BY_LITERAL_PROTOCOL_(ExpressibleByBuiltinUnicodeScalarLiteral)
BUILTIN_EXPRESSIBLE_BY_LITERAL_PROTOCOL_(ExpressibleByBuiltinConstStringLiteral)
BUILTIN_EXPRESSIBLE_BY_LITERAL_PROTOCOL_(ExpressibleByBuiltinConstUTF16StringLiteral)

#undef EXPRESSIBLE_BY_LITERAL_PROTOCOL
#undef EXPRESSIBLE_BY_LITERAL_PROTOCOL_
Expand Down
2 changes: 1 addition & 1 deletion include/swift/Serialization/ModuleFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ const uint16_t VERSION_MAJOR = 0;
/// in source control, you should also update the comment to briefly
/// describe what change you made. The content of this comment isn't important;
/// it just ensures a conflict if two people change the module format.
const uint16_t VERSION_MINOR = 332; // Last change: constant_string_literal
const uint16_t VERSION_MINOR = 333; // Last change: AST constant_string_literal

using DeclID = PointerEmbeddedInt<unsigned, 31>;
using DeclIDField = BCFixed<31>;
Expand Down
4 changes: 4 additions & 0 deletions lib/AST/ASTDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,10 @@ getStringLiteralExprEncodingString(StringLiteralExpr::Encoding value) {
switch (value) {
case StringLiteralExpr::UTF8: return "utf8";
case StringLiteralExpr::UTF16: return "utf16";
case StringLiteralExpr::UTF8ConstString:
return "utf8_const_string";
case StringLiteralExpr::UTF16ConstString:
return "utf16_const_string";
case StringLiteralExpr::OneUnicodeScalar: return "unicodeScalar";
}

Expand Down
2 changes: 2 additions & 0 deletions lib/IRGen/GenMeta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5526,6 +5526,8 @@ SpecialProtocol irgen::getSpecialProtocolID(ProtocolDecl *P) {
case KnownProtocolKind::BridgedNSError:
case KnownProtocolKind::BridgedStoredNSError:
case KnownProtocolKind::ErrorCodeProtocol:
case KnownProtocolKind::ExpressibleByBuiltinConstStringLiteral:
case KnownProtocolKind::ExpressibleByBuiltinConstUTF16StringLiteral:
return SpecialProtocol::None;
}

Expand Down
25 changes: 23 additions & 2 deletions lib/SILGen/SILGenApply.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1519,8 +1519,9 @@ static RValue emitStringLiteral(SILGenFunction &SGF, Expr *E, StringRef Str,
break;
}
}

bool useConstantStringBuiltin = false;
StringLiteralInst::Encoding instEncoding;
ConstStringLiteralInst::Encoding constInstEncoding;
switch (encoding) {
case StringLiteralExpr::UTF8:
instEncoding = StringLiteralInst::Encoding::UTF8;
Expand All @@ -1532,6 +1533,16 @@ static RValue emitStringLiteral(SILGenFunction &SGF, Expr *E, StringRef Str,
Length = unicode::getUTF16Length(Str);
break;
}
case StringLiteralExpr::UTF8ConstString:
constInstEncoding = ConstStringLiteralInst::Encoding::UTF8;
useConstantStringBuiltin = true;
break;

case StringLiteralExpr::UTF16ConstString: {
constInstEncoding = ConstStringLiteralInst::Encoding::UTF16;
useConstantStringBuiltin = true;
break;
}
case StringLiteralExpr::OneUnicodeScalar: {
SILType Int32Ty = SILType::getBuiltinIntegerType(32, SGF.getASTContext());
SILValue UnicodeScalarValue =
Expand All @@ -1542,8 +1553,18 @@ static RValue emitStringLiteral(SILGenFunction &SGF, Expr *E, StringRef Str,
}
}

// Should we build a constant string literal?
if (useConstantStringBuiltin) {
auto *string = SGF.B.createConstStringLiteral(E, Str, constInstEncoding);
ManagedValue Elts[] = {ManagedValue::forUnmanaged(string)};
TupleTypeElt TypeElts[] = {Elts[0].getType().getSwiftRValueType()};
CanType ty =
TupleType::get(TypeElts, SGF.getASTContext())->getCanonicalType();
return RValue::withPreExplodedElements(Elts, ty);
}

// The string literal provides the data.
StringLiteralInst *string = SGF.B.createStringLiteral(E, Str, instEncoding);
auto *string = SGF.B.createStringLiteral(E, Str, instEncoding);

// The length is lowered as an integer_literal.
auto WordTy = SILType::getBuiltinWordType(SGF.getASTContext());
Expand Down
43 changes: 37 additions & 6 deletions lib/Sema/CSApply.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1958,18 +1958,49 @@ namespace {
builtinProtocol = tc.getProtocol(
expr->getLoc(),
KnownProtocolKind::ExpressibleByBuiltinUTF16StringLiteral);
auto *builtinConstUTF16StringProtocol = tc.getProtocol(
expr->getLoc(),
KnownProtocolKind::ExpressibleByBuiltinConstUTF16StringLiteral);
auto *builtinConstStringProtocol = tc.getProtocol(
expr->getLoc(),
KnownProtocolKind::ExpressibleByBuiltinConstStringLiteral);

// First try the constant string protocols.
if (!forceASCII &&
tc.conformsToProtocol(type, builtinProtocol, cs.DC,
ConformanceCheckFlags::InExpression)) {
builtinLiteralFuncName
= DeclName(tc.Context, tc.Context.Id_init,
{ tc.Context.Id_builtinUTF16StringLiteral,
tc.Context.getIdentifier("utf16CodeUnitCount") });
(tc.conformsToProtocol(type, builtinConstUTF16StringProtocol, cs.DC,
ConformanceCheckFlags::InExpression))) {
builtinProtocol = builtinConstUTF16StringProtocol;
builtinLiteralFuncName =
DeclName(tc.Context, tc.Context.Id_init,
{tc.Context.Id_builtinConstUTF16StringLiteral});

if (stringLiteral)
stringLiteral->setEncoding(StringLiteralExpr::UTF16ConstString);
else
magicLiteral->setStringEncoding(StringLiteralExpr::UTF16);
} else if (!forceASCII && (tc.conformsToProtocol(
type, builtinProtocol, cs.DC,
ConformanceCheckFlags::InExpression))) {
builtinLiteralFuncName =
DeclName(tc.Context, tc.Context.Id_init,
{tc.Context.Id_builtinUTF16StringLiteral,
tc.Context.getIdentifier("utf16CodeUnitCount")});

if (stringLiteral)
stringLiteral->setEncoding(StringLiteralExpr::UTF16);
else
magicLiteral->setStringEncoding(StringLiteralExpr::UTF16);
} else if (tc.conformsToProtocol(type, builtinConstStringProtocol,
cs.DC,
ConformanceCheckFlags::InExpression)) {
builtinProtocol = builtinConstStringProtocol;
builtinLiteralFuncName =
DeclName(tc.Context, tc.Context.Id_init,
{tc.Context.Id_builtinConstStringLiteral});
if (stringLiteral)
stringLiteral->setEncoding(StringLiteralExpr::UTF8ConstString);
else
magicLiteral->setStringEncoding(StringLiteralExpr::UTF8);
} else {
// Otherwise, fall back to UTF-8.
builtinProtocol = tc.getProtocol(
Expand Down
12 changes: 12 additions & 0 deletions stdlib/public/core/CompilerProtocols.swift
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,18 @@ public protocol _ExpressibleByBuiltinUTF16StringLiteral
utf16CodeUnitCount: Builtin.Word)
}

public protocol _ExpressibleByBuiltinConstStringLiteral
: _ExpressibleByBuiltinExtendedGraphemeClusterLiteral {

init(_builtinConstStringLiteral constantString: Builtin.RawPointer)
}

public protocol _ExpressibleByBuiltinConstUTF16StringLiteral
: _ExpressibleByBuiltinConstStringLiteral {

init(_builtinConstUTF16StringLiteral constantUTF16String: Builtin.RawPointer)
}

/// A type that can be initialized with a string literal.
///
/// The `String` and `StaticString` types conform to the
Expand Down
31 changes: 31 additions & 0 deletions test/SILGen/expressions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,48 @@ struct SillyUTF16String : _ExpressibleByBuiltinUTF16StringLiteral, ExpressibleBy
init(stringLiteral value: SillyUTF16String) { }
}

struct SillyConstUTF16String : _ExpressibleByBuiltinConstUTF16StringLiteral, ExpressibleByStringLiteral {
init(_builtinUnicodeScalarLiteral value: Builtin.Int32) { }

init(unicodeScalarLiteral value: SillyString) { }

init(
_builtinExtendedGraphemeClusterLiteral start: Builtin.RawPointer,
utf8CodeUnitCount: Builtin.Word,
isASCII: Builtin.Int1
) {
}

init(extendedGraphemeClusterLiteral value: SillyString) { }

init( _builtinConstStringLiteral start: Builtin.RawPointer) { }

init( _builtinConstUTF16StringLiteral start: Builtin.RawPointer) { }

init(stringLiteral value: SillyUTF16String) { }
}

func literals() {
var a = 1
var b = 1.25
var d = "foö"
var e:SillyString = "foo"

var f:SillyConstUTF16String = "foobar"
var non_ascii:SillyConstUTF16String = "foobarö"
}
// CHECK-LABEL: sil hidden @_T011expressions8literalsyyF
// CHECK: integer_literal $Builtin.Int2048, 1
// CHECK: float_literal $Builtin.FPIEEE{{64|80}}, {{0x3FF4000000000000|0x3FFFA000000000000000}}
// CHECK: string_literal utf16 "foö"
// CHECK: string_literal utf8 "foo"
// CHECK: [[CONST_STRING_LIT:%.*]] = const_string_literal utf8 "foobar"
// CHECK: [[METATYPE:%.*]] = metatype $@thin SillyConstUTF16String.Type
// CHECK: [[FUN:%.*]] = function_ref @_T011expressions21SillyConstUTF16StringVACBp08_builtincE7Literal_tcfC : $@convention(method) (Builtin.RawPointer, @thin SillyConstUTF16String.Type) -> SillyConstUTF16String
// CHECK: apply [[FUN]]([[CONST_STRING_LIT]], [[METATYPE]]) : $@convention(method) (Builtin.RawPointer, @thin SillyConstUTF16String.Type) -> SillyConstUTF16String
// CHECK: [[CONST_UTF16STRING_LIT:%.*]] = const_string_literal utf16 "foobarö"
// CHECK: [[FUN:%.*]] = function_ref @_T011expressions21SillyConstUTF16StringVACBp08_builtincdE7Literal_tcfC : $@convention(method) (Builtin.RawPointer, @thin SillyConstUTF16String.Type) -> SillyConstUTF16String
// CHECK: apply [[FUN]]([[CONST_UTF16STRING_LIT]], {{.*}}) : $@convention(method) (Builtin.RawPointer, @thin SillyConstUTF16String.Type) -> SillyConstUTF16String

func bar(_ x: Int) {}
func bar(_ x: Int, _ y: Int) {}
Expand Down