Skip to content

Commit 1623f42

Browse files
authored
Merge pull request #18292 from mhong/master_bytes_string
2 parents 0556005 + 9cb7f49 commit 1623f42

File tree

12 files changed

+55
-6
lines changed

12 files changed

+55
-6
lines changed

include/swift/SIL/SILInstruction.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2891,6 +2891,7 @@ class StringLiteralInst final
28912891

28922892
public:
28932893
enum class Encoding {
2894+
Bytes,
28942895
UTF8,
28952896
UTF16,
28962897
/// UTF-8 encoding of an Objective-C selector.

lib/IRGen/GenConstant.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ llvm::Constant *irgen::emitConstantFP(IRGenModule &IGM, FloatLiteralInst *FLI) {
5454
llvm::Constant *irgen::emitAddrOfConstantString(IRGenModule &IGM,
5555
StringLiteralInst *SLI) {
5656
switch (SLI->getEncoding()) {
57+
case StringLiteralInst::Encoding::Bytes:
5758
case StringLiteralInst::Encoding::UTF8:
5859
return IGM.getAddrOfGlobalString(SLI->getValue());
5960

lib/ParseSIL/ParseSIL.cpp

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2354,6 +2354,8 @@ bool SILParser::parseSILInstruction(SILBuilder &B) {
23542354
encoding = StringLiteralInst::Encoding::UTF16;
23552355
} else if (P.Tok.getText() == "objc_selector") {
23562356
encoding = StringLiteralInst::Encoding::ObjCSelector;
2357+
} else if (P.Tok.getText() == "bytes") {
2358+
encoding = StringLiteralInst::Encoding::Bytes;
23572359
} else {
23582360
P.diagnose(P.Tok, diag::sil_string_invalid_encoding, P.Tok.getText());
23592361
return true;
@@ -2368,12 +2370,37 @@ bool SILParser::parseSILInstruction(SILBuilder &B) {
23682370
// Drop the double quotes.
23692371
StringRef rawString = P.Tok.getText().drop_front().drop_back();
23702372

2371-
// Ask the lexer to interpret the entire string as a literal segment.
2372-
SmallVector<char, 128> stringBuffer;
2373-
StringRef string = P.L->getEncodedStringSegment(rawString, stringBuffer);
23742373
P.consumeToken(tok::string_literal);
23752374
if (parseSILDebugLocation(InstLoc, B))
23762375
return true;
2376+
2377+
// Ask the lexer to interpret the entire string as a literal segment.
2378+
SmallVector<char, 128> stringBuffer;
2379+
2380+
if (encoding == StringLiteralInst::Encoding::Bytes) {
2381+
// Decode hex bytes.
2382+
if (rawString.size() & 1) {
2383+
P.diagnose(P.Tok, diag::expected_tok_in_sil_instr,
2384+
"even number of hex bytes");
2385+
return true;
2386+
}
2387+
while (!rawString.empty()) {
2388+
unsigned byte1 = llvm::hexDigitValue(rawString[0]);
2389+
unsigned byte2 = llvm::hexDigitValue(rawString[1]);
2390+
if (byte1 == -1U || byte2 == -1U) {
2391+
P.diagnose(P.Tok, diag::expected_tok_in_sil_instr,
2392+
"hex bytes should contain 0-9, a-f, A-F only");
2393+
return true;
2394+
}
2395+
stringBuffer.push_back((unsigned char)(byte1 << 4) | byte2);
2396+
rawString = rawString.drop_front(2);
2397+
}
2398+
2399+
ResultVal = B.createStringLiteral(InstLoc, stringBuffer, encoding);
2400+
break;
2401+
}
2402+
2403+
StringRef string = P.L->getEncodedStringSegment(rawString, stringBuffer);
23772404
ResultVal = B.createStringLiteral(InstLoc, string, encoding);
23782405
break;
23792406
}

lib/SIL/SILGlobalVariable.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ bool SILGlobalVariable::isValidStaticInitializerInst(const SILInstruction *I,
106106
}
107107
case SILInstructionKind::StringLiteralInst:
108108
switch (cast<StringLiteralInst>(I)->getEncoding()) {
109+
case StringLiteralInst::Encoding::Bytes:
109110
case StringLiteralInst::Encoding::UTF8:
110111
case StringLiteralInst::Encoding::UTF16:
111112
return true;

lib/SIL/SILPrinter.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,6 +1184,7 @@ class SILPrinter : public SILInstructionVisitor<SILPrinter> {
11841184
}
11851185
static StringRef getStringEncodingName(StringLiteralInst::Encoding kind) {
11861186
switch (kind) {
1187+
case StringLiteralInst::Encoding::Bytes: return "bytes ";
11871188
case StringLiteralInst::Encoding::UTF8: return "utf8 ";
11881189
case StringLiteralInst::Encoding::UTF16: return "utf16 ";
11891190
case StringLiteralInst::Encoding::ObjCSelector: return "objc_selector ";
@@ -1192,8 +1193,17 @@ class SILPrinter : public SILInstructionVisitor<SILPrinter> {
11921193
}
11931194

11941195
void visitStringLiteralInst(StringLiteralInst *SLI) {
1195-
*this << getStringEncodingName(SLI->getEncoding())
1196-
<< QuotedString(SLI->getValue());
1196+
*this << getStringEncodingName(SLI->getEncoding());
1197+
1198+
if (SLI->getEncoding() != StringLiteralInst::Encoding::Bytes) {
1199+
// FIXME: this isn't correct: this doesn't properly handle translating
1200+
// UTF16 into UTF8, and the SIL parser always parses as UTF8.
1201+
*this << QuotedString(SLI->getValue());
1202+
return;
1203+
}
1204+
1205+
// "Bytes" are always output in a hexadecimal form.
1206+
*this << '"' << llvm::toHex(SLI->getValue()) << '"';
11971207
}
11981208

11991209
static StringRef

lib/SILGen/SILGenApply.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1522,8 +1522,9 @@ static RValue emitStringLiteral(SILGenFunction &SGF, Expr *E, StringRef Str,
15221522
TypeElts = TypeEltsArray;
15231523
break;
15241524

1525+
case StringLiteralInst::Encoding::Bytes:
15251526
case StringLiteralInst::Encoding::ObjCSelector:
1526-
llvm_unreachable("Objective-C selectors cannot be formed here");
1527+
llvm_unreachable("these cannot be formed here");
15271528
}
15281529

15291530
CanType ty =

lib/SILOptimizer/Utils/SpecializationMangler.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ FunctionSignatureSpecializationMangler::mangleConstantProp(LiteralInst *LI) {
236236

237237
ArgOpBuffer << 's';
238238
switch (SLI->getEncoding()) {
239+
case StringLiteralInst::Encoding::Bytes: ArgOpBuffer << 'B'; break;
239240
case StringLiteralInst::Encoding::UTF8: ArgOpBuffer << 'b'; break;
240241
case StringLiteralInst::Encoding::UTF16: ArgOpBuffer << 'w'; break;
241242
case StringLiteralInst::Encoding::ObjCSelector: ArgOpBuffer << 'c'; break;

lib/Serialization/DeserializeSIL.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ STATISTIC(NumDeserializedFunc, "Number of deserialized SIL functions");
5050
static Optional<StringLiteralInst::Encoding>
5151
fromStableStringEncoding(unsigned value) {
5252
switch (value) {
53+
case SIL_BYTES: return StringLiteralInst::Encoding::Bytes;
5354
case SIL_UTF8: return StringLiteralInst::Encoding::UTF8;
5455
case SIL_UTF16: return StringLiteralInst::Encoding::UTF16;
5556
case SIL_OBJC_SELECTOR: return StringLiteralInst::Encoding::ObjCSelector;

lib/Serialization/SILFormat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ enum SILStringEncoding : uint8_t {
3333
SIL_UTF8,
3434
SIL_UTF16,
3535
SIL_OBJC_SELECTOR,
36+
SIL_BYTES
3637
};
3738

3839
enum SILLinkageEncoding : uint8_t {

lib/Serialization/SerializeSIL.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ using llvm::BCBlockRAII;
4444

4545
static unsigned toStableStringEncoding(StringLiteralInst::Encoding encoding) {
4646
switch (encoding) {
47+
case StringLiteralInst::Encoding::Bytes: return SIL_BYTES;
4748
case StringLiteralInst::Encoding::UTF8: return SIL_UTF8;
4849
case StringLiteralInst::Encoding::UTF16: return SIL_UTF16;
4950
case StringLiteralInst::Encoding::ObjCSelector: return SIL_OBJC_SELECTOR;

test/SIL/Parser/string_literal.sil

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ bb0:
88
%2 = const_string_literal utf8 "\u{0B}"
99
// CHECK: const_string_literal utf16 "\u{0B}"
1010
%3 = const_string_literal utf16 "\u{0B}"
11+
// CHECK: string_literal bytes "ABCD"
12+
%4 = string_literal bytes "ABCD"
1113
%6 = tuple () // user: %7
1214
return %6 : $()
1315
}

test/SIL/Serialization/literals.sil

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ bb0:
1414
%2 = const_string_literal utf8 "\u{0B}"
1515
// CHECK: const_string_literal utf16 "\u{0B}"
1616
%3 = const_string_literal utf16 "\u{0B}"
17+
// CHECK: string_literal bytes "ABCD"
18+
%4 = string_literal bytes "ABCD"
1719
%6 = tuple ()
1820
return %6 : $()
1921
}

0 commit comments

Comments
 (0)