Skip to content

Commit 74f2878

Browse files
authored
Punycode error handling (#31977)
* Fix NULL deref for invalid mangled input The `Qo` operator expects to consume a type name and a list (terminated with a `y` empty list marker) from the stack. After popping the list, it doesn't check whether the stack is empty, so `$syQo` crashes (it pops down to the `y` then tries to pop again). This PR just adds the obvious check to guard against this. Resolves rdar://63128307 * Audit Punycode implementation against RFC3492 Fuzz tests have revealed some weaknesses in the error handling of our Punycode implementation used to mangle Unicode identifiers. A more detailed comparison of the implementation against the algorithm detailed in RFC3492 showed that most of the arithmetic overflow checks were omitted and the ones that were present were handled as success instead of failure. A typical example: RFC3492 algorithm: ``` let w = w * (base - t), fail on overflow ``` Original implementation: ``` w = w * (base - t); ``` Corrected implementation: ``` if (w > std::numeric_limits<int>::max() / (base - t)) return false; w = w * (base - t); ``` Resolves rdar://63392615
1 parent 76ed2cd commit 74f2878

File tree

2 files changed

+38
-15
lines changed

2 files changed

+38
-15
lines changed

lib/Demangling/Punycode.cpp

Lines changed: 37 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "swift/Demangling/Punycode.h"
1414
#include "swift/Demangling/ManglingUtils.h"
15+
#include <limits>
1516
#include <vector>
1617
#include <cstdint>
1718

@@ -49,7 +50,7 @@ static int digit_index(char value) {
4950
static bool isValidUnicodeScalar(uint32_t S) {
5051
// Also accept the range of 0xD800 - 0xD880, which is used for non-symbol
5152
// ASCII characters.
52-
return (S < 0xD880) || (S >= 0xE000 && S <= 0x1FFFFF);
53+
return (S < 0xD880) || (S >= 0xE000 && S <= 0x10FFFF);
5354
}
5455

5556
// Section 6.1: Bias adaptation function
@@ -88,7 +89,7 @@ bool Punycode::decodePunycode(StringRef InputPunycode,
8889
for (char c : InputPunycode.slice(0, lastDelimiter)) {
8990
// fail on any non-basic code point
9091
if (static_cast<unsigned char>(c) > 0x7f)
91-
return true;
92+
return false;
9293
OutCodePoints.push_back(c);
9394
}
9495
// if more than zero code points were consumed then consume one more
@@ -103,28 +104,37 @@ bool Punycode::decodePunycode(StringRef InputPunycode,
103104
for (int k = base; ; k += base) {
104105
// consume a code point, or fail if there was none to consume
105106
if (InputPunycode.empty())
106-
return true;
107+
return false;
107108
char codePoint = InputPunycode.front();
108109
InputPunycode = InputPunycode.slice(1, InputPunycode.size());
109110
// let digit = the code point's digit-value, fail if it has none
110111
int digit = digit_index(codePoint);
111112
if (digit < 0)
112-
return true;
113+
return false;
113114

115+
// Fail if i + (digit * w) would overflow
116+
if (digit > (std::numeric_limits<int>::max() - i) / w)
117+
return false;
114118
i = i + digit * w;
115119
int t = k <= bias ? tmin
116120
: k >= bias + tmax ? tmax
117121
: k - bias;
118122
if (digit < t)
119123
break;
124+
// Fail if w * (base - t) would overflow
125+
if (w > std::numeric_limits<int>::max() / (base - t))
126+
return false;
120127
w = w * (base - t);
121128
}
122129
bias = adapt(i - oldi, OutCodePoints.size() + 1, oldi == 0);
130+
// Fail if n + i / (OutCodePoints.size() + 1) would overflow
131+
if (i / (OutCodePoints.size() + 1) > std::numeric_limits<int>::max() - n)
132+
return false;
123133
n = n + i / (OutCodePoints.size() + 1);
124134
i = i % (OutCodePoints.size() + 1);
125135
// if n is a basic code point then fail
126136
if (n < 0x80)
127-
return true;
137+
return false;
128138
// insert n into output at position i
129139
OutCodePoints.insert(OutCodePoints.begin() + i, n);
130140
++i;
@@ -168,11 +178,17 @@ bool Punycode::encodePunycode(const std::vector<uint32_t> &InputCodePoints,
168178
if (codePoint >= n && codePoint < m)
169179
m = codePoint;
170180
}
171-
181+
182+
if ((m - n) > (std::numeric_limits<int>::max() - delta) / (h + 1))
183+
return false;
172184
delta = delta + (m - n) * (h + 1);
173185
n = m;
174186
for (auto c : InputCodePoints) {
175-
if (c < n) ++delta;
187+
if (c < n) {
188+
if (delta == std::numeric_limits<int>::max())
189+
return false;
190+
++delta;
191+
}
176192
if (c == n) {
177193
int q = delta;
178194
for (int k = base; ; k += base) {
@@ -285,11 +301,12 @@ static bool convertUTF8toUTF32(llvm::StringRef InputUTF8,
285301
auto end = InputUTF8.end();
286302
while (ptr < end) {
287303
uint8_t first = *ptr++;
304+
uint32_t code_point = 0;
288305
if (first < 0x80) {
289306
if (Mangle::isValidSymbolChar(first) || !mapNonSymbolChars) {
290-
OutUTF32.push_back(first);
307+
code_point = first;
291308
} else {
292-
OutUTF32.push_back((uint32_t)first + 0xD800);
309+
code_point = (uint32_t)first + 0xD800;
293310
}
294311
} else if (first < 0xC0) {
295312
// Invalid continuation byte.
@@ -301,7 +318,7 @@ static bool convertUTF8toUTF32(llvm::StringRef InputUTF8,
301318
uint8_t second = *ptr++;
302319
if (!isContinuationByte(second))
303320
return false;
304-
OutUTF32.push_back(((first & 0x1F) << 6) | (second & 0x3F));
321+
code_point = ((first & 0x1F) << 6) | (second & 0x3F);
305322
} else if (first < 0xF0) {
306323
// Three-byte sequence.
307324
if (end - ptr < 2)
@@ -310,8 +327,9 @@ static bool convertUTF8toUTF32(llvm::StringRef InputUTF8,
310327
uint8_t third = *ptr++;
311328
if (!isContinuationByte(second) || !isContinuationByte(third))
312329
return false;
313-
OutUTF32.push_back(((first & 0xF) << 12) | ((second & 0x3F) << 6)
314-
| ( third & 0x3F ));
330+
code_point = ((first & 0xF) << 12)
331+
| ((second & 0x3F) << 6)
332+
| ( third & 0x3F );
315333
} else if (first < 0xF8) {
316334
// Four-byte sequence.
317335
if (end - ptr < 3)
@@ -322,13 +340,17 @@ static bool convertUTF8toUTF32(llvm::StringRef InputUTF8,
322340
if (!isContinuationByte(second) || !isContinuationByte(third)
323341
|| !isContinuationByte(fourth))
324342
return false;
325-
OutUTF32.push_back(((first & 0x7) << 18) | ((second & 0x3F) << 12)
326-
| ((third & 0x3F) << 6)
327-
| ( fourth & 0x3F ));
343+
code_point = ((first & 0x7) << 18)
344+
| ((second & 0x3F) << 12)
345+
| ((third & 0x3F) << 6)
346+
| ( fourth & 0x3F );
328347
} else {
329348
// Unused sequence length.
330349
return false;
331350
}
351+
if (!isValidUnicodeScalar(code_point))
352+
return false;
353+
OutUTF32.push_back(code_point);
332354
}
333355
return true;
334356
}

test/Demangle/Inputs/manglings.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,4 +360,5 @@ $sxq_Ilgnr_D ---> @differentiable(linear) @callee_guaranteed (@in_guaranteed A)
360360
$sS3fIedgyywd_D ---> @escaping @differentiable @callee_guaranteed (@unowned Swift.Float, @unowned @noDerivative Swift.Float) -> (@unowned Swift.Float)
361361
$sS5fIedtyyywddw_D ---> @escaping @differentiable @convention(thin) (@unowned Swift.Float, @unowned Swift.Float, @unowned @noDerivative Swift.Float) -> (@unowned Swift.Float, @unowned @noDerivative Swift.Float)
362362
$syQo ---> $syQo
363+
$s0059xxxxxxxxxxxxxxx_ttttttttBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBee -> $s0059xxxxxxxxxxxxxxx_ttttttttBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBee
363364
$sx1td_t ---> (t: A...)

0 commit comments

Comments
 (0)