Skip to content

Commit ea9ac35

Browse files
committed
An upcoming patch to LLDB will require the ability to decode base64. This patch adds support for decoding base64 and adds tests.
Resubmission of https://reviews.llvm.org/D126254 with where decodeBase64Byte is no longer a lambda but a static function. Some compilers have different errors or warnings with respect to what needs to be captured and what doesn't (see comments in https://reviews.llvm.org/D126254 for details). Differential Revision: https://reviews.llvm.org/D128560
1 parent 1877d76 commit ea9ac35

File tree

4 files changed

+156
-7
lines changed

4 files changed

+156
-7
lines changed

llvm/include/llvm/Support/Base64.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313
#ifndef LLVM_SUPPORT_BASE64_H
1414
#define LLVM_SUPPORT_BASE64_H
1515

16+
#include "llvm/Support/Error.h"
1617
#include <cstdint>
1718
#include <string>
19+
#include <vector>
1820

1921
namespace llvm {
2022

@@ -52,6 +54,8 @@ template <class InputBytes> std::string encodeBase64(InputBytes const &Bytes) {
5254
return Buffer;
5355
}
5456

57+
llvm::Error decodeBase64(llvm::StringRef Input, std::vector<char> &Output);
58+
5559
} // end namespace llvm
5660

5761
#endif

llvm/lib/Support/Base64.cpp

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
//===- Base64.cpp ---------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#define INVALID_BASE64_BYTE 64
10+
#include "llvm/Support/Base64.h"
11+
12+
static char decodeBase64Byte(uint8_t Ch) {
13+
constexpr char Inv = INVALID_BASE64_BYTE;
14+
static const char DecodeTable[] = {
15+
Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
16+
Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
17+
Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
18+
Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
19+
Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
20+
Inv, Inv, Inv, 62, Inv, Inv, Inv, 63, // ...+.../
21+
52, 53, 54, 55, 56, 57, 58, 59, // 01234567
22+
60, 61, Inv, Inv, Inv, 0, Inv, Inv, // 89...=..
23+
Inv, 0, 1, 2, 3, 4, 5, 6, // .ABCDEFG
24+
7, 8, 9, 10, 11, 12, 13, 14, // HIJKLMNO
25+
15, 16, 17, 18, 19, 20, 21, 22, // PQRSTUVW
26+
23, 24, 25, Inv, Inv, Inv, Inv, Inv, // XYZ.....
27+
Inv, 26, 27, 28, 29, 30, 31, 32, // .abcdefg
28+
33, 34, 35, 36, 37, 38, 39, 40, // hijklmno
29+
41, 42, 43, 44, 45, 46, 47, 48, // pqrstuvw
30+
49, 50, 51 // xyz.....
31+
};
32+
if (Ch >= sizeof(DecodeTable))
33+
return Inv;
34+
return DecodeTable[Ch];
35+
}
36+
37+
llvm::Error llvm::decodeBase64(llvm::StringRef Input,
38+
std::vector<char> &Output) {
39+
constexpr char Base64InvalidByte = INVALID_BASE64_BYTE;
40+
// Invalid table value with short name to fit in the table init below. The
41+
// invalid value is 64 since valid base64 values are 0 - 63.
42+
Output.clear();
43+
const uint64_t InputLength = Input.size();
44+
if (InputLength == 0)
45+
return Error::success();
46+
// Make sure we have a valid input string length which must be a multiple
47+
// of 4.
48+
if ((InputLength % 4) != 0)
49+
return createStringError(std::errc::illegal_byte_sequence,
50+
"Base64 encoded strings must be a multiple of 4 "
51+
"bytes in length");
52+
const uint64_t FirstValidEqualIdx = InputLength - 2;
53+
char Hex64Bytes[4];
54+
for (uint64_t Idx = 0; Idx < InputLength; Idx += 4) {
55+
for (uint64_t ByteOffset = 0; ByteOffset < 4; ++ByteOffset) {
56+
const uint64_t ByteIdx = Idx + ByteOffset;
57+
const char Byte = Input[ByteIdx];
58+
const char DecodedByte = decodeBase64Byte(Byte);
59+
bool Illegal = DecodedByte == Base64InvalidByte;
60+
if (!Illegal && Byte == '=') {
61+
if (ByteIdx < FirstValidEqualIdx) {
62+
// We have an '=' in the middle of the string which is invalid, only
63+
// the last two characters can be '=' characters.
64+
Illegal = true;
65+
} else if (ByteIdx == FirstValidEqualIdx && Input[ByteIdx + 1] != '=') {
66+
// We have an equal second to last from the end and the last character
67+
// is not also an equal, so the '=' character is invalid
68+
Illegal = true;
69+
}
70+
}
71+
if (Illegal)
72+
return createStringError(
73+
std::errc::illegal_byte_sequence,
74+
"Invalid Base64 character %#2.2x at index %" PRIu64, Byte, ByteIdx);
75+
Hex64Bytes[ByteOffset] = DecodedByte;
76+
}
77+
// Now we have 6 bits of 3 bytes in value in each of the Hex64Bytes bytes.
78+
// Extract the right bytes into the Output buffer.
79+
Output.push_back((Hex64Bytes[0] << 2) + ((Hex64Bytes[1] >> 4) & 0x03));
80+
Output.push_back((Hex64Bytes[1] << 4) + ((Hex64Bytes[2] >> 2) & 0x0f));
81+
Output.push_back((Hex64Bytes[2] << 6) + (Hex64Bytes[3] & 0x3f));
82+
}
83+
// If we had valid trailing '=' characters strip the right number of bytes
84+
// from the end of the output buffer. We already know that the Input length
85+
// it a multiple of 4 and is not zero, so direct character access is safe.
86+
if (Input.back() == '=') {
87+
Output.pop_back();
88+
if (Input[InputLength - 2] == '=')
89+
Output.pop_back();
90+
}
91+
return Error::success();
92+
}

llvm/lib/Support/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ add_llvm_component_library(LLVMSupport
125125
ARMWinEH.cpp
126126
Allocator.cpp
127127
AutoConvert.cpp
128+
Base64.cpp
128129
BinaryStreamError.cpp
129130
BinaryStreamReader.cpp
130131
BinaryStreamRef.cpp

llvm/unittests/Support/Base64Test.cpp

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "llvm/Support/Base64.h"
1515
#include "llvm/ADT/StringRef.h"
16+
#include "llvm/Testing/Support/Error.h"
1617
#include "gtest/gtest.h"
1718

1819
using namespace llvm;
@@ -24,6 +25,28 @@ void TestBase64(StringRef Input, StringRef Final) {
2425
EXPECT_EQ(Res, Final);
2526
}
2627

28+
void TestBase64Decode(StringRef Input, StringRef Expected,
29+
StringRef ExpectedErrorMessage = {}) {
30+
std::vector<char> DecodedBytes;
31+
if (ExpectedErrorMessage.empty()) {
32+
ASSERT_THAT_ERROR(decodeBase64(Input, DecodedBytes), Succeeded());
33+
EXPECT_EQ(llvm::ArrayRef<char>(DecodedBytes),
34+
llvm::ArrayRef<char>(Expected.data(), Expected.size()));
35+
} else {
36+
ASSERT_THAT_ERROR(decodeBase64(Input, DecodedBytes),
37+
FailedWithMessage(ExpectedErrorMessage));
38+
}
39+
}
40+
41+
char NonPrintableVector[] = {0x00, 0x00, 0x00, 0x46,
42+
0x00, 0x08, (char)0xff, (char)0xee};
43+
44+
char LargeVector[] = {0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b,
45+
0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, 0x66, 0x6f,
46+
0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, 0x73, 0x20, 0x6f,
47+
0x76, 0x65, 0x72, 0x20, 0x31, 0x33, 0x20, 0x6c, 0x61,
48+
0x7a, 0x79, 0x20, 0x64, 0x6f, 0x67, 0x73, 0x2e};
49+
2750
} // namespace
2851

2952
TEST(Base64Test, Base64) {
@@ -37,16 +60,45 @@ TEST(Base64Test, Base64) {
3760
TestBase64("foobar", "Zm9vYmFy");
3861

3962
// With non-printable values.
40-
char NonPrintableVector[] = {0x00, 0x00, 0x00, 0x46,
41-
0x00, 0x08, (char)0xff, (char)0xee};
4263
TestBase64({NonPrintableVector, sizeof(NonPrintableVector)}, "AAAARgAI/+4=");
4364

4465
// Large test case
45-
char LargeVector[] = {0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b,
46-
0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, 0x66, 0x6f,
47-
0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, 0x73, 0x20, 0x6f,
48-
0x76, 0x65, 0x72, 0x20, 0x31, 0x33, 0x20, 0x6c, 0x61,
49-
0x7a, 0x79, 0x20, 0x64, 0x6f, 0x67, 0x73, 0x2e};
5066
TestBase64({LargeVector, sizeof(LargeVector)},
5167
"VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIDEzIGxhenkgZG9ncy4=");
5268
}
69+
70+
TEST(Base64Test, DecodeBase64) {
71+
std::vector<llvm::StringRef> Outputs = {"", "f", "fo", "foo",
72+
"foob", "fooba", "foobar"};
73+
Outputs.push_back(
74+
llvm::StringRef(NonPrintableVector, sizeof(NonPrintableVector)));
75+
76+
Outputs.push_back(llvm::StringRef(LargeVector, sizeof(LargeVector)));
77+
// Make sure we can encode and decode any byte.
78+
std::vector<char> AllChars;
79+
for (int Ch = INT8_MIN; Ch <= INT8_MAX; ++Ch)
80+
AllChars.push_back(Ch);
81+
Outputs.push_back(llvm::StringRef(AllChars.data(), AllChars.size()));
82+
83+
for (const auto &Output : Outputs) {
84+
// We trust that encoding is working after running the Base64Test::Base64()
85+
// test function above, so we can use it to encode the string and verify we
86+
// can decode it correctly.
87+
auto Input = encodeBase64(Output);
88+
TestBase64Decode(Input, Output);
89+
}
90+
struct ErrorInfo {
91+
llvm::StringRef Input;
92+
llvm::StringRef ErrorMessage;
93+
};
94+
std::vector<ErrorInfo> ErrorInfos = {
95+
{"f", "Base64 encoded strings must be a multiple of 4 bytes in length"},
96+
{"=abc", "Invalid Base64 character 0x3d at index 0"},
97+
{"a=bc", "Invalid Base64 character 0x3d at index 1"},
98+
{"ab=c", "Invalid Base64 character 0x3d at index 2"},
99+
{"fun!", "Invalid Base64 character 0x21 at index 3"},
100+
};
101+
102+
for (const auto &EI : ErrorInfos)
103+
TestBase64Decode(EI.Input, "", EI.ErrorMessage);
104+
}

0 commit comments

Comments
 (0)