|
| 1 | +//===- Base64.cpp ---------------------------------------------------------===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +#define INVALID_BASE64_BYTE 64 |
| 10 | +#include "llvm/Support/Base64.h" |
| 11 | + |
| 12 | +static char decodeBase64Byte(uint8_t Ch) { |
| 13 | + constexpr char Inv = INVALID_BASE64_BYTE; |
| 14 | + static const char DecodeTable[] = { |
| 15 | + Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........ |
| 16 | + Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........ |
| 17 | + Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........ |
| 18 | + Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........ |
| 19 | + Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........ |
| 20 | + Inv, Inv, Inv, 62, Inv, Inv, Inv, 63, // ...+.../ |
| 21 | + 52, 53, 54, 55, 56, 57, 58, 59, // 01234567 |
| 22 | + 60, 61, Inv, Inv, Inv, 0, Inv, Inv, // 89...=.. |
| 23 | + Inv, 0, 1, 2, 3, 4, 5, 6, // .ABCDEFG |
| 24 | + 7, 8, 9, 10, 11, 12, 13, 14, // HIJKLMNO |
| 25 | + 15, 16, 17, 18, 19, 20, 21, 22, // PQRSTUVW |
| 26 | + 23, 24, 25, Inv, Inv, Inv, Inv, Inv, // XYZ..... |
| 27 | + Inv, 26, 27, 28, 29, 30, 31, 32, // .abcdefg |
| 28 | + 33, 34, 35, 36, 37, 38, 39, 40, // hijklmno |
| 29 | + 41, 42, 43, 44, 45, 46, 47, 48, // pqrstuvw |
| 30 | + 49, 50, 51 // xyz..... |
| 31 | + }; |
| 32 | + if (Ch >= sizeof(DecodeTable)) |
| 33 | + return Inv; |
| 34 | + return DecodeTable[Ch]; |
| 35 | +} |
| 36 | + |
| 37 | +llvm::Error llvm::decodeBase64(llvm::StringRef Input, |
| 38 | + std::vector<char> &Output) { |
| 39 | + constexpr char Base64InvalidByte = INVALID_BASE64_BYTE; |
| 40 | + // Invalid table value with short name to fit in the table init below. The |
| 41 | + // invalid value is 64 since valid base64 values are 0 - 63. |
| 42 | + Output.clear(); |
| 43 | + const uint64_t InputLength = Input.size(); |
| 44 | + if (InputLength == 0) |
| 45 | + return Error::success(); |
| 46 | + // Make sure we have a valid input string length which must be a multiple |
| 47 | + // of 4. |
| 48 | + if ((InputLength % 4) != 0) |
| 49 | + return createStringError(std::errc::illegal_byte_sequence, |
| 50 | + "Base64 encoded strings must be a multiple of 4 " |
| 51 | + "bytes in length"); |
| 52 | + const uint64_t FirstValidEqualIdx = InputLength - 2; |
| 53 | + char Hex64Bytes[4]; |
| 54 | + for (uint64_t Idx = 0; Idx < InputLength; Idx += 4) { |
| 55 | + for (uint64_t ByteOffset = 0; ByteOffset < 4; ++ByteOffset) { |
| 56 | + const uint64_t ByteIdx = Idx + ByteOffset; |
| 57 | + const char Byte = Input[ByteIdx]; |
| 58 | + const char DecodedByte = decodeBase64Byte(Byte); |
| 59 | + bool Illegal = DecodedByte == Base64InvalidByte; |
| 60 | + if (!Illegal && Byte == '=') { |
| 61 | + if (ByteIdx < FirstValidEqualIdx) { |
| 62 | + // We have an '=' in the middle of the string which is invalid, only |
| 63 | + // the last two characters can be '=' characters. |
| 64 | + Illegal = true; |
| 65 | + } else if (ByteIdx == FirstValidEqualIdx && Input[ByteIdx + 1] != '=') { |
| 66 | + // We have an equal second to last from the end and the last character |
| 67 | + // is not also an equal, so the '=' character is invalid |
| 68 | + Illegal = true; |
| 69 | + } |
| 70 | + } |
| 71 | + if (Illegal) |
| 72 | + return createStringError( |
| 73 | + std::errc::illegal_byte_sequence, |
| 74 | + "Invalid Base64 character %#2.2x at index %" PRIu64, Byte, ByteIdx); |
| 75 | + Hex64Bytes[ByteOffset] = DecodedByte; |
| 76 | + } |
| 77 | + // Now we have 6 bits of 3 bytes in value in each of the Hex64Bytes bytes. |
| 78 | + // Extract the right bytes into the Output buffer. |
| 79 | + Output.push_back((Hex64Bytes[0] << 2) + ((Hex64Bytes[1] >> 4) & 0x03)); |
| 80 | + Output.push_back((Hex64Bytes[1] << 4) + ((Hex64Bytes[2] >> 2) & 0x0f)); |
| 81 | + Output.push_back((Hex64Bytes[2] << 6) + (Hex64Bytes[3] & 0x3f)); |
| 82 | + } |
| 83 | + // If we had valid trailing '=' characters strip the right number of bytes |
| 84 | + // from the end of the output buffer. We already know that the Input length |
| 85 | + // it a multiple of 4 and is not zero, so direct character access is safe. |
| 86 | + if (Input.back() == '=') { |
| 87 | + Output.pop_back(); |
| 88 | + if (Input[InputLength - 2] == '=') |
| 89 | + Output.pop_back(); |
| 90 | + } |
| 91 | + return Error::success(); |
| 92 | +} |
0 commit comments