Skip to content

[libc] implement a64l #128758

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.stdbit.stdc_trailing_zeros_us

# stdlib.h entrypoints
libc.src.stdlib.a64l
libc.src.stdlib.abs
libc.src.stdlib.atof
libc.src.stdlib.atoi
Expand Down
6 changes: 6 additions & 0 deletions libc/include/stdlib.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ functions:
return_type: _Noreturn void
arguments:
- type: int
- name: a64l
standards:
- posix
return_type: long
arguments:
- type: const char *
- name: abort
standards:
- stdc
Expand Down
11 changes: 11 additions & 0 deletions libc/src/stdlib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,17 @@ add_entrypoint_object(
libc.src.__support.str_to_integer
)

add_entrypoint_object(
a64l
SRCS
a64l.cpp
HDRS
a64l.h
DEPENDS
libc.src.__support.ctype_utils
libc.hdr.types.size_t
)

add_entrypoint_object(
abs
SRCS
Expand Down
64 changes: 64 additions & 0 deletions libc/src/stdlib/a64l.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//===-- Implementation of a64l --------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/stdlib/a64l.h"
#include "hdr/types/size_t.h"
#include "src/__support/common.h"
#include "src/__support/ctype_utils.h"
#include "src/__support/macros/config.h"

#include <stdint.h>

namespace LIBC_NAMESPACE_DECL {

// I'm not sure this should go in ctype_utils since the specific ordering of
// base64 is so very implementation specific, and also this set is unusual.
// Returns -1 on any char without a specified value.
constexpr static int32_t b64_char_to_int(char ch) {
// from the standard: "The characters used to represent digits are '.' (dot)
// for 0, '/' for 1, '0' through '9' for [2,11], 'A' through 'Z' for [12,37],
// and 'a' through 'z' for [38,63]."
if (ch == '.')
return 0;
if (ch == '/')
return 1;

// handle the case of an unspecified char.
if (!internal::isalnum(ch))
return -1;

bool is_lower = internal::islower(ch);
// add 2 to account for '.' and '/', then b36_char_to_int is case insensitive
// so add case sensitivity back.
return internal::b36_char_to_int(ch) + 2 + (is_lower ? 26 : 0);
}

// This function takes a base 64 string and writes it to the low 32 bits of a
// long.
LLVM_LIBC_FUNCTION(long, a64l, (const char *s)) {
// the standard says to only use up to 6 characters.
constexpr size_t MAX_LENGTH = 6;
int32_t result = 0;

for (size_t i = 0; i < MAX_LENGTH && s[i] != '\0'; ++i) {
int32_t cur_val = b64_char_to_int(s[i]);
// The standard says what happens on an unspecified character is undefined,
// here we treat it as the end of the string.
if (cur_val == -1)
break;

// the first digit is the least significant, so for each subsequent digit we
// shift it more. 6 bits since 2^6 = 64
result += (cur_val << (6 * i));
}

// standard says to sign extend from 32 bits.
return static_cast<long>(result);
}

} // namespace LIBC_NAMESPACE_DECL
20 changes: 20 additions & 0 deletions libc/src/stdlib/a64l.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//===-- Implementation header for a64l --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_STDLIB_A64L_H
#define LLVM_LIBC_SRC_STDLIB_A64L_H

#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

long a64l(const char *s);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_STDLIB_A64L_H
10 changes: 10 additions & 0 deletions libc/test/src/stdlib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,16 @@ add_libc_test(
${strfrom_test_copts}
)

add_libc_test(
a64l_test
SUITE
libc-stdlib-tests
SRCS
a64l_test.cpp
DEPENDS
libc.src.stdlib.a64l
)

add_libc_test(
abs_test
SUITE
Expand Down
87 changes: 87 additions & 0 deletions libc/test/src/stdlib/a64l_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
//===-- Unittests for a64l ------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/stdlib/a64l.h"
#include "test/UnitTest/Test.h"

TEST(LlvmLibcA64lTest, EmptyString) { ASSERT_EQ(LIBC_NAMESPACE::a64l(""), 0l); }
TEST(LlvmLibcA64lTest, FullString) {
ASSERT_EQ(LIBC_NAMESPACE::a64l("AbC12/"), 1141696972l);
}

constexpr char B64_CHARS[64] = {
'.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a',
'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
};

TEST(LlvmLibcA64lTest, OneCharacter) {
char example_str[2] = {'\0', '\0'};

for (size_t i = 0; i < 64; ++i) {
example_str[0] = B64_CHARS[i];
ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), static_cast<long>(i));
}
}

TEST(LlvmLibcA64lTest, TwoCharacters) {
char example_str[3] = {'\0', '\0', '\0'};

for (size_t first = 0; first < 64; ++first) {
example_str[0] = B64_CHARS[first];
for (size_t second = 0; second < 64; ++second) {
example_str[1] = B64_CHARS[second];

ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str),
static_cast<long>(first + (second * 64)));
}
}
}

TEST(LlvmLibcA64lTest, FiveSameCharacters) {
// Technically the last digit can be parsed to give the last two bits. Not
// handling that here.
char example_str[6] = {
'\0', '\0', '\0', '\0', '\0', '\0',
};

// set every 6th bit
const long BASE_NUM = 0b1000001000001000001000001;

for (size_t char_val = 0; char_val < 64; ++char_val) {
for (size_t i = 0; i < 5; ++i)
example_str[i] = B64_CHARS[char_val];

const long expected_result = BASE_NUM * char_val;

ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), expected_result);
}
}

TEST(LlvmLibcA64lTest, OneOfSixCharacters) {
char example_str[7] = {'\0', '\0', '\0', '\0', '\0', '\0', '\0'};

for (size_t cur_char = 0; cur_char < 6; ++cur_char) {
// clear the string, set all the chars to b64(0)
for (size_t i = 0; i < 6; ++i)
example_str[i] = B64_CHARS[0];

for (size_t char_val = 0; char_val < 64; ++char_val) {
example_str[cur_char] = B64_CHARS[char_val];

// Need to limit to 32 bits, since that's what the standard says the
// function does.
const long expected_result =
static_cast<int32_t>(char_val << (6 * cur_char));

ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), expected_result);
}
}
}