Skip to content

[libc] implement a64l #128758

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 25, 2025
Merged

Conversation

michaelrj-google
Copy link
Contributor

Implement the posix function a64l.
Standard: https://pubs.opengroup.org/onlinepubs/9799919799/functions/a64l.html

@llvmbot
Copy link
Member

llvmbot commented Feb 25, 2025

@llvm/pr-subscribers-libc

Author: Michael Jones (michaelrj-google)

Changes

Implement the posix function a64l.
Standard: https://pubs.opengroup.org/onlinepubs/9799919799/functions/a64l.html


Full diff: https://github.com/llvm/llvm-project/pull/128758.diff

7 Files Affected:

  • (modified) libc/config/linux/x86_64/entrypoints.txt (+1)
  • (modified) libc/include/stdlib.yaml (+6)
  • (modified) libc/src/stdlib/CMakeLists.txt (+11)
  • (added) libc/src/stdlib/a64l.cpp (+64)
  • (added) libc/src/stdlib/a64l.h (+20)
  • (modified) libc/test/src/stdlib/CMakeLists.txt (+10)
  • (added) libc/test/src/stdlib/a64l_test.cpp (+87)
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index eaceb15c47291..22f747f24d92a 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -177,6 +177,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdbit.stdc_trailing_zeros_us
 
     # stdlib.h entrypoints
+    libc.src.stdlib.a64l
     libc.src.stdlib.abs
     libc.src.stdlib.atof
     libc.src.stdlib.atoi
diff --git a/libc/include/stdlib.yaml b/libc/include/stdlib.yaml
index 8d2b3f357e1a9..b308df98a6090 100644
--- a/libc/include/stdlib.yaml
+++ b/libc/include/stdlib.yaml
@@ -24,6 +24,12 @@ functions:
     return_type: _Noreturn void
     arguments:
       - type: int
+  - name: a64l
+    standards:
+      - posix
+    return_type: long
+    arguments:
+      - type: const char *
   - name: abort
     standards:
       - stdc
diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt
index 73a9fbf1e2ddc..361f2305358c9 100644
--- a/libc/src/stdlib/CMakeLists.txt
+++ b/libc/src/stdlib/CMakeLists.txt
@@ -184,6 +184,17 @@ add_entrypoint_object(
     libc.src.__support.str_to_integer
 )
 
+add_entrypoint_object(
+  a64l
+  SRCS
+    a64l.cpp
+  HDRS
+    a64l.h
+  DEPENDS
+    libc.src.__support.ctype_utils
+    libc.hdr.types.size_t
+)
+
 add_entrypoint_object(
   abs
   SRCS
diff --git a/libc/src/stdlib/a64l.cpp b/libc/src/stdlib/a64l.cpp
new file mode 100644
index 0000000000000..670ac43804bff
--- /dev/null
+++ b/libc/src/stdlib/a64l.cpp
@@ -0,0 +1,64 @@
+//===-- Implementation of a64l --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/a64l.h"
+#include "hdr/types/size_t.h"
+#include "src/__support/common.h"
+#include "src/__support/ctype_utils.h"
+#include "src/__support/macros/config.h"
+
+#include <stdint.h>
+
+namespace LIBC_NAMESPACE_DECL {
+
+// I'm not sure this should go in ctype_utils since the specific ordering of
+// base64 is so very implementation specific, and also this set is unusual.
+// Returns -1 on any char without a specified value.
+constexpr int32_t b64_char_to_int(char ch) {
+  // from the standard: "The characters used to represent digits are '.' (dot)
+  // for 0, '/' for 1, '0' through '9' for [2,11], 'A' through 'Z' for [12,37],
+  // and 'a' through 'z' for [38,63]."
+  if (ch == '.')
+    return 0;
+  if (ch == '/')
+    return 1;
+
+  // handle the case of an unspecified char.
+  if (!internal::isalnum(ch))
+    return -1;
+
+  bool is_lower = internal::islower(ch);
+  // add 2 to account for '.' and '/', then b36_char_to_int is case insensitive
+  // so add case sensitivity back.
+  return internal::b36_char_to_int(ch) + 2 + (is_lower ? 26 : 0);
+}
+
+// This function takes a base 64 string and writes it to the low 32 bits of a
+// long.
+LLVM_LIBC_FUNCTION(long, a64l, (const char *s)) {
+  // the standard says to only use up to 6 characters.
+  constexpr size_t MAX_LENGTH = 6;
+  int32_t result = 0;
+
+  for (size_t i = 0; i < MAX_LENGTH && s[i] != '\0'; ++i) {
+    int32_t cur_val = b64_char_to_int(s[i]);
+    // The standard says what happens on an unspecified character is undefined,
+    // here we treat it as the end of the string.
+    if (cur_val == -1)
+      break;
+
+    // the first digit is the least significant, so for each subsequent digit we
+    // shift it more. 6 bits since 2^6 = 64
+    result += (cur_val << (6 * i));
+  }
+
+  // standard says to sign extend from 32 bits.
+  return static_cast<long>(result);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/a64l.h b/libc/src/stdlib/a64l.h
new file mode 100644
index 0000000000000..024be058f756c
--- /dev/null
+++ b/libc/src/stdlib/a64l.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for a64l --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDLIB_A64L_H
+#define LLVM_LIBC_SRC_STDLIB_A64L_H
+
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+long a64l(const char *s);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STDLIB_A64L_H
diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt
index e6c8a629c71fa..848100442c88b 100644
--- a/libc/test/src/stdlib/CMakeLists.txt
+++ b/libc/test/src/stdlib/CMakeLists.txt
@@ -221,6 +221,16 @@ add_libc_test(
     ${strfrom_test_copts}
 )
 
+add_libc_test(
+  a64l_test
+  SUITE
+    libc-stdlib-tests
+  SRCS
+    a64l_test.cpp
+  DEPENDS
+    libc.src.stdlib.a64l
+)
+
 add_libc_test(
   abs_test
   SUITE
diff --git a/libc/test/src/stdlib/a64l_test.cpp b/libc/test/src/stdlib/a64l_test.cpp
new file mode 100644
index 0000000000000..acdef5d69543d
--- /dev/null
+++ b/libc/test/src/stdlib/a64l_test.cpp
@@ -0,0 +1,87 @@
+//===-- Unittests for a64l ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/a64l.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcA64lTest, EmptyString) { ASSERT_EQ(LIBC_NAMESPACE::a64l(""), 0l); }
+TEST(LlvmLibcA64lTest, FullString) {
+  ASSERT_EQ(LIBC_NAMESPACE::a64l("AbC12/"), 1141696972l);
+}
+
+constexpr char B64_CHARS[64] = {
+    '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
+    'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
+    'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a',
+    'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
+    'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+};
+
+TEST(LlvmLibcA64lTest, OneCharacter) {
+  char example_str[2] = {'\0', '\0'};
+
+  for (size_t i = 0; i < 64; ++i) {
+    example_str[0] = B64_CHARS[i];
+    ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), static_cast<long>(i));
+  }
+}
+
+TEST(LlvmLibcA64lTest, TwoCharacters) {
+  char example_str[3] = {'\0', '\0', '\0'};
+
+  for (size_t first = 0; first < 64; ++first) {
+    example_str[0] = B64_CHARS[first];
+    for (size_t second = 0; second < 64; ++second) {
+      example_str[1] = B64_CHARS[second];
+
+      ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str),
+                static_cast<long>(first + (second * 64)));
+    }
+  }
+}
+
+TEST(LlvmLibcA64lTest, FiveSameCharacters) {
+  // Technically the last digit can be parsed to give the last two bits. Not
+  // handling that here.
+  char example_str[6] = {
+      '\0', '\0', '\0', '\0', '\0', '\0',
+  };
+
+  // set every 6th bit
+  const long BASE_NUM = 0b1000001000001000001000001;
+
+  for (size_t char_val = 0; char_val < 64; ++char_val) {
+    for (size_t i = 0; i < 5; ++i)
+      example_str[i] = B64_CHARS[char_val];
+
+    const long expected_result = BASE_NUM * char_val;
+
+    ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), expected_result);
+  }
+}
+
+TEST(LlvmLibcA64lTest, OneOfSixCharacters) {
+  char example_str[7] = {'\0', '\0', '\0', '\0', '\0', '\0', '\0'};
+
+  for (size_t cur_char = 0; cur_char < 6; ++cur_char) {
+    // clear the string, set all the chars to b64(0)
+    for (size_t i = 0; i < 6; ++i)
+      example_str[i] = B64_CHARS[0];
+
+    for (size_t char_val = 0; char_val < 64; ++char_val) {
+      example_str[cur_char] = B64_CHARS[char_val];
+
+      // Need to limit to 32 bits, since that's what the standard says the
+      // function does.
+      const long expected_result =
+          static_cast<int32_t>(char_val << (6 * cur_char));
+
+      ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), expected_result);
+    }
+  }
+}

@michaelrj-google michaelrj-google merged commit 8beec9f into llvm:main Feb 25, 2025
16 checks passed
@michaelrj-google michaelrj-google deleted the libcParseBase64 branch February 25, 2025 21:57
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants