Skip to content

[libc] mbrtowc implementation #144760

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 20, 2025
Merged

[libc] mbrtowc implementation #144760

merged 10 commits into from
Jun 20, 2025

Conversation

sribee8
Copy link
Contributor

@sribee8 sribee8 commented Jun 18, 2025

implemented the internal and public mbrtowc as well as tests for the public function.

Sriya Pratipati added 2 commits June 17, 2025 20:19
implemented the internal and public mbrtowc as well as tests for the public function.
@llvmbot llvmbot added the libc label Jun 18, 2025
@llvmbot
Copy link
Member

llvmbot commented Jun 18, 2025

@llvm/pr-subscribers-libc

Author: None (sribee8)

Changes

implemented the internal and public mbrtowc as well as tests for the public function.


Full diff: https://github.com/llvm/llvm-project/pull/144760.diff

13 Files Affected:

  • (modified) libc/config/linux/x86_64/entrypoints.txt (+1)
  • (modified) libc/hdr/types/CMakeLists.txt (+8)
  • (added) libc/hdr/types/mbstate_t.h (+22)
  • (modified) libc/include/llvm-libc-types/mbstate_t.h (+5-1)
  • (modified) libc/include/wchar.yaml (+9)
  • (modified) libc/src/__support/wchar/CMakeLists.txt (+16)
  • (added) libc/src/__support/wchar/mbrtowc.cpp (+50)
  • (added) libc/src/__support/wchar/mbrtowc.h (+29)
  • (modified) libc/src/wchar/CMakeLists.txt (+17)
  • (added) libc/src/wchar/mbrtowc.cpp (+40)
  • (added) libc/src/wchar/mbrtowc.h (+24)
  • (modified) libc/test/src/wchar/CMakeLists.txt (+14)
  • (added) libc/test/src/wchar/mbrtowc_test.cpp (+170)
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index aa2079faed409..10509a0c25835 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -365,6 +365,7 @@ set(TARGET_LIBC_ENTRYPOINTS
 
     # wchar.h entrypoints
     libc.src.wchar.btowc
+    libc.src.wchar.mbrtowc
     libc.src.wchar.wcslen
     libc.src.wchar.wctob
     libc.src.wchar.wmemmove
diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt
index c88c357009072..e4b3cb0faa820 100644
--- a/libc/hdr/types/CMakeLists.txt
+++ b/libc/hdr/types/CMakeLists.txt
@@ -20,6 +20,14 @@ add_proxy_header_library(
     libc.include.uchar
 )
 
+add_proxy_header_library(
+  mbstate_t
+  HDRS
+    mbstate_t.h
+  DEPENDS
+    libc.include.llvm-libc-types.mbstate_t
+)
+
 add_proxy_header_library(
   div_t
   HDRS
diff --git a/libc/hdr/types/mbstate_t.h b/libc/hdr/types/mbstate_t.h
new file mode 100644
index 0000000000000..15b2614341d7d
--- /dev/null
+++ b/libc/hdr/types/mbstate_t.h
@@ -0,0 +1,22 @@
+//===-- Definition of macros from mbstate_t.h -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
+#define LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-types/mbstate_t.h"
+
+#else // Overlay mode
+
+#include "hdr/wchar_overlay.h"
+
+#endif // LLVM_LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
diff --git a/libc/include/llvm-libc-types/mbstate_t.h b/libc/include/llvm-libc-types/mbstate_t.h
index 540d50975a264..009fe57da50e2 100644
--- a/libc/include/llvm-libc-types/mbstate_t.h
+++ b/libc/include/llvm-libc-types/mbstate_t.h
@@ -9,8 +9,12 @@
 #ifndef LLVM_LIBC_TYPES_MBSTATE_T_H
 #define LLVM_LIBC_TYPES_MBSTATE_T_H
 
-// TODO: Complete this once we implement functions that operate on this type.
+#include "../llvm-libc-macros/stdint-macros.h"
+
 typedef struct {
+  uint32_t __field1;
+  uint8_t __field2;
+  uint8_t __field3;
 } mbstate_t;
 
 #endif // LLVM_LIBC_TYPES_MBSTATE_T_H
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 84db73d8f01ea..06c621f59b462 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -29,6 +29,15 @@ functions:
     return_type: wint_t
     arguments:
       - type: int
+  - name: mbrtowc
+    standards:
+      - stdc
+    return_type: size_t
+    arguments:
+      - type: wchar_t * __restrict
+      - type: const char * __restrict
+      - type: size_t
+      - type: mbstate_t * __restrict
   - name: wmemset
     standards:
       - stdc
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index 6715e354e23e5..479c1dff2c6e0 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -19,3 +19,19 @@ add_object_library(
     libc.src.__support.math_extras
     .mbstate
 )
+
+add_object_library(
+  mbrtowc
+  HDRS
+    mbrtowc.h
+  SRCS
+    mbrtowc.cpp
+  DEPENDS
+  libc.hdr.types.wchar_t
+  libc.hdr.types.size_t
+  libc.src.__support.common
+  libc.src.__support.error_or
+  libc.src.__support.macros.config
+  .character_converter
+  .mbstate
+)
diff --git a/libc/src/__support/wchar/mbrtowc.cpp b/libc/src/__support/wchar/mbrtowc.cpp
new file mode 100644
index 0000000000000..969448ee60e81
--- /dev/null
+++ b/libc/src/__support/wchar/mbrtowc.cpp
@@ -0,0 +1,50 @@
+//===-- Implementation for mbrtowc function ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/wchar/mbrtowc.h"
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/error_or.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/character_converter.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
+                        size_t n, mbstate *__restrict ps) {
+  CharacterConverter char_conv(ps);
+  if (s == nullptr)
+    return 0;
+  size_t i = 0;
+  auto wc = char_conv.pop_utf32();
+  // Reading in bytes until we have a complete wc or error
+  for (; i < n && !wc.has_value(); ++i) {
+    int err = char_conv.push(static_cast<char8_t>(s[i]));
+    // Encoding error
+    if (err == -1)
+      return Error(-1);
+    wc = char_conv.pop_utf32();
+  }
+  if (wc.has_value()) {
+    *pwc = wc.value();
+    // null terminator -> return 0
+    if (wc.value() == L'\0')
+      return 0;
+    return i;
+  }
+  // Incomplete but potentially valid
+  return Error(-2);
+}
+
+} // namespace internal
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/mbrtowc.h b/libc/src/__support/wchar/mbrtowc.h
new file mode 100644
index 0000000000000..37329ee61beac
--- /dev/null
+++ b/libc/src/__support/wchar/mbrtowc.h
@@ -0,0 +1,29 @@
+//===-- Implementation header for mbrtowc function --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBRTOWC
+#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBRTOWC
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/error_or.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
+                        size_t n, mbstate *__restrict ps);
+
+} // namespace internal
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBRTOWC
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 491dd5b34340a..163c29847e6a2 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -34,6 +34,23 @@ add_entrypoint_object(
     libc.src.__support.wctype_utils
 )
 
+add_entrypoint_object(
+  mbrtowc
+  SRCS
+    mbrtowc.cpp
+  HDRS
+    mbrtowc.h
+  DEPENDS
+    libc.hdr.types.size_t
+    libc.hdr.types.mbstate_t
+    libc.hdr.types.wchar_t
+    libc.src.__support.common
+    libc.src.__support.macros.config
+    libc.src.__support.wchar.mbrtowc
+    libc.src.__support.libc_errno
+    libc.src.__support.wchar.mbstate
+)
+
 add_entrypoint_object(
   wmemset
   SRCS
diff --git a/libc/src/wchar/mbrtowc.cpp b/libc/src/wchar/mbrtowc.cpp
new file mode 100644
index 0000000000000..c29c5ee161e32
--- /dev/null
+++ b/libc/src/wchar/mbrtowc.cpp
@@ -0,0 +1,40 @@
+//===-- Implementation of mbrtowc -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/mbrtowc.h"
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbrtowc.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, mbrtowc,
+                   (wchar_t *__restrict pwc, const char *__restrict s, size_t n,
+                    mbstate_t *__restrict ps)) {
+  static mbstate_t internal_mbstate{0, 0, 0};
+  auto ret = internal::mbrtowc(
+      pwc, s, n, (internal::mbstate *)(ps == nullptr ? &internal_mbstate : ps));
+  if (!ret.has_value()) {
+    // Encoding failure
+    if (ret.error() == -1) {
+      libc_errno = EILSEQ;
+      return -1;
+    }
+    // Could potentially read a valid wide character.
+    return -2;
+  }
+  return ret.value();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/mbrtowc.h b/libc/src/wchar/mbrtowc.h
new file mode 100644
index 0000000000000..e2e3d3ebd2853
--- /dev/null
+++ b/libc/src/wchar/mbrtowc.h
@@ -0,0 +1,24 @@
+//===-- Implementation header for mbrtowc ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_MBRTOWC_H
+#define LLVM_LIBC_SRC_WCHAR_MBRTOWC_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s, size_t n,
+               mbstate_t *__restrict ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_MBRTOWC_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 4990b6953348b..d4cae1f6228bd 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -23,6 +23,20 @@ add_libc_test(
     libc.src.wchar.btowc
 )
 
+add_libc_test(
+  mbrtowc_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    mbrtowc_test.cpp
+  DEPENDS
+    libc.src.__support.libc_errno
+    libc.src.string.memset
+    libc.src.wchar.mbrtowc
+    libc.hdr.types.mbstate_t
+    libc.hdr.types.wchar_t
+)
+
 add_libc_test(
   wctob_test
   SUITE
diff --git a/libc/test/src/wchar/mbrtowc_test.cpp b/libc/test/src/wchar/mbrtowc_test.cpp
new file mode 100644
index 0000000000000..6e96e7ac31f49
--- /dev/null
+++ b/libc/test/src/wchar/mbrtowc_test.cpp
@@ -0,0 +1,170 @@
+//===-- Unittests for mbrtowc ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/libc_errno.h"
+#include "src/string/memset.h"
+#include "src/wchar/mbrtowc.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcMBRToWC, OneByte) {
+  const char *ch = "A";
+  wchar_t dest[2];
+  // Testing if it works with nullptr mbstate_t
+  mbstate_t *mb = nullptr;
+  size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
+  ASSERT_EQ(static_cast<char>(*dest), 'A');
+  ASSERT_EQ(static_cast<int>(n), 1);
+
+  // Should fail since we have not read enough
+  n = LIBC_NAMESPACE::mbrtowc(dest, ch, 0, mb);
+  ASSERT_EQ(static_cast<int>(n), -2);
+}
+
+TEST(LlvmLibcMBRToWC, TwoByte) {
+  const char ch[2] = {static_cast<char>(0xC2),
+                      static_cast<char>(0x8E)}; // � car symbol
+  wchar_t dest[2];
+  mbstate_t *mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 2, mb);
+  ASSERT_EQ(static_cast<int>(*dest), 142);
+  ASSERT_EQ(static_cast<int>(n), 2);
+
+  // Should fail since we have not read enough
+  n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
+  ASSERT_EQ(static_cast<int>(n), -2);
+  // Should pass after reading one more byte
+  n = LIBC_NAMESPACE::mbrtowc(dest, ch + 1, 1, mb);
+  ASSERT_EQ(static_cast<int>(n), 1);
+  ASSERT_EQ(static_cast<int>(*dest), 142);
+}
+
+TEST(LlvmLibcMBRToWC, ThreeByte) {
+  const char ch[3] = {static_cast<char>(0xE2), static_cast<char>(0x88),
+                      static_cast<char>(0x91)}; // ∑ sigma symbol
+  wchar_t dest[2];
+  mbstate_t *mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 3, mb);
+  ASSERT_EQ(static_cast<int>(*dest), 8721);
+  ASSERT_EQ(static_cast<int>(n), 3);
+
+  // Should fail since we have not read enough
+  n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
+  ASSERT_EQ(static_cast<int>(n), -2);
+  // Should pass after reading two more bytes
+  n = LIBC_NAMESPACE::mbrtowc(dest, ch + 1, 2, mb);
+  ASSERT_EQ(static_cast<int>(n), 2);
+  ASSERT_EQ(static_cast<int>(*dest), 8721);
+}
+
+TEST(LlvmLibcMBRToWC, FourByte) {
+  const char ch[4] = {static_cast<char>(0xF0), static_cast<char>(0x9F),
+                      static_cast<char>(0xA4),
+                      static_cast<char>(0xA1)}; // 🤡 clown emoji
+  wchar_t dest[2];
+  mbstate_t *mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 4, mb);
+  ASSERT_EQ(static_cast<int>(*dest), 129313);
+  ASSERT_EQ(static_cast<int>(n), 4);
+
+  // Should fail since we have not read enough
+  n = LIBC_NAMESPACE::mbrtowc(dest, ch, 2, mb);
+  ASSERT_EQ(static_cast<int>(n), -2);
+  // Should pass after reading two more bytes
+  n = LIBC_NAMESPACE::mbrtowc(dest, ch + 2, 2, mb);
+  ASSERT_EQ(static_cast<int>(n), 2);
+  ASSERT_EQ(static_cast<int>(*dest), 129313);
+}
+
+TEST(LlvmLibcMBRToWC, InvalidByte) {
+  const char ch[1] = {static_cast<char>(0x80)};
+  wchar_t dest[2];
+  mbstate_t *mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
+  ASSERT_EQ(static_cast<int>(n), -1);
+  ASSERT_EQ(static_cast<int>(libc_errno), EILSEQ);
+}
+
+TEST(LlvmLibcMBRToWC, InvalidMultiByte) {
+  const char ch[4] = {static_cast<char>(0x80), static_cast<char>(0x00),
+                      static_cast<char>(0x80),
+                      static_cast<char>(0x00)}; // invalid sequence of bytes
+  wchar_t dest[2];
+  mbstate_t *mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  // Trying to push all 4 should error
+  size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 4, mb);
+  ASSERT_EQ(static_cast<int>(n), -1);
+  ASSERT_EQ(static_cast<int>(libc_errno), EILSEQ);
+  // Trying to push just the first one should error
+  n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
+  ASSERT_EQ(static_cast<int>(n), -1);
+  ASSERT_EQ(static_cast<int>(libc_errno), EILSEQ);
+  // Trying to push the second and third should correspond to null wc
+  n = LIBC_NAMESPACE::mbrtowc(dest, ch + 1, 2, mb);
+  ASSERT_EQ(static_cast<int>(n), 0);
+}
+
+TEST(LlvmLibcMBRToWC, InvalidLastByte) {
+  // Last byte is invalid since it does not have correct starting sequence.
+  // 0xC0 --> 11000000 starting sequence should be 10xxxxxx
+  const char ch[4] = {static_cast<char>(0xF1), static_cast<char>(0x80),
+                      static_cast<char>(0x80), static_cast<char>(0xC0)};
+  wchar_t dest[2];
+  mbstate_t *mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  // Trying to push all 4 should error
+  size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 4, mb);
+  ASSERT_EQ(static_cast<int>(n), -1);
+  ASSERT_EQ(static_cast<int>(libc_errno), EILSEQ);
+}
+
+TEST(LlvmLibcMBRToWC, ValidTwoByteWithExtraRead) {
+  const char ch[3] = {static_cast<char>(0xC2), static_cast<char>(0x8E),
+                      static_cast<char>(0x80)};
+  wchar_t dest[2];
+  mbstate_t *mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  // Trying to push all 3 should return valid 2 byte
+  size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 3, mb);
+  ASSERT_EQ(static_cast<int>(n), 2);
+  ASSERT_EQ(static_cast<int>(*dest), 142);
+}
+
+TEST(LlvmLibcMBRToWC, TwoValidTwoBytes) {
+  const char ch[4] = {static_cast<char>(0xC2), static_cast<char>(0x8E),
+                      static_cast<char>(0xC7), static_cast<char>(0x8C)};
+  wchar_t dest[2];
+  mbstate_t *mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  // mbstate should reset after reading first one
+  size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 2, mb);
+  ASSERT_EQ(static_cast<int>(n), 2);
+  ASSERT_EQ(static_cast<int>(*dest), 142);
+  n = LIBC_NAMESPACE::mbrtowc(dest + 1, ch + 2, 2, mb);
+  ASSERT_EQ(static_cast<int>(n), 2);
+  ASSERT_EQ(static_cast<int>(*(dest + 1)), 460);
+}
+
+TEST(LlvmLibcMBRToWC, NullString) {
+  wchar_t dest[2];
+  mbstate_t *mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  // reading on nullptr should return 0
+  size_t n = LIBC_NAMESPACE::mbrtowc(dest, nullptr, 2, mb);
+  ASSERT_EQ(static_cast<int>(n), 0);
+  // reading a null terminator should return 0
+  const char *ch = "\0";
+  n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb);
+  ASSERT_EQ(static_cast<int>(n), 0);
+}

Copy link
Contributor

@uzairnawaz uzairnawaz left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mostly good, just a few small suggestions‼️‼️

Copy link
Contributor

@michaelrj-google michaelrj-google left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

overall looks good

Copy link
Contributor

@michaelrj-google michaelrj-google left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Overall looks good, while you're doing the last fix I'll run the tests on my machine

Copy link
Contributor

@michaelrj-google michaelrj-google left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests passed. I found one small thing but once that's fixed this is good to merge.

@sribee8 sribee8 merged commit d078ce7 into llvm:main Jun 20, 2025
13 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jun 20, 2025

LLVM Buildbot has detected a new failure on builder libc-x86_64-debian-gcc-fullbuild-dbg running on libc-x86_64-debian-fullbuild while building libc at step 4 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/131/builds/24439

Here is the relevant piece of the build log for the reference
Step 4 (annotate) failure: 'python ../llvm-zorg/zorg/buildbot/builders/annotated/libc-linux.py ...' (failure)
...
[52/82] Generating header sys/statvfs.h from /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/runtimes/../libc/include/sys/statvfs.yaml
[53/82] Generating header sys/types.h from /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/runtimes/../libc/include/sys/types.yaml
[54/80] Generating header termios.h from /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/runtimes/../libc/include/termios.yaml
[55/69] Generating header poll.h from /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/runtimes/../libc/include/poll.yaml
[56/69] Generating header uchar.h from /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/runtimes/../libc/include/uchar.yaml
[57/69] Generating header sys/wait.h from /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/runtimes/../libc/include/sys/wait.yaml
[58/66] Generating header sys/socket.h from /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/runtimes/../libc/include/sys/socket.yaml
[59/64] Generating header wchar.h from /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/runtimes/../libc/include/wchar.yaml
[60/64] Generating header math.h from /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/runtimes/../libc/include/math.yaml
[61/64] Building CXX object libc/src/wchar/CMakeFiles/libc.src.wchar.mbrtowc.dir/mbrtowc.cpp.o
FAILED: libc/src/wchar/CMakeFiles/libc.src.wchar.mbrtowc.dir/mbrtowc.cpp.o 
/usr/bin/g++ -DLIBC_NAMESPACE=__llvm_libc_20_0_0_git -D_DEBUG -I/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc -isystem /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/build/libc/include -fvisibility-inlines-hidden -Werror=date-time -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -Wimplicit-fallthrough -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -g -DLIBC_QSORT_IMPL=LIBC_QSORT_QUICK_SORT -DLIBC_ADD_NULL_CHECKS -DLIBC_ERRNO_MODE=LIBC_ERRNO_MODE_DEFAULT -fpie -ffreestanding -DLIBC_FULL_BUILD -isystem/usr/lib/gcc/x86_64-linux-gnu/12//include -nostdinc -idirafter/usr/include -fno-builtin -fno-exceptions -fno-lax-vector-conversions -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-rtti -ftrivial-auto-var-init=pattern -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Wall -Wextra -Werror -Wconversion -Wno-sign-conversion -Wdeprecated -fext-numeric-literals -Wno-pedantic -Wimplicit-fallthrough -Wwrite-strings -Wextra-semi -DLIBC_COPT_PUBLIC_PACKAGING -std=gnu++17 -MD -MT libc/src/wchar/CMakeFiles/libc.src.wchar.mbrtowc.dir/mbrtowc.cpp.o -MF libc/src/wchar/CMakeFiles/libc.src.wchar.mbrtowc.dir/mbrtowc.cpp.o.d -o libc/src/wchar/CMakeFiles/libc.src.wchar.mbrtowc.dir/mbrtowc.cpp.o -c /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/wchar/mbrtowc.cpp
In file included from /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/wchar/mbrtowc.h:12,
                 from /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/wchar/mbrtowc.cpp:9:
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/hdr/types/mbstate_t.h:18:8: error: missing terminating " character [-Werror]
   18 | #error "Cannot overlay mbstate_t
      |        ^
cc1plus: all warnings being treated as errors
[62/64] Building CXX object libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.mbrtowc.dir/mbrtowc.cpp.o
FAILED: libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.mbrtowc.dir/mbrtowc.cpp.o 
/usr/bin/g++ -DLIBC_NAMESPACE=__llvm_libc_20_0_0_git -D_DEBUG -I/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc -isystem /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/build/libc/include -fvisibility-inlines-hidden -Werror=date-time -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -Wimplicit-fallthrough -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -g -DLIBC_QSORT_IMPL=LIBC_QSORT_QUICK_SORT -DLIBC_ADD_NULL_CHECKS -DLIBC_ERRNO_MODE=LIBC_ERRNO_MODE_DEFAULT -fpie -ffreestanding -DLIBC_FULL_BUILD -isystem/usr/lib/gcc/x86_64-linux-gnu/12//include -nostdinc -idirafter/usr/include -fno-builtin -fno-exceptions -fno-lax-vector-conversions -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-rtti -ftrivial-auto-var-init=pattern -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Wall -Wextra -Werror -Wconversion -Wno-sign-conversion -Wdeprecated -fext-numeric-literals -Wno-pedantic -Wimplicit-fallthrough -Wwrite-strings -Wextra-semi -std=gnu++17 -MD -MT libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.mbrtowc.dir/mbrtowc.cpp.o -MF libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.mbrtowc.dir/mbrtowc.cpp.o.d -o libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.mbrtowc.dir/mbrtowc.cpp.o -c /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/mbrtowc.cpp
In file included from /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/mbrtowc.cpp:10:
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/hdr/types/mbstate_t.h:18:8: error: missing terminating " character [-Werror]
   18 | #error "Cannot overlay mbstate_t
      |        ^
cc1plus: all warnings being treated as errors
[63/64] Building CXX object libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.character_converter.dir/character_converter.cpp.o
ninja: build stopped: subcommand failed.
['ninja', 'libc'] exited with return code 1.
The build step threw an exception...
Traceback (most recent call last):
  File "/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/build/../llvm-zorg/zorg/buildbot/builders/annotated/libc-linux.py", line 176, in step
    yield
  File "/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/build/../llvm-zorg/zorg/buildbot/builders/annotated/libc-linux.py", line 138, in main
    run_command(['ninja', 'libc'])
  File "/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/build/../llvm-zorg/zorg/buildbot/builders/annotated/libc-linux.py", line 191, in run_command
    util.report_run_cmd(cmd, cwd=directory)
  File "/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-zorg/zorg/buildbot/builders/annotated/util.py", line 49, in report_run_cmd
    subprocess.check_call(cmd, shell=shell, *args, **kwargs)
  File "/usr/lib/python3.11/subprocess.py", line 413, in check_call
    raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['ninja', 'libc']' returned non-zero exit status 1.
@@@STEP_FAILURE@@@
@@@BUILD_STEP build libc-startup@@@
Running: ninja libc-startup
ninja: no work to do.
@@@BUILD_STEP libc-unit-tests@@@
Running: ninja libc-unit-tests

Jaddyen pushed a commit to Jaddyen/llvm-project that referenced this pull request Jun 23, 2025
implemented the internal and public mbrtowc as well as tests for the
public function.

---------

Co-authored-by: Sriya Pratipati <[email protected]>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

6 participants