Skip to content

[libc] mbrtowc implementation #144760

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1244,6 +1244,9 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.sys.socket.recv
libc.src.sys.socket.recvfrom
libc.src.sys.socket.recvmsg

# wchar.h entrypoints
libc.src.wchar.mbrtowc
)
endif()

Expand Down
8 changes: 8 additions & 0 deletions libc/hdr/types/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ add_proxy_header_library(
libc.include.uchar
)

add_proxy_header_library(
mbstate_t
HDRS
mbstate_t.h
DEPENDS
libc.include.llvm-libc-types.mbstate_t
)

add_proxy_header_library(
div_t
HDRS
Expand Down
22 changes: 22 additions & 0 deletions libc/hdr/types/mbstate_t.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Definition of macros from mbstate_t.h -----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
#define LLVM_LIBC_HDR_TYPES_MBSTATE_T_H

#ifdef LIBC_FULL_BUILD

#include "include/llvm-libc-types/mbstate_t.h"

#else // Overlay mode

#error "Cannot overlay mbstate_t

#endif // LLVM_LIBC_FULL_BUILD

#endif // LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
6 changes: 5 additions & 1 deletion libc/include/llvm-libc-types/mbstate_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@
#ifndef LLVM_LIBC_TYPES_MBSTATE_T_H
#define LLVM_LIBC_TYPES_MBSTATE_T_H

// TODO: Complete this once we implement functions that operate on this type.
#include "../llvm-libc-macros/stdint-macros.h"

typedef struct {
uint32_t __field1;
uint8_t __field2;
uint8_t __field3;
} mbstate_t;

#endif // LLVM_LIBC_TYPES_MBSTATE_T_H
9 changes: 9 additions & 0 deletions libc/include/wchar.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ functions:
return_type: wint_t
arguments:
- type: int
- name: mbrtowc
standards:
- stdc
return_type: size_t
arguments:
- type: wchar_t *__restrict
- type: const char *__restrict
- type: size_t
- type: mbstate_t *__restrict
- name: wmemset
standards:
- stdc
Expand Down
16 changes: 16 additions & 0 deletions libc/src/__support/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,19 @@ add_object_library(
libc.src.__support.math_extras
.mbstate
)

add_object_library(
mbrtowc
HDRS
mbrtowc.h
SRCS
mbrtowc.cpp
DEPENDS
libc.hdr.types.wchar_t
libc.hdr.types.size_t
libc.src.__support.common
libc.src.__support.error_or
libc.src.__support.macros.config
.character_converter
.mbstate
)
49 changes: 49 additions & 0 deletions libc/src/__support/wchar/mbrtowc.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
//===-- Implementation for mbrtowc function ---------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/__support/wchar/mbrtowc.h"
#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/error_or.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/character_converter.h"
#include "src/__support/wchar/mbstate.h"

namespace LIBC_NAMESPACE_DECL {
namespace internal {

ErrorOr<size_t> mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
size_t n, mbstate *__restrict ps) {
CharacterConverter char_conv(ps);
if (s == nullptr)
return 0;
size_t i = 0;
// Reading in bytes until we have a complete wc or error
for (; i < n && !char_conv.isFull(); ++i) {
int err = char_conv.push(static_cast<char8_t>(s[i]));
// Encoding error
if (err == -1)
return Error(-1);
}
auto wc = char_conv.pop_utf32();
if (wc.has_value()) {
*pwc = wc.value();
// null terminator -> return 0
if (wc.value() == L'\0')
return 0;
return i;
}
// Incomplete but potentially valid
return -2;
}

} // namespace internal

} // namespace LIBC_NAMESPACE_DECL
29 changes: 29 additions & 0 deletions libc/src/__support/wchar/mbrtowc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//===-- Implementation header for mbrtowc function --------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBRTOWC
#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBRTOWC

#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/error_or.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbstate.h"

namespace LIBC_NAMESPACE_DECL {
namespace internal {

ErrorOr<size_t> mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
size_t n, mbstate *__restrict ps);

} // namespace internal

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBRTOWC
6 changes: 3 additions & 3 deletions libc/src/__support/wchar/mbstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,17 @@ namespace internal {

struct mbstate {
// store a partial codepoint (in UTF-32)
char32_t partial;
char32_t partial = 0;

/*
Progress towards a conversion
Increases with each push(...) until it reaches total_bytes
Decreases with each pop(...) until it reaches 0
*/
uint8_t bytes_stored;
uint8_t bytes_stored = 0;

// Total number of bytes that will be needed to represent this character
uint8_t total_bytes;
uint8_t total_bytes = 0;
};

} // namespace internal
Expand Down
17 changes: 17 additions & 0 deletions libc/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,23 @@ add_entrypoint_object(
libc.src.__support.wctype_utils
)

add_entrypoint_object(
mbrtowc
SRCS
mbrtowc.cpp
HDRS
mbrtowc.h
DEPENDS
libc.hdr.types.size_t
libc.hdr.types.mbstate_t
libc.hdr.types.wchar_t
libc.src.__support.common
libc.src.__support.macros.config
libc.src.__support.wchar.mbrtowc
libc.src.__support.libc_errno
libc.src.__support.wchar.mbstate
)

add_entrypoint_object(
wmemset
SRCS
Expand Down
38 changes: 38 additions & 0 deletions libc/src/wchar/mbrtowc.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//===-- Implementation of mbrtowc -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/mbrtowc.h"

#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbrtowc.h"
#include "src/__support/wchar/mbstate.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(size_t, mbrtowc,
(wchar_t *__restrict pwc, const char *__restrict s, size_t n,
mbstate_t *__restrict ps)) {
static internal::mbstate internal_mbstate;
auto ret = internal::mbrtowc(pwc, s, n,
ps == nullptr
? &internal_mbstate
: reinterpret_cast<internal::mbstate *>(ps));
if (!ret.has_value()) {
// Encoding failure
libc_errno = EILSEQ;
return -1;
}
return ret.value();
}

} // namespace LIBC_NAMESPACE_DECL
24 changes: 24 additions & 0 deletions libc/src/wchar/mbrtowc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//===-- Implementation header for mbrtowc ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_MBRTOWC_H
#define LLVM_LIBC_SRC_WCHAR_MBRTOWC_H

#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s, size_t n,
mbstate_t *__restrict ps);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_MBRTOWC_H
14 changes: 14 additions & 0 deletions libc/test/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@ add_libc_test(
libc.src.wchar.btowc
)

add_libc_test(
mbrtowc_test
SUITE
libc_wchar_unittests
SRCS
mbrtowc_test.cpp
DEPENDS
libc.src.__support.libc_errno
libc.src.string.memset
libc.src.wchar.mbrtowc
libc.hdr.types.mbstate_t
libc.hdr.types.wchar_t
)

add_libc_test(
wctob_test
SUITE
Expand Down
Loading
Loading