Skip to content

Commit d078ce7

Browse files
sribee8Sriya Pratipati
andauthored
[libc] mbrtowc implementation (#144760)
implemented the internal and public mbrtowc as well as tests for the public function. --------- Co-authored-by: Sriya Pratipati <[email protected]>
1 parent 3a66e20 commit d078ce7

File tree

14 files changed

+409
-4
lines changed

14 files changed

+409
-4
lines changed

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,6 +1244,9 @@ if(LLVM_LIBC_FULL_BUILD)
12441244
libc.src.sys.socket.recv
12451245
libc.src.sys.socket.recvfrom
12461246
libc.src.sys.socket.recvmsg
1247+
1248+
# wchar.h entrypoints
1249+
libc.src.wchar.mbrtowc
12471250
)
12481251
endif()
12491252

libc/hdr/types/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ add_proxy_header_library(
2020
libc.include.uchar
2121
)
2222

23+
add_proxy_header_library(
24+
mbstate_t
25+
HDRS
26+
mbstate_t.h
27+
DEPENDS
28+
libc.include.llvm-libc-types.mbstate_t
29+
)
30+
2331
add_proxy_header_library(
2432
div_t
2533
HDRS

libc/hdr/types/mbstate_t.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===-- Definition of macros from mbstate_t.h -----------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
10+
#define LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
11+
12+
#ifdef LIBC_FULL_BUILD
13+
14+
#include "include/llvm-libc-types/mbstate_t.h"
15+
16+
#else // Overlay mode
17+
18+
#error "Cannot overlay mbstate_t
19+
20+
#endif // LLVM_LIBC_FULL_BUILD
21+
22+
#endif // LLVM_LIBC_HDR_TYPES_MBSTATE_T_H

libc/include/llvm-libc-types/mbstate_t.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@
99
#ifndef LLVM_LIBC_TYPES_MBSTATE_T_H
1010
#define LLVM_LIBC_TYPES_MBSTATE_T_H
1111

12-
// TODO: Complete this once we implement functions that operate on this type.
12+
#include "../llvm-libc-macros/stdint-macros.h"
13+
1314
typedef struct {
15+
uint32_t __field1;
16+
uint8_t __field2;
17+
uint8_t __field3;
1418
} mbstate_t;
1519

1620
#endif // LLVM_LIBC_TYPES_MBSTATE_T_H

libc/include/wchar.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,15 @@ functions:
2929
return_type: wint_t
3030
arguments:
3131
- type: int
32+
- name: mbrtowc
33+
standards:
34+
- stdc
35+
return_type: size_t
36+
arguments:
37+
- type: wchar_t *__restrict
38+
- type: const char *__restrict
39+
- type: size_t
40+
- type: mbstate_t *__restrict
3241
- name: wmemset
3342
standards:
3443
- stdc

libc/src/__support/wchar/CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,19 @@ add_object_library(
1919
libc.src.__support.math_extras
2020
.mbstate
2121
)
22+
23+
add_object_library(
24+
mbrtowc
25+
HDRS
26+
mbrtowc.h
27+
SRCS
28+
mbrtowc.cpp
29+
DEPENDS
30+
libc.hdr.types.wchar_t
31+
libc.hdr.types.size_t
32+
libc.src.__support.common
33+
libc.src.__support.error_or
34+
libc.src.__support.macros.config
35+
.character_converter
36+
.mbstate
37+
)

libc/src/__support/wchar/mbrtowc.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
//===-- Implementation for mbrtowc function ---------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/__support/wchar/mbrtowc.h"
10+
#include "hdr/types/mbstate_t.h"
11+
#include "hdr/types/size_t.h"
12+
#include "hdr/types/wchar_t.h"
13+
#include "src/__support/common.h"
14+
#include "src/__support/error_or.h"
15+
#include "src/__support/macros/config.h"
16+
#include "src/__support/wchar/character_converter.h"
17+
#include "src/__support/wchar/mbstate.h"
18+
19+
namespace LIBC_NAMESPACE_DECL {
20+
namespace internal {
21+
22+
ErrorOr<size_t> mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
23+
size_t n, mbstate *__restrict ps) {
24+
CharacterConverter char_conv(ps);
25+
if (s == nullptr)
26+
return 0;
27+
size_t i = 0;
28+
// Reading in bytes until we have a complete wc or error
29+
for (; i < n && !char_conv.isFull(); ++i) {
30+
int err = char_conv.push(static_cast<char8_t>(s[i]));
31+
// Encoding error
32+
if (err == -1)
33+
return Error(-1);
34+
}
35+
auto wc = char_conv.pop_utf32();
36+
if (wc.has_value()) {
37+
*pwc = wc.value();
38+
// null terminator -> return 0
39+
if (wc.value() == L'\0')
40+
return 0;
41+
return i;
42+
}
43+
// Incomplete but potentially valid
44+
return -2;
45+
}
46+
47+
} // namespace internal
48+
49+
} // namespace LIBC_NAMESPACE_DECL

libc/src/__support/wchar/mbrtowc.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//===-- Implementation header for mbrtowc function --------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBRTOWC
10+
#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBRTOWC
11+
12+
#include "hdr/types/size_t.h"
13+
#include "hdr/types/wchar_t.h"
14+
#include "src/__support/common.h"
15+
#include "src/__support/error_or.h"
16+
#include "src/__support/macros/config.h"
17+
#include "src/__support/wchar/mbstate.h"
18+
19+
namespace LIBC_NAMESPACE_DECL {
20+
namespace internal {
21+
22+
ErrorOr<size_t> mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
23+
size_t n, mbstate *__restrict ps);
24+
25+
} // namespace internal
26+
27+
} // namespace LIBC_NAMESPACE_DECL
28+
29+
#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBRTOWC

libc/src/__support/wchar/mbstate.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,17 @@ namespace internal {
1818

1919
struct mbstate {
2020
// store a partial codepoint (in UTF-32)
21-
char32_t partial;
21+
char32_t partial = 0;
2222

2323
/*
2424
Progress towards a conversion
2525
Increases with each push(...) until it reaches total_bytes
2626
Decreases with each pop(...) until it reaches 0
2727
*/
28-
uint8_t bytes_stored;
28+
uint8_t bytes_stored = 0;
2929

3030
// Total number of bytes that will be needed to represent this character
31-
uint8_t total_bytes;
31+
uint8_t total_bytes = 0;
3232
};
3333

3434
} // namespace internal

libc/src/wchar/CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,23 @@ add_entrypoint_object(
3434
libc.src.__support.wctype_utils
3535
)
3636

37+
add_entrypoint_object(
38+
mbrtowc
39+
SRCS
40+
mbrtowc.cpp
41+
HDRS
42+
mbrtowc.h
43+
DEPENDS
44+
libc.hdr.types.size_t
45+
libc.hdr.types.mbstate_t
46+
libc.hdr.types.wchar_t
47+
libc.src.__support.common
48+
libc.src.__support.macros.config
49+
libc.src.__support.wchar.mbrtowc
50+
libc.src.__support.libc_errno
51+
libc.src.__support.wchar.mbstate
52+
)
53+
3754
add_entrypoint_object(
3855
wmemset
3956
SRCS

libc/src/wchar/mbrtowc.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
//===-- Implementation of mbrtowc -----------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/wchar/mbrtowc.h"
10+
11+
#include "hdr/types/mbstate_t.h"
12+
#include "hdr/types/size_t.h"
13+
#include "hdr/types/wchar_t.h"
14+
#include "src/__support/common.h"
15+
#include "src/__support/libc_errno.h"
16+
#include "src/__support/macros/config.h"
17+
#include "src/__support/wchar/mbrtowc.h"
18+
#include "src/__support/wchar/mbstate.h"
19+
20+
namespace LIBC_NAMESPACE_DECL {
21+
22+
LLVM_LIBC_FUNCTION(size_t, mbrtowc,
23+
(wchar_t *__restrict pwc, const char *__restrict s, size_t n,
24+
mbstate_t *__restrict ps)) {
25+
static internal::mbstate internal_mbstate;
26+
auto ret = internal::mbrtowc(pwc, s, n,
27+
ps == nullptr
28+
? &internal_mbstate
29+
: reinterpret_cast<internal::mbstate *>(ps));
30+
if (!ret.has_value()) {
31+
// Encoding failure
32+
libc_errno = EILSEQ;
33+
return -1;
34+
}
35+
return ret.value();
36+
}
37+
38+
} // namespace LIBC_NAMESPACE_DECL

libc/src/wchar/mbrtowc.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
//===-- Implementation header for mbrtowc ---------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_WCHAR_MBRTOWC_H
10+
#define LLVM_LIBC_SRC_WCHAR_MBRTOWC_H
11+
12+
#include "hdr/types/mbstate_t.h"
13+
#include "hdr/types/size_t.h"
14+
#include "hdr/types/wchar_t.h"
15+
#include "src/__support/macros/config.h"
16+
17+
namespace LIBC_NAMESPACE_DECL {
18+
19+
size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s, size_t n,
20+
mbstate_t *__restrict ps);
21+
22+
} // namespace LIBC_NAMESPACE_DECL
23+
24+
#endif // LLVM_LIBC_SRC_WCHAR_MBRTOWC_H

libc/test/src/wchar/CMakeLists.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,20 @@ add_libc_test(
2323
libc.src.wchar.btowc
2424
)
2525

26+
add_libc_test(
27+
mbrtowc_test
28+
SUITE
29+
libc_wchar_unittests
30+
SRCS
31+
mbrtowc_test.cpp
32+
DEPENDS
33+
libc.src.__support.libc_errno
34+
libc.src.string.memset
35+
libc.src.wchar.mbrtowc
36+
libc.hdr.types.mbstate_t
37+
libc.hdr.types.wchar_t
38+
)
39+
2640
add_libc_test(
2741
wctob_test
2842
SUITE

0 commit comments

Comments
 (0)