Skip to content

Commit 7a33b70

Browse files
authored
[libc] wcstok implementation (#145989)
Implemented wcstok and added tests
1 parent 790bc5b commit 7a33b70

File tree

7 files changed

+282
-0
lines changed

7 files changed

+282
-0
lines changed

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ set(TARGET_LIBC_ENTRYPOINTS
387387
libc.src.wchar.wmemchr
388388
libc.src.wchar.wcpcpy
389389
libc.src.wchar.wcpncpy
390+
libc.src.wchar.wcstok
390391

391392
# sys/uio.h entrypoints
392393
libc.src.sys.uio.writev

libc/include/wchar.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,14 @@ functions:
196196
arguments:
197197
- type: wchar_t *__restrict
198198
- type: const wchar_t *__restrict
199+
- name: wcstok
200+
standards:
201+
- stdc
202+
return_type: wchar_t *
203+
arguments:
204+
- type: wchar_t *__restrict
205+
- type: const wchar_t *__restrict
206+
- type: wchar_t** __restrict
199207
- name: wcpcpy
200208
standards:
201209
- stdc

libc/src/wchar/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,16 @@ add_entrypoint_object(
4545
libc.src.__support.wctype_utils
4646
)
4747

48+
add_entrypoint_object(
49+
wcstok
50+
SRCS
51+
wcstok.cpp
52+
HDRS
53+
wcstok.h
54+
DEPENDS
55+
libc.hdr.types.wchar_t
56+
)
57+
4858
add_entrypoint_object(
4959
wcrtomb
5060
SRCS

libc/src/wchar/wcstok.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
//===-- Implementation of wcstok ------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/wchar/wcstok.h"
10+
11+
#include "hdr/types/wchar_t.h"
12+
#include "src/__support/common.h"
13+
14+
namespace LIBC_NAMESPACE_DECL {
15+
16+
bool isADelimeter(wchar_t wc, const wchar_t *delimiters) {
17+
for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; ++delim_ptr)
18+
if (wc == *delim_ptr)
19+
return true;
20+
return false;
21+
}
22+
23+
LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
24+
(wchar_t *__restrict str, const wchar_t *__restrict delim,
25+
wchar_t **__restrict context)) {
26+
if (str == nullptr) {
27+
if (*context == nullptr)
28+
return nullptr;
29+
30+
str = *context;
31+
}
32+
33+
wchar_t *tok_start, *tok_end;
34+
for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim);
35+
++tok_start)
36+
;
37+
38+
for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim);
39+
++tok_end)
40+
;
41+
42+
if (*tok_end != L'\0') {
43+
*tok_end = L'\0';
44+
++tok_end;
45+
}
46+
*context = tok_end;
47+
return *tok_start == L'\0' ? nullptr : tok_start;
48+
}
49+
50+
} // namespace LIBC_NAMESPACE_DECL

libc/src/wchar/wcstok.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===-- Implementation header for wcstok ----------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_WCHAR_WCSTOK_H
10+
#define LLVM_LIBC_SRC_WCHAR_WCSTOK_H
11+
12+
#include "hdr/types/wchar_t.h"
13+
#include "src/__support/macros/config.h"
14+
15+
namespace LIBC_NAMESPACE_DECL {
16+
17+
wchar_t *wcstok(wchar_t *__restrict str, const wchar_t *__restrict delim,
18+
wchar_t **__restrict context);
19+
20+
} // namespace LIBC_NAMESPACE_DECL
21+
22+
#endif // LLVM_LIBC_SRC_WCHAR_WCSTOK_H

libc/test/src/wchar/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,16 @@ add_libc_test(
123123
libc.src.wchar.wcschr
124124
)
125125

126+
add_libc_test(
127+
wcstok_test
128+
SUITE
129+
libc_wchar_unittests
130+
SRCS
131+
wcstok_test.cpp
132+
DEPENDS
133+
libc.src.wchar.wcstok
134+
)
135+
126136
add_libc_test(
127137
wcsncmp_test
128138
SUITE

libc/test/src/wchar/wcstok_test.cpp

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
//===-- Unittests for wcstok ----------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "hdr/types/size_t.h"
10+
#include "hdr/types/wchar_t.h"
11+
#include "src/wchar/wcstok.h"
12+
#include "test/UnitTest/Test.h"
13+
14+
TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
15+
{ // Empty source and delimiter string.
16+
wchar_t empty[] = L"";
17+
wchar_t *reserve = nullptr;
18+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
19+
// Another call to ensure that 'reserve' is not in a bad state.
20+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
21+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"", &reserve), nullptr);
22+
}
23+
{ // Empty source and single character delimiter string.
24+
wchar_t empty[] = L"";
25+
wchar_t *reserve = nullptr;
26+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
27+
// Another call to ensure that 'reserve' is not in a bad state.
28+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
29+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
30+
}
31+
{ // Same character source and delimiter string.
32+
wchar_t single[] = L"_";
33+
wchar_t *reserve = nullptr;
34+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
35+
// Another call to ensure that 'reserve' is not in a bad state.
36+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
37+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
38+
}
39+
{ // Multiple character source and single character delimiter string.
40+
wchar_t multiple[] = L"1,2";
41+
wchar_t *reserve = nullptr;
42+
wchar_t *tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);
43+
ASSERT_TRUE(tok[0] == L'1');
44+
ASSERT_TRUE(tok[1] == L',');
45+
ASSERT_TRUE(tok[2] == L'2');
46+
ASSERT_TRUE(tok[3] == L'\0');
47+
// Another call to ensure that 'reserve' is not in a bad state.
48+
tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);
49+
ASSERT_TRUE(tok[0] == L'1');
50+
ASSERT_TRUE(tok[1] == L',');
51+
ASSERT_TRUE(tok[2] == L'2');
52+
ASSERT_TRUE(tok[3] == L'\0');
53+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
54+
}
55+
}
56+
57+
TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsFirstCharacterShouldBeIgnored) {
58+
wchar_t src[] = L".123";
59+
wchar_t *reserve = nullptr;
60+
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve);
61+
ASSERT_TRUE(tok[0] == L'1');
62+
ASSERT_TRUE(tok[1] == L'2');
63+
ASSERT_TRUE(tok[2] == L'3');
64+
ASSERT_TRUE(tok[3] == L'\0');
65+
// Another call to ensure that 'reserve' is not in a bad state.
66+
tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve);
67+
ASSERT_TRUE(tok[0] == L'1');
68+
ASSERT_TRUE(tok[1] == L'2');
69+
ASSERT_TRUE(tok[2] == L'3');
70+
ASSERT_TRUE(tok[3] == L'\0');
71+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L".", &reserve), nullptr);
72+
}
73+
74+
TEST(LlvmLibcWCSTokReentrantTest, DelimiterIsMiddleCharacter) {
75+
wchar_t src[] = L"12,34";
76+
wchar_t *reserve = nullptr;
77+
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
78+
ASSERT_TRUE(tok[0] == L'1');
79+
ASSERT_TRUE(tok[1] == L'2');
80+
ASSERT_TRUE(tok[2] == L'\0');
81+
// Another call to ensure that 'reserve' is not in a bad state.
82+
tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
83+
ASSERT_TRUE(tok[0] == L'1');
84+
ASSERT_TRUE(tok[1] == L'2');
85+
ASSERT_TRUE(tok[2] == L'\0');
86+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr);
87+
}
88+
89+
TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsLastCharacterShouldBeIgnored) {
90+
wchar_t src[] = L"1234:";
91+
wchar_t *reserve = nullptr;
92+
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve);
93+
ASSERT_TRUE(tok[0] == L'1');
94+
ASSERT_TRUE(tok[1] == L'2');
95+
ASSERT_TRUE(tok[2] == L'3');
96+
ASSERT_TRUE(tok[3] == L'4');
97+
ASSERT_TRUE(tok[4] == L'\0');
98+
// Another call to ensure that 'reserve' is not in a bad state.
99+
tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve);
100+
ASSERT_TRUE(tok[0] == L'1');
101+
ASSERT_TRUE(tok[1] == L'2');
102+
ASSERT_TRUE(tok[2] == L'3');
103+
ASSERT_TRUE(tok[3] == L'4');
104+
ASSERT_TRUE(tok[4] == L'\0');
105+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
106+
}
107+
108+
TEST(LlvmLibcWCSTokReentrantTest, ShouldNotGoPastNullTerminator) {
109+
wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'};
110+
wchar_t *reserve = nullptr;
111+
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
112+
ASSERT_TRUE(tok[0] == L'1');
113+
ASSERT_TRUE(tok[1] == L'2');
114+
ASSERT_TRUE(tok[2] == L'\0');
115+
// Another call to ensure that 'reserve' is not in a bad state.
116+
tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
117+
ASSERT_TRUE(tok[0] == L'1');
118+
ASSERT_TRUE(tok[1] == L'2');
119+
ASSERT_TRUE(tok[2] == L'\0');
120+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr);
121+
}
122+
123+
TEST(LlvmLibcWCSTokReentrantTest,
124+
ShouldReturnNullptrWhenBothSrcAndSaveptrAreNull) {
125+
wchar_t *src = nullptr;
126+
wchar_t *reserve = nullptr;
127+
// Ensure that instead of crashing if src and reserve are null, nullptr is
128+
// returned
129+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(src, L",", &reserve), nullptr);
130+
// And that neither src nor reserve are changed when that happens
131+
ASSERT_EQ(src, nullptr);
132+
ASSERT_EQ(reserve, nullptr);
133+
}
134+
135+
TEST(LlvmLibcWCSTokReentrantTest,
136+
SubsequentCallsShouldFindFollowingDelimiters) {
137+
wchar_t src[] = L"12,34.56";
138+
wchar_t *reserve = nullptr;
139+
wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &reserve);
140+
ASSERT_TRUE(token[0] == L'1');
141+
ASSERT_TRUE(token[1] == L'2');
142+
ASSERT_TRUE(token[2] == L'\0');
143+
144+
token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve);
145+
ASSERT_TRUE(token[0] == L'3');
146+
ASSERT_TRUE(token[1] == L'4');
147+
ASSERT_TRUE(token[2] == L'\0');
148+
149+
token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve);
150+
ASSERT_TRUE(token[0] == L'5');
151+
ASSERT_TRUE(token[1] == L'6');
152+
ASSERT_TRUE(token[2] == L'\0');
153+
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
154+
ASSERT_EQ(token, nullptr);
155+
// Subsequent calls after hitting the end of the string should also return
156+
// nullptr.
157+
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
158+
ASSERT_EQ(token, nullptr);
159+
}
160+
161+
TEST(LlvmLibcWCSTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
162+
wchar_t src[] = L"__ab__:_cd__:__ef__:__";
163+
wchar_t *reserve = nullptr;
164+
wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &reserve);
165+
ASSERT_TRUE(token[0] == L'a');
166+
ASSERT_TRUE(token[1] == L'b');
167+
ASSERT_TRUE(token[2] == L'\0');
168+
169+
token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &reserve);
170+
ASSERT_TRUE(token[0] == L'c');
171+
ASSERT_TRUE(token[1] == L'd');
172+
ASSERT_TRUE(token[2] == L'\0');
173+
174+
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &reserve);
175+
ASSERT_TRUE(token[0] == L'e');
176+
ASSERT_TRUE(token[1] == L'f');
177+
ASSERT_TRUE(token[2] == L'\0');
178+
179+
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
180+
ASSERT_EQ(token, nullptr);
181+
}

0 commit comments

Comments
 (0)