-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libc] add wmemchr, wcslen, wcschr, wcsrchr, wcspbrk, wcsstr #121183
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-libc Author: Tristan Ross (RossComputerGuy) ChangesAdds a few wchar functions which are missing. More functions to come in follow up PR's or may be tacked onto this one. Goal is to start getting LLVM libc to work with libcxx. Full diff: https://github.com/llvm/llvm-project/pull/121183.diff 14 Files Affected:
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index b008e0e6684fdd..2720ecc2f9f772 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -261,6 +261,10 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.time.nanosleep
# wchar.h entrypoints
+ libc.src.wchar.wcsrchr
+ libc.src.wchar.wcschr
+ libc.src.wchar.wcslen
+ libc.src.wchar.wmemchr
libc.src.wchar.wctob
# locale.h entrypoints
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 00f0c6a8bfb8e4..bfc8224660dad0 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -349,6 +349,10 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.write
# wchar.h entrypoints
+ libc.src.wchar.wcsrchr
+ libc.src.wchar.wcschr
+ libc.src.wchar.wcslen
+ libc.src.wchar.wmemchr
libc.src.wchar.wctob
)
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index 49a8d61b938027..50f1789bc60677 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -346,6 +346,10 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.write
# wchar.h entrypoints
+ libc.src.wchar.wcsrchr
+ libc.src.wchar.wcschr
+ libc.src.wchar.wcslen
+ libc.src.wchar.wmemchr
libc.src.wchar.wctob
)
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 7e549607716c02..82e6fe688e619f 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -348,6 +348,10 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.write
# wchar.h entrypoints
+ libc.src.wchar.wcsrchr
+ libc.src.wchar.wcschr
+ libc.src.wchar.wcslen
+ libc.src.wchar.wmemchr
libc.src.wchar.wctob
libc.src.wchar.btowc
)
diff --git a/libc/hdrgen/yaml/wchar.yaml b/libc/hdrgen/yaml/wchar.yaml
index bc824b21d8be17..a1ef7c41e1492f 100644
--- a/libc/hdrgen/yaml/wchar.yaml
+++ b/libc/hdrgen/yaml/wchar.yaml
@@ -14,3 +14,31 @@ functions:
return_type: int
arguments:
- type: wint_t
+ - name: wmemchr
+ standards:
+ - stdc
+ return_type: const wchar_t *
+ arguments:
+ - type: const wchar_t *
+ - type: wchar_t
+ - type: size_t
+ - name: wcslen
+ standards:
+ - stdc
+ return_type: size_t
+ arguments:
+ - type: const wchar_t *
+ - name: wcschr
+ standards:
+ - stdc
+ return_type: const wchar_t *
+ arguments:
+ - type: const wchar_t *
+ - type: wchar_t
+ - name: wcsrchr
+ standards:
+ - stdc
+ return_type: const wchar_t *
+ arguments:
+ - type: const wchar_t *
+ - type: wchar_t
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index d4c98ea527a8f9..f5a9158d28e330 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -22,3 +22,50 @@ add_entrypoint_object(
libc.hdr.wchar_macros
libc.src.__support.wctype_utils
)
+
+add_entrypoint_object(
+ wmemchr
+ SRCS
+ wmemchr.cpp
+ HDRS
+ wmemchr.h
+ DEPENDS
+ libc.hdr.types.size_t
+ libc.hdr.types.wchar_t
+ libc.src.__support.wctype_utils
+)
+
+add_entrypoint_object(
+ wcslen
+ SRCS
+ wcslen.cpp
+ HDRS
+ wcslen.h
+ DEPENDS
+ libc.hdr.types.size_t
+ libc.hdr.types.wchar_t
+ libc.src.__support.wctype_utils
+)
+
+add_entrypoint_object(
+ wcschr
+ SRCS
+ wcschr.cpp
+ HDRS
+ wcschr.h
+ DEPENDS
+ .wcslen
+ .wmemchr
+ libc.hdr.types.wchar_t
+)
+
+add_entrypoint_object(
+ wcsrchr
+ SRCS
+ wcsrchr.cpp
+ HDRS
+ wcsrchr.h
+ DEPENDS
+ .wcslen
+ libc.hdr.types.wchar_t
+)
diff --git a/libc/src/wchar/wcschr.cpp b/libc/src/wchar/wcschr.cpp
new file mode 100644
index 00000000000000..ebb24a64f639e1
--- /dev/null
+++ b/libc/src/wchar/wcschr.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of wcschr ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcschr.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "wcslen.h"
+#include "wmemchr.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(const wchar_t *, wcschr, (const wchar_t *s, wchar_t c)) {
+ return wmemchr(s, c, wcslen(s));
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcschr.h b/libc/src/wchar/wcschr.h
new file mode 100644
index 00000000000000..6466d8e2ec2d62
--- /dev/null
+++ b/libc/src/wchar/wcschr.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for wmemchr -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSCHR_H
+#define LLVM_LIBC_SRC_WCHAR_WCSCHR_H
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+const wchar_t *wcschr(const wchar_t *s, wchar_t c);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSCHR_H
diff --git a/libc/src/wchar/wcslen.cpp b/libc/src/wchar/wcslen.cpp
new file mode 100644
index 00000000000000..2e711af8b12bf7
--- /dev/null
+++ b/libc/src/wchar/wcslen.cpp
@@ -0,0 +1,22 @@
+//===-- Implementation of wcslen ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcslen.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, wcslen, (const wchar_t *s)) {
+ size_t length = 0;
+ while (s[length++])
+ ;
+ return length;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcslen.h b/libc/src/wchar/wcslen.h
new file mode 100644
index 00000000000000..f472a1d5e96315
--- /dev/null
+++ b/libc/src/wchar/wcslen.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for wcslen ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSLEN_H
+#define LLVM_LIBC_SRC_WCHAR_WCSLEN_H
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t wcslen(const wchar_t *s);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSLEN_H
diff --git a/libc/src/wchar/wcsrchr.cpp b/libc/src/wchar/wcsrchr.cpp
new file mode 100644
index 00000000000000..10894bcb987032
--- /dev/null
+++ b/libc/src/wchar/wcsrchr.cpp
@@ -0,0 +1,26 @@
+//===-- Implementation of wcsrchr
+//------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsrchr.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "wcslen.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(const wchar_t *, wcsrchr, (const wchar_t *s, wchar_t c)) {
+ size_t length = wcslen(s);
+ for (size_t i = 0; i < ; i++) {
+ if (s[length - i] == c)
+ return &s[length - i];
+ }
+ return nullptr;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsrchr.h b/libc/src/wchar/wcsrchr.h
new file mode 100644
index 00000000000000..8b4a3ef11c6b49
--- /dev/null
+++ b/libc/src/wchar/wcsrchr.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for wcsrchr -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSRCHR_H
+#define LLVM_LIBC_SRC_WCHAR_WCSRCHR_H
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+const wchar_t *wcsrchr(const wchar_t *s, wchar_t c);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSRCHR_H
diff --git a/libc/src/wchar/wmemchr.cpp b/libc/src/wchar/wmemchr.cpp
new file mode 100644
index 00000000000000..62191454343a8a
--- /dev/null
+++ b/libc/src/wchar/wmemchr.cpp
@@ -0,0 +1,26 @@
+//===-- Implementation of wmemchr -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wmemchr.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(const wchar_t *, wmemchr,
+ (const wchar_t *s, wchar_t c, size_t n)) {
+ for (size_t i = 0; i < n; i++) {
+ if (s[i] == c) {
+ return &s[i];
+ }
+ }
+
+ return nullptr;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wmemchr.h b/libc/src/wchar/wmemchr.h
new file mode 100644
index 00000000000000..e47a0a75735939
--- /dev/null
+++ b/libc/src/wchar/wmemchr.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for wmemchr -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WMEMCHR_H
+#define LLVM_LIBC_SRC_WCHAR_WMEMCHR_H
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+const wchar_t *wmemchr(const wchar_t *s, wchar_t c, size_t n);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WMEMCHR_H
|
a32e502
to
9dd063a
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Mind rebasing?
for (size_t i = 0; i < n; i++) { | ||
if (s[i] == c) { | ||
return &s[i]; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
libc.src.wchar.wcsrchr | ||
libc.src.wchar.wcschr | ||
libc.src.wchar.wcslen | ||
libc.src.wchar.wmemchr |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sort alphabetically, here and below
(const wchar_t *wcs, const wchar_t *accept)) { | ||
size_t n_accept = wcslen(accept); | ||
|
||
for (size_t i = 0; i < wcslen(wcs); i++) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we can implement wcspbrk
in terms of wcslen
; what if the users has configured llvm-libc to not include wcslen
? wcsrchr
also has this issue, maybe others.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, the LLVM libc's written in such a way that entrypoints should be independent from eachother. The correct way to do this is to move the function into an internal utility header and then make both call that instead.
LLVM_LIBC_FUNCTION(size_t, wcslen, (const wchar_t *s)) { | ||
size_t length = 0; | ||
while (s[length++]) | ||
; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't actually know what's special about wchars, is this significantly different from the existing byte-by-byte implementation in string_utils.h
? We probably want wide string utils or something.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
wchar_t has conically been 2B on Windows and 4B on Unixes, so we can read more than one byte at time all by having the vanilla implementation as written here.
This impl has an off by one bug in it; length
will get post incremented regardless of whether s[length]
is truthy or not. So my repost will look slightly different.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hm, we could possibly just template the implementation instead of using char *
, that way the pointer arithmetic work work I'd think.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I like the template implementation idea, some sort of generic string type that both char strings and wide-char strings can utilize.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
in general we don't allow including and using libc functions by their public names within our code. The recommended pattern is to take pieces that are going to be reused and put them in a utility file. See string utils for an example: https://github.com/llvm/llvm-project/blob/main/libc/src/string/string_utils.h
We can probably better document this code pattern in our first party developer docs, if it's not already... |
Thanks for the reviews, I'll make these changes once I have the bandwidth. Until then, I'll mark as a draft. |
Looks like these are necessary for building libcxxabi #124027. |
@RossComputerGuy may I take over this PR? I'd like to split it up into one PR per fn (so that any issues in code review don't block everything from landing). I'll keep you as the author and note myself in the commit message as co author, unless you'd prefer for me to do something different? |
Sure, I'm limited on bandwidth so I know I won't have time to get back to this. |
Add internal helper, which may be reusable when implementing wmemchr, wcspbrk, wcsrchr, wcsstr. Link: llvm#121183 Link: llvm#124027 Co-authored-by: Nick Desaulniers <[email protected]>
Update string_utils' string_length to work with char* or wchar_t*, so that it may be reusable when implementing wmemchr, wcspbrk, wcsrchr, wcsstr. Link: #121183 Link: #124027 Co-authored-by: Nick Desaulniers <[email protected]> --------- Co-authored-by: Tristan Ross <[email protected]>
While wcslen was easy to spin out, the others are running into an issue in testing; our testing framework needs to be able to print the expected vs actual if the test fails. It looks like we'd need to implement wcstombs FIRST in order to use that from the testing framework, to convert the wide character strings into something we could print. wcstombs can be implemented in terms of wcsrtombs, which can be implemented in terms of wcsnrtombs, which can be implemented in terms of wcrtomb, which can implemented in terms of |
Implemented wmemchr and tests. Fixes: #121183 --------- Co-authored-by: Sriya Pratipati <[email protected]>
Implemented wmemchr and tests. Fixes: llvm#121183 --------- Co-authored-by: Sriya Pratipati <[email protected]>
Implemented wmemchr and tests. Fixes: llvm#121183 --------- Co-authored-by: Sriya Pratipati <[email protected]>
Adds a few wchar functions which are missing. More functions to come in follow up PR's or may be tacked onto this one. Goal is to start getting LLVM libc to work with libcxx.