Skip to content

Commit 718729e

Browse files
committed
[libc] Add memmem implementation
Introduce the `memmem` libc string function. `memmem_implementation` performs shared logic for `strstr`, `strcasestr`, and `memmem`; essentially reconfiguring what was the `strstr_implementation` to support length parameters. Differential Revision: https://reviews.llvm.org/D147822
1 parent 1fa26e6 commit 718729e

File tree

19 files changed

+266
-12
lines changed

19 files changed

+266
-12
lines changed

libc/config/baremetal/arm/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ set(TARGET_LIBC_ENTRYPOINTS
2828
libc.src.string.memchr
2929
libc.src.string.memcmp
3030
libc.src.string.memcpy
31+
libc.src.string.memmem
3132
libc.src.string.memmove
3233
libc.src.string.mempcpy
3334
libc.src.string.memrchr

libc/config/darwin/arm/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ set(TARGET_LIBC_ENTRYPOINTS
2828
libc.src.string.memchr
2929
libc.src.string.memcmp
3030
libc.src.string.memcpy
31+
libc.src.string.memmem
3132
libc.src.string.memmove
3233
libc.src.string.mempcpy
3334
libc.src.string.memrchr

libc/config/darwin/x86_64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ set(TARGET_LIBC_ENTRYPOINTS
2424
libc.src.string.memchr
2525
libc.src.string.memcmp
2626
libc.src.string.memcpy
27+
libc.src.string.memmem
2728
libc.src.string.memmove
2829
libc.src.string.mempcpy
2930
libc.src.string.memrchr

libc/config/gpu/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ set(TARGET_LIBC_ENTRYPOINTS
2424
libc.src.string.memchr
2525
libc.src.string.memcmp
2626
libc.src.string.memcpy
27+
libc.src.string.memmem
2728
libc.src.string.memmove
2829
libc.src.string.mempcpy
2930
libc.src.string.memrchr

libc/config/linux/aarch64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ set(TARGET_LIBC_ENTRYPOINTS
3838
libc.src.string.memchr
3939
libc.src.string.memcmp
4040
libc.src.string.memcpy
41+
libc.src.string.memmem
4142
libc.src.string.memmove
4243
libc.src.string.mempcpy
4344
libc.src.string.memrchr

libc/config/linux/arm/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ set(TARGET_LIBC_ENTRYPOINTS
2929
libc.src.string.memchr
3030
libc.src.string.memcmp
3131
libc.src.string.memcpy
32+
libc.src.string.memmem
3233
libc.src.string.memmove
3334
libc.src.string.mempcpy
3435
libc.src.string.memrchr

libc/config/linux/riscv64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ set(TARGET_LIBC_ENTRYPOINTS
3838
libc.src.string.memchr
3939
libc.src.string.memcmp
4040
libc.src.string.memcpy
41+
libc.src.string.memmem
4142
libc.src.string.memmove
4243
libc.src.string.mempcpy
4344
libc.src.string.memrchr

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ set(TARGET_LIBC_ENTRYPOINTS
3838
libc.src.string.memchr
3939
libc.src.string.memcmp
4040
libc.src.string.memcpy
41+
libc.src.string.memmem
4142
libc.src.string.memmove
4243
libc.src.string.mempcpy
4344
libc.src.string.memrchr

libc/config/windows/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ set(TARGET_LIBC_ENTRYPOINTS
2525
libc.src.string.memchr
2626
libc.src.string.memcmp
2727
libc.src.string.memcpy
28+
libc.src.string.memmem
2829
libc.src.string.memmove
2930
libc.src.string.mempcpy
3031
libc.src.string.memrchr

libc/spec/gnu_ext.td

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,12 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
5757
[], // Macros
5858
[], // Types
5959
[], // Enumerations
60-
[
60+
[
61+
FunctionSpec<
62+
"memmem",
63+
RetValSpec<VoidPtr>,
64+
[ArgSpec<ConstVoidPtr>, ArgSpec<SizeTType>, ArgSpec<ConstVoidPtr>, ArgSpec<SizeTType]
65+
>,
6166
FunctionSpec<
6267
"memrchr",
6368
RetValSpec<VoidPtr>,

libc/src/string/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,16 @@ add_entrypoint_object(
5959
.memory_utils.memcpy_implementation
6060
)
6161

62+
add_entrypoint_object(
63+
memmem
64+
SRCS
65+
memmem.cpp
66+
HDRS
67+
memmem.h
68+
DEPENDS
69+
.memory_utils.memmem_implementation
70+
)
71+
6272
add_entrypoint_object(
6373
memchr
6474
SRCS

libc/src/string/memmem.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
//===-- Implementation of memmem ------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/string/memmem.h"
10+
#include "src/__support/common.h"
11+
#include "src/string/memory_utils/memmem_implementations.h"
12+
13+
namespace __llvm_libc {
14+
15+
LLVM_LIBC_FUNCTION(void *, memmem,
16+
(const void *haystack, size_t haystack_len,
17+
const void *needle, size_t needle_len)) {
18+
constexpr auto comp = [](unsigned char l, unsigned char r) -> int {
19+
return l - r;
20+
};
21+
return memmem_implementation(haystack, haystack_len, needle, needle_len,
22+
comp);
23+
}
24+
25+
} // namespace __llvm_libc

libc/src/string/memmem.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//===-- Implementation header for memmem ------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_STRING_MEMMEM_H
10+
#define LLVM_LIBC_SRC_STRING_MEMMEM_H
11+
12+
#include <stddef.h> // For size_t
13+
14+
namespace __llvm_libc {
15+
16+
void *memmem(const void *haystack, size_t haystack_len, const void *needle,
17+
size_t needle_len);
18+
19+
} // namespace __llvm_libc
20+
21+
#endif // LLVM_LIBC_SRC_STRING_MEMMEM_H

libc/src/string/memory_utils/CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,13 @@ add_header_library(
7272
)
7373

7474
add_header_library(
75-
strstr_implementation
75+
strstr_implementation
7676
HDRS
7777
strstr_implementations.h
7878
)
79+
80+
add_header_library(
81+
memmem_implementation
82+
HDRS
83+
memmem_implementations.h
84+
)
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
//===-- memmem implementation -----------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMMEM_IMPLEMENTATIONS_H
10+
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMMEM_IMPLEMENTATIONS_H
11+
12+
#include <stddef.h>
13+
14+
namespace __llvm_libc {
15+
16+
template <typename Comp>
17+
constexpr static void *
18+
memmem_implementation(const void *haystack, size_t haystack_len,
19+
const void *needle, size_t needle_len, Comp &&comp) {
20+
// TODO: simple brute force implementation. This can be
21+
// improved upon using well known string matching algorithms.
22+
if (!needle_len)
23+
return const_cast<void *>(haystack);
24+
25+
if (needle_len > haystack_len)
26+
return nullptr;
27+
28+
const unsigned char *h = static_cast<const unsigned char *>(haystack);
29+
const unsigned char *n = static_cast<const unsigned char *>(needle);
30+
for (size_t i = 0; i <= (haystack_len - needle_len); ++i) {
31+
size_t j = 0;
32+
for (; j < needle_len && !comp(h[i + j], n[j]); ++j)
33+
;
34+
if (j == needle_len)
35+
return const_cast<unsigned char *>(h + i);
36+
}
37+
return nullptr;
38+
}
39+
40+
} // namespace __llvm_libc
41+
42+
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMMEM_IMPLEMENTATIONS_H

libc/src/string/memory_utils/strstr_implementations.h

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,19 @@
99
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_STRSTR_IMPLEMENTATIONS_H
1010
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_STRSTR_IMPLEMENTATIONS_H
1111

12+
#include "src/string/memory_utils/memmem_implementations.h"
13+
#include "src/string/string_utils.h"
1214
#include <stddef.h>
1315

1416
namespace __llvm_libc {
1517

1618
template <typename Comp>
1719
constexpr static char *strstr_implementation(const char *haystack,
1820
const char *needle, Comp &&comp) {
19-
// TODO: This is a simple brute force implementation. This can be
20-
// improved upon using well known string matching algorithms.
21-
for (size_t i = 0; comp(haystack[i], 0); ++i) {
22-
size_t j = 0;
23-
for (; comp(haystack[i + j], 0) && !comp(haystack[i + j], needle[j]); ++j)
24-
;
25-
if (!comp(needle[j], 0))
26-
return const_cast<char *>(haystack + i);
27-
}
28-
return nullptr;
21+
void *result = memmem_implementation(
22+
static_cast<const void *>(haystack), internal::string_length(haystack),
23+
static_cast<const void *>(needle), internal::string_length(needle), comp);
24+
return static_cast<char *>(result);
2925
}
3026

3127
} // namespace __llvm_libc

libc/test/src/string/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,16 @@ add_libc_unittest(
5151
libc.src.string.mempcpy
5252
)
5353

54+
add_libc_unittest(
55+
memmem_test
56+
SUITE
57+
libc_string_unittests
58+
SRCS
59+
memmem_test.cpp
60+
DEPENDS
61+
libc.src.string.memmem
62+
)
63+
5464
add_libc_unittest(
5565
memchr_test
5666
SUITE

libc/test/src/string/memmem_test.cpp

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
//===-- Unittests for memmem ----------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/string/memmem.h"
10+
#include "test/UnitTest/Test.h"
11+
12+
#include "src/string/string_utils.h"
13+
14+
namespace __llvm_libc {
15+
16+
TEST(LlvmLibcMemmemTest, EmptyHaystackEmptyNeedleReturnsHaystck) {
17+
char *h = nullptr;
18+
char *n = nullptr;
19+
void *result = __llvm_libc::memmem(h, 0, n, 0);
20+
ASSERT_EQ(static_cast<char *>(result), h);
21+
}
22+
23+
TEST(LlvmLibcMemmemTest, EmptyHaystackNonEmptyNeedleReturnsNull) {
24+
char *h = nullptr;
25+
char n[] = {'a', 'b', 'c'};
26+
void *result = __llvm_libc::memmem(h, 0, n, sizeof(n));
27+
ASSERT_EQ(result, static_cast<void *>(nullptr));
28+
}
29+
30+
TEST(LlvmLibcMemmemTest, EmptyNeedleReturnsHaystack) {
31+
char h[] = {'a', 'b', 'c'};
32+
char *n = nullptr;
33+
void *result = __llvm_libc::memmem(h, sizeof(h), n, 0);
34+
ASSERT_EQ(static_cast<char *>(result), h);
35+
}
36+
37+
TEST(LlvmLibcMemmemTest, ExactMatchReturnsHaystack) {
38+
char h[] = {'a', 'b', 'c'};
39+
char n[] = {'a', 'b', 'c'};
40+
void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
41+
ASSERT_EQ(static_cast<char *>(result), h);
42+
}
43+
44+
TEST(LlvmLibcMemmemTest, ReturnFirstMatchOfNeedle) {
45+
char h[] = {'a', 'a', 'b', 'c'};
46+
char n[] = {'a'};
47+
void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
48+
ASSERT_EQ(static_cast<char *>(result), h);
49+
}
50+
51+
TEST(LlvmLibcMemmemTest, ReturnFirstExactMatchOfNeedle) {
52+
{
53+
char h[] = {'a', 'b', 'a', 'c', 'a', 'a'};
54+
char n[] = {'a', 'a'};
55+
void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
56+
ASSERT_EQ(static_cast<char *>(result), h + 4);
57+
}
58+
{
59+
char h[] = {'a', 'a', 'b', 'a', 'b', 'a'};
60+
char n[] = {'a', 'b', 'a'};
61+
void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
62+
ASSERT_EQ(static_cast<char *>(result), h + 1);
63+
}
64+
}
65+
66+
TEST(LlvmLibcMemmemTest, NullTerminatorDoesNotInterruptMatch) {
67+
char h[] = {'\0', 'a', 'b'};
68+
char n[] = {'a', 'b'};
69+
void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
70+
ASSERT_EQ(static_cast<char *>(result), h + 1);
71+
}
72+
73+
TEST(LlvmLibcMemmemTest, ReturnNullIfNoExactMatch) {
74+
{
75+
char h[] = {'a'};
76+
char n[] = {'a', 'a'};
77+
void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
78+
ASSERT_EQ(result, static_cast<void *>(nullptr));
79+
}
80+
{
81+
char h[] = {'a', 'A'};
82+
char n[] = {'a', 'a'};
83+
void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
84+
ASSERT_EQ(result, static_cast<void *>(nullptr));
85+
}
86+
{
87+
char h[] = {'a'};
88+
char n[] = {'a', '\0'};
89+
void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
90+
ASSERT_EQ(result, static_cast<void *>(nullptr));
91+
}
92+
{
93+
char h[] = {'\0'};
94+
char n[] = {'\0', '\0'};
95+
void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n));
96+
ASSERT_EQ(result, static_cast<void *>(nullptr));
97+
}
98+
}
99+
100+
TEST(LlvmLibcMemmemTest, ReturnMatchOfSpecifiedNeedleLength) {
101+
{
102+
char h[] = {'a', 'b', 'c'};
103+
char n[] = {'x', 'y', 'z'};
104+
void *result = __llvm_libc::memmem(h, sizeof(h), n, 0);
105+
ASSERT_EQ(static_cast<char *>(result), h);
106+
}
107+
{
108+
char h[] = {'a', 'b', 'c'};
109+
char n[] = {'b', 'c', 'a'};
110+
void *result = __llvm_libc::memmem(h, sizeof(h), n, 2);
111+
ASSERT_EQ(static_cast<char *>(result), h + 1);
112+
}
113+
}
114+
115+
TEST(LlvmLibcMemmemTest, ReturnNullIfInadequateHaystackLength) {
116+
{
117+
char h[] = {'a', 'b', 'c'};
118+
char n[] = {'c'};
119+
void *result = __llvm_libc::memmem(h, 2, n, sizeof(n));
120+
ASSERT_EQ(result, static_cast<void *>(nullptr));
121+
}
122+
{
123+
char h[] = {'a', 'b', 'c'};
124+
char n[] = {'a', 'b', 'c'};
125+
void *result = __llvm_libc::memmem(h, 2, n, sizeof(n));
126+
ASSERT_EQ(result, static_cast<void *>(nullptr));
127+
}
128+
}
129+
} // namespace __llvm_libc

utils/bazel/llvm-project-overlay/libc/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1591,6 +1591,7 @@ libc_support_library(
15911591
"src/string/memory_utils/memset_implementations.h",
15921592
"src/string/memory_utils/strcmp_implementations.h",
15931593
"src/string/memory_utils/strstr_implementations.h",
1594+
"src/string/memory_utils/memmem_implementations.h",
15941595
"src/string/memory_utils/x86_64/memcmp_implementations.h",
15951596
"src/string/memory_utils/x86_64/memcpy_implementations.h",
15961597
],

0 commit comments

Comments
 (0)