-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libcxx] Add testing configuration for GPU targets #104515
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-libcxx Author: Joseph Huber (jhuber6) ChangesSummary: We force serial exeuction here, because In the future this can likely be refined to accept user specified This currently fails ~1% of the tests on AMDGPU and ~3% of the tests on Future support will likely want to allow passing in a custom Full diff: https://github.com/llvm/llvm-project/pull/104515.diff 6 Files Affected:
diff --git a/libcxx/cmake/caches/AMDGPU.cmake b/libcxx/cmake/caches/AMDGPU.cmake
new file mode 100644
index 00000000000000..00549c69af00fb
--- /dev/null
+++ b/libcxx/cmake/caches/AMDGPU.cmake
@@ -0,0 +1,40 @@
+# Configuration options for libcxx.
+set(LIBCXX_ABI_VERSION 2 CACHE STRING "")
+set(LIBCXX_CXX_ABI none CACHE STRING "")
+set(LIBCXX_ENABLE_SHARED OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_STATIC ON CACHE BOOL "")
+set(LIBCXX_ENABLE_FILESYSTEM OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_RANDOM_DEVICE OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_LOCALIZATION OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_UNICODE OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_WIDE_CHARACTERS OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
+set(LIBCXX_HAS_TERMINAL_AVAILABLE OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_RTTI OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "")
+set(LIBCXX_STATICALLY_LINK_ABI_IN_STATIC_LIBRARY ON CACHE BOOL "")
+set(LIBCXX_ENABLE_THREADS OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_MONOTONIC_CLOCK ON CACHE BOOL "")
+set(LIBCXX_INSTALL_LIBRARY ON CACHE BOOL "")
+set(LIBCXX_LIBC "llvm-libc" CACHE STRING "")
+set(LIBCXX_USE_COMPILER_RT ON CACHE BOOL "")
+set(LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS ON CACHE BOOL "")
+
+# Configuration options for libcxxabi.
+set(LIBCXXABI_BAREMETAL ON CACHE BOOL "")
+set(LIBCXXABI_ENABLE_SHARED OFF CACHE BOOL "")
+set(LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
+set(LIBCXXABI_ENABLE_THREADS OFF CACHE BOOL "")
+set(LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "")
+set(LIBCXXABI_USE_LLVM_UNWINDER OFF CACHE BOOL "")
+
+# Test configuration.
+set(LIBCXX_TEST_CONFIG "amdgpu-libc++-shared.cfg.in" CACHE STRING "")
+set(LIBCXX_TEST_PARAMS "long_tests=False;executor=amdhsa-loader" CACHE STRING "")
+
+# Necessary compile flags for AMDGPU.
+set(LIBCXX_ADDITIONAL_COMPILE_FLAGS
+ "-nogpulib;-flto;-fconvergent-functions;-Xclang;-mcode-object-version=none" CACHE STRING "")
+set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS
+ "-nogpulib;-flto;-fconvergent-functions;-Xclang;-mcode-object-version=none" CACHE STRING "")
+set(CMAKE_REQUIRED_FLAGS "-nogpulib -nodefaultlibs" CACHE STRING "")
diff --git a/libcxx/cmake/caches/NVPTX.cmake b/libcxx/cmake/caches/NVPTX.cmake
new file mode 100644
index 00000000000000..dae83940af5b04
--- /dev/null
+++ b/libcxx/cmake/caches/NVPTX.cmake
@@ -0,0 +1,40 @@
+# Configuration options for libcxx.
+set(LIBCXX_ABI_VERSION 2 CACHE STRING "")
+set(LIBCXX_CXX_ABI none CACHE STRING "")
+set(LIBCXX_ENABLE_SHARED OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_STATIC ON CACHE BOOL "")
+set(LIBCXX_ENABLE_FILESYSTEM OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_RANDOM_DEVICE OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_LOCALIZATION OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_UNICODE OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_WIDE_CHARACTERS OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
+set(LIBCXX_HAS_TERMINAL_AVAILABLE OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_RTTI OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "")
+set(LIBCXX_STATICALLY_LINK_ABI_IN_STATIC_LIBRARY ON CACHE BOOL "")
+set(LIBCXX_ENABLE_THREADS OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_MONOTONIC_CLOCK ON CACHE BOOL "")
+set(LIBCXX_INSTALL_LIBRARY ON CACHE BOOL "")
+set(LIBCXX_LIBC "llvm-libc" CACHE STRING "")
+set(LIBCXX_USE_COMPILER_RT ON CACHE BOOL "")
+set(LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS ON CACHE BOOL "")
+
+# Configuration options for libcxxabi.
+set(LIBCXXABI_BAREMETAL ON CACHE BOOL "")
+set(LIBCXXABI_ENABLE_SHARED OFF CACHE BOOL "")
+set(LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
+set(LIBCXXABI_ENABLE_THREADS OFF CACHE BOOL "")
+set(LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "")
+set(LIBCXXABI_USE_LLVM_UNWINDER OFF CACHE BOOL "")
+
+# Test configuration.
+set(LIBCXX_TEST_CONFIG "nvptx-libc++-shared.cfg.in" CACHE STRING "")
+set(LIBCXX_TEST_PARAMS "long_tests=False;executor=nvptx-loader" CACHE STRING "")
+
+# Necessary compile flags for NVPTX.
+set(LIBCXX_ADDITIONAL_COMPILE_FLAGS
+ "-nogpulib;-flto;-fconvergent-functions;--cuda-feature=+ptx63" CACHE STRING "")
+set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS
+ "-nogpulib;-flto;-fconvergent-functions;--cuda-feature=+ptx63" CACHE STRING "")
+set(CMAKE_REQUIRED_FLAGS "-nogpulib -nodefaultlibs -flto -c" CACHE STRING "")
diff --git a/libcxx/test/configs/amdgpu-libc++-shared.cfg.in b/libcxx/test/configs/amdgpu-libc++-shared.cfg.in
new file mode 100644
index 00000000000000..9b37a81f8de5d4
--- /dev/null
+++ b/libcxx/test/configs/amdgpu-libc++-shared.cfg.in
@@ -0,0 +1,29 @@
+lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg')
+
+config.substitutions.append(('%{flags}',
+ f'--target={config.target_triple} -Wno-multi-gpu -flto -mcpu=native'))
+config.substitutions.append(('%{compile_flags}',
+ '-nogpulib -fno-builtin-printf -nogpuinc -nostdlibinc '
+ '-I %{include-dir} -I %{target-include-dir}/../../ '
+ '-I %{target-include-dir} -I %{libcxx-dir}/test/support'
+))
+config.substitutions.append(('%{link_flags}',
+ '-O1 -nostdinc++ -nostdlib++ %{lib-dir}/crt1.o '
+ '-L %{lib-dir} -lc++ -lc++abi -lclang_rt.builtins '
+))
+
+config.substitutions.append(('%{exec}',
+ '%{executor} --no-parallelism'
+))
+
+config.stdlib = 'llvm-libc++'
+
+import os, site
+site.addsitedir(os.path.join('@LIBCXX_SOURCE_DIR@', 'utils'))
+import libcxx.test.params, libcxx.test.config
+libcxx.test.config.configure(
+ libcxx.test.params.DEFAULT_PARAMETERS,
+ libcxx.test.features.DEFAULT_FEATURES,
+ config,
+ lit_config
+)
diff --git a/libcxx/test/configs/nvptx-libc++-shared.cfg.in b/libcxx/test/configs/nvptx-libc++-shared.cfg.in
new file mode 100644
index 00000000000000..26d93b29183f72
--- /dev/null
+++ b/libcxx/test/configs/nvptx-libc++-shared.cfg.in
@@ -0,0 +1,31 @@
+lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg')
+
+config.substitutions.append(('%{flags}',
+ f'--target={config.target_triple} -Wno-multi-gpu -flto -march=native'))
+config.substitutions.append(('%{compile_flags}',
+ '-nogpulib -fno-builtin-printf -nogpuinc -nostdlibinc '
+ '-I %{include-dir} -I %{target-include-dir}/../../ '
+ '-I %{target-include-dir} -I %{libcxx-dir}/test/support'
+))
+config.substitutions.append(('%{link_flags}',
+ '-nostdinc++ -nostdlib++ %{lib-dir}/crt1.o '
+ '-L %{lib-dir} -lc++ -lc++abi -lclang_rt.builtins '
+ '-Wl,--suppress-stack-size-warning '
+ '-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1 '
+ '-Wl,-mllvm,-nvptx-emit-init-fini-kernel'
+))
+config.substitutions.append(('%{exec}',
+ '%{executor} --no-parallelism'
+))
+
+config.stdlib = 'llvm-libc++'
+
+import os, site
+site.addsitedir(os.path.join('@LIBCXX_SOURCE_DIR@', 'utils'))
+import libcxx.test.params, libcxx.test.config
+libcxx.test.config.configure(
+ libcxx.test.params.DEFAULT_PARAMETERS,
+ libcxx.test.features.DEFAULT_FEATURES,
+ config,
+ lit_config
+)
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_range.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_range.pass.cpp
index a5f5455297ad44..b0218cb75aca93 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_range.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_range.pass.cpp
@@ -6,6 +6,10 @@
//
//===----------------------------------------------------------------------===//
+// FIXME: This takes over an hour to compile, disable for now.
+// UNSUPPORTED: target=amdgcn-amd-amdhsa
+// UNSUPPORTED: target=nvptx64-nvidia-cuda
+
// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/replace_with_range.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/replace_with_range.pass.cpp
index 03e82590ed4ef6..d4b16b79a0b8dc 100644
--- a/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/replace_with_range.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/replace_with_range.pass.cpp
@@ -6,6 +6,10 @@
//
//===----------------------------------------------------------------------===//
+// FIXME: This takes over an hour to compile, disable for now.
+// UNSUPPORTED: target=amdgcn-amd-amdhsa
+// UNSUPPORTED: target=nvptx64-nvidia-cuda
+
// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=10000000
// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-ops-limit): -fconstexpr-ops-limit=70000000
|
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to get the added CI setup alongside this?
What would that look like? I've talked w/ @jplehr and @Artem-B about getting a build for these targets at least set-up. I don't think a full CI tester will be available for awhile -- there's still lots of failing tests and it takes a long time to run. Right now I need to redo this since the |
How long does it take to run the tests? What's the reason for it being that slow? |
There's a lot of factors that contribute to it being really slow.
From my current configuration, I'd say an average test run takes about an hour on my server. The bot that runs the NVPTX tests for example is nowhere near as powerful as my computer, so expect that to take like 10 hours? |
I added some CMake that gets the |
ping |
2 similar comments
ping |
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A few comments, should be fine to merge once addressed.
libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_range.pass.cpp
Outdated
Show resolved
Hide resolved
It seems that some changes moved
Thanks, made some updates since I apparently didn't merge any of my changes last time. Since the recent changes made the |
Okay, CI's green and I updated the issues I had previously. Would it be possible to land this? |
ping |
# Available if building libc++ on top of llvm-libc. | ||
config.substitutions.append(('%{libc-dir}', '@LIBCXX_LIBC_LIBRARY_DIR@')) | ||
config.substitutions.append(('%{libc-include-dir}', '@LIBCXX_LIBC_INCLUDE_DIR@')) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We want to keep the cmake-bridge
as lean as possible, and adding "optional" substitutions like this goes against that principle.
It seems like a smell that you need to pass %{libc-dir}/crt1.o -lc -lm
explicitly when linking: why do you need that? You didn't pass -nostdlib
so I would expect Clang to use the default stdlib on the system. It looks like the underlying problem is really that your compiler is not configured properly to use the just-built llvm-libc and you're working around that in the libc++ testing configuration.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a GPU, executing stuff this way is the exceptions exception. I actually used to link -lc
by default but that turned out to be a huge pain to deal with when working with the build systems so I removed it. I could possibly make an argument like -stdlib
and -startfiles
that adds that in this case, since it should know where to find them. However these positive versions of those flags don't really exist, so it'd need to be a new clang flag that's really specific.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Think of it this way: this configuration file is like a hello world for running code on this platform. It's the bare minimum that's required by anyone trying to compile and run on this platform. When doing that requires manually specifying the C library to use, IMO there's something wrong.
If you're dead set on keeping these substitutions, the right way of passing that information without impacting the general-purpose libc++ configuration would be to add additional parameters to your Lit configuration, like:
ADDITIONAL_PARAMETERS = [
libcxx.test.dsl.Parameter(name='libc_lib_dir', type=str,
actions=lambda path: [libcxx.test.dsl.AddSubstitution('%{libc-lib-dir}', path)],
help="""
TODO explain what this is
"""),
libcxx.test.dsl.Parameter(name='libc_include_dir', type=str,
actions=lambda path: [libcxx.test.dsl.AddSubstitution('%{libc-include-dir}', path)],
help="""
TODO explain what this is
"""),
]
...
libcxx.test.config.configure(
libcxx.test.params.DEFAULT_PARAMETERS + ADDITIONAL_PARAMETERS,
...
)
Then you'd call lit
with --param libc_lib_dir=<path> --param libc_include_dir=<path>
when running the tests, and the rest of libc++ doesn't have to know about this particularity of your setup.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I made #112025. If this lands will adding -gpustartfiles
to the commands be acceptable?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Either way is fine by me. I care mostly that the general-purpose libc++ configuration doesn't get impacted by this complexity, and on the side I'm trying to vouch for naive users trying to just do a hello world. I think the ADDITIONAL_PARAMETERs
approach above is also acceptable in the meantime just to unblock you.
I think -gpustartfiles
might be the better approach, but I'm not strongly attached to it. If you want to use ADDITIONAL_PARAMETERS
for now, that's fine by me too.
Summary: The C library for GPUs provides the ability to target regular C/C++ programs by providing the C library and a file containing kernels that call the `main` function. This is mostly used for unit tests, this patch provides a quick way to add them without needing to know the paths. I currently do this explicitly, but according to the libc++ contributors we don't want to need to specify these paths manually. See the discussion in llvm#104515. I just default to `lib/` if the target-specific one isn't found because the linker will handle giving a reasonable error message if it's not found. Basically the use-case looks like this. ```console $ clang test.c --target=amdgcn-amd-amdhsa -mcpu=native -gpustartfiles $ amdhsa-loader a.out PASS! ```
Summary: The C library for GPUs provides the ability to target regular C/C++ programs by providing the C library and a file containing kernels that call the `main` function. This is mostly used for unit tests, this patch provides a quick way to add them without needing to know the paths. I currently do this explicitly, but according to the libc++ contributors we don't want to need to specify these paths manually. See the discussion in llvm#104515. I just default to `lib/` if the target-specific one isn't found because the linker will handle giving a reasonable error message if it's not found. Basically the use-case looks like this. ```console $ clang test.c --target=amdgcn-amd-amdhsa -mcpu=native -gpustartfiles $ amdhsa-loader a.out PASS! ```
Summary: The GPU runs these tests using the files built from the `libc` project. These will be placed in `include/<triple>` and `lib/<triple>`. We use the `amdhsa-loader` and `nvptx-loader` tools, which are also provided by `libc`. These launch a kernel called `_start` which calls `main` so we can pretend like GPU programs are normal terminal applications. We force serial exeuction here, because `llvm-lit` runs way too many processes in parallel, which has a bad habit of making the GPU drivers hang or run out of resources. This allows the compilation to be run in parallel while the jobs themselves are serialized via a file lock. In the future this can likely be refined to accept user specified architectures, or better handle including the root directory by exposing that instead of just `include/<triple>/c++/v1/`. This currently fails ~1% of the tests on AMDGPU and ~3% of the tests on NVPTX. This will hopefully be reduced further, and later patches can XFAIL a lot of them once it's down to a reasonable number. Future support will likely want to allow passing in a custom architecture instead of simply relying on `-mcpu=native`.
a8a9ba2
to
83c207e
Compare
Summary: The C library for GPUs provides the ability to target regular C/C++ programs by providing the C library and a file containing kernels that call the `main` function. This is mostly used for unit tests, this patch provides a quick way to add them without needing to know the paths. I currently do this explicitly, but according to the libc++ contributors we don't want to need to specify these paths manually. See the discussion in llvm#104515. I just default to `lib/` if the target-specific one isn't found because the linker will handle giving a reasonable error message if it's not found. Basically the use-case looks like this. ```console $ clang test.c --target=amdgcn-amd-amdhsa -mcpu=native -gpustartfiles $ amdhsa-loader a.out PASS! ```
Summary: The C library for GPUs provides the ability to target regular C/C++ programs by providing the C library and a file containing kernels that call the `main` function. This is mostly used for unit tests, this patch provides a quick way to add them without needing to know the paths. I currently do this explicitly, but according to the libc++ contributors we don't want to need to specify these paths manually. See the discussion in #104515. I just default to `lib/` if the target-specific one isn't found because the linker will handle giving a reasonable error message if it's not found. Basically the use-case looks like this. ```console $ clang test.c --target=amdgcn-amd-amdhsa -mcpu=native -startfiles -stdlib $ amdhsa-loader a.out PASS! ```
Summary: The C library for GPUs provides the ability to target regular C/C++ programs by providing the C library and a file containing kernels that call the `main` function. This is mostly used for unit tests, this patch provides a quick way to add them without needing to know the paths. I currently do this explicitly, but according to the libc++ contributors we don't want to need to specify these paths manually. See the discussion in llvm#104515. I just default to `lib/` if the target-specific one isn't found because the linker will handle giving a reasonable error message if it's not found. Basically the use-case looks like this. ```console $ clang test.c --target=amdgcn-amd-amdhsa -mcpu=native -startfiles -stdlib $ amdhsa-loader a.out PASS! ```
@ldionne The previous |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Summary: The GPU runs these tests using the files built from the `libc` project. These will be placed in `include/<triple>` and `lib/<triple>`. We use the `amdhsa-loader` and `nvptx-loader` tools, which are also provided by `libc`. These launch a kernel called `_start` which calls `main` so we can pretend like GPU programs are normal terminal applications. We force serial exeuction here, because `llvm-lit` runs way too many processes in parallel, which has a bad habit of making the GPU drivers hang or run out of resources. This allows the compilation to be run in parallel while the jobs themselves are serialized via a file lock. In the future this can likely be refined to accept user specified architectures, or better handle including the root directory by exposing that instead of just `include/<triple>/c++/v1/`. This currently fails ~1% of the tests on AMDGPU and ~3% of the tests on NVPTX. This will hopefully be reduced further, and later patches can XFAIL a lot of them once it's down to a reasonable number. Future support will likely want to allow passing in a custom architecture instead of simply relying on `-mcpu=native`.
Summary:
The GPU runs these tests using the files built from the
libc
project.These will be placed in
include/<triple>
andlib/<triple>
. We usethe
amdhsa-loader
andnvptx-loader
tools, which are also provided bylibc
. These launch a kernel called_start
which callsmain
so wecan pretend like GPU programs are normal terminal applications.
We force serial exeuction here, because
llvm-lit
runs way too manyprocesses in parallel, which has a bad habit of making the GPU drivers
hang or run out of resources. This allows the compilation to be run in
parallel while the jobs themselves are serialized via a file lock.
In the future this can likely be refined to accept user specified
architectures, or better handle including the root directory by exposing
that instead of just
include/<triple>/c++/v1/
.This currently fails ~1% of the tests on AMDGPU and ~3% of the tests on
NVPTX. This will hopefully be reduced further, and later patches can
XFAIL a lot of them once it's down to a reasonable number.
Future support will likely want to allow passing in a custom
architecture instead of simply relying on
-mcpu=native
.