-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[libc] Clean up GPU math implementations #83133
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-libc @llvm/pr-subscribers-backend-amdgpu Author: Joseph Huber (jhuber6) ChangesSummary: Patch is 187.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83133.diff 223 Files Affected:
diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake
index 8a84c82206ba6c..0649e9f7a76709 100644
--- a/libc/cmake/modules/LLVMLibCObjectRules.cmake
+++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake
@@ -307,7 +307,7 @@ function(create_entrypoint_object fq_target_name)
${fq_target_name}
PROPERTIES
ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME}
- TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE}
+ TARGET_TYPE ${entrypoint_target_type}
OBJECT_FILE "$<TARGET_OBJECTS:${fq_target_name}>"
CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
DEPS "${fq_deps_list}"
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 33dc1fc97c5680..fcb29e72a2d56a 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -1,9 +1,6 @@
add_subdirectory(generic)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE})
add_subdirectory(${LIBC_TARGET_ARCHITECTURE})
-elseif(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
- # TODO: We should split this into 'nvptx' and 'amdgpu' for the GPU build.
- add_subdirectory(${LIBC_TARGET_OS})
endif()
function(add_math_entrypoint_object name)
@@ -11,7 +8,6 @@ function(add_math_entrypoint_object name)
# that first and return early if we are able to add an alias target for the
# machine specific implementation.
get_fq_target_name("${LIBC_TARGET_ARCHITECTURE}.${name}" fq_machine_specific_target_name)
- get_fq_target_name("${LIBC_TARGET_OS}.${name}" fq_os_specific_target_name)
if(TARGET ${fq_machine_specific_target_name})
add_entrypoint_object(
${name}
@@ -20,28 +16,6 @@ function(add_math_entrypoint_object name)
.${LIBC_TARGET_ARCHITECTURE}.${name}
)
return()
- elseif(TARGET ${fq_os_specific_target_name})
- add_entrypoint_object(
- ${name}
- ALIAS
- DEPENDS
- .${LIBC_TARGET_OS}.${name}
- )
- return()
- endif()
-
- # The GPU optionally depends on vendor libraries. If we emitted one of these
- # entrypoints it means the user requested it and we should use it instead.
- get_fq_target_name("${LIBC_TARGET_OS}.vendor.${name}" fq_vendor_specific_target_name)
- if(TARGET ${fq_vendor_specific_target_name})
- add_entrypoint_object(
- ${name}
- ALIAS
- DEPENDS
- .${LIBC_TARGET_OS}.vendor.${name}
- VENDOR
- )
- return()
endif()
get_fq_target_name("generic.${name}" fq_generic_target_name)
diff --git a/libc/src/math/gpu/vendor/CMakeLists.txt b/libc/src/math/amdgpu/CMakeLists.txt
similarity index 59%
rename from libc/src/math/gpu/vendor/CMakeLists.txt
rename to libc/src/math/amdgpu/CMakeLists.txt
index 36087ade63bfcd..cb77341aa50522 100644
--- a/libc/src/math/gpu/vendor/CMakeLists.txt
+++ b/libc/src/math/amdgpu/CMakeLists.txt
@@ -1,39 +1,360 @@
+# Math functions not yet available in the libc project, or those not yet tuned
+# for GPU workloads are provided as wrappers over vendor libraries. If we find
+# them ahead of time we will import them statically. Otherwise, we will keep
+# them as external references and expect them to be resolved by the user when
+# they compile. In the future,we will use implementations from the 'libc'
+# project and not provide these wrappers.
find_package(AMDDeviceLibs QUIET HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
if(AMDDeviceLibs_FOUND)
message(STATUS "Found the ROCm device library. Implementations falling back "
"to the vendor libraries will be resolved statically.")
get_target_property(ocml_path ocml IMPORTED_LOCATION)
- list(APPEND bitcode_link_flags
- "SHELL:-Xclang -mlink-builtin-bitcode -Xclang ${ocml_path}")
+ set(bitcode_link_flags
+ "SHELL:-Xclang -mlink-builtin-bitcode -Xclang ${ocml_path}")
else()
message(STATUS "Could not find the ROCm device library. Unimplemented "
"functions will be an external reference to the vendor libraries.")
endif()
-if(CUDAToolkit_FOUND)
- set(libdevice_path ${CUDAToolkit_BIN_DIR}/../nvvm/libdevice/libdevice.10.bc)
- if (EXISTS ${libdevice_path})
- message(STATUS "Found the CUDA device library. Implementations falling back "
- "to the vendor libraries will be resolved statically.")
- list(APPEND bitcode_link_flags
- "SHELL:-Xclang -mlink-builtin-bitcode -Xclang ${libdevice_path}")
- endif()
-else()
- message(STATUS "Could not find the CUDA device library. Unimplemented "
- "functions will be an external reference to the vendor libraries.")
-endif()
+add_entrypoint_object(
+ ceil
+ SRCS
+ ceil.cpp
+ HDRS
+ ../ceil.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ ceilf
+ SRCS
+ ceilf.cpp
+ HDRS
+ ../ceilf.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ copysign
+ SRCS
+ copysign.cpp
+ HDRS
+ ../copysign.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ copysignf
+ SRCS
+ copysignf.cpp
+ HDRS
+ ../copysignf.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ fabs
+ SRCS
+ fabs.cpp
+ HDRS
+ ../fabs.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ fabsf
+ SRCS
+ fabsf.cpp
+ HDRS
+ ../fabsf.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ floor
+ SRCS
+ floor.cpp
+ HDRS
+ ../floor.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ floorf
+ SRCS
+ floorf.cpp
+ HDRS
+ ../floorf.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ fma
+ SRCS
+ fma.cpp
+ HDRS
+ ../fma.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ fmaf
+ SRCS
+ fmaf.cpp
+ HDRS
+ ../fmaf.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ fmax
+ SRCS
+ fmax.cpp
+ HDRS
+ ../fmax.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ fmaxf
+ SRCS
+ fmaxf.cpp
+ HDRS
+ ../fmaxf.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ fmin
+ SRCS
+ fmin.cpp
+ HDRS
+ ../fmin.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ fminf
+ SRCS
+ fminf.cpp
+ HDRS
+ ../fminf.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ fmod
+ SRCS
+ fmod.cpp
+ HDRS
+ ../fmod.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ fmodf
+ SRCS
+ fmodf.cpp
+ HDRS
+ ../fmodf.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ lround
+ SRCS
+ lround.cpp
+ HDRS
+ ../lround.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ lroundf
+ SRCS
+ lroundf.cpp
+ HDRS
+ ../lroundf.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ llround
+ SRCS
+ llround.cpp
+ HDRS
+ ../llround.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ llroundf
+ SRCS
+ llroundf.cpp
+ HDRS
+ ../llroundf.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ modf
+ SRCS
+ modf.cpp
+ HDRS
+ ../modf.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ modff
+ SRCS
+ modff.cpp
+ HDRS
+ ../modff.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ nearbyint
+ SRCS
+ nearbyint.cpp
+ HDRS
+ ../nearbyint.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ nearbyintf
+ SRCS
+ nearbyintf.cpp
+ HDRS
+ ../nearbyintf.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ remainder
+ SRCS
+ remainder.cpp
+ HDRS
+ ../remainder.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ remainderf
+ SRCS
+ remainderf.cpp
+ HDRS
+ ../remainderf.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ rint
+ SRCS
+ rint.cpp
+ HDRS
+ ../rint.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ rintf
+ SRCS
+ rintf.cpp
+ HDRS
+ ../rintf.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ round
+ SRCS
+ round.cpp
+ HDRS
+ ../round.h
+ COMPILE_OPTIONS
+ -O2
+)
-# FIXME: We need a way to pass the library to only the NVTPX / AMDGPU build.
-# This shouldn't cause issues because we only link in needed symbols, but it
-# will link in identity metadata from both libraries. This silences the warning.
-list(APPEND bitcode_link_flags "-Wno-linker-warnings")
+add_entrypoint_object(
+ sqrt
+ SRCS
+ sqrt.cpp
+ HDRS
+ ../sqrt.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ sqrtf
+ SRCS
+ sqrtf.cpp
+ HDRS
+ ../sqrtf.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ trunc
+ SRCS
+ trunc.cpp
+ HDRS
+ ../trunc.h
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ truncf
+ SRCS
+ truncf.cpp
+ HDRS
+ ../truncf.h
+ COMPILE_OPTIONS
+ -O2
+)
+# The following functions currently are not implemented natively and borrow from
+# existing implementations. This will be removed in the future.
add_entrypoint_object(
acos
SRCS
acos.cpp
HDRS
- ../../acos.h
+ ../acos.h
+ VENDOR
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
@@ -44,10 +365,11 @@ add_entrypoint_object(
SRCS
acosf.cpp
HDRS
- ../../acosf.h
+ ../acosf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -55,10 +377,11 @@ add_entrypoint_object(
SRCS
acosh.cpp
HDRS
- ../../acosh.h
+ ../acosh.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -66,10 +389,11 @@ add_entrypoint_object(
SRCS
acoshf.cpp
HDRS
- ../../acoshf.h
+ ../acoshf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -77,10 +401,11 @@ add_entrypoint_object(
SRCS
asin.cpp
HDRS
- ../../asin.h
+ ../asin.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -88,10 +413,11 @@ add_entrypoint_object(
SRCS
asinf.cpp
HDRS
- ../../asinf.h
+ ../asinf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -99,10 +425,11 @@ add_entrypoint_object(
SRCS
asinh.cpp
HDRS
- ../../asinh.h
+ ../asinh.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -110,10 +437,11 @@ add_entrypoint_object(
SRCS
atan.cpp
HDRS
- ../../atan.h
+ ../atan.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -121,10 +449,11 @@ add_entrypoint_object(
SRCS
atanf.cpp
HDRS
- ../../atanf.h
+ ../atanf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -132,10 +461,11 @@ add_entrypoint_object(
SRCS
atan2.cpp
HDRS
- ../../atan2.h
+ ../atan2.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -143,10 +473,11 @@ add_entrypoint_object(
SRCS
atan2f.cpp
HDRS
- ../../atan2f.h
+ ../atan2f.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -154,10 +485,11 @@ add_entrypoint_object(
SRCS
atanh.cpp
HDRS
- ../../atanh.h
+ ../atanh.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -165,10 +497,11 @@ add_entrypoint_object(
SRCS
atanhf.cpp
HDRS
- ../../atanhf.h
+ ../atanhf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -176,10 +509,11 @@ add_entrypoint_object(
SRCS
cos.cpp
HDRS
- ../../cos.h
+ ../cos.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -187,10 +521,11 @@ add_entrypoint_object(
SRCS
cosf.cpp
HDRS
- ../../cosf.h
+ ../cosf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -198,10 +533,11 @@ add_entrypoint_object(
SRCS
cosh.cpp
HDRS
- ../../cosh.h
+ ../cosh.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -209,10 +545,11 @@ add_entrypoint_object(
SRCS
coshf.cpp
HDRS
- ../../coshf.h
+ ../coshf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -220,10 +557,11 @@ add_entrypoint_object(
SRCS
erf.cpp
HDRS
- ../../erf.h
+ ../erf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -231,10 +569,11 @@ add_entrypoint_object(
SRCS
erff.cpp
HDRS
- ../../erff.h
+ ../erff.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -242,10 +581,11 @@ add_entrypoint_object(
SRCS
exp.cpp
HDRS
- ../../exp.h
+ ../exp.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -253,10 +593,11 @@ add_entrypoint_object(
SRCS
exp10.cpp
HDRS
- ../../exp10.h
+ ../exp10.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -264,10 +605,11 @@ add_entrypoint_object(
SRCS
exp10f.cpp
HDRS
- ../../exp10f.h
+ ../exp10f.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -275,10 +617,11 @@ add_entrypoint_object(
SRCS
exp2.cpp
HDRS
- ../../exp2.h
+ ../exp2.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -286,10 +629,11 @@ add_entrypoint_object(
SRCS
exp2f.cpp
HDRS
- ../../exp2f.h
+ ../exp2f.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -297,10 +641,11 @@ add_entrypoint_object(
SRCS
expf.cpp
HDRS
- ../../expf.h
+ ../expf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -308,10 +653,11 @@ add_entrypoint_object(
SRCS
expm1.cpp
HDRS
- ../../expm1.h
+ ../expm1.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -319,10 +665,11 @@ add_entrypoint_object(
SRCS
expm1f.cpp
HDRS
- ../../expm1f.h
+ ../expm1f.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -330,10 +677,11 @@ add_entrypoint_object(
SRCS
fdim.cpp
HDRS
- ../../fdim.h
+ ../fdim.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -341,10 +689,11 @@ add_entrypoint_object(
SRCS
fdimf.cpp
HDRS
- ../../fdimf.h
+ ../fdimf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -352,10 +701,11 @@ add_entrypoint_object(
SRCS
hypot.cpp
HDRS
- ../../hypot.h
+ ../hypot.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -363,10 +713,11 @@ add_entrypoint_object(
SRCS
hypotf.cpp
HDRS
- ../../hypotf.h
+ ../hypotf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -374,10 +725,11 @@ add_entrypoint_object(
SRCS
ilogb.cpp
HDRS
- ../../ilogb.h
+ ../ilogb.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -385,10 +737,11 @@ add_entrypoint_object(
SRCS
ilogbf.cpp
HDRS
- ../../ilogbf.h
+ ../ilogbf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -396,10 +749,11 @@ add_entrypoint_object(
SRCS
log10.cpp
HDRS
- ../../log10.h
+ ../log10.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -407,10 +761,11 @@ add_entrypoint_object(
SRCS
log10f.cpp
HDRS
- ../../log10f.h
+ ../log10f.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -418,10 +773,11 @@ add_entrypoint_object(
SRCS
log2.cpp
HDRS
- ../../log2.h
+ ../log2.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -429,10 +785,11 @@ add_entrypoint_object(
SRCS
log2f.cpp
HDRS
- ../../log2f.h
+ ../log2f.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -440,10 +797,11 @@ add_entrypoint_object(
SRCS
log.cpp
HDRS
- ../../log.h
+ ../log.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -451,10 +809,11 @@ add_entrypoint_object(
SRCS
logf.cpp
HDRS
- ../../logf.h
+ ../logf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -462,10 +821,11 @@ add_entrypoint_object(
SRCS
lrint.cpp
HDRS
- ../../lrint.h
+ ../lrint.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -473,10 +833,11 @@ add_entrypoint_object(
SRCS
lrintf.cpp
HDRS
- ../../lrintf.h
+ ../lrintf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -484,10 +845,11 @@ add_entrypoint_object(
SRCS
ldexp.cpp
HDRS
- ../../ldexp.h
+ ../ldexp.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -495,10 +857,11 @@ add_entrypoint_object(
SRCS
ldexpf.cpp
HDRS
- ../../ldexpf.h
+ ../ldexpf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -506,10 +869,11 @@ add_entrypoint_object(
SRCS
log1p.cpp
HDRS
- ../../log1p.h
+ ../log1p.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -517,10 +881,11 @@ add_entrypoint_object(
SRCS
log1pf.cpp
HDRS
- ../../log1pf.h
+ ../log1pf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -528,10 +893,11 @@ add_entrypoint_object(
SRCS
llrint.cpp
HDRS
- ../../llrint.h
+ ../llrint.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -539,10 +905,11 @@ add_entrypoint_object(
SRCS
llrintf.cpp
HDRS
- ../../llrintf.h
+ ../llrintf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -550,10 +917,11 @@ add_entrypoint_object(
SRCS
remquo.cpp
HDRS
- ../../remquo.h
+ ../remquo.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -561,10 +929,11 @@ add_entrypoint_object(
SRCS
remquof.cpp
HDRS
- ../../remquof.h
+ ../remquof.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -572,10 +941,11 @@ add_entrypoint_object(
SRCS
scalbn.cpp
HDRS
- ../../scalbn.h
+ ../scalbn.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -583,10 +953,11 @@ add_entrypoint_object(
SRCS
scalbnf.cpp
HDRS
- ../../scalbnf.h
+ ../scalbnf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
@@ -595,10 +966,11 @@ add_entrypoint_object(
SRCS
nextafter.cpp
HDRS
- ../../nextafter.h
+ ../nextafter.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -606,10 +978,11 @@ add_entrypoint_object(
SRCS
nextafterf.cpp
HDRS
- ../../nextafterf.h
+ ../nextafterf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -617,10 +990,11 @@ add_entrypoint_object(
SRCS
pow.cpp
HDRS
- ../../pow.h
+ ../pow.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -628,10 +1002,11 @@ add_entrypoint_object(
SRCS
powf.cpp
HDRS
- ../../powf.h
+ ../powf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -639,10 +1014,11 @@ add_entrypoint_object(
SRCS
sin.cpp
HDRS
- ../../sin.h
+ ../sin.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -650,10 +1026,11 @@ add_entrypoint_object(
SRCS
sinf.cpp
HDRS
- ../../sinf.h
+ ../sinf.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
+ VENDOR
)
add_entrypoint_object(
@@ -661,10 +1038,11...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Summary: The math directory likes to do architecture specific implementations of these math functions. For the GPU case it was complicated by the fact that both NVPTX and AMDGPU had to go through the same code paths. Since reworking the GPU target this is no longer the case and we can simply use the same scheme. This patch moves all the old code into two separate directories. This likely results in a net increase in code, but it's easier to reason with.
|
||
namespace LIBC_NAMESPACE { | ||
|
||
LLVM_LIBC_FUNCTION(double, trunc, (double x)) { return __builtin_trunc(x); } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why aren't the simple cases like this in a common place still?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Laziness, the targets support different subsets of the math builtins so at a high level it makes sense to split them. The rest of the math library doesn't use built-ins so there's no point making some common builtin/
directory I wouldn't think. The long term goal is to collapse a lot of this so I wasn't overly concerned with redundancy for now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Interested in @lntue's perspective on a lot of these built-ins as well. Most of what's special about these built-ins is that they use more hardware specific instructions, though a lot of the time this comes at the cost of precision. We could possibly wrap some of the hardware specific stuff in utility functions kind of like FMA (though that's hardly unsupported these days) and try to have a more generic implementation if we really want everything to be correctly rounded even on the GPU.
Summary:
The math directory likes to do architecture specific implementations of
these math functions. For the GPU case it was complicated by the fact
that both NVPTX and AMDGPU had to go through the same code paths. Since
reworking the GPU target this is no longer the case and we can simply
use the same scheme. This patch moves all the old code into two separate
directories. This likely results in a net increase in code, but it's
easier to reason with.