-
Notifications
You must be signed in to change notification settings - Fork 14.3k
clang/HIP: Do not call ocml in scalbln implementations #129639
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
clang/HIP: Do not call ocml in scalbln implementations #129639
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: Matt Arsenault (arsenm) ChangesI do not understand why this was calling the float version with Somehow INT_MIN was also not defined, so deal with that. Full diff: https://github.com/llvm/llvm-project/pull/129639.diff 3 Files Affected:
diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h
index 51d9acbb87270..f6c06eaf4afe0 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -639,8 +639,11 @@ float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
__DEVICE__
float scalblnf(float __x, long int __n) {
- return (__n < INT_MAX) ? __builtin_amdgcn_ldexpf(__x, __n)
- : __ocml_scalb_f32(__x, __n);
+ if (__n > INT_MAX)
+ __n = INT_MAX;
+ else if (__n < INT_MIN)
+ __n = INT_MIN;
+ return __builtin_ldexpf(__x, (int)__n);
}
__DEVICE__
@@ -1044,8 +1047,11 @@ double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
__DEVICE__
double scalbln(double __x, long int __n) {
- return (__n < INT_MAX) ? __builtin_amdgcn_ldexp(__x, __n)
- : __ocml_scalb_f64(__x, __n);
+ if (__n > INT_MAX)
+ __n = INT_MAX;
+ else if (__n < INT_MIN)
+ __n = INT_MIN;
+ return __builtin_ldexp(__x, (int)__n);
}
__DEVICE__
double scalbn(double __x, int __n) { return __builtin_amdgcn_ldexp(__x, __n); }
diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h
index ed1550038e63e..da1e39ac7270e 100644
--- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h
@@ -125,11 +125,13 @@ typedef __SIZE_TYPE__ size_t;
#pragma push_macro("uint64_t")
#pragma push_macro("CHAR_BIT")
#pragma push_macro("INT_MAX")
+#pragma push_macro("INT_MIN")
#define NULL (void *)0
#define uint32_t __UINT32_TYPE__
#define uint64_t __UINT64_TYPE__
#define CHAR_BIT __CHAR_BIT__
#define INT_MAX __INTMAX_MAX__
+#define INT_MIN (-__INT_MAX__ - 1)
#endif // __HIPCC_RTC__
#include <__clang_hip_libdevice_declares.h>
@@ -154,6 +156,7 @@ typedef __SIZE_TYPE__ size_t;
#pragma pop_macro("uint64_t")
#pragma pop_macro("CHAR_BIT")
#pragma pop_macro("INT_MAX")
+#pragma pop_macro("INT_MIN")
#endif // __HIPCC_RTC__
#endif // __HIP__
#endif // __CLANG_HIP_RUNTIME_WRAPPER_H__
diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip
index ff9f55a8e0710..e879fec0ebe5a 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -4984,63 +4984,31 @@ extern "C" __device__ double test_rsqrt(double x) {
// DEFAULT-LABEL: @test_scalblnf(
// DEFAULT-NEXT: entry:
-// DEFAULT-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// DEFAULT-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// DEFAULT: cond.true.i:
-// DEFAULT-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
-// DEFAULT-NEXT: br label [[_ZL8SCALBLNFFL_EXIT:%.*]]
-// DEFAULT: cond.false.i:
-// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_scalb_f32(float noundef [[X]], float noundef 0x43E0000000000000) #[[ATTR12]]
-// DEFAULT-NEXT: br label [[_ZL8SCALBLNFFL_EXIT]]
-// DEFAULT: _ZL8scalblnffl.exit:
-// DEFAULT-NEXT: [[COND_I:%.*]] = phi contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// DEFAULT-NEXT: ret float [[COND_I]]
+// DEFAULT-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// DEFAULT-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
+// DEFAULT-NEXT: ret float [[TMP0]]
//
// FINITEONLY-LABEL: @test_scalblnf(
// FINITEONLY-NEXT: entry:
-// FINITEONLY-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// FINITEONLY-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// FINITEONLY: cond.true.i:
-// FINITEONLY-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
-// FINITEONLY-NEXT: br label [[_ZL8SCALBLNFFL_EXIT:%.*]]
-// FINITEONLY: cond.false.i:
-// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_scalb_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) 0x43E0000000000000) #[[ATTR12]]
-// FINITEONLY-NEXT: br label [[_ZL8SCALBLNFFL_EXIT]]
-// FINITEONLY: _ZL8scalblnffl.exit:
-// FINITEONLY-NEXT: [[COND_I:%.*]] = phi nnan ninf contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// FINITEONLY-NEXT: ret float [[COND_I]]
+// FINITEONLY-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// FINITEONLY-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
+// FINITEONLY-NEXT: ret float [[TMP0]]
//
// APPROX-LABEL: @test_scalblnf(
// APPROX-NEXT: entry:
-// APPROX-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// APPROX-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// APPROX: cond.true.i:
-// APPROX-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
-// APPROX-NEXT: br label [[_ZL8SCALBLNFFL_EXIT:%.*]]
-// APPROX: cond.false.i:
-// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_scalb_f32(float noundef [[X]], float noundef 0x43E0000000000000) #[[ATTR12]]
-// APPROX-NEXT: br label [[_ZL8SCALBLNFFL_EXIT]]
-// APPROX: _ZL8scalblnffl.exit:
-// APPROX-NEXT: [[COND_I:%.*]] = phi contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// APPROX-NEXT: ret float [[COND_I]]
+// APPROX-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// APPROX-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
+// APPROX-NEXT: ret float [[TMP0]]
//
// AMDGCNSPIRV-LABEL: @test_scalblnf(
// AMDGCNSPIRV-NEXT: entry:
-// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// AMDGCNSPIRV: cond.true.i:
-// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
-// AMDGCNSPIRV-NEXT: br label [[_ZL8SCALBLNFFL_EXIT:%.*]]
-// AMDGCNSPIRV: cond.false.i:
-// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) float @__ocml_scalb_f32(float noundef [[X]], float noundef 0x43E0000000000000) #[[ATTR12]]
-// AMDGCNSPIRV-NEXT: br label [[_ZL8SCALBLNFFL_EXIT]]
-// AMDGCNSPIRV: _ZL8scalblnffl.exit:
-// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = phi contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// AMDGCNSPIRV-NEXT: ret float [[COND_I]]
+// AMDGCNSPIRV-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call addrspace(4) i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
+// AMDGCNSPIRV-NEXT: ret float [[TMP0]]
//
extern "C" __device__ float test_scalblnf(float x, long int y) {
return scalblnf(x, y);
@@ -5048,63 +5016,31 @@ extern "C" __device__ float test_scalblnf(float x, long int y) {
// DEFAULT-LABEL: @test_scalbln(
// DEFAULT-NEXT: entry:
-// DEFAULT-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// DEFAULT-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// DEFAULT: cond.true.i:
-// DEFAULT-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
-// DEFAULT-NEXT: br label [[_ZL7SCALBLNDL_EXIT:%.*]]
-// DEFAULT: cond.false.i:
-// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract double @__ocml_scalb_f64(double noundef [[X]], double noundef 0x43E0000000000000) #[[ATTR12]]
-// DEFAULT-NEXT: br label [[_ZL7SCALBLNDL_EXIT]]
-// DEFAULT: _ZL7scalblndl.exit:
-// DEFAULT-NEXT: [[COND_I:%.*]] = phi contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// DEFAULT-NEXT: ret double [[COND_I]]
+// DEFAULT-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// DEFAULT-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
+// DEFAULT-NEXT: ret double [[TMP0]]
//
// FINITEONLY-LABEL: @test_scalbln(
// FINITEONLY-NEXT: entry:
-// FINITEONLY-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// FINITEONLY-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// FINITEONLY: cond.true.i:
-// FINITEONLY-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
-// FINITEONLY-NEXT: br label [[_ZL7SCALBLNDL_EXIT:%.*]]
-// FINITEONLY: cond.false.i:
-// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) double @__ocml_scalb_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) 0x43E0000000000000) #[[ATTR12]]
-// FINITEONLY-NEXT: br label [[_ZL7SCALBLNDL_EXIT]]
-// FINITEONLY: _ZL7scalblndl.exit:
-// FINITEONLY-NEXT: [[COND_I:%.*]] = phi nnan ninf contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// FINITEONLY-NEXT: ret double [[COND_I]]
+// FINITEONLY-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// FINITEONLY-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
+// FINITEONLY-NEXT: ret double [[TMP0]]
//
// APPROX-LABEL: @test_scalbln(
// APPROX-NEXT: entry:
-// APPROX-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// APPROX-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// APPROX: cond.true.i:
-// APPROX-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
-// APPROX-NEXT: br label [[_ZL7SCALBLNDL_EXIT:%.*]]
-// APPROX: cond.false.i:
-// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract double @__ocml_scalb_f64(double noundef [[X]], double noundef 0x43E0000000000000) #[[ATTR12]]
-// APPROX-NEXT: br label [[_ZL7SCALBLNDL_EXIT]]
-// APPROX: _ZL7scalblndl.exit:
-// APPROX-NEXT: [[COND_I:%.*]] = phi contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// APPROX-NEXT: ret double [[COND_I]]
+// APPROX-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// APPROX-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
+// APPROX-NEXT: ret double [[TMP0]]
//
// AMDGCNSPIRV-LABEL: @test_scalbln(
// AMDGCNSPIRV-NEXT: entry:
-// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// AMDGCNSPIRV: cond.true.i:
-// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
-// AMDGCNSPIRV-NEXT: br label [[_ZL7SCALBLNDL_EXIT:%.*]]
-// AMDGCNSPIRV: cond.false.i:
-// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) double @__ocml_scalb_f64(double noundef [[X]], double noundef 0x43E0000000000000) #[[ATTR12]]
-// AMDGCNSPIRV-NEXT: br label [[_ZL7SCALBLNDL_EXIT]]
-// AMDGCNSPIRV: _ZL7scalblndl.exit:
-// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = phi contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// AMDGCNSPIRV-NEXT: ret double [[COND_I]]
+// AMDGCNSPIRV-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call addrspace(4) i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
+// AMDGCNSPIRV-NEXT: ret double [[TMP0]]
//
extern "C" __device__ double test_scalbln(double x, long int y) {
return scalbln(x, y);
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks.
I do not understand why this was calling the float version with an implicit cast from the long. Just clamp to the bounds of int, and use the generic ldexp (this is also how musl does it (except scalbnf is the base implementation there). Somehow INT_MIN was also not defined, so deal with that.
2c0e09d
to
5aa9386
Compare
I do not understand why this was calling the float version with an implicit cast from the long. Just clamp to the bounds of int, and use the generic ldexp (this is also how musl does it, except scalbnf is the base implementation there). Somehow INT_MIN was also not defined, so deal with that.
I do not understand why this was calling the float version with
an implicit cast from the long. Just clamp to the bounds of int,
and use the generic ldexp (this is also how musl does it, except
scalbnf is the base implementation there).
Somehow INT_MIN was also not defined, so deal with that.