Skip to content

Commit 5aa9386

Browse files
committed
clang/HIP: Do not call ocml in scalbln implementations
I do not understand why this was calling the float version with an implicit cast from the long. Just clamp to the bounds of int, and use the generic ldexp (this is also how musl does it (except scalbnf is the base implementation there). Somehow INT_MIN was also not defined, so deal with that.
1 parent 4703f8b commit 5aa9386

File tree

3 files changed

+45
-100
lines changed

3 files changed

+45
-100
lines changed

clang/lib/Headers/__clang_hip_math.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -639,8 +639,11 @@ float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
639639

640640
__DEVICE__
641641
float scalblnf(float __x, long int __n) {
642-
return (__n < INT_MAX) ? __builtin_amdgcn_ldexpf(__x, __n)
643-
: __ocml_scalb_f32(__x, __n);
642+
if (__n > INT_MAX)
643+
__n = INT_MAX;
644+
else if (__n < INT_MIN)
645+
__n = INT_MIN;
646+
return __builtin_ldexpf(__x, (int)__n);
644647
}
645648

646649
__DEVICE__
@@ -1044,8 +1047,11 @@ double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
10441047

10451048
__DEVICE__
10461049
double scalbln(double __x, long int __n) {
1047-
return (__n < INT_MAX) ? __builtin_amdgcn_ldexp(__x, __n)
1048-
: __ocml_scalb_f64(__x, __n);
1050+
if (__n > INT_MAX)
1051+
__n = INT_MAX;
1052+
else if (__n < INT_MIN)
1053+
__n = INT_MIN;
1054+
return __builtin_ldexp(__x, (int)__n);
10491055
}
10501056
__DEVICE__
10511057
double scalbn(double __x, int __n) { return __builtin_amdgcn_ldexp(__x, __n); }

clang/lib/Headers/__clang_hip_runtime_wrapper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,13 @@ typedef __SIZE_TYPE__ size_t;
125125
#pragma push_macro("uint64_t")
126126
#pragma push_macro("CHAR_BIT")
127127
#pragma push_macro("INT_MAX")
128+
#pragma push_macro("INT_MIN")
128129
#define NULL (void *)0
129130
#define uint32_t __UINT32_TYPE__
130131
#define uint64_t __UINT64_TYPE__
131132
#define CHAR_BIT __CHAR_BIT__
132133
#define INT_MAX __INTMAX_MAX__
134+
#define INT_MIN (-__INT_MAX__ - 1)
133135
#endif // __HIPCC_RTC__
134136

135137
#include <__clang_hip_libdevice_declares.h>
@@ -154,6 +156,7 @@ typedef __SIZE_TYPE__ size_t;
154156
#pragma pop_macro("uint64_t")
155157
#pragma pop_macro("CHAR_BIT")
156158
#pragma pop_macro("INT_MAX")
159+
#pragma pop_macro("INT_MIN")
157160
#endif // __HIPCC_RTC__
158161
#endif // __HIP__
159162
#endif // __CLANG_HIP_RUNTIME_WRAPPER_H__

clang/test/Headers/__clang_hip_math.hip

Lines changed: 32 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -4984,127 +4984,63 @@ extern "C" __device__ double test_rsqrt(double x) {
49844984

49854985
// DEFAULT-LABEL: @test_scalblnf(
49864986
// DEFAULT-NEXT: entry:
4987-
// DEFAULT-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
4988-
// DEFAULT-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
4989-
// DEFAULT: cond.true.i:
4990-
// DEFAULT-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
4991-
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
4992-
// DEFAULT-NEXT: br label [[_ZL8SCALBLNFFL_EXIT:%.*]]
4993-
// DEFAULT: cond.false.i:
4994-
// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_scalb_f32(float noundef [[X]], float noundef 0x43E0000000000000) #[[ATTR12]]
4995-
// DEFAULT-NEXT: br label [[_ZL8SCALBLNFFL_EXIT]]
4996-
// DEFAULT: _ZL8scalblnffl.exit:
4997-
// DEFAULT-NEXT: [[COND_I:%.*]] = phi contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
4998-
// DEFAULT-NEXT: ret float [[COND_I]]
4987+
// DEFAULT-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
4988+
// DEFAULT-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
4989+
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
4990+
// DEFAULT-NEXT: ret float [[TMP0]]
49994991
//
50004992
// FINITEONLY-LABEL: @test_scalblnf(
50014993
// FINITEONLY-NEXT: entry:
5002-
// FINITEONLY-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
5003-
// FINITEONLY-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
5004-
// FINITEONLY: cond.true.i:
5005-
// FINITEONLY-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
5006-
// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
5007-
// FINITEONLY-NEXT: br label [[_ZL8SCALBLNFFL_EXIT:%.*]]
5008-
// FINITEONLY: cond.false.i:
5009-
// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_scalb_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) 0x43E0000000000000) #[[ATTR12]]
5010-
// FINITEONLY-NEXT: br label [[_ZL8SCALBLNFFL_EXIT]]
5011-
// FINITEONLY: _ZL8scalblnffl.exit:
5012-
// FINITEONLY-NEXT: [[COND_I:%.*]] = phi nnan ninf contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
5013-
// FINITEONLY-NEXT: ret float [[COND_I]]
4994+
// FINITEONLY-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
4995+
// FINITEONLY-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
4996+
// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
4997+
// FINITEONLY-NEXT: ret float [[TMP0]]
50144998
//
50154999
// APPROX-LABEL: @test_scalblnf(
50165000
// APPROX-NEXT: entry:
5017-
// APPROX-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
5018-
// APPROX-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
5019-
// APPROX: cond.true.i:
5020-
// APPROX-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
5021-
// APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
5022-
// APPROX-NEXT: br label [[_ZL8SCALBLNFFL_EXIT:%.*]]
5023-
// APPROX: cond.false.i:
5024-
// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract float @__ocml_scalb_f32(float noundef [[X]], float noundef 0x43E0000000000000) #[[ATTR12]]
5025-
// APPROX-NEXT: br label [[_ZL8SCALBLNFFL_EXIT]]
5026-
// APPROX: _ZL8scalblnffl.exit:
5027-
// APPROX-NEXT: [[COND_I:%.*]] = phi contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
5028-
// APPROX-NEXT: ret float [[COND_I]]
5001+
// APPROX-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
5002+
// APPROX-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
5003+
// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
5004+
// APPROX-NEXT: ret float [[TMP0]]
50295005
//
50305006
// AMDGCNSPIRV-LABEL: @test_scalblnf(
50315007
// AMDGCNSPIRV-NEXT: entry:
5032-
// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
5033-
// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
5034-
// AMDGCNSPIRV: cond.true.i:
5035-
// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
5036-
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
5037-
// AMDGCNSPIRV-NEXT: br label [[_ZL8SCALBLNFFL_EXIT:%.*]]
5038-
// AMDGCNSPIRV: cond.false.i:
5039-
// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) float @__ocml_scalb_f32(float noundef [[X]], float noundef 0x43E0000000000000) #[[ATTR12]]
5040-
// AMDGCNSPIRV-NEXT: br label [[_ZL8SCALBLNFFL_EXIT]]
5041-
// AMDGCNSPIRV: _ZL8scalblnffl.exit:
5042-
// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = phi contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
5043-
// AMDGCNSPIRV-NEXT: ret float [[COND_I]]
5008+
// AMDGCNSPIRV-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call addrspace(4) i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
5009+
// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
5010+
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
5011+
// AMDGCNSPIRV-NEXT: ret float [[TMP0]]
50445012
//
50455013
extern "C" __device__ float test_scalblnf(float x, long int y) {
50465014
return scalblnf(x, y);
50475015
}
50485016

50495017
// DEFAULT-LABEL: @test_scalbln(
50505018
// DEFAULT-NEXT: entry:
5051-
// DEFAULT-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
5052-
// DEFAULT-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
5053-
// DEFAULT: cond.true.i:
5054-
// DEFAULT-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
5055-
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
5056-
// DEFAULT-NEXT: br label [[_ZL7SCALBLNDL_EXIT:%.*]]
5057-
// DEFAULT: cond.false.i:
5058-
// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract double @__ocml_scalb_f64(double noundef [[X]], double noundef 0x43E0000000000000) #[[ATTR12]]
5059-
// DEFAULT-NEXT: br label [[_ZL7SCALBLNDL_EXIT]]
5060-
// DEFAULT: _ZL7scalblndl.exit:
5061-
// DEFAULT-NEXT: [[COND_I:%.*]] = phi contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
5062-
// DEFAULT-NEXT: ret double [[COND_I]]
5019+
// DEFAULT-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
5020+
// DEFAULT-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
5021+
// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
5022+
// DEFAULT-NEXT: ret double [[TMP0]]
50635023
//
50645024
// FINITEONLY-LABEL: @test_scalbln(
50655025
// FINITEONLY-NEXT: entry:
5066-
// FINITEONLY-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
5067-
// FINITEONLY-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
5068-
// FINITEONLY: cond.true.i:
5069-
// FINITEONLY-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
5070-
// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
5071-
// FINITEONLY-NEXT: br label [[_ZL7SCALBLNDL_EXIT:%.*]]
5072-
// FINITEONLY: cond.false.i:
5073-
// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) double @__ocml_scalb_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) 0x43E0000000000000) #[[ATTR12]]
5074-
// FINITEONLY-NEXT: br label [[_ZL7SCALBLNDL_EXIT]]
5075-
// FINITEONLY: _ZL7scalblndl.exit:
5076-
// FINITEONLY-NEXT: [[COND_I:%.*]] = phi nnan ninf contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
5077-
// FINITEONLY-NEXT: ret double [[COND_I]]
5026+
// FINITEONLY-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
5027+
// FINITEONLY-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
5028+
// FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
5029+
// FINITEONLY-NEXT: ret double [[TMP0]]
50785030
//
50795031
// APPROX-LABEL: @test_scalbln(
50805032
// APPROX-NEXT: entry:
5081-
// APPROX-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
5082-
// APPROX-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
5083-
// APPROX: cond.true.i:
5084-
// APPROX-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
5085-
// APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
5086-
// APPROX-NEXT: br label [[_ZL7SCALBLNDL_EXIT:%.*]]
5087-
// APPROX: cond.false.i:
5088-
// APPROX-NEXT: [[CALL_I:%.*]] = tail call contract double @__ocml_scalb_f64(double noundef [[X]], double noundef 0x43E0000000000000) #[[ATTR12]]
5089-
// APPROX-NEXT: br label [[_ZL7SCALBLNDL_EXIT]]
5090-
// APPROX: _ZL7scalblndl.exit:
5091-
// APPROX-NEXT: [[COND_I:%.*]] = phi contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
5092-
// APPROX-NEXT: ret double [[COND_I]]
5033+
// APPROX-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
5034+
// APPROX-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
5035+
// APPROX-NEXT: [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
5036+
// APPROX-NEXT: ret double [[TMP0]]
50935037
//
50945038
// AMDGCNSPIRV-LABEL: @test_scalbln(
50955039
// AMDGCNSPIRV-NEXT: entry:
5096-
// AMDGCNSPIRV-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
5097-
// AMDGCNSPIRV-NEXT: br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
5098-
// AMDGCNSPIRV: cond.true.i:
5099-
// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
5100-
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
5101-
// AMDGCNSPIRV-NEXT: br label [[_ZL7SCALBLNDL_EXIT:%.*]]
5102-
// AMDGCNSPIRV: cond.false.i:
5103-
// AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) double @__ocml_scalb_f64(double noundef [[X]], double noundef 0x43E0000000000000) #[[ATTR12]]
5104-
// AMDGCNSPIRV-NEXT: br label [[_ZL7SCALBLNDL_EXIT]]
5105-
// AMDGCNSPIRV: _ZL7scalblndl.exit:
5106-
// AMDGCNSPIRV-NEXT: [[COND_I:%.*]] = phi contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
5107-
// AMDGCNSPIRV-NEXT: ret double [[COND_I]]
5040+
// AMDGCNSPIRV-NEXT: [[SPEC_STORE_SELECT_I:%.*]] = tail call addrspace(4) i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
5041+
// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
5042+
// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
5043+
// AMDGCNSPIRV-NEXT: ret double [[TMP0]]
51085044
//
51095045
extern "C" __device__ double test_scalbln(double x, long int y) {
51105046
return scalbln(x, y);

0 commit comments

Comments
 (0)