Skip to content

Commit c4312cb

Browse files
Scale down arguments and scale back the result
This improves accuracy at extremes of supported range. Use sycl:: namespace ldexp and ilogb to prevent problem with VS 2017 headers.
1 parent ff3c680 commit c4312cb

File tree

1 file changed

+18
-8
lines changed
  • dpctl/tensor/libtensor/include/kernels/elementwise_functions

1 file changed

+18
-8
lines changed

dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -148,16 +148,26 @@ template <typename argT, typename resT> struct SqrtFunctor
148148
constexpr realT half = realT(0x1.0p-1f); // 1/2
149149
constexpr realT zero = realT(0);
150150

151-
if (std::signbit(x)) {
152-
realT m = std::hypot(x, y);
153-
realT d = std::sqrt((m - x) * half);
154-
return {(d == zero ? zero : std::abs(y) / d * half),
155-
std::copysign(d, y)};
151+
const int exp_x = sycl::ilogb(x);
152+
const int exp_y = sycl::ilogb(y);
153+
154+
int sc = std::max<int>(exp_x, exp_y) / 2;
155+
const realT xx = sycl::ldexp(x, -sc * 2);
156+
const realT yy = sycl::ldexp(y, -sc * 2);
157+
158+
if (std::signbit(xx)) {
159+
const realT m = std::hypot(xx, yy);
160+
const realT d = std::sqrt((m - xx) * half);
161+
const realT res_re = (d == zero ? zero : std::abs(yy) / d * half);
162+
const realT res_im = std::copysign(d, yy);
163+
return {sycl::ldexp(res_re, sc), sycl::ldexp(res_im, sc)};
156164
}
157165
else {
158-
realT m = std::hypot(x, y);
159-
realT d = std::sqrt((m + x) * half);
160-
return {d, (d == zero) ? std::copysign(zero, y) : y * half / d};
166+
const realT m = std::hypot(xx, yy);
167+
const realT d = std::sqrt((m + xx) * half);
168+
const realT res_im =
169+
(d == zero) ? std::copysign(zero, yy) : yy * half / d;
170+
return {sycl::ldexp(d, sc), sycl::ldexp(res_im, sc)};
161171
}
162172
}
163173
};

0 commit comments

Comments
 (0)