IntelPython · ndgrigorian · Jul 30, 2023 · Jun 30, 2023 · Jul 29, 2023 · oleksandr-pavlyk
@@ -52,62 +52,60 @@ namespace tu_ns = dpctl::tensor::type_utils;
 template <typename argT1, typename argT2, typename resT>
 struct FloorDivideFunctor
 {
-
-    using supports_sg_loadstore = std::negation<
-        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
-    using supports_vec = std::negation<
-        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
+    using supports_sg_loadstore = std::true_type;
+    using supports_vec = std::true_type;
 
     resT operator()(const argT1 &in1, const argT2 &in2)
     {
-        auto tmp = in1 / in2;
-        if constexpr (std::is_integral_v<decltype(tmp)>) {
-            if constexpr (std::is_unsigned_v<decltype(tmp)>) {
-                return (in2 == argT2(0)) ? resT(0) : tmp;
+        if constexpr (std::is_integral_v<argT1> || std::is_integral_v<argT2>) {
+            if (in2 == argT2(0)) {
+                return resT(0);
+            }
+            if constexpr (std::is_signed_v<argT1> || std::is_signed_v<argT2>) {
+                auto div = in1 / in2;
+                auto mod = in1 % in2;
+                auto corr = (mod != 0 && l_xor(mod < 0, in2 < 0));
+                return (div - corr);
             }
             else {
-                if (in2 == argT2(0)) {
-                    return resT(0);
-                }
-                else {
-                    auto rem = in1 % in2;
-                    auto corr = (rem != 0 && ((rem < 0) != (in2 < 0)));
-                    return (tmp - corr);
-                }
+                return (in1 / in2);
             }
         }
         else {
-            return sycl::floor(tmp);
+            auto div = in1 / in2;
+            return (div == resT(0)) ? div : resT(std::floor(div));
         }
     }
 
     template <int vec_sz>
     sycl::vec<resT, vec_sz> operator()(const sycl::vec<argT1, vec_sz> &in1,
                                        const sycl::vec<argT2, vec_sz> &in2)
     {
-        auto tmp = in1 / in2;
-        using tmpT = typename decltype(tmp)::element_type;
-        if constexpr (std::is_integral_v<tmpT>) {
-            if constexpr (std::is_signed_v<tmpT>) {
-                auto rem_tmp = in1 % in2;
+        if constexpr (std::is_integral_v<resT>) {
+            sycl::vec<resT, vec_sz> res;
 #pragma unroll
-                for (int i = 0; i < vec_sz; ++i) {
-                    if (in2[i] == argT2(0)) {
-                        tmp[i] = tmpT(0);
-                    }
-                    else {
-                        tmpT corr = (rem_tmp[i] != 0 &&
-                                     ((rem_tmp[i] < 0) != (in2[i] < 0)));
-                        tmp[i] -= corr;
+            for (int i = 0; i < vec_sz; ++i) {
+                if (in2[i] == argT2(0)) {
+                    res[i] = resT(0);
+                }
+                else {
+                    res[i] = in1[i] / in2[i];
+                    if constexpr (std::is_signed_v<resT>) {
+                        auto mod = in1[i] % in2[i];
+                        auto corr = (mod != 0 && l_xor(mod < 0, in2[i] < 0));
+                        res[i] -= corr;
                     }
                 }
             }
-            else {
+            return res;
+        }
+        else {
+            auto tmp = in1 / in2;
+            using tmpT = typename decltype(tmp)::element_type;
 #pragma unroll
-                for (int i = 0; i < vec_sz; ++i) {
-                    if (in2[i] == argT2(0)) {
-                        tmp[i] = tmpT(0);
-                    }
+            for (int i = 0; i < vec_sz; ++i) {
+                if (in2[i] != argT2(0)) {
+                    tmp[i] = std::floor(tmp[i]);
                 }
             }
             if constexpr (std::is_same_v<resT, tmpT>) {
@@ -118,19 +116,12 @@ struct FloorDivideFunctor
                 return vec_cast<resT, tmpT, vec_sz>(tmp);
             }
         }
-        else {
-            sycl::vec<resT, vec_sz> res = sycl::floor(tmp);
-            if constexpr (std::is_same_v<resT,
-                                         typename decltype(res)::element_type>)
-            {
-                return res;
-            }
-            else {
-                using dpctl::tensor::type_utils::vec_cast;
-                return vec_cast<resT, typename decltype(res)::element_type,
-                                vec_sz>(res);
-            }
-        }
+    }
+
+private:
+    bool l_xor(bool b1, bool b2) const
+    {
+        return (b1 != b2);
     }
 };
 

@@ -203,16 +203,6 @@ def test_floor_divide_gh_1247():
         dpt.asnumpy(res), np.full(res.shape, -1, dtype=res.dtype)
     )
 
-    # attempt to invoke sycl::vec overload using a larger array
-    x = dpt.arange(-64, 65, 1, dtype="i4")
-    np.testing.assert_array_equal(
-        dpt.asnumpy(dpt.floor_divide(x, 3)), np.floor_divide(dpt.asnumpy(x), 3)
-    )
-    np.testing.assert_array_equal(
-        dpt.asnumpy(dpt.floor_divide(x, -3)),
-        np.floor_divide(dpt.asnumpy(x), -3),
-    )
-
 
 @pytest.mark.parametrize("dtype", _no_complex_dtypes[1:9])
 def test_floor_divide_integer_zero(dtype):
@@ -226,10 +216,42 @@ def test_floor_divide_integer_zero(dtype):
         dpt.asnumpy(res), np.zeros(x.shape, dtype=res.dtype)
     )
 
-    # attempt to invoke sycl::vec overload using a larger array
-    x = dpt.arange(129, dtype=dtype, sycl_queue=q)
-    y = dpt.zeros_like(x, sycl_queue=q)
+
+def test_floor_divide_special_cases():
+    q = get_queue_or_skip()
+
+    x = dpt.empty(1, dtype="f4", sycl_queue=q)
+    y = dpt.empty_like(x)
+    x[0], y[0] = dpt.inf, dpt.inf
+    res = dpt.floor_divide(x, y)
+    with np.errstate(all="ignore"):
+        res_np = np.floor_divide(dpt.asnumpy(x), dpt.asnumpy(y))
+        np.testing.assert_array_equal(dpt.asnumpy(res), res_np)
+
+    x[0], y[0] = 0.0, -1.0
+    res = dpt.floor_divide(x, y)
+    x_np = dpt.asnumpy(x)
+    y_np = dpt.asnumpy(y)
+    res_np = np.floor_divide(x_np, y_np)
+    np.testing.assert_array_equal(dpt.asnumpy(res), res_np)
+
+    res = dpt.floor_divide(y, x)
+    with np.errstate(all="ignore"):
+        res_np = np.floor_divide(y_np, x_np)
+        np.testing.assert_array_equal(dpt.asnumpy(res), res_np)
+
+    x[0], y[0] = -1.0, dpt.inf
     res = dpt.floor_divide(x, y)
     np.testing.assert_array_equal(
-        dpt.asnumpy(res), np.zeros(x.shape, dtype=res.dtype)
+        dpt.asnumpy(res), np.asarray([-0.0], dtype="f4")
     )
+
+    res = dpt.floor_divide(y, x)
+    np.testing.assert_array_equal(
+        dpt.asnumpy(res), np.asarray([-dpt.inf], dtype="f4")
+    )
+
+    x[0], y[0] = 1.0, dpt.nan
+    res = dpt.floor_divide(x, y)
+    res_np = np.floor_divide(dpt.asnumpy(x), dpt.asnumpy(y))
+    np.testing.assert_array_equal(dpt.asnumpy(res), res_np)