Skip to content

Commit c992e77

Browse files
author
Rohan Jain
committed
fix uint64 overflow
1 parent 80a6976 commit c992e77

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

pandas/core/arrays/arrow/array.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,15 +129,22 @@ def floordiv_compat(
129129
) -> pa.ChunkedArray:
130130
divided = pc.divide(left, right)
131131
if pa.types.is_integer(divided.type):
132-
# GH 56676, avoid storing intermediate calculating in floating point type.
132+
# GH 56676: avoid storing intermediate calculating in floating point type.
133133
has_remainder = pc.not_equal(pc.multiply(divided, right), left)
134134
result = pc.if_else(
135-
pc.and_(pc.less(divided, 0), has_remainder),
135+
# Pass a typed arrow scalar rather than stdlib int
136+
# which always inferred as int64, to prevent overflow
137+
# in case of large uint64 values.
138+
pc.and_(
139+
pc.less(divided, pa.scalar(0, type=divided.type)), has_remainder
140+
),
141+
# GH 55561: floordiv should round towards negative infinity.
142+
# pv.divide for integral types rounds towards 0.
136143
# Avoid using subtract_checked which would incorrectly raise
137144
# for -9223372036854775808 // 1, because if integer overflow
138145
# occurs, then has_remainder should be false, and overflowed
139146
# value is discarded.
140-
pc.subtract(divided, 1),
147+
pc.subtract(divided, pa.scalar(1, type=divided.type)),
141148
divided,
142149
)
143150
# Ensure compatibility with older versions of pandas where

pandas/tests/extension/test_arrow.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3253,6 +3253,14 @@ def test_arrow_floordiv_large_integral_result():
32533253
tm.assert_series_equal(result, a)
32543254

32553255

3256+
def test_arrow_floordiv_no_overflow():
3257+
# GH 56676
3258+
a = pd.Series([9223372036854775808], dtype="uint64[pyarrow]")
3259+
b = pd.Series([1], dtype="uint64[pyarrow]")
3260+
result = a // b
3261+
tm.assert_series_equal(result, a)
3262+
3263+
32563264
def test_string_to_datetime_parsing_cast():
32573265
# GH 56266
32583266
string_dates = ["2020-01-01 04:30:00", "2020-01-02 00:00:00", "2020-01-03 00:00:00"]

0 commit comments

Comments
 (0)