-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
BUG: where gives incorrect results when upcasting (GH 9731) #9743
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1081,12 +1081,23 @@ def _infer_dtype_from_scalar(val): | |
return dtype, val | ||
|
||
|
||
def _maybe_cast_scalar(dtype, value): | ||
""" if we a scalar value and are casting to a dtype that needs nan -> NaT | ||
conversion | ||
def _maybe_cast(dtype, value): | ||
""" | ||
if np.isscalar(value) and dtype in _DATELIKE_DTYPES and isnull(value): | ||
return tslib.iNaT | ||
If `dtype` is date-like, then: | ||
if `value` == nan, then convert to NaT | ||
if `value` is an integer or integer array, convert to `dtype` | ||
""" | ||
if dtype in _DATELIKE_DTYPES: | ||
if np.isscalar(value): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use |
||
if isnull(value): | ||
return tslib.iNaT | ||
elif is_integer(value): | ||
return np.array(value, dtype=dtype) | ||
|
||
elif isinstance(value, np.ndarray): | ||
if issubclass(dtype.type, np.integer): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use |
||
return np.array(value, dtype=dtype) | ||
|
||
return value | ||
|
||
|
||
|
@@ -1154,16 +1165,29 @@ def _maybe_promote(dtype, fill_value=np.nan): | |
return dtype, fill_value | ||
|
||
|
||
def _maybe_upcast_putmask(result, mask, other, dtype=None, change=None): | ||
""" a safe version of put mask that (potentially upcasts the result | ||
return the result | ||
if change is not None, then MUTATE the change (and change the dtype) | ||
return a changed flag | ||
def _maybe_upcast_putmask(result, mask, other): | ||
""" | ||
A safe version of putmask that potentially upcasts the result | ||
|
||
Parameters | ||
---------- | ||
result : ndarray | ||
The destination array. This will be mutated in-place if no upcasting is | ||
necessary. | ||
mask : boolean ndarray | ||
other : ndarray or scalar | ||
The source array or value | ||
|
||
Returns | ||
------- | ||
result : ndarray | ||
changed : boolean | ||
Set to true if the result array was upcasted | ||
""" | ||
|
||
if mask.any(): | ||
|
||
other = _maybe_cast_scalar(result.dtype, other) | ||
other = _maybe_cast(result.dtype, other) | ||
|
||
def changeit(): | ||
|
||
|
@@ -1173,39 +1197,26 @@ def changeit(): | |
om = other[mask] | ||
om_at = om.astype(result.dtype) | ||
if (om == om_at).all(): | ||
new_other = result.values.copy() | ||
new_other[mask] = om_at | ||
result[:] = new_other | ||
new_result = result.values.copy() | ||
new_result[mask] = om_at | ||
result[:] = new_result | ||
return result, False | ||
except: | ||
pass | ||
|
||
# we are forced to change the dtype of the result as the input | ||
# isn't compatible | ||
r, fill_value = _maybe_upcast( | ||
result, fill_value=other, dtype=dtype, copy=True) | ||
np.putmask(r, mask, other) | ||
|
||
# we need to actually change the dtype here | ||
if change is not None: | ||
|
||
# if we are trying to do something unsafe | ||
# like put a bigger dtype in a smaller one, use the smaller one | ||
# pragma: no cover | ||
if change.dtype.itemsize < r.dtype.itemsize: | ||
raise AssertionError( | ||
"cannot change dtype of input to smaller size") | ||
change.dtype = r.dtype | ||
change[:] = r | ||
r, _ = _maybe_upcast(result, fill_value=other, copy=True) | ||
np.place(r, mask, other) | ||
|
||
return r, True | ||
|
||
# we want to decide whether putmask will work | ||
# we want to decide whether place will work | ||
# if we have nans in the False portion of our mask then we need to | ||
# upcast (possibily) otherwise we DON't want to upcast (e.g. if we are | ||
# have values, say integers in the success portion then its ok to not | ||
# upcast (possibly), otherwise we DON't want to upcast (e.g. if we | ||
# have values, say integers, in the success portion then it's ok to not | ||
# upcast) | ||
new_dtype, fill_value = _maybe_promote(result.dtype, other) | ||
new_dtype, _ = _maybe_promote(result.dtype, other) | ||
if new_dtype != result.dtype: | ||
|
||
# we have a scalar or len 0 ndarray | ||
|
@@ -1222,7 +1233,7 @@ def changeit(): | |
return changeit() | ||
|
||
try: | ||
np.putmask(result, mask, other) | ||
np.place(result, mask, other) | ||
except: | ||
return changeit() | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1688,6 +1688,14 @@ def test_where(self): | |
assert_series_equal(s, expected) | ||
self.assertEqual(s.dtype, expected.dtype) | ||
|
||
# GH 9731 | ||
s = Series(np.arange(10)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do this as |
||
mask = s > 5 | ||
values = [2.5, 3.5, 4.5, 5.5] | ||
s[mask] = values | ||
expected = Series(lrange(6) + values, dtype='float64') | ||
assert_series_equal(s, expected) | ||
|
||
# can't do these as we are forced to change the itemsize of the input | ||
# to something we cannot | ||
for dtype in [np.int8, np.int16, np.int32, np.float16, np.float32]: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
actually why don't you roll this routine into
_maybe_upcast_putmask
as its only used there (you can just make it an in-line defined function)