Skip to content

Commit 346cfc8

Browse files
committed
Review (jreback)
1 parent a41bd16 commit 346cfc8

File tree

6 files changed

+107
-50
lines changed

6 files changed

+107
-50
lines changed

doc/source/whatsnew/v0.24.0.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,8 @@ Other Enhancements
233233
all instances of ``set`` will not be considered "list-like" anymore (:issue:`23061`)
234234
- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).
235235
- New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`).
236-
- :meth:`Series.update` now supports the same keywords and functionality as :meth:`DataFrame.update` (:issue:`22358`)
236+
- :meth:`Series.update` now supports the same keywords and functionality as :meth:`DataFrame.update`.
237+
In particular, it has gained the keywords ``overwrite``, ``filter_func`` and ``errors`` (:issue:`22358`)
237238
- Compatibility with Matplotlib 3.0 (:issue:`22790`).
238239
- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`)
239240
- :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`)

pandas/core/frame.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5230,6 +5230,14 @@ def combiner(x, y):
52305230

52315231
return self.combine(other, combiner, overwrite=False)
52325232

5233+
@Appender(NDFrame.update.__doc__)
5234+
@deprecate_kwarg(old_arg_name='raise_conflict', new_arg_name='errors',
5235+
mapping={False: 'ignore', True: 'raise'})
5236+
def update(self, other, join='left', overwrite=True, filter_func=None,
5237+
errors='ignore'):
5238+
super(DataFrame, self).update(other, join=join, overwrite=overwrite,
5239+
filter_func=filter_func, errors=errors)
5240+
52335241
# ----------------------------------------------------------------------
52345242
# Data reshaping
52355243

pandas/core/generic.py

Lines changed: 18 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -101,32 +101,6 @@ def _single_replace(self, to_replace, method, inplace, limit):
101101
return result
102102

103103

104-
def _update_column(this, that, overwrite=True, filter_func=None,
105-
raise_conflict=False):
106-
import pandas.core.computation.expressions as expressions
107-
108-
if filter_func is not None:
109-
with np.errstate(all='ignore'):
110-
mask = ~filter_func(this) | isna(that)
111-
else:
112-
if raise_conflict:
113-
mask_this = notna(that)
114-
mask_that = notna(this)
115-
if any(mask_this & mask_that):
116-
raise ValueError("Data overlaps.")
117-
118-
if overwrite:
119-
mask = isna(that)
120-
else:
121-
mask = notna(this)
122-
123-
# don't overwrite columns unnecessarily
124-
if mask.all():
125-
return None
126-
127-
return expressions.where(mask, this, that)
128-
129-
130104
class NDFrame(PandasObject, SelectionMixin):
131105
"""
132106
N-dimensional analogue of DataFrame. Store multi-dimensional in a
@@ -4199,7 +4173,7 @@ def _reindex_with_indexers(self, reindexers, fill_value=None, copy=False,
41994173
return self._constructor(new_data).__finalize__(self)
42004174

42014175
def update(self, other, join='left', overwrite=True, filter_func=None,
4202-
raise_conflict=False):
4176+
errors='ignore'):
42034177
"""
42044178
Modify in place using non-NA values from another DataFrame.
42054179
@@ -4226,8 +4200,8 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
42264200
filter_func : callable(1d-array) -> boolean 1d-array, optional
42274201
Can choose to replace values other than NA. Return True for values
42284202
that should be updated.
4229-
raise_conflict : bool, default False
4230-
If True, will raise a ValueError if the DataFrame and `other`
4203+
errors : {'raise', 'ignore'}, default 'ignore'
4204+
If 'raise', will raise a ValueError if the DataFrame and `other`
42314205
both contain non-NA data in the same place.
42324206
42334207
Raises
@@ -4316,13 +4290,15 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
43164290
other = other.reindex_like(self)
43174291
this = self.values
43184292
that = other.values
4319-
updated = _update_column(this, that, overwrite=overwrite,
4320-
filter_func=filter_func,
4321-
raise_conflict=raise_conflict)
4322-
if updated is None:
4323-
# don't overwrite Series unnecessarily
4324-
return
4325-
self._data._block.values = updated
4293+
4294+
# missing.update_array returns an np.ndarray
4295+
updated_values = missing.update_array(this, that,
4296+
overwrite=overwrite,
4297+
filter_func=filter_func,
4298+
errors=errors)
4299+
# don't overwrite unnecessarily
4300+
if updated_values is not None:
4301+
self._update_inplace(Series(updated_values, index=self.index))
43264302
else: # DataFrame
43274303
if not isinstance(other, ABCDataFrame):
43284304
other = DataFrame(other)
@@ -4333,13 +4309,12 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
43334309
this = self[col].values
43344310
that = other[col].values
43354311

4336-
updated = _update_column(this, that, overwrite=overwrite,
4337-
filter_func=filter_func,
4338-
raise_conflict=raise_conflict)
4339-
# don't overwrite columns unnecessarily
4340-
if updated is None:
4341-
continue
4342-
self[col] = updated
4312+
updated = missing.update_array(this, that, overwrite=overwrite,
4313+
filter_func=filter_func,
4314+
errors=errors)
4315+
# don't overwrite unnecessarily
4316+
if updated is not None:
4317+
self[col] = updated
43434318

43444319
def filter(self, items=None, like=None, regex=None, axis=None):
43454320
"""

pandas/core/missing.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
ensure_float64)
2222

2323
from pandas.core.dtypes.cast import infer_dtype_from_array
24-
from pandas.core.dtypes.missing import isna
24+
from pandas.core.dtypes.missing import isna, notna
2525

2626

2727
def mask_missing(arr, values_to_mask):
@@ -75,6 +75,70 @@ def mask_missing(arr, values_to_mask):
7575
return mask
7676

7777

78+
def update_array(this, that, overwrite=True, filter_func=None,
79+
errors='ignore'):
80+
"""
81+
Update one array with non-NA values from another array.
82+
83+
Parameters
84+
----------
85+
this : np.ndarray (one-dimensional)
86+
The array being updated.
87+
that : np.ndarray (one-dimensional)
88+
The array being used to update.
89+
overwrite : bool, default True
90+
How to handle non-NA values for overlapping keys:
91+
92+
* True: overwrite original array's values with values from `that`.
93+
* False: only update values that are NA in `this`.
94+
95+
filter_func : callable(1d-array) -> boolean 1d-array, optional
96+
Can choose to replace values other than NA. Return True for values
97+
that should be updated.
98+
errors : {'raise', 'ignore'}, default 'ignore'
99+
If 'raise', will raise a ValueError if `this` and `that` both contain
100+
non-NA data in the same place.
101+
102+
Raises
103+
------
104+
ValueError
105+
When `errors='raise'` and there's overlapping non-NA data.
106+
107+
Returns
108+
-------
109+
updated : np.ndarray (one-dimensional) or None
110+
The updated array. Return None if `this` remains unchanged
111+
112+
See Also
113+
--------
114+
Series.update : Similar method for `Series`.
115+
DataFrame.update : Similar method for `DataFrame`.
116+
dict.update : Similar method for `dict`.
117+
"""
118+
import pandas.core.computation.expressions as expressions
119+
120+
if filter_func is not None:
121+
with np.errstate(all='ignore'):
122+
mask = ~filter_func(this) | isna(that)
123+
else:
124+
if errors == 'raise':
125+
mask_this = notna(that)
126+
mask_that = notna(this)
127+
if any(mask_this & mask_that):
128+
raise ValueError("Data overlaps.")
129+
130+
if overwrite:
131+
mask = isna(that)
132+
else:
133+
mask = notna(this)
134+
135+
# don't overwrite columns unnecessarily
136+
if mask.all():
137+
return None
138+
139+
return expressions.where(mask, this, that)
140+
141+
78142
def clean_fill_method(method, allow_nearest=False):
79143
# asfreq is compat for resampling
80144
if method in [None, 'asfreq']:

pandas/core/series.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2392,7 +2392,7 @@ def combine_first(self, other):
23922392
return this.where(notna(this), other)
23932393

23942394
def update(self, other, join='left', overwrite=True, filter_func=None,
2395-
raise_conflict=False):
2395+
errors='ignore'):
23962396
"""
23972397
Modify Series in place using non-NA values from passed Series.
23982398
@@ -2406,6 +2406,8 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
24062406
join : {'left'}, default 'left'
24072407
Only left join is implemented, keeping the index and columns of the
24082408
original object.
2409+
2410+
.. versionadded:: 0.24.0
24092411
overwrite : bool, default True
24102412
How to handle non-NA values for overlapping keys:
24112413
@@ -2414,13 +2416,18 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
24142416
* False: only update values that are NA in
24152417
the original DataFrame.
24162418
2419+
.. versionadded:: 0.24.0
24172420
filter_func : callable(1d-array) -> boolean 1d-array, optional
24182421
Can choose to replace values other than NA. Return True for values
24192422
that should be updated.
2420-
raise_conflict : bool, default False
2421-
If True, will raise a ValueError if the DataFrame and `other`
2423+
2424+
.. versionadded:: 0.24.0
2425+
errors : {'raise', 'ignore'}, default 'ignore'
2426+
If 'raise', will raise a ValueError if the DataFrame and `other`
24222427
both contain non-NA data in the same place.
24232428
2429+
.. versionadded:: 0.24.0
2430+
24242431
See Also
24252432
--------
24262433
DataFrame.update : Similar method for `DataFrame`.
@@ -2465,7 +2472,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
24652472
"""
24662473
super(Series, self).update(other, join=join, overwrite=overwrite,
24672474
filter_func=filter_func,
2468-
raise_conflict=raise_conflict)
2475+
errors=errors)
24692476

24702477
# ----------------------------------------------------------------------
24712478
# Reindexing, sorting

pandas/tests/frame/test_combine_concat.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,9 @@ def test_update_raise(self):
322322
other = DataFrame([[2., nan],
323323
[nan, 7]], index=[1, 3], columns=[1, 2])
324324
with pytest.raises(ValueError, match="Data overlaps"):
325-
df.update(other, raise_conflict=True)
325+
df.update(other, errors='raise')
326+
with tm.assert_produces_warning(FutureWarning):
327+
df.update(other, raise_conflict=False)
326328

327329
def test_update_from_non_df(self):
328330
d = {'a': Series([1, 2, 3, 4]), 'b': Series([5, 6, 7, 8])}

0 commit comments

Comments
 (0)