Skip to content

Commit e1abe77

Browse files
committed
Review (jreback)
1 parent 56f569b commit e1abe77

File tree

5 files changed

+104
-49
lines changed

5 files changed

+104
-49
lines changed

doc/source/whatsnew/v0.24.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,8 @@ Other Enhancements
281281
all instances of ``set`` will not be considered "list-like" anymore (:issue:`23061`)
282282
- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).
283283
- New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`).
284-
- :meth:`Series.update` now supports the same keywords and functionality as :meth:`DataFrame.update` (:issue:`22358`)
284+
- :meth:`Series.update` now supports the same keywords and functionality as :meth:`DataFrame.update`.
285+
In particular, it has gained the keywords ``overwrite``, ``filter_func`` and ``errors`` (:issue:`22358`)
285286
- Compatibility with Matplotlib 3.0 (:issue:`22790`).
286287
- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`)
287288
- :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`)

pandas/core/frame.py

+8
Original file line numberDiff line numberDiff line change
@@ -5203,6 +5203,14 @@ def combiner(x, y):
52035203

52045204
return self.combine(other, combiner, overwrite=False)
52055205

5206+
@Appender(NDFrame.update.__doc__)
5207+
@deprecate_kwarg(old_arg_name='raise_conflict', new_arg_name='errors',
5208+
mapping={False: 'ignore', True: 'raise'})
5209+
def update(self, other, join='left', overwrite=True, filter_func=None,
5210+
errors='ignore'):
5211+
super(DataFrame, self).update(other, join=join, overwrite=overwrite,
5212+
filter_func=filter_func, errors=errors)
5213+
52065214
# ----------------------------------------------------------------------
52075215
# Data reshaping
52085216

pandas/core/generic.py

+18-43
Original file line numberDiff line numberDiff line change
@@ -101,32 +101,6 @@ def _single_replace(self, to_replace, method, inplace, limit):
101101
return result
102102

103103

104-
def _update_column(this, that, overwrite=True, filter_func=None,
105-
raise_conflict=False):
106-
import pandas.core.computation.expressions as expressions
107-
108-
if filter_func is not None:
109-
with np.errstate(all='ignore'):
110-
mask = ~filter_func(this) | isna(that)
111-
else:
112-
if raise_conflict:
113-
mask_this = notna(that)
114-
mask_that = notna(this)
115-
if any(mask_this & mask_that):
116-
raise ValueError("Data overlaps.")
117-
118-
if overwrite:
119-
mask = isna(that)
120-
else:
121-
mask = notna(this)
122-
123-
# don't overwrite columns unnecessarily
124-
if mask.all():
125-
return None
126-
127-
return expressions.where(mask, this, that)
128-
129-
130104
class NDFrame(PandasObject, SelectionMixin):
131105
"""
132106
N-dimensional analogue of DataFrame. Store multi-dimensional in a
@@ -4200,7 +4174,7 @@ def _reindex_with_indexers(self, reindexers, fill_value=None, copy=False,
42004174
return self._constructor(new_data).__finalize__(self)
42014175

42024176
def update(self, other, join='left', overwrite=True, filter_func=None,
4203-
raise_conflict=False):
4177+
errors='ignore'):
42044178
"""
42054179
Modify in place using non-NA values from another DataFrame.
42064180
@@ -4227,8 +4201,8 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
42274201
filter_func : callable(1d-array) -> boolean 1d-array, optional
42284202
Can choose to replace values other than NA. Return True for values
42294203
that should be updated.
4230-
raise_conflict : bool, default False
4231-
If True, will raise a ValueError if the DataFrame and `other`
4204+
errors : {'raise', 'ignore'}, default 'ignore'
4205+
If 'raise', will raise a ValueError if the DataFrame and `other`
42324206
both contain non-NA data in the same place.
42334207
42344208
Raises
@@ -4317,13 +4291,15 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
43174291
other = other.reindex_like(self)
43184292
this = self.values
43194293
that = other.values
4320-
updated = _update_column(this, that, overwrite=overwrite,
4321-
filter_func=filter_func,
4322-
raise_conflict=raise_conflict)
4323-
if updated is None:
4324-
# don't overwrite Series unnecessarily
4325-
return
4326-
self._data._block.values = updated
4294+
4295+
# missing.update_array returns an np.ndarray
4296+
updated_values = missing.update_array(this, that,
4297+
overwrite=overwrite,
4298+
filter_func=filter_func,
4299+
errors=errors)
4300+
# don't overwrite unnecessarily
4301+
if updated_values is not None:
4302+
self._update_inplace(Series(updated_values, index=self.index))
43274303
else: # DataFrame
43284304
if not isinstance(other, ABCDataFrame):
43294305
other = DataFrame(other)
@@ -4334,13 +4310,12 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
43344310
this = self[col].values
43354311
that = other[col].values
43364312

4337-
updated = _update_column(this, that, overwrite=overwrite,
4338-
filter_func=filter_func,
4339-
raise_conflict=raise_conflict)
4340-
# don't overwrite columns unnecessarily
4341-
if updated is None:
4342-
continue
4343-
self[col] = updated
4313+
updated = missing.update_array(this, that, overwrite=overwrite,
4314+
filter_func=filter_func,
4315+
errors=errors)
4316+
# don't overwrite unnecessarily
4317+
if updated is not None:
4318+
self[col] = updated
43444319

43454320
def filter(self, items=None, like=None, regex=None, axis=None):
43464321
"""

pandas/core/missing.py

+65-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
ensure_float64)
2222

2323
from pandas.core.dtypes.cast import infer_dtype_from_array
24-
from pandas.core.dtypes.missing import isna
24+
from pandas.core.dtypes.missing import isna, notna
2525

2626

2727
def mask_missing(arr, values_to_mask):
@@ -75,6 +75,70 @@ def mask_missing(arr, values_to_mask):
7575
return mask
7676

7777

78+
def update_array(this, that, overwrite=True, filter_func=None,
79+
errors='ignore'):
80+
"""
81+
Update one array with non-NA values from another array.
82+
83+
Parameters
84+
----------
85+
this : np.ndarray (one-dimensional)
86+
The array being updated.
87+
that : np.ndarray (one-dimensional)
88+
The array being used to update.
89+
overwrite : bool, default True
90+
How to handle non-NA values for overlapping keys:
91+
92+
* True: overwrite original array's values with values from `that`.
93+
* False: only update values that are NA in `this`.
94+
95+
filter_func : callable(1d-array) -> boolean 1d-array, optional
96+
Can choose to replace values other than NA. Return True for values
97+
that should be updated.
98+
errors : {'raise', 'ignore'}, default 'ignore'
99+
If 'raise', will raise a ValueError if `this` and `that` both contain
100+
non-NA data in the same place.
101+
102+
Raises
103+
------
104+
ValueError
105+
When `errors='raise'` and there's overlapping non-NA data.
106+
107+
Returns
108+
-------
109+
updated : np.ndarray (one-dimensional) or None
110+
The updated array. Return None if `this` remains unchanged
111+
112+
See Also
113+
--------
114+
Series.update : Similar method for `Series`.
115+
DataFrame.update : Similar method for `DataFrame`.
116+
dict.update : Similar method for `dict`.
117+
"""
118+
import pandas.core.computation.expressions as expressions
119+
120+
if filter_func is not None:
121+
with np.errstate(all='ignore'):
122+
mask = ~filter_func(this) | isna(that)
123+
else:
124+
if errors == 'raise':
125+
mask_this = notna(that)
126+
mask_that = notna(this)
127+
if any(mask_this & mask_that):
128+
raise ValueError("Data overlaps.")
129+
130+
if overwrite:
131+
mask = isna(that)
132+
else:
133+
mask = notna(this)
134+
135+
# don't overwrite columns unnecessarily
136+
if mask.all():
137+
return None
138+
139+
return expressions.where(mask, this, that)
140+
141+
78142
def clean_fill_method(method, allow_nearest=False):
79143
# asfreq is compat for resampling
80144
if method in [None, 'asfreq']:

pandas/core/series.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -2386,7 +2386,7 @@ def combine_first(self, other):
23862386
return this.where(notna(this), other)
23872387

23882388
def update(self, other, join='left', overwrite=True, filter_func=None,
2389-
raise_conflict=False):
2389+
errors='ignore'):
23902390
"""
23912391
Modify Series in place using non-NA values from passed Series.
23922392
@@ -2400,6 +2400,8 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
24002400
join : {'left'}, default 'left'
24012401
Only left join is implemented, keeping the index and columns of the
24022402
original object.
2403+
2404+
.. versionadded:: 0.24.0
24032405
overwrite : bool, default True
24042406
How to handle non-NA values for overlapping keys:
24052407
@@ -2408,13 +2410,18 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
24082410
* False: only update values that are NA in
24092411
the original DataFrame.
24102412
2413+
.. versionadded:: 0.24.0
24112414
filter_func : callable(1d-array) -> boolean 1d-array, optional
24122415
Can choose to replace values other than NA. Return True for values
24132416
that should be updated.
2414-
raise_conflict : bool, default False
2415-
If True, will raise a ValueError if the DataFrame and `other`
2417+
2418+
.. versionadded:: 0.24.0
2419+
errors : {'raise', 'ignore'}, default 'ignore'
2420+
If 'raise', will raise a ValueError if the DataFrame and `other`
24162421
both contain non-NA data in the same place.
24172422
2423+
.. versionadded:: 0.24.0
2424+
24182425
See Also
24192426
--------
24202427
DataFrame.update : Similar method for `DataFrame`.
@@ -2459,7 +2466,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
24592466
"""
24602467
super(Series, self).update(other, join=join, overwrite=overwrite,
24612468
filter_func=filter_func,
2462-
raise_conflict=raise_conflict)
2469+
errors=errors)
24632470

24642471
# ----------------------------------------------------------------------
24652472
# Reindexing, sorting

0 commit comments

Comments
 (0)