Skip to content

Commit 16ca125

Browse files
committed
API: Unify .update to generic
1 parent 913f71f commit 16ca125

File tree

4 files changed

+193
-146
lines changed

4 files changed

+193
-146
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ Other Enhancements
200200
- :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).
201201
- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).
202202
- New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`).
203+
- :meth:`Series.update` now supports the same keywords and functionality as :meth:`DataFrame.update` (:issue:`22358`)
203204
- Compatibility with Matplotlib 3.0 (:issue:`22790`).
204205

205206
.. _whatsnew_0240.api_breaking:

pandas/core/frame.py

-136
Original file line numberDiff line numberDiff line change
@@ -5198,142 +5198,6 @@ def combiner(x, y, needs_i8_conversion=False):
51985198

51995199
return self.combine(other, combiner, overwrite=False)
52005200

5201-
def update(self, other, join='left', overwrite=True, filter_func=None,
5202-
raise_conflict=False):
5203-
"""
5204-
Modify in place using non-NA values from another DataFrame.
5205-
5206-
Aligns on indices. There is no return value.
5207-
5208-
Parameters
5209-
----------
5210-
other : DataFrame, or object coercible into a DataFrame
5211-
Should have at least one matching index/column label
5212-
with the original DataFrame. If a Series is passed,
5213-
its name attribute must be set, and that will be
5214-
used as the column name to align with the original DataFrame.
5215-
join : {'left'}, default 'left'
5216-
Only left join is implemented, keeping the index and columns of the
5217-
original object.
5218-
overwrite : bool, default True
5219-
How to handle non-NA values for overlapping keys:
5220-
5221-
* True: overwrite original DataFrame's values
5222-
with values from `other`.
5223-
* False: only update values that are NA in
5224-
the original DataFrame.
5225-
5226-
filter_func : callable(1d-array) -> boolean 1d-array, optional
5227-
Can choose to replace values other than NA. Return True for values
5228-
that should be updated.
5229-
raise_conflict : bool, default False
5230-
If True, will raise a ValueError if the DataFrame and `other`
5231-
both contain non-NA data in the same place.
5232-
5233-
Raises
5234-
------
5235-
ValueError
5236-
When `raise_conflict` is True and there's overlapping non-NA data.
5237-
5238-
See Also
5239-
--------
5240-
dict.update : Similar method for dictionaries.
5241-
DataFrame.merge : For column(s)-on-columns(s) operations.
5242-
5243-
Examples
5244-
--------
5245-
>>> df = pd.DataFrame({'A': [1, 2, 3],
5246-
... 'B': [400, 500, 600]})
5247-
>>> new_df = pd.DataFrame({'B': [4, 5, 6],
5248-
... 'C': [7, 8, 9]})
5249-
>>> df.update(new_df)
5250-
>>> df
5251-
A B
5252-
0 1 4
5253-
1 2 5
5254-
2 3 6
5255-
5256-
The DataFrame's length does not increase as a result of the update,
5257-
only values at matching index/column labels are updated.
5258-
5259-
>>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
5260-
... 'B': ['x', 'y', 'z']})
5261-
>>> new_df = pd.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']})
5262-
>>> df.update(new_df)
5263-
>>> df
5264-
A B
5265-
0 a d
5266-
1 b e
5267-
2 c f
5268-
5269-
For Series, it's name attribute must be set.
5270-
5271-
>>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
5272-
... 'B': ['x', 'y', 'z']})
5273-
>>> new_column = pd.Series(['d', 'e'], name='B', index=[0, 2])
5274-
>>> df.update(new_column)
5275-
>>> df
5276-
A B
5277-
0 a d
5278-
1 b y
5279-
2 c e
5280-
>>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
5281-
... 'B': ['x', 'y', 'z']})
5282-
>>> new_df = pd.DataFrame({'B': ['d', 'e']}, index=[1, 2])
5283-
>>> df.update(new_df)
5284-
>>> df
5285-
A B
5286-
0 a x
5287-
1 b d
5288-
2 c e
5289-
5290-
If `other` contains NaNs the corresponding values are not updated
5291-
in the original dataframe.
5292-
5293-
>>> df = pd.DataFrame({'A': [1, 2, 3],
5294-
... 'B': [400, 500, 600]})
5295-
>>> new_df = pd.DataFrame({'B': [4, np.nan, 6]})
5296-
>>> df.update(new_df)
5297-
>>> df
5298-
A B
5299-
0 1 4.0
5300-
1 2 500.0
5301-
2 3 6.0
5302-
"""
5303-
import pandas.core.computation.expressions as expressions
5304-
# TODO: Support other joins
5305-
if join != 'left': # pragma: no cover
5306-
raise NotImplementedError("Only left join is supported")
5307-
5308-
if not isinstance(other, DataFrame):
5309-
other = DataFrame(other)
5310-
5311-
other = other.reindex_like(self)
5312-
5313-
for col in self.columns:
5314-
this = self[col].values
5315-
that = other[col].values
5316-
if filter_func is not None:
5317-
with np.errstate(all='ignore'):
5318-
mask = ~filter_func(this) | isna(that)
5319-
else:
5320-
if raise_conflict:
5321-
mask_this = notna(that)
5322-
mask_that = notna(this)
5323-
if any(mask_this & mask_that):
5324-
raise ValueError("Data overlaps.")
5325-
5326-
if overwrite:
5327-
mask = isna(that)
5328-
else:
5329-
mask = notna(this)
5330-
5331-
# don't overwrite columns unecessarily
5332-
if mask.all():
5333-
continue
5334-
5335-
self[col] = expressions.where(mask, this, that)
5336-
53375201
# ----------------------------------------------------------------------
53385202
# Data reshaping
53395203

pandas/core/generic.py

+164
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,32 @@ def _single_replace(self, to_replace, method, inplace, limit):
9999
return result
100100

101101

102+
def _update_column(this, that, overwrite=True, filter_func=None,
103+
raise_conflict=False):
104+
import pandas.core.computation.expressions as expressions
105+
106+
if filter_func is not None:
107+
with np.errstate(all='ignore'):
108+
mask = ~filter_func(this) | isna(that)
109+
else:
110+
if raise_conflict:
111+
mask_this = notna(that)
112+
mask_that = notna(this)
113+
if any(mask_this & mask_that):
114+
raise ValueError("Data overlaps.")
115+
116+
if overwrite:
117+
mask = isna(that)
118+
else:
119+
mask = notna(this)
120+
121+
# don't overwrite columns unnecessarily
122+
if mask.all():
123+
return None
124+
125+
return expressions.where(mask, this, that)
126+
127+
102128
class NDFrame(PandasObject, SelectionMixin):
103129
"""
104130
N-dimensional analogue of DataFrame. Store multi-dimensional in a
@@ -4079,6 +4105,144 @@ def _reindex_axis(self, new_index, fill_method, axis, copy):
40794105
else:
40804106
return self._constructor(new_data).__finalize__(self)
40814107

4108+
def update(self, other, join='left', overwrite=True, filter_func=None,
4109+
raise_conflict=False):
4110+
"""
4111+
Modify in place using non-NA values from another DataFrame.
4112+
4113+
Aligns on indices. There is no return value.
4114+
4115+
Parameters
4116+
----------
4117+
other : DataFrame, or object coercible into a DataFrame
4118+
Should have at least one matching index/column label
4119+
with the original DataFrame. If a Series is passed,
4120+
its name attribute must be set, and that will be
4121+
used as the column name to align with the original DataFrame.
4122+
join : {'left'}, default 'left'
4123+
Only left join is implemented, keeping the index and columns of the
4124+
original object.
4125+
overwrite : bool, default True
4126+
How to handle non-NA values for overlapping keys:
4127+
4128+
* True: overwrite original DataFrame's values
4129+
with values from `other`.
4130+
* False: only update values that are NA in
4131+
the original DataFrame.
4132+
4133+
filter_func : callable(1d-array) -> boolean 1d-array, optional
4134+
Can choose to replace values other than NA. Return True for values
4135+
that should be updated.
4136+
raise_conflict : bool, default False
4137+
If True, will raise a ValueError if the DataFrame and `other`
4138+
both contain non-NA data in the same place.
4139+
4140+
Raises
4141+
------
4142+
ValueError
4143+
When `raise_conflict` is True and there's overlapping non-NA data.
4144+
4145+
See Also
4146+
--------
4147+
dict.update : Similar method for dictionaries.
4148+
DataFrame.merge : For column(s)-on-columns(s) operations.
4149+
4150+
Examples
4151+
--------
4152+
>>> df = pd.DataFrame({'A': [1, 2, 3],
4153+
... 'B': [400, 500, 600]})
4154+
>>> new_df = pd.DataFrame({'B': [4, 5, 6],
4155+
... 'C': [7, 8, 9]})
4156+
>>> df.update(new_df)
4157+
>>> df
4158+
A B
4159+
0 1 4
4160+
1 2 5
4161+
2 3 6
4162+
4163+
The DataFrame's length does not increase as a result of the update,
4164+
only values at matching index/column labels are updated.
4165+
4166+
>>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
4167+
... 'B': ['x', 'y', 'z']})
4168+
>>> new_df = pd.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']})
4169+
>>> df.update(new_df)
4170+
>>> df
4171+
A B
4172+
0 a d
4173+
1 b e
4174+
2 c f
4175+
4176+
For Series, it's name attribute must be set.
4177+
4178+
>>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
4179+
... 'B': ['x', 'y', 'z']})
4180+
>>> new_column = pd.Series(['d', 'e'], name='B', index=[0, 2])
4181+
>>> df.update(new_column)
4182+
>>> df
4183+
A B
4184+
0 a d
4185+
1 b y
4186+
2 c e
4187+
>>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
4188+
... 'B': ['x', 'y', 'z']})
4189+
>>> new_df = pd.DataFrame({'B': ['d', 'e']}, index=[1, 2])
4190+
>>> df.update(new_df)
4191+
>>> df
4192+
A B
4193+
0 a x
4194+
1 b d
4195+
2 c e
4196+
4197+
If `other` contains NaNs the corresponding values are not updated
4198+
in the original dataframe.
4199+
4200+
>>> df = pd.DataFrame({'A': [1, 2, 3],
4201+
... 'B': [400, 500, 600]})
4202+
>>> new_df = pd.DataFrame({'B': [4, np.nan, 6]})
4203+
>>> df.update(new_df)
4204+
>>> df
4205+
A B
4206+
0 1 4.0
4207+
1 2 500.0
4208+
2 3 6.0
4209+
"""
4210+
from pandas import Series, DataFrame
4211+
# TODO: Support other joins
4212+
if join != 'left': # pragma: no cover
4213+
raise NotImplementedError("Only left join is supported")
4214+
4215+
if isinstance(self, ABCSeries):
4216+
if not isinstance(other, ABCSeries):
4217+
other = Series(other)
4218+
other = other.reindex_like(self)
4219+
this = self.values
4220+
that = other.values
4221+
updated = _update_column(this, that, overwrite=overwrite,
4222+
filter_func=filter_func,
4223+
raise_conflict=raise_conflict)
4224+
if updated is None:
4225+
# don't overwrite Series unnecessarily
4226+
return
4227+
self._data._block.values = updated
4228+
else: # DataFrame
4229+
if not isinstance(other, ABCDataFrame):
4230+
other = DataFrame(other)
4231+
4232+
other = other.reindex_like(self)
4233+
4234+
for col in self.columns:
4235+
this = self[col].values
4236+
that = other[col].values
4237+
4238+
updated = _update_column(this, that, overwrite=overwrite,
4239+
filter_func=filter_func,
4240+
raise_conflict=raise_conflict)
4241+
# don't overwrite columns unnecessarily
4242+
if updated is None:
4243+
continue
4244+
self[col] = updated
4245+
40824246
def filter(self, items=None, like=None, regex=None, axis=None):
40834247
"""
40844248
Subset rows or columns of dataframe according to labels in

pandas/core/series.py

+28-10
Original file line numberDiff line numberDiff line change
@@ -2377,14 +2377,35 @@ def combine_first(self, other):
23772377

23782378
return this.where(notna(this), other)
23792379

2380-
def update(self, other):
2380+
def update(self, other, join='left', overwrite=True, filter_func=None,
2381+
raise_conflict=False):
23812382
"""
2382-
Modify Series in place using non-NA values from passed
2383-
Series. Aligns on index
2383+
Modify Series in place using non-NA values from passed Series.
2384+
2385+
Aligns on index.
23842386
23852387
Parameters
23862388
----------
2387-
other : Series
2389+
other : Series, or object coercible into a Series
2390+
Should have at least one matching index label with the calling
2391+
Series.
2392+
join : {'left'}, default 'left'
2393+
Only left join is implemented, keeping the index and columns of the
2394+
original object.
2395+
overwrite : bool, default True
2396+
How to handle non-NA values for overlapping keys:
2397+
2398+
* True: overwrite original DataFrame's values
2399+
with values from `other`.
2400+
* False: only update values that are NA in
2401+
the original DataFrame.
2402+
2403+
filter_func : callable(1d-array) -> boolean 1d-array, optional
2404+
Can choose to replace values other than NA. Return True for values
2405+
that should be updated.
2406+
raise_conflict : bool, default False
2407+
If True, will raise a ValueError if the DataFrame and `other`
2408+
both contain non-NA data in the same place.
23882409
23892410
Examples
23902411
--------
@@ -2422,13 +2443,10 @@ def update(self, other):
24222443
1 2
24232444
2 6
24242445
dtype: int64
2425-
24262446
"""
2427-
other = other.reindex_like(self)
2428-
mask = notna(other)
2429-
2430-
self._data = self._data.putmask(mask=mask, new=other, inplace=True)
2431-
self._maybe_update_cacher()
2447+
super(Series, self).update(self, other, join=join, overwrite=overwrite,
2448+
filter_func=filter_func,
2449+
raise_conflict=raise_conflict)
24322450

24332451
# ----------------------------------------------------------------------
24342452
# Reindexing, sorting

0 commit comments

Comments
 (0)