Skip to content

Commit 09ad308

Browse files
committed
Merge remote-tracking branch 'upstream/master' into tst_cln_str_cat
2 parents 54721b9 + a5fe9cf commit 09ad308

File tree

11 files changed

+131
-76
lines changed

11 files changed

+131
-76
lines changed

doc/source/whatsnew/v0.24.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ Removal of prior version deprecations/changes
527527
- Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`)
528528
- Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`)
529529
- Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`)
530+
- :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats``(:issue:`14645`)
530531
-
531532

532533
.. _whatsnew_0240.performance:
@@ -707,6 +708,7 @@ Groupby/Resample/Rolling
707708
- Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'` and a
708709
datetime-like index leading to incorrect results and also segfault. (:issue:`21704`)
709710
- Bug in :meth:`Resampler.apply` when passing postiional arguments to applied func (:issue:`14615`).
711+
- Bug in :meth:`Series.resample` when passing ``numpy.timedelta64`` to `loffset` kwarg (:issue:`7687`).
710712

711713
Sparse
712714
^^^^^^

pandas/core/frame.py

+50-24
Original file line numberDiff line numberDiff line change
@@ -138,12 +138,11 @@
138138
"""
139139

140140
_merge_doc = """
141-
Merge DataFrame or named Series objects by performing a database-style join
142-
operation by columns or indexes.
141+
Merge DataFrame or named Series objects with a database-style join.
143142
144-
If joining columns on columns, the DataFrame indexes *will be
145-
ignored*. Otherwise if joining indexes on indexes or indexes on a column or
146-
columns, the index will be passed on.
143+
The join is done on columns or indexes. If joining columns on
144+
columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes
145+
on indexes or indexes on a column or columns, the index will be passed on.
147146
148147
Parameters
149148
----------%s
@@ -153,13 +152,13 @@
153152
Type of merge to be performed.
154153
155154
* left: use only keys from left frame, similar to a SQL left outer join;
156-
preserve key order
155+
preserve key order.
157156
* right: use only keys from right frame, similar to a SQL right outer join;
158-
preserve key order
157+
preserve key order.
159158
* outer: use union of keys from both frames, similar to a SQL full outer
160-
join; sort keys lexicographically
159+
join; sort keys lexicographically.
161160
* inner: use intersection of keys from both frames, similar to a SQL inner
162-
join; preserve the order of the left keys
161+
join; preserve the order of the left keys.
163162
on : label or list
164163
Column or index level names to join on. These must be found in both
165164
DataFrames. If `on` is None and not merging on indexes then this defaults
@@ -172,22 +171,23 @@
172171
Column or index level names to join on in the right DataFrame. Can also
173172
be an array or list of arrays of the length of the right DataFrame.
174173
These arrays are treated as if they are columns.
175-
left_index : boolean, default False
174+
left_index : bool, default False
176175
Use the index from the left DataFrame as the join key(s). If it is a
177176
MultiIndex, the number of keys in the other DataFrame (either the index
178177
or a number of columns) must match the number of levels.
179-
right_index : boolean, default False
178+
right_index : bool, default False
180179
Use the index from the right DataFrame as the join key. Same caveats as
181180
left_index.
182-
sort : boolean, default False
181+
sort : bool, default False
183182
Sort the join keys lexicographically in the result DataFrame. If False,
184183
the order of the join keys depends on the join type (how keyword).
185-
suffixes : 2-length sequence (tuple, list, ...)
184+
suffixes : tuple of (str, str), default ('_x', '_y')
186185
Suffix to apply to overlapping column names in the left and right
187-
side, respectively.
188-
copy : boolean, default True
186+
side, respectively. To raise an exception on overlapping columns use
187+
(False, False).
188+
copy : bool, default True
189189
If False, avoid copy if possible.
190-
indicator : boolean or string, default False
190+
indicator : bool or str, default False
191191
If True, adds a column to output DataFrame called "_merge" with
192192
information on the source of each row.
193193
If string, column with information on source of each row will be added to
@@ -197,7 +197,7 @@
197197
"right_only" for observations whose merge key only appears in 'right'
198198
DataFrame, and "both" if the observation's merge key is found in both.
199199
200-
validate : string, default None
200+
validate : str, optional
201201
If specified, checks if merge is of specified type.
202202
203203
* "one_to_one" or "1:1": check if merge keys are unique in both
@@ -213,6 +213,7 @@
213213
Returns
214214
-------
215215
DataFrame
216+
A DataFrame of the two merged objects.
216217
217218
Notes
218219
-----
@@ -229,31 +230,56 @@
229230
Examples
230231
--------
231232
232-
>>> A = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'],
233-
... 'value': [1, 2, 3, 5]})
234-
>>> B = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'],
235-
... 'value': [5, 6, 7, 8]})
236-
>>> A
233+
>>> df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'],
234+
... 'value': [1, 2, 3, 5]})
235+
>>> df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'],
236+
... 'value': [5, 6, 7, 8]})
237+
>>> df1
237238
lkey value
238239
0 foo 1
239240
1 bar 2
240241
2 baz 3
241242
3 foo 5
242-
>>> B
243+
>>> df2
243244
rkey value
244245
0 foo 5
245246
1 bar 6
246247
2 baz 7
247248
3 foo 8
248249
249-
>>> A.merge(B, left_on='lkey', right_on='rkey', how='outer')
250+
Merge df1 and df2 on the lkey and rkey columns. The value columns have
251+
the default suffixes, _x and _y, appended.
252+
253+
>>> df1.merge(df2, left_on='lkey', right_on='rkey')
250254
lkey value_x rkey value_y
251255
0 foo 1 foo 5
252256
1 foo 1 foo 8
253257
2 foo 5 foo 5
254258
3 foo 5 foo 8
255259
4 bar 2 bar 6
256260
5 baz 3 baz 7
261+
262+
Merge DataFrames df1 and df2 with specified left and right suffixes
263+
appended to any overlapping columns.
264+
265+
>>> df1.merge(df2, left_on='lkey', right_on='rkey',
266+
... suffixes=('_left', '_right'))
267+
lkey value_left rkey value_right
268+
0 foo 1 foo 5
269+
1 foo 1 foo 8
270+
2 foo 5 foo 5
271+
3 foo 5 foo 8
272+
4 bar 2 bar 6
273+
5 baz 3 baz 7
274+
275+
Merge DataFrames df1 and df2, but raise an exception if the DataFrames have
276+
any overlapping columns.
277+
278+
>>> df1.merge(df2, left_on='lkey', right_on='rkey', suffixes=(False, False))
279+
Traceback (most recent call last):
280+
...
281+
ValueError: columns overlap but no suffix specified:
282+
Index(['value'], dtype='object')
257283
"""
258284

259285
# -----------------------------------------------------------------------

pandas/core/indexes/base.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
import pandas.core.common as com
5454
from pandas.core import ops
5555
from pandas.util._decorators import (
56-
Appender, Substitution, cache_readonly, deprecate_kwarg)
56+
Appender, Substitution, cache_readonly)
5757
from pandas.core.indexes.frozen import FrozenList
5858
import pandas.core.dtypes.concat as _concat
5959
import pandas.core.missing as missing
@@ -773,7 +773,6 @@ def memory_usage(self, deep=False):
773773
return result
774774

775775
# ops compat
776-
@deprecate_kwarg(old_arg_name='n', new_arg_name='repeats')
777776
def repeat(self, repeats, *args, **kwargs):
778777
"""
779778
Repeat elements of an Index.

pandas/core/indexes/multi.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from pandas.core.dtypes.missing import isna, array_equivalent
2828
from pandas.errors import PerformanceWarning, UnsortedIndexError
2929

30-
from pandas.util._decorators import Appender, cache_readonly, deprecate_kwarg
30+
from pandas.util._decorators import Appender, cache_readonly
3131
import pandas.core.common as com
3232
import pandas.core.missing as missing
3333
import pandas.core.algorithms as algos
@@ -1646,7 +1646,6 @@ def append(self, other):
16461646
def argsort(self, *args, **kwargs):
16471647
return self.values.argsort(*args, **kwargs)
16481648

1649-
@deprecate_kwarg(old_arg_name='n', new_arg_name='repeats')
16501649
def repeat(self, repeats, *args, **kwargs):
16511650
nv.validate_repeat(args, kwargs)
16521651
return MultiIndex(levels=self.levels,

pandas/core/resample.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,8 @@ def _apply_loffset(self, result):
366366
"""
367367

368368
needs_offset = (
369-
isinstance(self.loffset, (DateOffset, timedelta)) and
369+
isinstance(self.loffset, (DateOffset, timedelta,
370+
np.timedelta64)) and
370371
isinstance(result.index, DatetimeIndex) and
371372
len(result.index) > 0
372373
)

pandas/core/strings.py

+16-10
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,10 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
558558
559559
Returns
560560
-------
561-
replaced : Series/Index of objects
561+
Series or Index of object
562+
A copy of the object with all matching occurrences of `pat` replaced by
563+
`repl`.
564+
562565
563566
Raises
564567
------
@@ -854,8 +857,9 @@ def str_extract(arr, pat, flags=0, expand=True):
854857
pat : string
855858
Regular expression pattern with capturing groups.
856859
flags : int, default 0 (no flags)
857-
``re`` module flags, e.g. ``re.IGNORECASE``.
858-
See :mod:`re`
860+
Flags from the ``re`` module, e.g. ``re.IGNORECASE``, that
861+
modify regular expression matching for things like case,
862+
spaces, etc. For more details, see :mod:`re`.
859863
expand : bool, default True
860864
If True, return DataFrame with one column per capture group.
861865
If False, return a Series/Index if there is one capture group
@@ -865,13 +869,15 @@ def str_extract(arr, pat, flags=0, expand=True):
865869
866870
Returns
867871
-------
868-
DataFrame with one row for each subject string, and one column for
869-
each group. Any capture group names in regular expression pat will
870-
be used for column names; otherwise capture group numbers will be
871-
used. The dtype of each result column is always object, even when
872-
no match is found. If expand=False and pat has only one capture group,
873-
then return a Series (if subject is a Series) or Index (if subject
874-
is an Index).
872+
DataFrame or Series or Index
873+
A DataFrame with one row for each subject string, and one
874+
column for each group. Any capture group names in regular
875+
expression pat will be used for column names; otherwise
876+
capture group numbers will be used. The dtype of each result
877+
column is always object, even when no match is found. If
878+
``expand=False`` and pat has only one capture group, then
879+
return a Series (if subject is a Series) or Index (if subject
880+
is an Index).
875881
876882
See Also
877883
--------

pandas/tests/indexes/multi/test_reshape.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,6 @@ def test_repeat():
100100
numbers, names.repeat(reps)], names=names)
101101
tm.assert_index_equal(m.repeat(reps), expected)
102102

103-
with tm.assert_produces_warning(FutureWarning):
104-
result = m.repeat(n=reps)
105-
tm.assert_index_equal(result, expected)
106-
107103

108104
def test_insert_base(idx):
109105

@@ -126,5 +122,5 @@ def test_delete_base(idx):
126122
assert result.name == expected.name
127123

128124
with pytest.raises((IndexError, ValueError)):
129-
# either depending on numpy version
130-
result = idx.delete(len(idx))
125+
# Exception raised depends on NumPy version.
126+
idx.delete(len(idx))

pandas/tests/indexes/test_base.py

-9
Original file line numberDiff line numberDiff line change
@@ -2402,15 +2402,6 @@ def test_repeat(self):
24022402
result = index.repeat(repeats)
24032403
tm.assert_index_equal(result, expected)
24042404

2405-
def test_repeat_warns_n_keyword(self):
2406-
index = pd.Index([1, 2, 3])
2407-
expected = pd.Index([1, 1, 2, 2, 3, 3])
2408-
2409-
with tm.assert_produces_warning(FutureWarning):
2410-
result = index.repeat(n=2)
2411-
2412-
tm.assert_index_equal(result, expected)
2413-
24142405
@pytest.mark.parametrize("index", [
24152406
pd.Index([np.nan]), pd.Index([np.nan, 1]),
24162407
pd.Index([1, 2, np.nan]), pd.Index(['a', 'b', np.nan]),

pandas/tests/test_resample.py

+6-13
Original file line numberDiff line numberDiff line change
@@ -1173,27 +1173,20 @@ def test_resample_frame_basic(self):
11731173
df.resample('M', kind='period').mean()
11741174
df.resample('W-WED', kind='period').mean()
11751175

1176-
def test_resample_loffset(self):
1176+
@pytest.mark.parametrize('loffset', [timedelta(minutes=1),
1177+
'1min', Minute(1),
1178+
np.timedelta64(1, 'm')])
1179+
def test_resample_loffset(self, loffset):
1180+
# GH 7687
11771181
rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min')
11781182
s = Series(np.random.randn(14), index=rng)
11791183

11801184
result = s.resample('5min', closed='right', label='right',
1181-
loffset=timedelta(minutes=1)).mean()
1185+
loffset=loffset).mean()
11821186
idx = date_range('1/1/2000', periods=4, freq='5min')
11831187
expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
11841188
index=idx + timedelta(minutes=1))
11851189
assert_series_equal(result, expected)
1186-
1187-
expected = s.resample(
1188-
'5min', closed='right', label='right',
1189-
loffset='1min').mean()
1190-
assert_series_equal(result, expected)
1191-
1192-
expected = s.resample(
1193-
'5min', closed='right', label='right',
1194-
loffset=Minute(1)).mean()
1195-
assert_series_equal(result, expected)
1196-
11971190
assert result.index.freq == Minute(5)
11981191

11991192
# from daily

0 commit comments

Comments
 (0)