Skip to content

Commit 42a089e

Browse files
authored
Merge branch 'master' into crosstable_margins_name
2 parents abe70e6 + d7962c5 commit 42a089e

13 files changed

+127
-15
lines changed

doc/source/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1738,6 +1738,7 @@ application to columns of a specific data type.
17381738
DataFrameGroupBy.diff
17391739
DataFrameGroupBy.ffill
17401740
DataFrameGroupBy.fillna
1741+
DataFrameGroupBy.filter
17411742
DataFrameGroupBy.hist
17421743
DataFrameGroupBy.idxmax
17431744
DataFrameGroupBy.idxmin

doc/source/whatsnew/v0.20.2.txt

+5
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,13 @@ Bug Fixes
3737
~~~~~~~~~
3838

3939
- Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`)
40+
- Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`)
4041

4142
Conversion
4243
^^^^^^^^^^
4344

4445
- Bug in ``pd.to_numeric()`` in which empty data inputs were causing Python to crash (:issue:`16302`)
46+
- Silence numpy warnings when broadcasting DataFrame to Series with comparison ops (:issue:`16378`, :issue:`16306`)
4547

4648

4749
Indexing
@@ -85,6 +87,7 @@ Reshaping
8587
- Bug in ``DataFrame.stack`` with unsorted levels in MultiIndex columns (:issue:`16323`)
8688
- Bug in ``pd.wide_to_long()`` where no error was raised when ``i`` was not a unique identifier (:issue:`16382`)
8789
- Bug in ``Series.isin(..)`` with a list of tuples (:issue:`16394`)
90+
- Bug in construction of a ``DataFrame`` with mixed dtypes including an all-NaT column. (:issue:`16395`)
8891

8992

9093
Numeric
@@ -98,3 +101,5 @@ Categorical
98101

99102
Other
100103
^^^^^
104+
105+
- Bug in ``pd.drop([])`` for DataFrame with non-unique indices (:issue:`16270`)

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ Other Enhancements
3636
- :func:`to_pickle` has gained a protocol parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL <https://docs.python.org/3/library/pickle.html#data-stream-format>`__
3737
- :func:`api.types.infer_dtype` now infers decimals. (:issue: `15690`)
3838
- ``Crosstab`` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when margins is True. (:issue:`15972`)
39+
- :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`)
3940

4041
.. _whatsnew_0210.api_breaking:
4142

pandas/core/dtypes/cast.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -837,7 +837,7 @@ def try_timedelta(v):
837837
try:
838838
return to_timedelta(v)._values.reshape(shape)
839839
except:
840-
return v
840+
return v.reshape(shape)
841841

842842
inferred_type = lib.infer_datetimelike_array(_ensure_object(v))
843843

pandas/core/frame.py

+42-7
Original file line numberDiff line numberDiff line change
@@ -2947,9 +2947,44 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
29472947
29482948
Examples
29492949
--------
2950-
>>> indexed_df = df.set_index(['A', 'B'])
2951-
>>> indexed_df2 = df.set_index(['A', [0, 1, 2, 0, 1, 2]])
2952-
>>> indexed_df3 = df.set_index([[0, 1, 2, 0, 1, 2]])
2950+
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
2951+
... 'year': [2012, 2014, 2013, 2014],
2952+
... 'sale':[55, 40, 84, 31]})
2953+
month sale year
2954+
0 1 55 2012
2955+
1 4 40 2014
2956+
2 7 84 2013
2957+
3 10 31 2014
2958+
2959+
Set the index to become the 'month' column:
2960+
2961+
>>> df.set_index('month')
2962+
sale year
2963+
month
2964+
1 55 2012
2965+
4 40 2014
2966+
7 84 2013
2967+
10 31 2014
2968+
2969+
Create a multi-index using columns 'year' and 'month':
2970+
2971+
>>> df.set_index(['year', 'month'])
2972+
sale
2973+
year month
2974+
2012 1 55
2975+
2014 4 40
2976+
2013 7 84
2977+
2014 10 31
2978+
2979+
Create a multi-index using a set of values and a column:
2980+
2981+
>>> df.set_index([[1, 2, 3, 4], 'year'])
2982+
month sale
2983+
year
2984+
1 2012 1 55
2985+
2 2014 4 40
2986+
3 2013 7 84
2987+
4 2014 10 31
29532988
29542989
Returns
29552990
-------
@@ -3921,13 +3956,13 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
39213956

39223957
if overwrite:
39233958
mask = isnull(that)
3924-
3925-
# don't overwrite columns unecessarily
3926-
if mask.all():
3927-
continue
39283959
else:
39293960
mask = notnull(this)
39303961

3962+
# don't overwrite columns unecessarily
3963+
if mask.all():
3964+
continue
3965+
39313966
self[col] = expressions.where(mask, this, that,
39323967
raise_on_error=True)
39333968

pandas/core/generic.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas._libs import tslib, lib
1313
from pandas.core.dtypes.common import (
1414
_ensure_int64,
15+
_ensure_object,
1516
needs_i8_conversion,
1617
is_scalar,
1718
is_number,
@@ -2076,7 +2077,7 @@ def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'):
20762077
result = dropped
20772078

20782079
else:
2079-
labels = com._index_labels_to_array(labels)
2080+
labels = _ensure_object(com._index_labels_to_array(labels))
20802081
if level is not None:
20812082
if not isinstance(axis, MultiIndex):
20822083
raise AssertionError('axis must be a MultiIndex')

pandas/core/ops.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1250,7 +1250,8 @@ def _flex_comp_method_FRAME(op, name, str_rep=None, default_axis='columns',
12501250
masker=False):
12511251
def na_op(x, y):
12521252
try:
1253-
result = op(x, y)
1253+
with np.errstate(invalid='ignore'):
1254+
result = op(x, y)
12541255
except TypeError:
12551256
xrav = x.ravel()
12561257
result = np.empty(x.size, dtype=bool)

pandas/io/feather_format.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def to_feather(df, path):
4343
df : DataFrame
4444
path : string
4545
File path
46+
4647
"""
4748
path = _stringify_path(path)
4849
if not isinstance(df, DataFrame):
@@ -83,7 +84,7 @@ def to_feather(df, path):
8384
feather.write_dataframe(df, path)
8485

8586

86-
def read_feather(path):
87+
def read_feather(path, nthreads=1):
8788
"""
8889
Load a feather-format object from the file path
8990
@@ -93,6 +94,10 @@ def read_feather(path):
9394
----------
9495
path : string
9596
File path
97+
nthreads : int, default 1
98+
Number of CPU threads to use when reading to pandas.DataFrame
99+
100+
.. versionadded 0.21.0
96101
97102
Returns
98103
-------
@@ -102,4 +107,8 @@ def read_feather(path):
102107

103108
feather = _try_import()
104109
path = _stringify_path(path)
105-
return feather.read_dataframe(path)
110+
111+
if feather.__version__ < LooseVersion('0.4.0'):
112+
return feather.read_dataframe(path)
113+
114+
return feather.read_dataframe(path, nthreads=nthreads)

pandas/tests/dtypes/test_cast.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from datetime import datetime, timedelta, date
1010
import numpy as np
1111

12-
from pandas import Timedelta, Timestamp, DatetimeIndex
12+
from pandas import Timedelta, Timestamp, DatetimeIndex, DataFrame, NaT
1313

1414
from pandas.core.dtypes.cast import (
1515
maybe_downcast_to_dtype,
@@ -213,6 +213,17 @@ def test_maybe_convert_scalar(self):
213213
result = maybe_convert_scalar(Timedelta('1 day 1 min'))
214214
assert result == Timedelta('1 day 1 min').value
215215

216+
def test_maybe_infer_to_datetimelike(self):
217+
# GH16362
218+
# pandas=0.20.1 raises IndexError: tuple index out of range
219+
result = DataFrame(np.array([[NaT, 'a', 'b', 0],
220+
[NaT, 'b', 'c', 1]]))
221+
assert result.size == 8
222+
# this construction was fine
223+
result = DataFrame(np.array([[NaT, 'a', 0],
224+
[NaT, 'b', 1]]))
225+
assert result.size == 6
226+
216227

217228
class TestConvert(object):
218229

pandas/tests/frame/test_analytics.py

+13
Original file line numberDiff line numberDiff line change
@@ -2081,3 +2081,16 @@ def test_n_duplicate_index(self, df_duplicates, n, order):
20812081
result = df.nlargest(n, order)
20822082
expected = df.sort_values(order, ascending=False).head(n)
20832083
tm.assert_frame_equal(result, expected)
2084+
2085+
def test_series_broadcasting(self):
2086+
# smoke test for numpy warnings
2087+
# GH 16378, GH 16306
2088+
df = DataFrame([1.0, 1.0, 1.0])
2089+
df_nan = DataFrame({'A': [np.nan, 2.0, np.nan]})
2090+
s = Series([1, 1, 1])
2091+
s_nan = Series([np.nan, np.nan, 1])
2092+
2093+
with tm.assert_produces_warning(None):
2094+
df_nan.clip_lower(s, axis=0)
2095+
for op in ['lt', 'le', 'gt', 'ge', 'eq', 'ne']:
2096+
getattr(df, op)(s_nan, axis=0)

pandas/tests/frame/test_axis_select_reindex.py

+6
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ def test_drop_names(self):
6161
expected = Index(['e', 'f'], name='second')
6262
tm.assert_index_equal(dropped.columns, expected)
6363

64+
# GH 16398
65+
dropped = df.drop([], errors='ignore')
66+
expected = Index(['a', 'b', 'c'], name='first')
67+
tm.assert_index_equal(dropped.index, expected)
68+
6469
def test_drop_col_still_multiindex(self):
6570
arrays = [['a', 'b', 'c', 'top'],
6671
['', '', '', 'OD'],
@@ -100,6 +105,7 @@ def test_drop(self):
100105
columns=['a', 'a', 'b'])
101106
assert_frame_equal(nu_df.drop('a', axis=1), nu_df[['b']])
102107
assert_frame_equal(nu_df.drop('b', axis='columns'), nu_df['a'])
108+
assert_frame_equal(nu_df.drop([]), nu_df) # GH 16398
103109

104110
nu_df = nu_df.set_index(pd.Index(['X', 'Y', 'X']))
105111
nu_df.columns = list('abc')

pandas/tests/frame/test_combine_concat.py

+23
Original file line numberDiff line numberDiff line change
@@ -763,3 +763,26 @@ def test_concat_datetime_datetime64_frame(self):
763763

764764
# it works!
765765
pd.concat([df1, df2_obj])
766+
767+
768+
class TestDataFrameUpdate(TestData):
769+
770+
def test_update_nan(self):
771+
# #15593 #15617
772+
# test 1
773+
df1 = DataFrame({'A': [1.0, 2, 3], 'B': date_range('2000', periods=3)})
774+
df2 = DataFrame({'A': [None, 2, 3]})
775+
expected = df1.copy()
776+
df1.update(df2, overwrite=False)
777+
778+
tm.assert_frame_equal(df1, expected)
779+
780+
# test 2
781+
df1 = DataFrame({'A': [1.0, None, 3],
782+
'B': date_range('2000', periods=3)})
783+
df2 = DataFrame({'A': [None, 2, 3]})
784+
expected = DataFrame({'A': [1.0, 2, 3],
785+
'B': date_range('2000', periods=3)})
786+
df1.update(df2, overwrite=False)
787+
788+
tm.assert_frame_equal(df1, expected)

pandas/tests/io/test_feather.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ def check_error_on_write(self, df, exc):
2727
with ensure_clean() as path:
2828
to_feather(df, path)
2929

30-
def check_round_trip(self, df):
30+
def check_round_trip(self, df, **kwargs):
3131

3232
with ensure_clean() as path:
3333
to_feather(df, path)
34-
result = read_feather(path)
34+
result = read_feather(path, **kwargs)
3535
assert_frame_equal(result, df)
3636

3737
def test_error(self):
@@ -98,6 +98,12 @@ def test_unsupported_other(self):
9898
df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)})
9999
self.check_error_on_write(df, ValueError)
100100

101+
@pytest.mark.skipif(fv < '0.4.0', reason='new in 0.4.0')
102+
def test_rw_nthreads(self):
103+
104+
df = pd.DataFrame({'A': np.arange(100000)})
105+
self.check_round_trip(df, nthreads=2)
106+
101107
def test_write_with_index(self):
102108

103109
df = pd.DataFrame({'A': [1, 2, 3]})

0 commit comments

Comments
 (0)