Skip to content

Commit e7ff318

Browse files
committed
Merge pull request #11588 from sinhrks/test_idxtype
TST: Enable Index dtype comparison by default
2 parents e29bf61 + 7ecb409 commit e7ff318

21 files changed

+378
-256
lines changed

doc/source/whatsnew/v0.17.1.txt

+3
Original file line numberDiff line numberDiff line change
@@ -156,3 +156,6 @@ Bug Fixes
156156
- Bug in the link-time error caused by C ``inline`` functions on FreeBSD 10+ (with ``clang``) (:issue:`10510`)
157157
- Bug in ``DataFrame.to_csv`` in passing through arguments for formatting ``MultiIndexes``, including ``date_format`` (:issue:`7791`)
158158
- Bug in ``DataFrame.join()`` with ``how='right'`` producing a ``TypeError`` (:issue:`11519`)
159+
- Bug in ``Series.quantile`` with empty list results has ``Index`` with ``object`` dtype (:issue:`11588`)
160+
- Bug in ``pd.merge`` results in empty ``Int64Index`` rather than ``Index(dtype=object)`` when the merge result is empty (:issue:`11588`)
161+

pandas/core/generic.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -1779,7 +1779,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
17791779
avoid duplicating data
17801780
method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional
17811781
method to use for filling holes in reindexed DataFrame.
1782-
Please note: this is only applicable to DataFrames/Series with a
1782+
Please note: this is only applicable to DataFrames/Series with a
17831783
monotonically increasing/decreasing index.
17841784
* default: don't fill gaps
17851785
* pad / ffill: propagate last valid observation forward to next valid
@@ -1822,7 +1822,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
18221822
18231823
Create a new index and reindex the dataframe. By default
18241824
values in the new index that do not have corresponding
1825-
records in the dataframe are assigned ``NaN``.
1825+
records in the dataframe are assigned ``NaN``.
18261826
18271827
>>> new_index= ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10',
18281828
... 'Chrome']
@@ -1836,8 +1836,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
18361836
18371837
We can fill in the missing values by passing a value to
18381838
the keyword ``fill_value``. Because the index is not monotonically
1839-
increasing or decreasing, we cannot use arguments to the keyword
1840-
``method`` to fill the ``NaN`` values.
1839+
increasing or decreasing, we cannot use arguments to the keyword
1840+
``method`` to fill the ``NaN`` values.
18411841
18421842
>>> df.reindex(new_index, fill_value=0)
18431843
http_status response_time
@@ -1855,8 +1855,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
18551855
IE10 404 0.08
18561856
Chrome 200 0.02
18571857
1858-
To further illustrate the filling functionality in
1859-
``reindex``, we will create a dataframe with a
1858+
To further illustrate the filling functionality in
1859+
``reindex``, we will create a dataframe with a
18601860
monotonically increasing index (for example, a sequence
18611861
of dates).
18621862
@@ -1873,7 +1873,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
18731873
2010-01-06 88
18741874
18751875
Suppose we decide to expand the dataframe to cover a wider
1876-
date range.
1876+
date range.
18771877
18781878
>>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D')
18791879
>>> df2.reindex(date_index2)
@@ -1890,10 +1890,10 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
18901890
2010-01-07 NaN
18911891
18921892
The index entries that did not have a value in the original data frame
1893-
(for example, '2009-12-29') are by default filled with ``NaN``.
1893+
(for example, '2009-12-29') are by default filled with ``NaN``.
18941894
If desired, we can fill in the missing values using one of several
1895-
options.
1896-
1895+
options.
1896+
18971897
For example, to backpropagate the last valid value to fill the ``NaN``
18981898
values, pass ``bfill`` as an argument to the ``method`` keyword.
18991899
@@ -1911,7 +1911,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
19111911
2010-01-07 NaN
19121912
19131913
Please note that the ``NaN`` value present in the original dataframe
1914-
(at index value 2010-01-03) will not be filled by any of the
1914+
(at index value 2010-01-03) will not be filled by any of the
19151915
value propagation schemes. This is because filling while reindexing
19161916
does not look at dataframe values, but only compares the original and
19171917
desired indexes. If you do want to fill in the ``NaN`` values present

pandas/core/series.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
_maybe_box_datetimelike, ABCDataFrame,
2828
_dict_compat)
2929
from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
30-
_ensure_index)
30+
Float64Index, _ensure_index)
3131
from pandas.core.indexing import check_bool_indexer, maybe_convert_indices
3232
from pandas.core import generic, base
3333
from pandas.core.internals import SingleBlockManager
@@ -1277,6 +1277,8 @@ def quantile(self, q=0.5):
12771277
def multi(values, qs):
12781278
if com.is_list_like(qs):
12791279
values = [_quantile(values, x*100) for x in qs]
1280+
# let empty result to be Float64Index
1281+
qs = Float64Index(qs)
12801282
return self._constructor(values, index=qs, name=self.name)
12811283
else:
12821284
return _quantile(values, qs*100)

pandas/io/tests/test_excel.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -846,7 +846,8 @@ def test_int_types(self):
846846
# test with convert_float=False comes back as float
847847
float_frame = frame.astype(float)
848848
recons = read_excel(path, 'test1', convert_float=False)
849-
tm.assert_frame_equal(recons, float_frame)
849+
tm.assert_frame_equal(recons, float_frame,
850+
check_index_type=False, check_column_type=False)
850851

851852
def test_float_types(self):
852853
_skip_if_no_xlrd()
@@ -1186,9 +1187,11 @@ def test_to_excel_output_encoding(self):
11861187
_skip_if_no_xlrd()
11871188
ext = self.ext
11881189
filename = '__tmp_to_excel_float_format__.' + ext
1189-
df = DataFrame([[u('\u0192'), u('\u0193'), u('\u0194')],
1190-
[u('\u0195'), u('\u0196'), u('\u0197')]],
1191-
index=[u('A\u0192'), 'B'], columns=[u('X\u0193'), 'Y', 'Z'])
1190+
1191+
# avoid mixed inferred_type
1192+
df = DataFrame([[u'\u0192', u'\u0193', u'\u0194'],
1193+
[u'\u0195', u'\u0196', u'\u0197']],
1194+
index=[u'A\u0192', u'B'], columns=[u'X\u0193', u'Y', u'Z'])
11921195

11931196
with ensure_clean(filename) as filename:
11941197
df.to_excel(filename, sheet_name='TestSheet', encoding='utf8')

0 commit comments

Comments
 (0)