Skip to content

Commit 24ef8ef

Browse files
committed
Merge branch 'master' into PR_TOOL_MERGE_PR_22952
2 parents 2816459 + cc712c9 commit 24ef8ef

File tree

10 files changed

+107
-82
lines changed

10 files changed

+107
-82
lines changed

doc/source/whatsnew/v0.24.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -1008,6 +1008,8 @@ Other API Changes
10081008
- Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
10091009
- :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)
10101010
- Comparing :class:`Timedelta` to be less or greater than unknown types now raises a ``TypeError`` instead of returning ``False`` (:issue:`20829`)
1011+
- :meth:`Categorical.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23466`).
1012+
- :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`).
10111013
- :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`).
10121014
- The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`)
10131015

@@ -1382,6 +1384,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
13821384
- Bug in :func:`read_csv()` in which incorrect error messages were being raised when ``skipfooter`` was passed in along with ``nrows``, ``iterator``, or ``chunksize`` (:issue:`23711`)
13831385
- Bug in :meth:`read_csv()` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`)
13841386
- Bug in :meth:`read_html()` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`)
1387+
- Bug in :meth:`read_excel()` in which extraneous header names were extracted, even though none were specified (:issue:`11733`)
13851388
- Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`20480`)
13861389
- Bug in :meth:`read_excel()` in which ``usecols`` was not being validated for proper column names when passed in as a string (:issue:`20480`)
13871390

pandas/core/arrays/categorical.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -1344,15 +1344,13 @@ def searchsorted(self, value, side='left', sorter=None):
13441344
"ordered one")
13451345

13461346
from pandas.core.series import Series
1347+
codes = _get_codes_for_values(Series(value).values, self.categories)
1348+
if -1 in codes:
1349+
raise KeyError("Value(s) to be inserted must be in categories.")
13471350

1348-
values_as_codes = _get_codes_for_values(Series(value).values,
1349-
self.categories)
1351+
codes = codes[0] if is_scalar(value) else codes
13501352

1351-
if -1 in values_as_codes:
1352-
raise ValueError("Value(s) to be inserted must be in categories.")
1353-
1354-
return self.codes.searchsorted(values_as_codes, side=side,
1355-
sorter=sorter)
1353+
return self.codes.searchsorted(codes, side=side, sorter=sorter)
13561354

13571355
def isna(self):
13581356
"""

pandas/io/excel.py

+32-16
Original file line numberDiff line numberDiff line change
@@ -630,11 +630,12 @@ def _parse_cell(cell_contents, cell_typ):
630630
if is_integer(skiprows):
631631
row += skiprows
632632

633-
data[row], control_row = _fill_mi_header(
634-
data[row], control_row)
635-
header_name, _ = _pop_header_name(
636-
data[row], index_col)
637-
header_names.append(header_name)
633+
data[row], control_row = _fill_mi_header(data[row],
634+
control_row)
635+
636+
if index_col is not None:
637+
header_name, _ = _pop_header_name(data[row], index_col)
638+
header_names.append(header_name)
638639

639640
if is_list_like(index_col):
640641
# Forward fill values for MultiIndex index.
@@ -682,7 +683,8 @@ def _parse_cell(cell_contents, cell_typ):
682683

683684
output[asheetname] = parser.read(nrows=nrows)
684685

685-
if not squeeze or isinstance(output[asheetname], DataFrame):
686+
if ((not squeeze or isinstance(output[asheetname], DataFrame))
687+
and header_names):
686688
output[asheetname].columns = output[
687689
asheetname].columns.set_names(header_names)
688690
except EmptyDataError:
@@ -863,16 +865,30 @@ def _fill_mi_header(row, control_row):
863865

864866

865867
def _pop_header_name(row, index_col):
866-
""" (header, new_data) for header rows in MultiIndex parsing"""
867-
none_fill = lambda x: None if x == '' else x
868-
869-
if index_col is None:
870-
# no index col specified, trim data for inference path
871-
return none_fill(row[0]), row[1:]
872-
else:
873-
# pop out header name and fill w/ blank
874-
i = index_col if not is_list_like(index_col) else max(index_col)
875-
return none_fill(row[i]), row[:i] + [''] + row[i + 1:]
868+
"""
869+
Pop the header name for MultiIndex parsing.
870+
871+
Parameters
872+
----------
873+
row : list
874+
The data row to parse for the header name.
875+
index_col : int, list
876+
The index columns for our data. Assumed to be non-null.
877+
878+
Returns
879+
-------
880+
header_name : str
881+
The extracted header name.
882+
trimmed_row : list
883+
The original data row with the header name removed.
884+
"""
885+
# Pop out header name and fill w/blank.
886+
i = index_col if not is_list_like(index_col) else max(index_col)
887+
888+
header_name = row[i]
889+
header_name = None if header_name == "" else header_name
890+
891+
return header_name, row[:i] + [''] + row[i + 1:]
876892

877893

878894
@add_metaclass(abc.ABCMeta)

pandas/io/formats/html.py

+9-45
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def _write_cell(self, s, kind='td', indent=0, tags=None):
7878
self.write(u'{start}{rs}</{kind}>'
7979
.format(start=start_tag, rs=rs, kind=kind), indent)
8080

81-
def write_tr(self, line, indent=0, indent_delta=4, header=False,
81+
def write_tr(self, line, indent=0, indent_delta=0, header=False,
8282
align=None, tags=None, nindex_levels=0):
8383
if tags is None:
8484
tags = {}
@@ -200,26 +200,6 @@ def _write_header(self, indent):
200200
# write nothing
201201
return indent
202202

203-
def _column_header():
204-
if self.fmt.index:
205-
row = [''] * (self.frame.index.nlevels - 1)
206-
else:
207-
row = []
208-
209-
if isinstance(self.columns, ABCMultiIndex):
210-
if self.fmt.has_column_names and self.fmt.index:
211-
row.append(single_column_table(self.columns.names))
212-
else:
213-
row.append('')
214-
style = "text-align: {just};".format(just=self.fmt.justify)
215-
row.extend([single_column_table(c, self.fmt.justify, style)
216-
for c in self.columns])
217-
else:
218-
if self.fmt.index:
219-
row.append(self.columns.name or '')
220-
row.extend(self.columns)
221-
return row
222-
223203
self.write('<thead>', indent)
224204

225205
indent += self.indent_delta
@@ -301,16 +281,21 @@ def _column_header():
301281
self.write_tr(row, indent, self.indent_delta, tags=tags,
302282
header=True)
303283
else:
304-
col_row = _column_header()
284+
if self.fmt.index:
285+
row = [''] * (self.frame.index.nlevels - 1)
286+
row.append(self.columns.name or '')
287+
else:
288+
row = []
289+
row.extend(self.columns)
305290
align = self.fmt.justify
306291

307292
if truncate_h:
308293
if not self.fmt.index:
309294
row_levels = 0
310295
ins_col = row_levels + self.fmt.tr_col_num
311-
col_row.insert(ins_col, '...')
296+
row.insert(ins_col, '...')
312297

313-
self.write_tr(col_row, indent, self.indent_delta, header=True,
298+
self.write_tr(row, indent, self.indent_delta, header=True,
314299
align=align)
315300

316301
if all((self.fmt.has_index_names,
@@ -486,24 +471,3 @@ def _write_hierarchical_rows(self, fmt_values, indent):
486471
row.insert(row_levels + self.fmt.tr_col_num, '...')
487472
self.write_tr(row, indent, self.indent_delta, tags=None,
488473
nindex_levels=frame.index.nlevels)
489-
490-
491-
def single_column_table(column, align=None, style=None):
492-
table = '<table'
493-
if align is not None:
494-
table += (' align="{align}"'.format(align=align))
495-
if style is not None:
496-
table += (' style="{style}"'.format(style=style))
497-
table += '><tbody>'
498-
for i in column:
499-
table += ('<tr><td>{i!s}</td></tr>'.format(i=i))
500-
table += '</tbody></table>'
501-
return table
502-
503-
504-
def single_row_table(row): # pragma: no cover
505-
table = '<table><tbody><tr>'
506-
for i in row:
507-
table += ('<td>{i!s}</td>'.format(i=i))
508-
table += '</tr></tbody></table>'
509-
return table

pandas/tests/arrays/categorical/test_analytics.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,10 @@ def test_searchsorted(self):
8585

8686
# Searching for single item argument, side='left' (default)
8787
res_cat = c1.searchsorted('apple')
88+
assert res_cat == 2
89+
8890
res_ser = s1.searchsorted('apple')
89-
exp = np.array([2], dtype=np.intp)
90-
tm.assert_numpy_array_equal(res_cat, exp)
91-
tm.assert_numpy_array_equal(res_ser, exp)
91+
assert res_ser == 2
9292

9393
# Searching for single item array, side='left' (default)
9494
res_cat = c1.searchsorted(['bread'])
@@ -105,13 +105,13 @@ def test_searchsorted(self):
105105
tm.assert_numpy_array_equal(res_ser, exp)
106106

107107
# Searching for a single value that is not from the Categorical
108-
pytest.raises(ValueError, lambda: c1.searchsorted('cucumber'))
109-
pytest.raises(ValueError, lambda: s1.searchsorted('cucumber'))
108+
pytest.raises(KeyError, lambda: c1.searchsorted('cucumber'))
109+
pytest.raises(KeyError, lambda: s1.searchsorted('cucumber'))
110110

111111
# Searching for multiple values one of each is not from the Categorical
112-
pytest.raises(ValueError,
112+
pytest.raises(KeyError,
113113
lambda: c1.searchsorted(['bread', 'cucumber']))
114-
pytest.raises(ValueError,
114+
pytest.raises(KeyError,
115115
lambda: s1.searchsorted(['bread', 'cucumber']))
116116

117117
# searchsorted call for unordered Categorical
10.5 KB
Binary file not shown.
2.82 KB
Binary file not shown.
2.65 KB
Binary file not shown.

pandas/tests/io/test_excel.py

+11
Original file line numberDiff line numberDiff line change
@@ -896,6 +896,17 @@ def test_read_excel_multiindex(self, ext):
896896
header=[0, 1], skiprows=2)
897897
tm.assert_frame_equal(actual, expected)
898898

899+
def test_read_excel_multiindex_header_only(self, ext):
900+
# see gh-11733.
901+
#
902+
# Don't try to parse a header name if there isn't one.
903+
mi_file = os.path.join(self.dirpath, "testmultiindex" + ext)
904+
result = read_excel(mi_file, "index_col_none", header=[0, 1])
905+
906+
exp_columns = MultiIndex.from_product([("A", "B"), ("key", "val")])
907+
expected = DataFrame([[1, 2, 3, 4]] * 2, columns=exp_columns)
908+
tm.assert_frame_equal(result, expected)
909+
899910
@td.skip_if_no("xlsxwriter")
900911
def test_read_excel_multiindex_empty_level(self, ext):
901912
# see gh-12453

pandas/tests/scalar/timestamp/test_arithmetic.py

+40-7
Original file line numberDiff line numberDiff line change
@@ -7,25 +7,58 @@
77
import pandas.util.testing as tm
88
from pandas.compat import long
99
from pandas.tseries import offsets
10+
from pandas.tseries.frequencies import to_offset
1011
from pandas import Timestamp, Timedelta
1112

1213

1314
class TestTimestampArithmetic(object):
1415
def test_overflow_offset(self):
16+
# no overflow expected
17+
18+
stamp = Timestamp("2000/1/1")
19+
offset_no_overflow = to_offset("D") * 100
20+
21+
expected = Timestamp("2000/04/10")
22+
assert stamp + offset_no_overflow == expected
23+
24+
assert offset_no_overflow + stamp == expected
25+
26+
expected = Timestamp("1999/09/23")
27+
assert stamp - offset_no_overflow == expected
28+
29+
def test_overflow_offset_raises(self):
1530
# xref https://github.com/statsmodels/statsmodels/issues/3374
1631
# ends up multiplying really large numbers which overflow
1732

1833
stamp = Timestamp('2017-01-13 00:00:00', freq='D')
19-
offset = 20169940 * offsets.Day(1)
34+
offset_overflow = 20169940 * offsets.Day(1)
35+
msg = ("the add operation between "
36+
r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} "
37+
"will overflow")
38+
39+
with pytest.raises(OverflowError, match=msg):
40+
stamp + offset_overflow
41+
42+
with pytest.raises(OverflowError, match=msg):
43+
offset_overflow + stamp
44+
45+
with pytest.raises(OverflowError, match=msg):
46+
stamp - offset_overflow
47+
48+
# xref https://github.com/pandas-dev/pandas/issues/14080
49+
# used to crash, so check for proper overflow exception
50+
51+
stamp = Timestamp("2000/1/1")
52+
offset_overflow = to_offset("D") * 100 ** 25
2053

21-
with pytest.raises(OverflowError):
22-
stamp + offset
54+
with pytest.raises(OverflowError, match=msg):
55+
stamp + offset_overflow
2356

24-
with pytest.raises(OverflowError):
25-
offset + stamp
57+
with pytest.raises(OverflowError, match=msg):
58+
offset_overflow + stamp
2659

27-
with pytest.raises(OverflowError):
28-
stamp - offset
60+
with pytest.raises(OverflowError, match=msg):
61+
stamp - offset_overflow
2962

3063
def test_delta_preserve_nanos(self):
3164
val = Timestamp(long(1337299200000000123))

0 commit comments

Comments
 (0)