From 2ca3c5e5b7ee6e4016f662c3d33b1548fc8a0432 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 18 Nov 2018 20:47:01 -0500 Subject: [PATCH 1/5] DEPS: bump xlrd min version to 1.0.0 remove numpy warnings on np.diff in boolean arrays --- ci/deps/travis-27.yaml | 2 +- doc/source/whatsnew/v0.24.0.rst | 4 +++- pandas/_libs/algos_rank_helper.pxi.in | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ci/deps/travis-27.yaml b/ci/deps/travis-27.yaml index 28bee387a4f4a..5a9e206ec2c69 100644 --- a/ci/deps/travis-27.yaml +++ b/ci/deps/travis-27.yaml @@ -35,7 +35,7 @@ dependencies: - scipy - sqlalchemy=0.9.6 - xarray=0.9.6 - - xlrd=0.9.2 + - xlrd=1.0.0 - xlsxwriter=0.5.2 - xlwt=0.7.5 # universal diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index c00503e080864..f94aa3d320b75 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -307,7 +307,7 @@ Backwards incompatible API changes Dependencies have increased minimum versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -We have updated our minimum supported versions of dependencies (:issue:`21242`, `18742`). +We have updated our minimum supported versions of dependencies (:issue:`21242`, :issue:`18742`, :issue:`23774`). If installed, we now require: +-----------------+-----------------+----------+ @@ -331,6 +331,8 @@ If installed, we now require: +-----------------+-----------------+----------+ | scipy | 0.18.1 | | +-----------------+-----------------+----------+ +| xlrd | 1.0.0 | | ++-----------------+-----------------+----------+ Additionally we no longer depend on `feather-format` for feather based storage and replaced it with references to `pyarrow` (:issue:`21639` and :issue:`23053`). diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index 329c368e13d6d..5ffc6dd578023 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -126,7 +126,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', sorted_data = values.take(_as) sorted_mask = mask.take(_as) - _indices = np.diff(sorted_mask).nonzero()[0] + _indices = np.diff(sorted_mask.astype(int)).nonzero()[0] non_na_idx = _indices[0] if len(_indices) > 0 else -1 argsorted = _as.astype('i8') From c359c4623d0ae0aeab7c87a6d1eed11e0accd8cd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 18 Nov 2018 22:17:39 -0500 Subject: [PATCH 2/5] make sure to type check on get_loc in large hashtables --- pandas/_libs/index.pyx | 2 ++ pandas/_libs/index_class_helper.pxi.in | 2 ++ 2 files changed, 4 insertions(+) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 7930f583274b5..d828c3dd8e923 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -113,6 +113,8 @@ cdef class IndexEngine: if not self.is_unique: return self._get_loc_duplicates(val) values = self._get_index_values() + + self._check_type(val) loc = _bin_search(values, val) # .searchsorted(val, side='left') if loc >= len(values): raise KeyError(val) diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in index ff95917f6643a..b393283bfd4ca 100644 --- a/pandas/_libs/index_class_helper.pxi.in +++ b/pandas/_libs/index_class_helper.pxi.in @@ -51,6 +51,8 @@ cdef class {{name}}Engine(IndexEngine): raise KeyError(val) elif util.is_float_object(val): raise KeyError(val) + elif not util.is_integer_object(val): + raise KeyError(val) {{endif}} {{if name != 'Object'}} From 19f69819b6943afe11c96d8aa9b260ec82b605e1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 18 Nov 2018 22:23:18 -0500 Subject: [PATCH 3/5] fix period with NaT --- pandas/core/indexes/period.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index f35e775fc0a28..f83687bacd72d 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -33,6 +33,7 @@ from pandas.core.arrays.period import PeriodArray, period_array from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs, ensure_index +from pandas.core.missing import isna from pandas import compat from pandas.util._decorators import ( @@ -652,7 +653,8 @@ def get_value(self, series, key): except TypeError: pass - key = Period(key, self.freq).ordinal + period = Period(key, self.freq) + key = period.value if isna(period) else period.ordinal return com.maybe_box(self, self._engine.get_value(s, key), series, key) From 063877582e58ac3e2591cccdbea09ef1adfeaf60 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 19 Nov 2018 07:12:07 -0500 Subject: [PATCH 4/5] remove old refs to < 1.0.0 --- doc/source/install.rst | 2 +- pandas/io/excel.py | 61 ++++++++++++----------------------- pandas/tests/io/test_excel.py | 40 +++++++---------------- 3 files changed, 33 insertions(+), 70 deletions(-) diff --git a/doc/source/install.rst b/doc/source/install.rst index 9a62c6a89457a..4a71dbcec17e6 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -269,7 +269,7 @@ Optional Dependencies * `matplotlib `__: for plotting, Version 2.0.0 or higher. * For Excel I/O: - * `xlrd/xlwt `__: Excel reading (xlrd) and writing (xlwt) + * `xlrd/xlwt `__: Excel reading (xlrd), version 1.0.0 or higher required, and writing (xlwt) * `openpyxl `__: openpyxl version 2.4.0 for writing .xlsx files (xlrd >= 0.9.0) * `XlsxWriter `__: Alternative Excel writer diff --git a/pandas/io/excel.py b/pandas/io/excel.py index c0e584567407a..4f9ae2e27ed8b 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -5,7 +5,7 @@ # --------------------------------------------------------------------- # ExcelFile class import abc -from datetime import MINYEAR, date, datetime, time, timedelta +from datetime import date, datetime, time, timedelta from distutils.version import LooseVersion from io import UnsupportedOperation import os @@ -375,15 +375,14 @@ class ExcelFile(object): def __init__(self, io, **kwds): - err_msg = "Install xlrd >= 0.9.0 for Excel support" + err_msg = "Install xlrd >= 1.0.0 for Excel support" try: import xlrd except ImportError: raise ImportError(err_msg) else: - ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2])) - if ver < (0, 9): # pragma: no cover + if xlrd.__VERSION__ < LooseVersion("1.0.0"): raise ImportError(err_msg + ". Current version " + xlrd.__VERSION__) @@ -515,7 +514,6 @@ def _parse_excel(self, raise NotImplementedError("chunksize keyword of read_excel " "is not implemented") - import xlrd from xlrd import (xldate, XL_CELL_DATE, XL_CELL_ERROR, XL_CELL_BOOLEAN, XL_CELL_NUMBER) @@ -528,36 +526,23 @@ def _parse_cell(cell_contents, cell_typ): if cell_typ == XL_CELL_DATE: - if xlrd_0_9_3: - # Use the newer xlrd datetime handling. - try: - cell_contents = \ - xldate.xldate_as_datetime(cell_contents, - epoch1904) - except OverflowError: - return cell_contents - # Excel doesn't distinguish between dates and time, - # so we treat dates on the epoch as times only. - # Also, Excel supports 1900 and 1904 epochs. - year = (cell_contents.timetuple())[0:3] - if ((not epoch1904 and year == (1899, 12, 31)) or - (epoch1904 and year == (1904, 1, 1))): - cell_contents = time(cell_contents.hour, - cell_contents.minute, - cell_contents.second, - cell_contents.microsecond) - else: - # Use the xlrd <= 0.9.2 date handling. - try: - dt = xldate.xldate_as_tuple(cell_contents, epoch1904) - - except xldate.XLDateTooLarge: - return cell_contents - - if dt[0] < MINYEAR: - cell_contents = time(*dt[3:]) - else: - cell_contents = datetime(*dt) + # Use the newer xlrd datetime handling. + try: + cell_contents = xldate.xldate_as_datetime( + cell_contents, epoch1904) + except OverflowError: + return cell_contents + + # Excel doesn't distinguish between dates and time, + # so we treat dates on the epoch as times only. + # Also, Excel supports 1900 and 1904 epochs. + year = (cell_contents.timetuple())[0:3] + if ((not epoch1904 and year == (1899, 12, 31)) or + (epoch1904 and year == (1904, 1, 1))): + cell_contents = time(cell_contents.hour, + cell_contents.minute, + cell_contents.second, + cell_contents.microsecond) elif cell_typ == XL_CELL_ERROR: cell_contents = np.nan @@ -571,12 +556,6 @@ def _parse_cell(cell_contents, cell_typ): cell_contents = val return cell_contents - # xlrd >= 0.9.3 can return datetime objects directly. - if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"): - xlrd_0_9_3 = True - else: - xlrd_0_9_3 = False - ret_dict = False # Keep sheetname to maintain backwards compatibility. diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index fbcd0f279a9ff..34fcb17127439 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -36,7 +36,7 @@ _mixed_frame['foo'] = 'bar' -@td.skip_if_no('xlrd', '0.9') +@td.skip_if_no('xlrd', '1.0.0') class SharedItems(object): @pytest.fixture(autouse=True) @@ -796,35 +796,19 @@ def tdf(col_sheet_name): tm.assert_frame_equal(dfs[s], dfs_returned[s]) def test_reader_seconds(self, ext): - import xlrd # Test reading times with and without milliseconds. GH5945. - if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"): - # Xlrd >= 0.9.3 can handle Excel milliseconds. - expected = DataFrame.from_dict({"Time": [time(1, 2, 3), - time(2, 45, 56, 100000), - time(4, 29, 49, 200000), - time(6, 13, 42, 300000), - time(7, 57, 35, 400000), - time(9, 41, 28, 500000), - time(11, 25, 21, 600000), - time(13, 9, 14, 700000), - time(14, 53, 7, 800000), - time(16, 37, 0, 900000), - time(18, 20, 54)]}) - else: - # Xlrd < 0.9.3 rounds Excel milliseconds. - expected = DataFrame.from_dict({"Time": [time(1, 2, 3), - time(2, 45, 56), - time(4, 29, 49), - time(6, 13, 42), - time(7, 57, 35), - time(9, 41, 29), - time(11, 25, 22), - time(13, 9, 15), - time(14, 53, 8), - time(16, 37, 1), - time(18, 20, 54)]}) + expected = DataFrame.from_dict({"Time": [time(1, 2, 3), + time(2, 45, 56, 100000), + time(4, 29, 49, 200000), + time(6, 13, 42, 300000), + time(7, 57, 35, 400000), + time(9, 41, 28, 500000), + time(11, 25, 21, 600000), + time(13, 9, 14, 700000), + time(14, 53, 7, 800000), + time(16, 37, 0, 900000), + time(18, 20, 54)]}) actual = self.get_exceldf('times_1900', ext, 'Sheet1') tm.assert_frame_equal(actual, expected) From b1cf09f62e0cabcd1cca0573ba2e5a51e464ba35 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 19 Nov 2018 08:30:01 -0500 Subject: [PATCH 5/5] update ci --- ci/deps/azure-27-compat.yaml | 2 +- ci/deps/travis-27-locale.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/deps/azure-27-compat.yaml b/ci/deps/azure-27-compat.yaml index 5b726304cf414..44c561e9c8911 100644 --- a/ci/deps/azure-27-compat.yaml +++ b/ci/deps/azure-27-compat.yaml @@ -16,7 +16,7 @@ dependencies: - pytz=2013b - scipy=0.18.1 - sqlalchemy=0.7.8 - - xlrd=0.9.2 + - xlrd=1.0.0 - xlsxwriter=0.5.2 - xlwt=0.7.5 # universal diff --git a/ci/deps/travis-27-locale.yaml b/ci/deps/travis-27-locale.yaml index dc5580ae6d287..c8d17cf190e35 100644 --- a/ci/deps/travis-27-locale.yaml +++ b/ci/deps/travis-27-locale.yaml @@ -16,7 +16,7 @@ dependencies: - pytz=2013b - scipy - sqlalchemy=0.8.1 - - xlrd=0.9.2 + - xlrd=1.0.0 - xlsxwriter=0.5.2 - xlwt=0.7.5 # universal