From a0a37ccbf4302bea5c8a58901fbcbd45531a80fb Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 4 Dec 2018 09:32:09 -0600 Subject: [PATCH 1/9] CI: remove failing line --- asv_bench/benchmarks/frame_methods.py | 1 - 1 file changed, 1 deletion(-) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index dcecaf60ed578..d89ac71ecf724 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -1,5 +1,4 @@ import string -import warnings import numpy as np import pandas.util.testing as tm From 1f15d1035ae435012ff0c7bb308c900889056951 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 4 Dec 2018 12:34:33 -0600 Subject: [PATCH 2/9] CI: ASV fixups --- asv_bench/benchmarks/algorithms.py | 11 +++-------- asv_bench/benchmarks/frame_methods.py | 13 +++++-------- asv_bench/benchmarks/groupby.py | 18 +++++++++--------- pandas/core/sparse/scipy_sparse.py | 10 +--------- pandas/io/formats/html.py | 10 +--------- .../tests/sparse/frame/test_to_from_scipy.py | 14 ++++++++++++++ 6 files changed, 33 insertions(+), 43 deletions(-) diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 1ab88dc9f9e6d..7074a2fe2eb3a 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -1,10 +1,12 @@ -import warnings from importlib import import_module import numpy as np + import pandas as pd from pandas.util import testing as tm +from .pandas_vb_common import setup # noqa: F401 + for imp in ['pandas.util', 'pandas.tools.hashing']: try: hashing = import_module(imp) @@ -73,10 +75,6 @@ def setup(self): self.uniques = tm.makeStringIndex(1000).values self.all = self.uniques.repeat(10) - def time_match_string(self): - with warnings.catch_warnings(record=True): - pd.match(self.all, self.uniques) - class Hashing(object): @@ -114,6 +112,3 @@ def time_series_timedeltas(self, df): def time_series_dates(self, df): hashing.hash_pandas_object(df['dates']) - - -from .pandas_vb_common import setup # noqa: F401 diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index d89ac71ecf724..dfedb9f2d8f71 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -1,9 +1,12 @@ import string import numpy as np + +from pandas import ( + DataFrame, MultiIndex, NaT, Series, date_range, isnull, period_range) import pandas.util.testing as tm -from pandas import (DataFrame, Series, MultiIndex, date_range, period_range, - isnull, NaT) + +from .pandas_vb_common import setup # noqa: F401 class GetNumericData(object): @@ -60,9 +63,6 @@ def time_reindex_axis1(self): def time_reindex_both_axes(self): self.df.reindex(index=self.idx, columns=self.idx) - def time_reindex_both_axes_ix(self): - self.df.ix[self.idx, self.idx] - def time_reindex_upcast(self): self.df2.reindex(np.random.permutation(range(1200))) @@ -521,6 +521,3 @@ def time_series_describe(self): def time_dataframe_describe(self): self.df.describe() - - -from .pandas_vb_common import setup # noqa: F401 diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index dbd79185ec006..5b28e0b4cc054 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -1,13 +1,16 @@ -import warnings -from string import ascii_letters -from itertools import product from functools import partial +from itertools import product +from string import ascii_letters +import warnings import numpy as np -from pandas import (DataFrame, Series, MultiIndex, date_range, period_range, - TimeGrouper, Categorical, Timestamp) + +from pandas import ( + Categorical, DataFrame, MultiIndex, Series, TimeGrouper, Timestamp, + date_range, period_range) import pandas.util.testing as tm +from .pandas_vb_common import setup # noqa: F401 method_blacklist = { 'object': {'median', 'prod', 'sem', 'cumsum', 'sum', 'cummin', 'mean', @@ -210,7 +213,7 @@ def time_multi_int_nunique(self, df): class AggFunctions(object): - def setup_cache(): + def setup_cache(self): N = 10**5 fac1 = np.array(['A', 'B', 'C'], dtype='O') fac2 = np.array(['one', 'two'], dtype='O') @@ -535,6 +538,3 @@ def setup(self): def time_first(self): self.df_nans.groupby('key').transform('first') - - -from .pandas_vb_common import setup # noqa: F401 diff --git a/pandas/core/sparse/scipy_sparse.py b/pandas/core/sparse/scipy_sparse.py index ab4fdeb05f8f1..2d0ce2d5e5951 100644 --- a/pandas/core/sparse/scipy_sparse.py +++ b/pandas/core/sparse/scipy_sparse.py @@ -58,15 +58,7 @@ def _get_label_to_i_dict(labels, sort_labels=False): return (d) def _get_index_subset_to_coord_dict(index, subset, sort_labels=False): - def robust_get_level_values(i): - # if index has labels (that are not None) use those, - # else use the level location - try: - return index.get_level_values(index.names[i]) - except KeyError: - return index.get_level_values(i) - - ilabels = list(zip(*[robust_get_level_values(i) for i in subset])) + ilabels = list(zip(*[index._get_level_values(i) for i in subset])) labels_to_i = _get_label_to_i_dict(ilabels, sort_labels=sort_labels) labels_to_i = Series(labels_to_i) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index bf92ce7ee0f67..ae5c212d122c8 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -161,15 +161,7 @@ def write_result(self, buf): _classes.extend(self.classes) if self.notebook: - div_style = '' - try: - import IPython - if IPython.__version__ < LooseVersion('3.0.0'): - div_style = ' style="max-width:1500px;overflow:auto;"' - except (ImportError, AttributeError): - pass - - self.write(''.format(style=div_style)) + self.write('
') self.write_style() diff --git a/pandas/tests/sparse/frame/test_to_from_scipy.py b/pandas/tests/sparse/frame/test_to_from_scipy.py index 1a10ff83d3097..e94ca8c8730b3 100644 --- a/pandas/tests/sparse/frame/test_to_from_scipy.py +++ b/pandas/tests/sparse/frame/test_to_from_scipy.py @@ -1,5 +1,6 @@ import pytest import numpy as np +import pandas as pd from pandas.util import testing as tm from pandas import SparseDataFrame, SparseSeries from pandas.core.sparse.api import SparseDtype @@ -168,3 +169,16 @@ def test_from_scipy_fillna(spmatrix): expected[col].fill_value = -1 tm.assert_sp_frame_equal(sdf, expected) + + +def test_index_names_multiple_nones(): + # https://github.com/pandas-dev/pandas/pull/24092 + sparse = pytest.importorskip("scipy.sparse") + + s = (pd.Series(1, index=pd.MultiIndex.from_product([['A', 'B'], [0, 1]])) + .to_sparse()) + result, _, _ = s.to_coo() + assert isinstance(result, sparse.coo_matrix) + result = result.toarray() + expected = np.ones((2, 2), dtype=int) + tm.assert_numpy_array_equal(result, expected) From e79d52e952e06535b99db0d27b45f21f5ccc7b45 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 4 Dec 2018 12:39:51 -0600 Subject: [PATCH 3/9] debug boto --- pandas/tests/io/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index b0cdbe2b5bedb..b108b142b4f3d 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -37,6 +37,8 @@ def s3_resource(tips_file, jsonl_file): """ pytest.importorskip('s3fs') boto3 = pytest.importorskip('boto3') + # GH-24092. See if boto.plugin skips the test or fails. + pytest.importorskip("boto.plugin") moto = pytest.importorskip('moto') test_s3_files = [ From 1d0a775a7a0dc06e481a420a60dde22e9cf0c89d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 4 Dec 2018 12:59:32 -0600 Subject: [PATCH 4/9] No isorting benchmarks --- asv_bench/benchmarks/algorithms.py | 4 ++- asv_bench/benchmarks/frame_methods.py | 5 ++-- asv_bench/benchmarks/groupby.py | 4 ++- setup.cfg | 39 +++++++++++++++++++++++++-- 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 7074a2fe2eb3a..7dcd7b284d66d 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -5,7 +5,6 @@ import pandas as pd from pandas.util import testing as tm -from .pandas_vb_common import setup # noqa: F401 for imp in ['pandas.util', 'pandas.tools.hashing']: try: @@ -112,3 +111,6 @@ def time_series_timedeltas(self, df): def time_series_dates(self, df): hashing.hash_pandas_object(df['dates']) + + +from .pandas_vb_common import setup # noqa: F401 diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index dfedb9f2d8f71..3c0dd646aa502 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -6,8 +6,6 @@ DataFrame, MultiIndex, NaT, Series, date_range, isnull, period_range) import pandas.util.testing as tm -from .pandas_vb_common import setup # noqa: F401 - class GetNumericData(object): @@ -521,3 +519,6 @@ def time_series_describe(self): def time_dataframe_describe(self): self.df.describe() + + +from .pandas_vb_common import setup # noqa: F401 diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 5b28e0b4cc054..ee5ae69555d16 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -10,7 +10,6 @@ date_range, period_range) import pandas.util.testing as tm -from .pandas_vb_common import setup # noqa: F401 method_blacklist = { 'object': {'median', 'prod', 'sem', 'cumsum', 'sum', 'cummin', 'mean', @@ -538,3 +537,6 @@ def setup(self): def time_first(self): self.df_nans.groupby('key').transform('first') + + +from .pandas_vb_common import setup # noqa: F401 diff --git a/setup.cfg b/setup.cfg index 25f713822f127..44df79d1b60d2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -361,5 +361,40 @@ skip= pandas/tests/computation/test_compat.py, pandas/tests/computation/test_eval.py, pandas/types/common.py, - pandas/tests/extension/arrow/test_bool.py - doc/source/conf.py + pandas/tests/extension/arrow/test_bool.py, + doc/source/conf.py, + asv_bench/benchmarks/algorithms.py, + asv_bench/benchmarks/attrs_caching.py, + asv_bench/benchmarks/binary_ops.py, + asv_bench/benchmarks/categoricals.py, + asv_bench/benchmarks/ctors.py, + asv_bench/benchmarks/eval.py, + asv_bench/benchmarks/frame_ctor.py, + asv_bench/benchmarks/frame_methods.py, + asv_bench/benchmarks/gil.py, + asv_bench/benchmarks/groupby.py, + asv_bench/benchmarks/index_object.py, + asv_bench/benchmarks/indexing.py, + asv_bench/benchmarks/inference.py, + asv_bench/benchmarks/io/csv.py, + asv_bench/benchmarks/io/excel.py, + asv_bench/benchmarks/io/hdf.py, + asv_bench/benchmarks/io/json.py, + asv_bench/benchmarks/io/msgpack.py, + asv_bench/benchmarks/io/pickle.py, + asv_bench/benchmarks/io/sql.py, + asv_bench/benchmarks/io/stata.py, + asv_bench/benchmarks/join_merge.py, + asv_bench/benchmarks/multiindex_object.py, + asv_bench/benchmarks/panel_ctor.py, + asv_bench/benchmarks/panel_methods.py, + asv_bench/benchmarks/plotting.py, + asv_bench/benchmarks/reindex.py, + asv_bench/benchmarks/replace.py, + asv_bench/benchmarks/reshape.py, + asv_bench/benchmarks/rolling.py, + asv_bench/benchmarks/series_methods.py, + asv_bench/benchmarks/sparse.py, + asv_bench/benchmarks/stat_ops.py, + asv_bench/benchmarks/timeseries.py + From d3df86f9601b51c70e587633a3347a9b995be068 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 4 Dec 2018 13:09:14 -0600 Subject: [PATCH 5/9] correct dtype --- pandas/tests/sparse/frame/test_to_from_scipy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/sparse/frame/test_to_from_scipy.py b/pandas/tests/sparse/frame/test_to_from_scipy.py index e94ca8c8730b3..e5c50e9574f90 100644 --- a/pandas/tests/sparse/frame/test_to_from_scipy.py +++ b/pandas/tests/sparse/frame/test_to_from_scipy.py @@ -180,5 +180,5 @@ def test_index_names_multiple_nones(): result, _, _ = s.to_coo() assert isinstance(result, sparse.coo_matrix) result = result.toarray() - expected = np.ones((2, 2), dtype=int) + expected = np.ones((2, 2), dtype="int64") tm.assert_numpy_array_equal(result, expected) From 378515a88687143b01916b12b3831c3f0a814b74 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 4 Dec 2018 14:07:49 -0600 Subject: [PATCH 6/9] more hacky workarounds --- pandas/tests/io/conftest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index b108b142b4f3d..1b9dbd152af00 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -38,7 +38,10 @@ def s3_resource(tips_file, jsonl_file): pytest.importorskip('s3fs') boto3 = pytest.importorskip('boto3') # GH-24092. See if boto.plugin skips the test or fails. - pytest.importorskip("boto.plugin") + try: + pytest.importorskip("boto.plugin") + except AttributeError: + raise pytest.skip(reason="moto/moto error") moto = pytest.importorskip('moto') test_s3_files = [ From 092320a37c540427980a0cdee06f9ff5ebfffb2f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 4 Dec 2018 14:40:46 -0600 Subject: [PATCH 7/9] Remove unused import --- pandas/io/formats/html.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index ae5c212d122c8..6425e655959bd 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -5,7 +5,6 @@ from __future__ import print_function -from distutils.version import LooseVersion from textwrap import dedent from pandas.compat import OrderedDict, lzip, map, range, u, unichr, zip From e20850e2bd74b5ac890adde148c5e1323f725eb2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 4 Dec 2018 14:54:51 -0600 Subject: [PATCH 8/9] fix skip --- pandas/tests/io/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index 1b9dbd152af00..928519d39aed3 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -41,7 +41,7 @@ def s3_resource(tips_file, jsonl_file): try: pytest.importorskip("boto.plugin") except AttributeError: - raise pytest.skip(reason="moto/moto error") + raise pytest.skip("moto/moto error") moto = pytest.importorskip('moto') test_s3_files = [ From ce0e25cc37101a063cd8923dc7a3f5c8d69d1e4c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 4 Dec 2018 15:29:30 -0600 Subject: [PATCH 9/9] try marking single... --- pandas/tests/io/json/test_pandas.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index d047970ce2f08..ed954c76294b6 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -846,6 +846,7 @@ def test_misc_example(self): assert_frame_equal(result, expected) @network + @pytest.mark.single def test_round_trip_exception_(self): # GH 3867 csv = 'https://raw.github.com/hayd/lahman2012/master/csvs/Teams.csv' @@ -856,6 +857,7 @@ def test_round_trip_exception_(self): index=df.index, columns=df.columns), df) @network + @pytest.mark.single def test_url(self): url = 'https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5' # noqa result = read_json(url, convert_dates=True)