From 57aca9a6df2e7ee014448bb93ee79915c8b42f39 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Thu, 15 Nov 2018 21:20:23 +0000 Subject: [PATCH 01/10] Fixing possible bugs in the CI --- ci/azure/linux.yml | 3 ++- ci/deps/azure-37-locale.yaml | 2 +- ci/script_multi.sh | 5 +++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/ci/azure/linux.yml b/ci/azure/linux.yml index b5a8e36d5097d..4a521f4084430 100644 --- a/ci/azure/linux.yml +++ b/ci/azure/linux.yml @@ -15,7 +15,7 @@ jobs: CONDA_ENV: pandas TEST_ARGS: "--skip-slow --skip-network" - py36_locale: + py37_locale: ENV_FILE: ci/deps/azure-37-locale.yaml CONDA_PY: "37" CONDA_ENV: pandas @@ -27,6 +27,7 @@ jobs: CONDA_PY: "36" CONDA_ENV: pandas TEST_ARGS: "--only-slow --skip-network" + LOCALE_OVERRIDE: "zh_CN.UTF-8" steps: - script: | diff --git a/ci/deps/azure-37-locale.yaml b/ci/deps/azure-37-locale.yaml index 59c8818eaef1e..b702b08618e0a 100644 --- a/ci/deps/azure-37-locale.yaml +++ b/ci/deps/azure-37-locale.yaml @@ -18,7 +18,7 @@ dependencies: - pymysql - pytables - python-dateutil - - python=3.6* + - python=3.7* - pytz - s3fs - scipy diff --git a/ci/script_multi.sh b/ci/script_multi.sh index e56d5da7232b2..a5a77f9df1daa 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -6,6 +6,7 @@ source activate pandas if [ -n "$LOCALE_OVERRIDE" ]; then export LC_ALL="$LOCALE_OVERRIDE"; + export LANG="$LOCALE_OVERRIDE"; echo "Setting LC_ALL to $LOCALE_OVERRIDE" pycmd='import pandas; print("pandas detected console encoding: %s" % pandas.get_option("display.encoding"))' @@ -32,8 +33,8 @@ elif [ "$COVERAGE" ]; then elif [ "$SLOW" ]; then TEST_ARGS="--only-slow --skip-network" - echo pytest -m "not single and slow" -v --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas - pytest -m "not single and slow" -v --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas + echo pytest -n 2 -m "not single" -v --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas + pytest -n 2 -m "not single" -v --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas else echo pytest -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas From 08ccb8010de95e9448f219237c81cdbc531fd39f Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 20 Nov 2018 17:12:55 +0000 Subject: [PATCH 02/10] Moving Python 3.7 test to 3.6, as moto is failing on 3.7 at the moment --- ci/deps/azure-37-locale.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/deps/azure-37-locale.yaml b/ci/deps/azure-37-locale.yaml index b702b08618e0a..c4735c92c465a 100644 --- a/ci/deps/azure-37-locale.yaml +++ b/ci/deps/azure-37-locale.yaml @@ -18,7 +18,10 @@ dependencies: - pymysql - pytables - python-dateutil - - python=3.7* + # XXX We should be testing ``python=3.7*`` here, but `moto` is + # failing with py3.7 at the moment. + # See: https://github.com/pandas-dev/pandas/pull/23727/files#r234240004 + - python=3.6* - pytz - s3fs - scipy From cfc89a550d8e0c5f1d62c8b02acc02c465873cb6 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 20 Nov 2018 17:15:04 +0000 Subject: [PATCH 03/10] Testing with Italian locale (was supposed to be tested, but Chinese was used instead --- ci/azure/linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure/linux.yml b/ci/azure/linux.yml index 4a521f4084430..23800f6719bfe 100644 --- a/ci/azure/linux.yml +++ b/ci/azure/linux.yml @@ -27,7 +27,7 @@ jobs: CONDA_PY: "36" CONDA_ENV: pandas TEST_ARGS: "--only-slow --skip-network" - LOCALE_OVERRIDE: "zh_CN.UTF-8" + LOCALE_OVERRIDE: "it_IT.UTF-8" steps: - script: | From c8d20b1d84c1a4980169c841f76157c603510dac Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 20 Nov 2018 19:56:17 +0000 Subject: [PATCH 04/10] Restoring -m 'not single and slow' to see if removing it caused the very verbose log --- ci/script_multi.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/script_multi.sh b/ci/script_multi.sh index a5a77f9df1daa..aab79c2a8a012 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -33,8 +33,10 @@ elif [ "$COVERAGE" ]; then elif [ "$SLOW" ]; then TEST_ARGS="--only-slow --skip-network" - echo pytest -n 2 -m "not single" -v --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas - pytest -n 2 -m "not single" -v --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas + # XXX adding `-n 2` and removing the " and slow" caused the log to be extremly verbose, and made the job fail because of it + # restoring the " and slow" to see if that is the problem + echo pytest -n 2 -m "not single and slow" -v --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas + pytest -n 2 -m "not single and slow" -v --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas else echo pytest -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas From 9718fd5efea86c8f7050c6bc74c9a95afdb3477e Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 21 Nov 2018 01:12:07 +0000 Subject: [PATCH 05/10] callng multi/slow tests in a consistent way with the rest --- ci/script_multi.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ci/script_multi.sh b/ci/script_multi.sh index aab79c2a8a012..ff64a8f403a89 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -33,10 +33,8 @@ elif [ "$COVERAGE" ]; then elif [ "$SLOW" ]; then TEST_ARGS="--only-slow --skip-network" - # XXX adding `-n 2` and removing the " and slow" caused the log to be extremly verbose, and made the job fail because of it - # restoring the " and slow" to see if that is the problem - echo pytest -n 2 -m "not single and slow" -v --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas - pytest -n 2 -m "not single and slow" -v --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas + echo pytest -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas + pytest -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas else echo pytest -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas From b3f57446f38e691f592a509dfda3e0c27df085b4 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 21 Nov 2018 02:28:28 +0000 Subject: [PATCH 06/10] Reducing verbosity of slow tests, to avoid reaching the maximum size of the log in travis --- ci/script_multi.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/script_multi.sh b/ci/script_multi.sh index ff64a8f403a89..13a12eb1127f1 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -33,8 +33,8 @@ elif [ "$COVERAGE" ]; then elif [ "$SLOW" ]; then TEST_ARGS="--only-slow --skip-network" - echo pytest -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas - pytest -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas + echo pytest -q -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas + pytest -q -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas else echo pytest -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas From 22b8fc20044b26c842094ec5f381eea4bfddb53e Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 21 Nov 2018 13:15:17 +0000 Subject: [PATCH 07/10] restoring -m slow so the build passes, and added comment explaining why --- ci/script_multi.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ci/script_multi.sh b/ci/script_multi.sh index 13a12eb1127f1..98dd5137bf5fb 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -33,8 +33,12 @@ elif [ "$COVERAGE" ]; then elif [ "$SLOW" ]; then TEST_ARGS="--only-slow --skip-network" - echo pytest -q -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas - pytest -q -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas + # The `-m " and slow"` is redundant here, as `--only-slow` is already used (via $TEST_ARGS). But is needed, because with + # `--only-slow` fast tests are skipped, but each of them is printed in the log (which can be avoided with `-q`), + # and also added to `test-data-multiple.xml`, and then printed in the log in the call to `ci/print_skipped.py`. + # Printing them to the log makes the log exceed the maximum size allowed by Travis and makes the build fail. + echo pytest -n 2 -m "not single and slow" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas + pytest -n 2 -m "not single and slow" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas else echo pytest -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas From 5afef71d6497eaad402e1ce4f954d7aefad95190 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 21 Nov 2018 14:42:03 +0000 Subject: [PATCH 08/10] Moving azure 37 build back to 3.7, and installing moto from pip, as conda version is not py37 compatible --- ci/deps/azure-37-locale.yaml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ci/deps/azure-37-locale.yaml b/ci/deps/azure-37-locale.yaml index c4735c92c465a..064886d5cd965 100644 --- a/ci/deps/azure-37-locale.yaml +++ b/ci/deps/azure-37-locale.yaml @@ -18,10 +18,7 @@ dependencies: - pymysql - pytables - python-dateutil - # XXX We should be testing ``python=3.7*`` here, but `moto` is - # failing with py3.7 at the moment. - # See: https://github.com/pandas-dev/pandas/pull/23727/files#r234240004 - - python=3.6* + - python=3.7* - pytz - s3fs - scipy @@ -33,6 +30,6 @@ dependencies: # universal - pytest - pytest-xdist - - moto - pip: - hypothesis>=3.58.0 + - moto # latest moto in conda-forge fails with 3.7, move to conda dependencies when this is fixed From 6f17fe53129a592dcfbe4a2a54301f4ddb234242 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 21 Nov 2018 22:45:43 +0000 Subject: [PATCH 09/10] Filtering xlrd warnings in Python 3.7 --- pandas/tests/io/test_excel.py | 40 ++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 49a3a3d58672d..13f828fcd1c06 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -115,8 +115,14 @@ def test_usecols_int(self, ext): index_col=0, usecols=3) with tm.assert_produces_warning(FutureWarning): - df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], - index_col=0, parse_cols=3) + with warnings.catch_warnings(): + # Ignore xlrd time.clock DeprecationWarning in py37 + warnings.filterwarnings( + action='ignore', + message='time.clock has been deprecated', + category=DeprecationWarning) + df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + index_col=0, parse_cols=3) # TODO add index to xls file) tm.assert_frame_equal(df1, dfref, check_names=False) @@ -134,8 +140,14 @@ def test_usecols_list(self, ext): index_col=0, usecols=[0, 2, 3]) with tm.assert_produces_warning(FutureWarning): - df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], - index_col=0, parse_cols=[0, 2, 3]) + with warnings.catch_warnings(): + # Ignore xlrd time.clock DeprecationWarning in py37 + warnings.filterwarnings( + action='ignore', + message='time.clock has been deprecated', + category=DeprecationWarning) + df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + index_col=0, parse_cols=[0, 2, 3]) # TODO add index to xls file) tm.assert_frame_equal(df1, dfref, check_names=False) @@ -154,8 +166,14 @@ def test_usecols_str(self, ext): index_col=0, usecols='A:D') with tm.assert_produces_warning(FutureWarning): - df4 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], - index_col=0, parse_cols='A:D') + with warnings.catch_warnings(): + # Ignore xlrd time.clock DeprecationWarning in py37 + warnings.filterwarnings( + action='ignore', + message='time.clock has been deprecated', + category=DeprecationWarning) + df4 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], + index_col=0, parse_cols='A:D') # TODO add index to xls, read xls ignores index name ? tm.assert_frame_equal(df2, df1, check_names=False) @@ -597,8 +615,14 @@ def test_sheet_name_and_sheetname(self, ext): df1 = self.get_exceldf(filename, ext, sheet_name=sheet_name, index_col=0) # doc with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - df2 = self.get_exceldf(filename, ext, index_col=0, - sheetname=sheet_name) # backward compat + with warnings.catch_warnings(): + # Ignore xlrd time.clock DeprecationWarning in py37 + warnings.filterwarnings( + action='ignore', + message='time.clock has been deprecated', + category=DeprecationWarning) + df2 = self.get_exceldf(filename, ext, index_col=0, + sheetname=sheet_name) # backward compat excel = self.get_excelfile(filename, ext) df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc From e1a6f89db248f422ddb3cd67e97efa5246868d39 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 23 Nov 2018 08:59:15 +0000 Subject: [PATCH 10/10] Using a context manager to ignore xlrd warnings --- pandas/tests/io/test_excel.py | 86 ++++++++++++++--------------------- setup.cfg | 1 - 2 files changed, 35 insertions(+), 52 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index ccb372ffe04a0..741d03a8dc0c2 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1,31 +1,31 @@ -# pylint: disable=E1101 -import os -import warnings -from datetime import datetime, date, time, timedelta +from collections import OrderedDict +import contextlib +from datetime import date, datetime, time, timedelta from distutils.version import LooseVersion from functools import partial +import os +import warnings from warnings import catch_warnings -from collections import OrderedDict import numpy as np -import pytest from numpy import nan +import pytest -import pandas as pd -import pandas.util.testing as tm +from pandas.compat import PY36, BytesIO, iteritems, map, range, u import pandas.util._test_decorators as td + +import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series -from pandas.compat import u, range, map, BytesIO, iteritems, PY36 -from pandas.core.config import set_option, get_option +from pandas.core.config import get_option, set_option +import pandas.util.testing as tm +from pandas.util.testing import ensure_clean, makeCustomDataframe as mkdf + from pandas.io.common import URLError from pandas.io.excel import ( - ExcelFile, ExcelWriter, read_excel, _XlwtWriter, _OpenpyxlWriter, - register_writer, _XlsxWriter -) + ExcelFile, ExcelWriter, _OpenpyxlWriter, _XlsxWriter, _XlwtWriter, + read_excel, register_writer) from pandas.io.formats.excel import ExcelFormatter from pandas.io.parsers import read_csv -from pandas.util.testing import ensure_clean, makeCustomDataframe as mkdf - _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() @@ -36,6 +36,20 @@ _mixed_frame['foo'] = 'bar' +@contextlib.contextmanager +def ignore_xlrd_time_clock_warning(): + """ + Context manager to ignore warnings raised by the xlrd library, + regarding the deprecation of `time.clock` in Python 3.7. + """ + with warnings.catch_warnings(): + warnings.filterwarnings( + action='ignore', + message='time.clock has been deprecated', + category=DeprecationWarning) + yield + + @td.skip_if_no('xlrd', '1.0.0') class SharedItems(object): @@ -114,36 +128,21 @@ def test_usecols_int(self, ext): # usecols as int with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - with warnings.catch_warnings(): - # Ignore xlrd time.clock DeprecationWarning in py37 - warnings.filterwarnings( - action='ignore', - message='time.clock has been deprecated', - category=DeprecationWarning) + with ignore_xlrd_time_clock_warning(): df1 = self.get_exceldf("test1", ext, "Sheet1", index_col=0, usecols=3) # usecols as int with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - with warnings.catch_warnings(): - # Ignore xlrd time.clock DeprecationWarning in py37 - warnings.filterwarnings( - action='ignore', - message='time.clock has been deprecated', - category=DeprecationWarning) + with ignore_xlrd_time_clock_warning(): df2 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1], index_col=0, usecols=3) # parse_cols instead of usecols, usecols as int with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - with warnings.catch_warnings(): - # Ignore xlrd time.clock DeprecationWarning in py37 - warnings.filterwarnings( - action='ignore', - message='time.clock has been deprecated', - category=DeprecationWarning) + with ignore_xlrd_time_clock_warning(): df3 = self.get_exceldf("test1", ext, "Sheet2", skiprows=[1], index_col=0, parse_cols=3) @@ -163,12 +162,7 @@ def test_usecols_list(self, ext): index_col=0, usecols=[0, 2, 3]) with tm.assert_produces_warning(FutureWarning): - with warnings.catch_warnings(): - # Ignore xlrd time.clock DeprecationWarning in py37 - warnings.filterwarnings( - action='ignore', - message='time.clock has been deprecated', - category=DeprecationWarning) + with ignore_xlrd_time_clock_warning(): df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, parse_cols=[0, 2, 3]) @@ -189,12 +183,7 @@ def test_usecols_str(self, ext): index_col=0, usecols='A:D') with tm.assert_produces_warning(FutureWarning): - with warnings.catch_warnings(): - # Ignore xlrd time.clock DeprecationWarning in py37 - warnings.filterwarnings( - action='ignore', - message='time.clock has been deprecated', - category=DeprecationWarning) + with ignore_xlrd_time_clock_warning(): df4 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1], index_col=0, parse_cols='A:D') @@ -648,12 +637,7 @@ def test_sheet_name_and_sheetname(self, ext): df1 = self.get_exceldf(filename, ext, sheet_name=sheet_name, index_col=0) # doc with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - with warnings.catch_warnings(): - # Ignore xlrd time.clock DeprecationWarning in py37 - warnings.filterwarnings( - action='ignore', - message='time.clock has been deprecated', - category=DeprecationWarning) + with ignore_xlrd_time_clock_warning(): df2 = self.get_exceldf(filename, ext, index_col=0, sheetname=sheet_name) # backward compat diff --git a/setup.cfg b/setup.cfg index e8db1308741aa..eed4ddb3b61f3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -200,7 +200,6 @@ skip= pandas/tests/io/test_parquet.py, pandas/tests/io/generate_legacy_storage_files.py, pandas/tests/io/test_common.py, - pandas/tests/io/test_excel.py, pandas/tests/io/test_feather.py, pandas/tests/io/test_s3.py, pandas/tests/io/test_html.py,