From 5f5fb7c96804805a61efc475b17f25734894c340 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 27 Nov 2019 12:26:23 +0000 Subject: [PATCH 01/17] CLN: Clean up of locale testing --- ci/azure/posix.yml | 9 ++++++--- ci/run_tests.sh | 15 ++++++--------- ci/setup_env.sh | 6 +++--- pandas/tests/config/test_localization.py | 21 ++++++++++----------- 4 files changed, 25 insertions(+), 26 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index a10fd402b6733..24f34e92cb7d0 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -23,14 +23,16 @@ jobs: ENV_FILE: ci/deps/azure-36-locale.yaml CONDA_PY: "36" PATTERN: "slow" - LOCALE_OVERRIDE: "zh_CN.UTF-8" + LANG: "zh_CN.GB_2312-80" + LC_ALL: "zh_CN.GB_2312-80" EXTRA_APT: "language-pack-zh-hans" py36_locale_slow: ENV_FILE: ci/deps/azure-36-locale_slow.yaml CONDA_PY: "36" PATTERN: "not slow and not network" - LOCALE_OVERRIDE: "it_IT.UTF-8" + LANG: "it_IT.ISO-8859-1" + LC_ALL: "it_IT.ISO-8859-1" py36_32bit: ENV_FILE: ci/deps/azure-36-32bit.yaml @@ -42,7 +44,8 @@ jobs: ENV_FILE: ci/deps/azure-37-locale.yaml CONDA_PY: "37" PATTERN: "not slow and not network" - LOCALE_OVERRIDE: "zh_CN.UTF-8" + LANG: "zh_CN.GB_2312-80" + LC_ALL: "zh_CN.GB_2312-80" py37_np_dev: ENV_FILE: ci/deps/azure-37-numpydev.yaml diff --git a/ci/run_tests.sh b/ci/run_tests.sh index b91cfb3bed8cc..16820922c352c 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -5,15 +5,12 @@ # https://github.com/pytest-dev/pytest/issues/1075 export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') -if [ -n "$LOCALE_OVERRIDE" ]; then - export LC_ALL="$LOCALE_OVERRIDE" - export LANG="$LOCALE_OVERRIDE" - PANDAS_LOCALE=`python -c 'import pandas; pandas.get_option("display.encoding")'` - if [[ "$LOCALE_OVERRIDE" != "$PANDAS_LOCALE" ]]; then - echo "pandas could not detect the locale. System locale: $LOCALE_OVERRIDE, pandas detected: $PANDAS_LOCALE" - # TODO Not really aborting the tests until https://github.com/pandas-dev/pandas/issues/23923 is fixed - # exit 1 - fi +SYSTEM_ENCODING=`echo $LANG | cut -d. -f2` +SYSTEM_ENCODING="${SYSTEM_ENCODING:-UTF-8}" +PANDAS_DETECTED_ENCODING=`python -c 'import pandas; pandas.get_option("display.encoding")'` +if [[ "$SYSTEM_ENCODING" != "$PANDAS_DETECTED_ENCODING" ]]; then + echo "pandas could not detect the encoding. System encoding: $SYSTEM_ENCODING, pandas detected: $PANDAS_DETECTED_ENCODING" + exit 1 fi if [[ "not network" == *"$PATTERN"* ]]; then diff --git a/ci/setup_env.sh b/ci/setup_env.sh index 3d79c0cfd7000..cd2014a507fad 100755 --- a/ci/setup_env.sh +++ b/ci/setup_env.sh @@ -1,15 +1,15 @@ #!/bin/bash -e # edit the locale file if needed -if [ -n "$LOCALE_OVERRIDE" ]; then +if [ -n "$LANG" ]; then echo "Adding locale to the first line of pandas/__init__.py" rm -f pandas/__init__.pyc - SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LOCALE_OVERRIDE')\n" + SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LANG')\n" sed -i "$SEDC" pandas/__init__.py echo "[head -4 pandas/__init__.py]" head -4 pandas/__init__.py echo - sudo locale-gen "$LOCALE_OVERRIDE" + sudo locale-gen "$LANG" fi MINICONDA_DIR="$HOME/miniconda3" diff --git a/pandas/tests/config/test_localization.py b/pandas/tests/config/test_localization.py index 20a5be0c8a289..edc40946a30da 100644 --- a/pandas/tests/config/test_localization.py +++ b/pandas/tests/config/test_localization.py @@ -1,6 +1,5 @@ import codecs import locale -import os import pytest @@ -56,21 +55,21 @@ def test_get_locales_prefix(): @_skip_if_only_one_locale -def test_set_locale(): +@pytest.mark.parametrize( + "lang,enc", + [ + ("it_CH", "UTF-8"), + ("en_US", "ascii"), + ("zh_CN", "GB_2312-80"), + ("it_IT", "ISO-8859-1"), + ], +) +def test_set_locale(lang, enc): if all(x is None for x in _current_locale): # Not sure why, but on some Travis runs with pytest, # getlocale() returned (None, None). pytest.skip("Current locale is not set.") - locale_override = os.environ.get("LOCALE_OVERRIDE", None) - - if locale_override is None: - lang, enc = "it_CH", "UTF-8" - elif locale_override == "C": - lang, enc = "en_US", "ascii" - else: - lang, enc = locale_override.split(".") - enc = codecs.lookup(enc).name new_locale = lang, enc From 231f0c7bbef3ddbbe1470cace38ae6969f08d178 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 27 Nov 2019 12:43:54 +0000 Subject: [PATCH 02/17] Moving validation in run_tests.sh to a test --- ci/run_tests.sh | 8 -------- pandas/tests/config/test_localization.py | 21 ++++++++++++++------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 16820922c352c..586757183be63 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -5,14 +5,6 @@ # https://github.com/pytest-dev/pytest/issues/1075 export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') -SYSTEM_ENCODING=`echo $LANG | cut -d. -f2` -SYSTEM_ENCODING="${SYSTEM_ENCODING:-UTF-8}" -PANDAS_DETECTED_ENCODING=`python -c 'import pandas; pandas.get_option("display.encoding")'` -if [[ "$SYSTEM_ENCODING" != "$PANDAS_DETECTED_ENCODING" ]]; then - echo "pandas could not detect the encoding. System encoding: $SYSTEM_ENCODING, pandas detected: $PANDAS_DETECTED_ENCODING" - exit 1 -fi - if [[ "not network" == *"$PATTERN"* ]]; then export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4; fi diff --git a/pandas/tests/config/test_localization.py b/pandas/tests/config/test_localization.py index edc40946a30da..b5a1a6fed439a 100644 --- a/pandas/tests/config/test_localization.py +++ b/pandas/tests/config/test_localization.py @@ -1,5 +1,6 @@ import codecs import locale +import os import pytest @@ -7,6 +8,8 @@ from pandas.compat import is_platform_windows +import pandas as pd + _all_locales = get_locales() or [] _current_locale = locale.getlocale() @@ -56,13 +59,7 @@ def test_get_locales_prefix(): @_skip_if_only_one_locale @pytest.mark.parametrize( - "lang,enc", - [ - ("it_CH", "UTF-8"), - ("en_US", "ascii"), - ("zh_CN", "GB_2312-80"), - ("it_IT", "ISO-8859-1"), - ], + "lang,enc", [("it_CH", "UTF-8"), ("en_US", "ascii"), ("it_IT", "ISO-8859-1"),], ) def test_set_locale(lang, enc): if all(x is None for x in _current_locale): @@ -90,3 +87,13 @@ def test_set_locale(lang, enc): # Once we exit the "with" statement, locale should be back to what it was. current_locale = locale.getlocale() assert current_locale == _current_locale + + +def test_encoding_detected(): + system_locale = os.environ.get("LC_ALL") + if system_locale: + system_encoding = system_locale.split(".")[-1].upper().replace("-", "") + else: + system_encoding = "UTF8" + + assert pd.options.display.encoding == system_encoding From aaf37f44489741c0752ea44c37077fd30ac43d36 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 27 Nov 2019 12:48:52 +0000 Subject: [PATCH 03/17] Restoring Chinese encoding --- pandas/tests/config/test_localization.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/tests/config/test_localization.py b/pandas/tests/config/test_localization.py index b5a1a6fed439a..194bb5f8ac4c8 100644 --- a/pandas/tests/config/test_localization.py +++ b/pandas/tests/config/test_localization.py @@ -59,7 +59,13 @@ def test_get_locales_prefix(): @_skip_if_only_one_locale @pytest.mark.parametrize( - "lang,enc", [("it_CH", "UTF-8"), ("en_US", "ascii"), ("it_IT", "ISO-8859-1"),], + "lang,enc", + [ + ("it_CH", "UTF-8"), + ("en_US", "ascii"), + ("zh_CN", "GB2312"), + ("it_IT", "ISO-8859-1"), + ], ) def test_set_locale(lang, enc): if all(x is None for x in _current_locale): From d85f539bd3501c2f0577e7465d0ae62f104b6190 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 27 Nov 2019 12:51:38 +0000 Subject: [PATCH 04/17] Setting right encoding for Chinese in the CI --- ci/azure/posix.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 24f34e92cb7d0..02acc098693d3 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -23,8 +23,8 @@ jobs: ENV_FILE: ci/deps/azure-36-locale.yaml CONDA_PY: "36" PATTERN: "slow" - LANG: "zh_CN.GB_2312-80" - LC_ALL: "zh_CN.GB_2312-80" + LANG: "zh_CN.GB2312" + LC_ALL: "zh_CN.GB2312" EXTRA_APT: "language-pack-zh-hans" py36_locale_slow: @@ -44,8 +44,8 @@ jobs: ENV_FILE: ci/deps/azure-37-locale.yaml CONDA_PY: "37" PATTERN: "not slow and not network" - LANG: "zh_CN.GB_2312-80" - LC_ALL: "zh_CN.GB_2312-80" + LANG: "zh_CN.GB2312" + LC_ALL: "zh_CN.GB2312" py37_np_dev: ENV_FILE: ci/deps/azure-37-numpydev.yaml From f8c4101248df5a8903f5d8872226ff26b3d1411e Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 27 Nov 2019 12:52:19 +0000 Subject: [PATCH 05/17] Using LC_ALL instead of LANG, seems more consistent --- ci/setup_env.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/setup_env.sh b/ci/setup_env.sh index cd2014a507fad..c71897f8fe88a 100755 --- a/ci/setup_env.sh +++ b/ci/setup_env.sh @@ -1,15 +1,15 @@ #!/bin/bash -e # edit the locale file if needed -if [ -n "$LANG" ]; then +if [ -n "$LC_ALL" ]; then echo "Adding locale to the first line of pandas/__init__.py" rm -f pandas/__init__.pyc - SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LANG')\n" + SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LC_ALL')\n" sed -i "$SEDC" pandas/__init__.py echo "[head -4 pandas/__init__.py]" head -4 pandas/__init__.py echo - sudo locale-gen "$LANG" + sudo locale-gen "$LC_ALL" fi MINICONDA_DIR="$HOME/miniconda3" From b5aee6ad9c2d5d01ea8cd18719b9827fd47bdb9c Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 30 Nov 2019 23:28:12 +0000 Subject: [PATCH 06/17] Fixes to the CI (installing language pack, and normalizing encoding in test --- ci/setup_env.sh | 5 ++++- pandas/tests/config/test_localization.py | 10 +++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/ci/setup_env.sh b/ci/setup_env.sh index c71897f8fe88a..125d38919f946 100755 --- a/ci/setup_env.sh +++ b/ci/setup_env.sh @@ -2,14 +2,17 @@ # edit the locale file if needed if [ -n "$LC_ALL" ]; then + echo "sudo apt-get install -y language-pack-${LC_ALL:0:2}" + sudo apt-get install -y language-pack-${LC_ALL:0:2} + echo "Adding locale to the first line of pandas/__init__.py" rm -f pandas/__init__.pyc SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LC_ALL')\n" sed -i "$SEDC" pandas/__init__.py + echo "[head -4 pandas/__init__.py]" head -4 pandas/__init__.py echo - sudo locale-gen "$LC_ALL" fi MINICONDA_DIR="$HOME/miniconda3" diff --git a/pandas/tests/config/test_localization.py b/pandas/tests/config/test_localization.py index 194bb5f8ac4c8..e815a90207a08 100644 --- a/pandas/tests/config/test_localization.py +++ b/pandas/tests/config/test_localization.py @@ -97,9 +97,9 @@ def test_set_locale(lang, enc): def test_encoding_detected(): system_locale = os.environ.get("LC_ALL") - if system_locale: - system_encoding = system_locale.split(".")[-1].upper().replace("-", "") - else: - system_encoding = "UTF8" + system_encoding = system_locale.split(".")[-1] if system_locale else "utf-8" - assert pd.options.display.encoding == system_encoding + assert ( + codecs.lookup(pd.options.display.encoding).name + == codecs.lookup(system_encoding).name + ) From 56f6a6602e3a8f727f4b16621558cfdf7f332d70 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 30 Nov 2019 23:36:58 +0000 Subject: [PATCH 07/17] Working on error when installing language packages --- ci/azure/posix.yml | 4 ++++ ci/setup_env.sh | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 02acc098693d3..1f6b817aedfc6 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -19,6 +19,7 @@ jobs: ENV_FILE: ci/deps/azure-36-minimum_versions.yaml CONDA_PY: "36" PATTERN: "not slow and not network" + py36_locale_slow_old_np: ENV_FILE: ci/deps/azure-36-locale.yaml CONDA_PY: "36" @@ -33,6 +34,7 @@ jobs: PATTERN: "not slow and not network" LANG: "it_IT.ISO-8859-1" LC_ALL: "it_IT.ISO-8859-1" + EXTRA_APT: "language-pack-it" py36_32bit: ENV_FILE: ci/deps/azure-36-32bit.yaml @@ -46,6 +48,7 @@ jobs: PATTERN: "not slow and not network" LANG: "zh_CN.GB2312" LC_ALL: "zh_CN.GB2312" + EXTRA_APT: "language-pack-zh-hans" py37_np_dev: ENV_FILE: ci/deps/azure-37-numpydev.yaml @@ -58,6 +61,7 @@ jobs: steps: - script: | if [ "$(uname)" == "Linux" ]; then sudo apt-get install -y libc6-dev-i386 $EXTRA_APT; fi + uname -a echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' echo "Creating Environment" ci/setup_env.sh diff --git a/ci/setup_env.sh b/ci/setup_env.sh index 125d38919f946..9651dc22831df 100755 --- a/ci/setup_env.sh +++ b/ci/setup_env.sh @@ -2,9 +2,6 @@ # edit the locale file if needed if [ -n "$LC_ALL" ]; then - echo "sudo apt-get install -y language-pack-${LC_ALL:0:2}" - sudo apt-get install -y language-pack-${LC_ALL:0:2} - echo "Adding locale to the first line of pandas/__init__.py" rm -f pandas/__init__.pyc SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LC_ALL')\n" From 1b65902511890ba954dd527521a0a9ef06e0cdb8 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 1 Dec 2019 00:11:20 +0000 Subject: [PATCH 08/17] Properly dividing CI steps --- ci/azure/posix.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 1f6b817aedfc6..ff85eb34757f1 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -59,12 +59,16 @@ jobs: EXTRA_APT: "xsel" steps: - - script: | - if [ "$(uname)" == "Linux" ]; then sudo apt-get install -y libc6-dev-i386 $EXTRA_APT; fi - uname -a - echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' - echo "Creating Environment" - ci/setup_env.sh + - script: if [ "$(uname)" == "Linux" ]; then sudo apt-get install -y libc6-dev-i386 $EXTRA_APT; fi + displayName: 'Install extra packages' + + - sctipt: locale -a + displayName: 'Show available locales' + + - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' + displayName: 'Set conda path' + + - script: ci/setup_env.sh displayName: 'Setup environment and build pandas' - script: | From cd26d14b6af99388f86cece29ca5f1177ef23c2b Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 1 Dec 2019 00:16:48 +0000 Subject: [PATCH 09/17] Fixed typo --- ci/azure/posix.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index ff85eb34757f1..4058f50a25878 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -62,7 +62,7 @@ jobs: - script: if [ "$(uname)" == "Linux" ]; then sudo apt-get install -y libc6-dev-i386 $EXTRA_APT; fi displayName: 'Install extra packages' - - sctipt: locale -a + - script: locale -a displayName: 'Show available locales' - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' From 640b4c9f76d5c085b2832472689cb353a70509d6 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 1 Dec 2019 00:21:03 +0000 Subject: [PATCH 10/17] apt-get update --- ci/azure/posix.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 4058f50a25878..90ca309995ab4 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -59,7 +59,11 @@ jobs: EXTRA_APT: "xsel" steps: - - script: if [ "$(uname)" == "Linux" ]; then sudo apt-get install -y libc6-dev-i386 $EXTRA_APT; fi + - script: | + if [ "$(uname)" == "Linux" ]; then + sudo apt-get update + sudo apt-get install -y libc6-dev-i386 $EXTRA_APT + fi displayName: 'Install extra packages' - script: locale -a From 63b38e7e22883f7e88b5540d27abc91cad3f05c2 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 1 Dec 2019 00:49:07 +0000 Subject: [PATCH 11/17] Generate locale --- ci/azure/posix.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 90ca309995ab4..65ae9226a3c0b 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -59,6 +59,9 @@ jobs: EXTRA_APT: "xsel" steps: + - script: if [ -z $LANG ]; then sudo locale-gen $LANG; fi + displayName: 'Generate locale' + - script: | if [ "$(uname)" == "Linux" ]; then sudo apt-get update From 4238fc1c4f400e1ebed95ac82231331fbcec79ce Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 1 Dec 2019 00:55:34 +0000 Subject: [PATCH 12/17] Setting encoding to utf8, and removing extra output --- ci/azure/posix.yml | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 65ae9226a3c0b..5b007e91a7a05 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -24,16 +24,18 @@ jobs: ENV_FILE: ci/deps/azure-36-locale.yaml CONDA_PY: "36" PATTERN: "slow" - LANG: "zh_CN.GB2312" - LC_ALL: "zh_CN.GB2312" + # pandas ignored the language (zh_CN), but should support diferent encodings (utf8) + # we should test with encodings different than utf8, but doesn't seem like Ubuntu support any + LANG: "zh_CN.utf8" + LC_ALL: "zh_CN.utf8" EXTRA_APT: "language-pack-zh-hans" py36_locale_slow: ENV_FILE: ci/deps/azure-36-locale_slow.yaml CONDA_PY: "36" PATTERN: "not slow and not network" - LANG: "it_IT.ISO-8859-1" - LC_ALL: "it_IT.ISO-8859-1" + LANG: "it_IT.utf8" + LC_ALL: "it_IT.utf8" EXTRA_APT: "language-pack-it" py36_32bit: @@ -46,8 +48,8 @@ jobs: ENV_FILE: ci/deps/azure-37-locale.yaml CONDA_PY: "37" PATTERN: "not slow and not network" - LANG: "zh_CN.GB2312" - LC_ALL: "zh_CN.GB2312" + LANG: "zh_CN.utf8" + LC_ALL: "zh_CN.utf8" EXTRA_APT: "language-pack-zh-hans" py37_np_dev: @@ -59,9 +61,6 @@ jobs: EXTRA_APT: "xsel" steps: - - script: if [ -z $LANG ]; then sudo locale-gen $LANG; fi - displayName: 'Generate locale' - - script: | if [ "$(uname)" == "Linux" ]; then sudo apt-get update @@ -69,9 +68,6 @@ jobs: fi displayName: 'Install extra packages' - - script: locale -a - displayName: 'Show available locales' - - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' displayName: 'Set conda path' From 74f796b7b97dbf63266c468310f5b3f6fee800bb Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 1 Dec 2019 01:27:28 +0000 Subject: [PATCH 13/17] Fixing typos in comment, and not setting locale on mac --- ci/azure/posix.yml | 4 ++-- ci/setup_env.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 5b007e91a7a05..9bbc24ecdd528 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -24,8 +24,8 @@ jobs: ENV_FILE: ci/deps/azure-36-locale.yaml CONDA_PY: "36" PATTERN: "slow" - # pandas ignored the language (zh_CN), but should support diferent encodings (utf8) - # we should test with encodings different than utf8, but doesn't seem like Ubuntu support any + # pandas does not use the language (zh_CN), but should support diferent encodings (utf8) + # we should test with encodings different than utf8, but doesn't seem like Ubuntu supports any LANG: "zh_CN.utf8" LC_ALL: "zh_CN.utf8" EXTRA_APT: "language-pack-zh-hans" diff --git a/ci/setup_env.sh b/ci/setup_env.sh index 9651dc22831df..6653991a3a09b 100755 --- a/ci/setup_env.sh +++ b/ci/setup_env.sh @@ -1,7 +1,7 @@ #!/bin/bash -e # edit the locale file if needed -if [ -n "$LC_ALL" ]; then +if [[ "$(uname)" == "Linux" && -n "$LC_ALL" ]]; then echo "Adding locale to the first line of pandas/__init__.py" rm -f pandas/__init__.pyc SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LC_ALL')\n" From db9a483a87863de192b92e5fa691fc32ec98fd08 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 30 Dec 2019 01:13:23 +0000 Subject: [PATCH 14/17] Add localized message to tests --- pandas/tests/io/test_common.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index cfcd2c9f2df95..1e7becb5b7124 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -146,11 +146,12 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): msg3 = "Expected object or value" msg4 = "path_or_buf needs to be a string file path or file-like" msg5 = ( - r"\[Errno 2\] File .+does_not_exist\.{} does not exist:" - r" '.+does_not_exist\.{}'" - ).format(fn_ext, fn_ext) + fr"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist:" + fr" '.+does_not_exist\.{fn_ext}'" + ) + msg6 = fr"[Errno 2] 没有那个文件或目录: '.+does_not_exist.{fn_ext}'" with pytest.raises( - error_class, match=r"({}|{}|{}|{}|{})".format(msg1, msg2, msg3, msg4, msg5) + error_class, match=fr"({msg1}|{msg2}|{msg3}|{msg4}|{msg5}|{msg6})" ): reader(path) From 8c278dc6f67962c7d2dd73c3dac59ef07847e4a4 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 30 Dec 2019 01:40:41 +0000 Subject: [PATCH 15/17] Fixing unescaped characters in regex --- pandas/tests/io/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 1e7becb5b7124..63083d99af686 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -149,7 +149,7 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): fr"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist:" fr" '.+does_not_exist\.{fn_ext}'" ) - msg6 = fr"[Errno 2] 没有那个文件或目录: '.+does_not_exist.{fn_ext}'" + msg6 = fr"\[Errno 2\] 没有那个文件或目录: '.+does_not_exist.{fn_ext}'" with pytest.raises( error_class, match=fr"({msg1}|{msg2}|{msg3}|{msg4}|{msg5}|{msg6})" ): From 9ccedd75696f6332f3de7681dabdd16a83a91de5 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 30 Dec 2019 17:52:30 +0000 Subject: [PATCH 16/17] Adding more localized messages --- pandas/tests/io/test_common.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 63083d99af686..f8576efe3d494 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -149,9 +149,12 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): fr"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist:" fr" '.+does_not_exist\.{fn_ext}'" ) - msg6 = fr"\[Errno 2\] 没有那个文件或目录: '.+does_not_exist.{fn_ext}'" + msg6 = fr"\[Errno 2\] 没有那个文件或目录: '.+does_not_exist\.{fn_ext}'" + msg7 = ( + fr"\[Errno 2\] File o directory non esistente: '.+does_not_exist\.{fn_ext}'" + ) with pytest.raises( - error_class, match=fr"({msg1}|{msg2}|{msg3}|{msg4}|{msg5}|{msg6})" + error_class, match=fr"({msg1}|{msg2}|{msg3}|{msg4}|{msg5}|{msg6}|{msg7})" ): reader(path) @@ -178,17 +181,18 @@ def test_read_expands_user_home_dir( path = os.path.join("~", "does_not_exist." + fn_ext) monkeypatch.setattr(icom, "_expand_user", lambda x: os.path.join("foo", x)) - msg1 = r"File (b')?.+does_not_exist\.{}'? does not exist".format(fn_ext) + msg1 = fr"File (b')?.+does_not_exist\.{fn_ext}'? does not exist" msg2 = fr"\[Errno 2\] No such file or directory: '.+does_not_exist\.{fn_ext}'" msg3 = "Unexpected character found when decoding 'false'" msg4 = "path_or_buf needs to be a string file path or file-like" msg5 = ( - r"\[Errno 2\] File .+does_not_exist\.{} does not exist:" - r" '.+does_not_exist\.{}'" - ).format(fn_ext, fn_ext) + fr"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist:" + fr" '.+does_not_exist\.{fn_ext}'" + ) + msg6 = fr"[Errno 2] 没有那个文件或目录: '.+does_not_exist\.{fn_ext}'" with pytest.raises( - error_class, match=r"({}|{}|{}|{}|{})".format(msg1, msg2, msg3, msg4, msg5) + error_class, match=fr"({msg1}|{msg2}|{msg3}|{msg4}|{msg5}|{msg6})" ): reader(path) From c785e8ea979f38f4a3bab397d09818efe8909016 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 30 Dec 2019 18:21:08 +0000 Subject: [PATCH 17/17] Add missing localized message --- pandas/tests/io/test_common.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index f8576efe3d494..59d7f6f904337 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -189,10 +189,13 @@ def test_read_expands_user_home_dir( fr"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist:" fr" '.+does_not_exist\.{fn_ext}'" ) - msg6 = fr"[Errno 2] 没有那个文件或目录: '.+does_not_exist\.{fn_ext}'" + msg6 = fr"\[Errno 2\] 没有那个文件或目录: '.+does_not_exist\.{fn_ext}'" + msg7 = ( + fr"\[Errno 2\] File o directory non esistente: '.+does_not_exist\.{fn_ext}'" + ) with pytest.raises( - error_class, match=fr"({msg1}|{msg2}|{msg3}|{msg4}|{msg5}|{msg6})" + error_class, match=fr"({msg1}|{msg2}|{msg3}|{msg4}|{msg5}|{msg6}|{msg7})" ): reader(path)