From d1778521ff9a5ee60e704ccfc77ea8fc4878948d Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Thu, 17 Mar 2022 14:53:49 +0100 Subject: [PATCH 01/46] Added test representative of #46319. Should fail on CI --- pandas/tests/io/formats/test_format.py | 48 +++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index adcaeba5cfd8d..51b664b0364a2 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2,9 +2,13 @@ Test output formatting for Series/DataFrame, including to_string & reprs """ -from datetime import datetime +from datetime import ( + datetime, + time, +) from io import StringIO import itertools +import locale from operator import methodcaller import os from pathlib import Path @@ -47,6 +51,13 @@ use_32bit_repr = is_platform_windows() or not IS64 +def get_local_am_pm(): + """Return the AM and PM strings returned by strftime in current locale.""" + am_local = time(1).strftime("%p") + pm_local = time(13).strftime("%p") + return am_local, pm_local + + @pytest.fixture(params=["string", "pathlike", "buffer"]) def filepath_or_buffer_id(request): """ @@ -3167,6 +3178,41 @@ def test_str(self): assert str(NaT) == "NaT" +class TestPeriodIndexFormat: + def test_period_custom_locale(self, overridden_locale): + # GH#46319 locale-specific formatting directive leads to non-utf8 str result + + # Get locale-specific reference + am_local, pm_local = get_local_am_pm() + + # Scalar + per = pd.Period("2018-03-11 13:00", freq="H") + assert per.strftime("%p") == pm_local + + # Index + per = pd.period_range("2003-01-01 01:00:00", periods=2, freq="12h") + formatted = per.format(date_format="%y %I:%M:%S%p") + assert formatted[0] == f"03 01:00:00{am_local}" + assert formatted[1] == f"03 01:00:00{pm_local}" + + +@pytest.fixture(params=[None, "fr_FR", "zh_CN"]) +def overridden_locale(request): + """A fixture used to temporarily change the locale""" + old = locale.setlocale(locale.LC_ALL) + target = request.param + if target is None: + yield old + else: + try: + locale.setlocale(locale.LC_ALL, target) + except locale.Error as e: + pytest.skip(f"Skipping as locale cannot be set. {type(e).__name__}: {e}") + else: + yield target + locale.setlocale(locale.LC_ALL, old) + + class TestDatetimeIndexFormat: def test_datetime(self): formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format() From c09b76d5c207aae3841567be50c092f75203eb43 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Thu, 17 Mar 2022 16:22:00 +0100 Subject: [PATCH 02/46] Added a gha worker with non utf 8 zh_CN encoding --- .github/workflows/posix.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 4380e8dfa2e57..249a962bef878 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -49,6 +49,12 @@ jobs: lang: "zh_CN.utf8" lc_all: "zh_CN.utf8" name: "Locale: zh_CN.utf8" + - env_file: actions-38.yaml + pattern: "not slow and not network and not single_cpu" + extra_apt: "language-pack-zh-hans" + lang: "zh_CN.GB2312" + lc_all: "zh_CN.GB2312" + name: "Locale: zh_CN.GB2312" - env_file: actions-38.yaml pattern: "not slow and not network and not single_cpu" pandas_data_manager: "array" From 9dff1533f0d36775d30229e480de094b9eaa58b5 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Thu, 17 Mar 2022 17:26:50 +0100 Subject: [PATCH 03/46] Attempt to fix the encoding so that locale works --- .github/workflows/posix.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 249a962bef878..9389469db5c0e 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -52,9 +52,9 @@ jobs: - env_file: actions-38.yaml pattern: "not slow and not network and not single_cpu" extra_apt: "language-pack-zh-hans" - lang: "zh_CN.GB2312" - lc_all: "zh_CN.GB2312" - name: "Locale: zh_CN.GB2312" + lang: "zh_CN.gb2312" + lc_all: "zh_CN.gb2312" + name: "Locale: zh_CN.gb2312" - env_file: actions-38.yaml pattern: "not slow and not network and not single_cpu" pandas_data_manager: "array" From 2fa0736352de518ac893c375d0dba038b27596fc Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Thu, 17 Mar 2022 18:18:03 +0100 Subject: [PATCH 04/46] Added the fix, but not using it for now, until CI is able to reproduce the issue. --- pandas/_libs/tslibs/period.pyx | 2 ++ pandas/_libs/tslibs/util.pxd | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 54cae834d7024..0173f98c5556f 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1256,6 +1256,8 @@ cdef str _period_strftime(int64_t value, int freq, bytes fmt): formatted = c_strftime(&dts, fmt) + # Decode result according to current locale + # TODO use char_to_string_locale once the CI is able to reproduce the issue result = util.char_to_string(formatted) free(formatted) diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 150516aadffc6..31dac776391e9 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -27,6 +27,9 @@ cdef extern from "Python.h": const char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t* length) except NULL + object PyUnicode_DecodeLocale(const char *str, const char *errors) nogil + + from numpy cimport ( float64_t, int64_t, @@ -220,3 +223,8 @@ cdef inline const char* get_c_string_buf_and_size(str py_string, cdef inline const char* get_c_string(str py_string) except NULL: return get_c_string_buf_and_size(py_string, NULL) + + +cdef inline object char_to_string_locale(const char* data): + """As opposed to PyUnicode_FromString, use current system locale to decode.""" + return PyUnicode_DecodeLocale(data, NULL) From 6b85fb235d2b907e35754e8d5b5efd57dd6c7406 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Fri, 18 Mar 2022 15:48:47 +0100 Subject: [PATCH 05/46] Crazy idea: maybe simply removing the .utf8 modifier will use the right encoding ! --- .github/workflows/posix.yml | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 9389469db5c0e..f7a396e551d5d 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -46,15 +46,9 @@ jobs: - env_file: actions-38.yaml pattern: "not slow and not network and not single_cpu" extra_apt: "language-pack-zh-hans" - lang: "zh_CN.utf8" - lc_all: "zh_CN.utf8" - name: "Locale: zh_CN.utf8" - - env_file: actions-38.yaml - pattern: "not slow and not network and not single_cpu" - extra_apt: "language-pack-zh-hans" - lang: "zh_CN.gb2312" - lc_all: "zh_CN.gb2312" - name: "Locale: zh_CN.gb2312" + lang: "zh_CN" + lc_all: "zh_CN" + name: "Locale: zh_CN (gb2312)" - env_file: actions-38.yaml pattern: "not slow and not network and not single_cpu" pandas_data_manager: "array" From 409d196b8b5a9ff21a515b06344d00376370814a Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Fri, 18 Mar 2022 18:12:00 +0100 Subject: [PATCH 06/46] Hopefully fixing the locale not available error --- .github/actions/build_pandas/action.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/actions/build_pandas/action.yml b/.github/actions/build_pandas/action.yml index 2e4bfea165316..b472ee618a8d6 100644 --- a/.github/actions/build_pandas/action.yml +++ b/.github/actions/build_pandas/action.yml @@ -10,6 +10,18 @@ runs: conda list shell: bash -l {0} + - name: Switching (possibly gen) the locale + run: | + less /usr/share/i18n/SUPPORTED + sudo locale-gen ${LANG} + sudo update-locale LANG=${LANG} + shell: bash -l {0} + + - name: Locale test with date print + run: | + date + shell: bash -l {0} + - name: Build Pandas run: | python setup.py build_ext -j 2 From 4d09db9f7cc59e54b8533c6887873e2fdfc0daf5 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Fri, 18 Mar 2022 20:57:51 +0100 Subject: [PATCH 07/46] Now simply generating the locale, not updating the ubuntu one --- .github/actions/build_pandas/action.yml | 12 ------------ .github/workflows/posix.yml | 6 ++++++ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/.github/actions/build_pandas/action.yml b/.github/actions/build_pandas/action.yml index b472ee618a8d6..2e4bfea165316 100644 --- a/.github/actions/build_pandas/action.yml +++ b/.github/actions/build_pandas/action.yml @@ -10,18 +10,6 @@ runs: conda list shell: bash -l {0} - - name: Switching (possibly gen) the locale - run: | - less /usr/share/i18n/SUPPORTED - sudo locale-gen ${LANG} - sudo update-locale LANG=${LANG} - shell: bash -l {0} - - - name: Locale test with date print - run: | - date - shell: bash -l {0} - - name: Build Pandas run: | python setup.py build_ext -j 2 diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index f7a396e551d5d..77e89640fa00b 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -138,6 +138,12 @@ jobs: # xsel for clipboard tests run: sudo apt-get update && sudo apt-get install -y libc6-dev-i386 xsel ${{ env.EXTRA_APT }} + - name: Generate the locale + run: | + less /usr/share/i18n/SUPPORTED + sudo locale-gen ${LANG} + # sudo update-locale LANG=${LANG} + - uses: conda-incubator/setup-miniconda@v2 with: mamba-version: "*" From 3c1b80c7873834f2957d628b9bd41500ebbd7b38 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Fri, 18 Mar 2022 21:26:32 +0100 Subject: [PATCH 08/46] Trying to install the locale without enabling it --- .github/workflows/posix.yml | 72 +++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 77e89640fa00b..36222748d3c50 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -24,50 +24,52 @@ jobs: timeout-minutes: 120 strategy: matrix: - env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml] - pattern: ["not single_cpu", "single_cpu"] + env_file: [actions-38.yaml] #, actions-39.yaml, actions-310.yaml] + pattern: ["not single_cpu"] #, "single_cpu"] # Don't test pyarrow v2/3: Causes timeouts in read_csv engine # even if tests are skipped/xfailed - pyarrow_version: ["5", "7"] + pyarrow_version: ["5"] # , "7"] include: - - env_file: actions-38-downstream_compat.yaml - pattern: "not slow and not network and not single_cpu" - pytest_target: "pandas/tests/test_downstream.py" - name: "Downstream Compat" - - env_file: actions-38-minimum_versions.yaml - pattern: "not slow and not network and not single_cpu" - name: "Minimum Versions" - - env_file: actions-38.yaml - pattern: "not slow and not network and not single_cpu" - extra_apt: "language-pack-it" - lang: "it_IT.utf8" - lc_all: "it_IT.utf8" - name: "Locale: it_IT.utf8" +# - env_file: actions-38-downstream_compat.yaml +# pattern: "not slow and not network and not single_cpu" +# pytest_target: "pandas/tests/test_downstream.py" +# name: "Downstream Compat" +# - env_file: actions-38-minimum_versions.yaml +# pattern: "not slow and not network and not single_cpu" +# name: "Minimum Versions" +# - env_file: actions-38.yaml +# pattern: "not slow and not network and not single_cpu" +# extra_apt: "language-pack-it" +# lang: "it_IT.utf8" +# lc_all: "it_IT.utf8" +# name: "Locale: it_IT.utf8" - env_file: actions-38.yaml pattern: "not slow and not network and not single_cpu" extra_apt: "language-pack-zh-hans" - lang: "zh_CN" - lc_all: "zh_CN" - name: "Locale: zh_CN (gb2312)" - - env_file: actions-38.yaml - pattern: "not slow and not network and not single_cpu" - pandas_data_manager: "array" - name: "Data Manager" - - env_file: actions-pypy-38.yaml - pattern: "not slow and not network and not single_cpu" - test_args: "--max-worker-restart 0" - name: "Pypy" - - env_file: actions-310-numpydev.yaml - pattern: "not slow and not network and not single_cpu" - pandas_testing_mode: "deprecate" - test_args: "-W error" - name: "Numpy Dev" + extra_loc: "zh_CN" + lang: "zh_CN.utf-8" + lc_all: "zh_CN.utf-8" + name: "Locale: zh_CN.utf-8" +# - env_file: actions-38.yaml +# pattern: "not slow and not network and not single_cpu" +# pandas_data_manager: "array" +# name: "Data Manager" +# - env_file: actions-pypy-38.yaml +# pattern: "not slow and not network and not single_cpu" +# test_args: "--max-worker-restart 0" +# name: "Pypy" +# - env_file: actions-310-numpydev.yaml +# pattern: "not slow and not network and not single_cpu" +# pandas_testing_mode: "deprecate" +# test_args: "-W error" +# name: "Numpy Dev" fail-fast: false name: ${{ matrix.name || format('{0} pyarrow={1} {2}', matrix.env_file, matrix.pyarrow_version, matrix.pattern) }} env: ENV_FILE: ci/deps/${{ matrix.env_file }} PATTERN: ${{ matrix.pattern }} EXTRA_APT: ${{ matrix.extra_apt || '' }} + EXTRA_LOC: ${{ matrix.extra_loc || '' }} LANG: ${{ matrix.lang || '' }} LC_ALL: ${{ matrix.lc_all || '' }} PANDAS_TESTING_MODE: ${{ matrix.pandas_testing_mode || '' }} @@ -138,10 +140,10 @@ jobs: # xsel for clipboard tests run: sudo apt-get update && sudo apt-get install -y libc6-dev-i386 xsel ${{ env.EXTRA_APT }} - - name: Generate the locale + - name: Generate locales run: | - less /usr/share/i18n/SUPPORTED - sudo locale-gen ${LANG} + sudo locale-gen en_EN ${{ env.EXTRA_LOC }} + # less /usr/share/i18n/SUPPORTED # sudo update-locale LANG=${LANG} - uses: conda-incubator/setup-miniconda@v2 From 4c687a8ad96d365b12b5814100a3bc490522ed44 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Fri, 18 Mar 2022 21:31:38 +0100 Subject: [PATCH 09/46] Stupid mistake --- .github/workflows/posix.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 36222748d3c50..99df31c835e32 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -47,9 +47,9 @@ jobs: pattern: "not slow and not network and not single_cpu" extra_apt: "language-pack-zh-hans" extra_loc: "zh_CN" - lang: "zh_CN.utf-8" - lc_all: "zh_CN.utf-8" - name: "Locale: zh_CN.utf-8" + lang: "zh_CN.utf8" + lc_all: "zh_CN.utf8" + name: "Locale: zh_CN.utf8" # - env_file: actions-38.yaml # pattern: "not slow and not network and not single_cpu" # pandas_data_manager: "array" @@ -142,7 +142,7 @@ jobs: - name: Generate locales run: | - sudo locale-gen en_EN ${{ env.EXTRA_LOC }} + sudo locale-gen ${{ env.EXTRA_LOC }} # less /usr/share/i18n/SUPPORTED # sudo update-locale LANG=${LANG} From e01e051a4742a3ec05e6b6796cc9270fde16a82a Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Sat, 19 Mar 2022 11:04:08 +0100 Subject: [PATCH 10/46] Testing the optional locale generator condition --- .github/workflows/posix.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 99df31c835e32..052998aa95a30 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -30,10 +30,10 @@ jobs: # even if tests are skipped/xfailed pyarrow_version: ["5"] # , "7"] include: -# - env_file: actions-38-downstream_compat.yaml -# pattern: "not slow and not network and not single_cpu" -# pytest_target: "pandas/tests/test_downstream.py" -# name: "Downstream Compat" + - env_file: actions-38-downstream_compat.yaml + pattern: "not slow and not network and not single_cpu" + pytest_target: "pandas/tests/test_downstream.py" + name: "Downstream Compat" # - env_file: actions-38-minimum_versions.yaml # pattern: "not slow and not network and not single_cpu" # name: "Minimum Versions" @@ -145,6 +145,7 @@ jobs: sudo locale-gen ${{ env.EXTRA_LOC }} # less /usr/share/i18n/SUPPORTED # sudo update-locale LANG=${LANG} + if: ${{ env.EXTRA_LOC != '' }} - uses: conda-incubator/setup-miniconda@v2 with: From a53524992e0d2694997d87078a9e196552543a78 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Sat, 19 Mar 2022 11:25:56 +0100 Subject: [PATCH 11/46] Put back all runners --- .github/workflows/posix.yml | 52 ++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 052998aa95a30..f33b5de2d1b70 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -24,25 +24,25 @@ jobs: timeout-minutes: 120 strategy: matrix: - env_file: [actions-38.yaml] #, actions-39.yaml, actions-310.yaml] - pattern: ["not single_cpu"] #, "single_cpu"] + env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml] + pattern: ["not single_cpu", "single_cpu"] # Don't test pyarrow v2/3: Causes timeouts in read_csv engine # even if tests are skipped/xfailed - pyarrow_version: ["5"] # , "7"] + pyarrow_version: ["5", "7"] include: - env_file: actions-38-downstream_compat.yaml pattern: "not slow and not network and not single_cpu" pytest_target: "pandas/tests/test_downstream.py" name: "Downstream Compat" -# - env_file: actions-38-minimum_versions.yaml -# pattern: "not slow and not network and not single_cpu" -# name: "Minimum Versions" -# - env_file: actions-38.yaml -# pattern: "not slow and not network and not single_cpu" -# extra_apt: "language-pack-it" -# lang: "it_IT.utf8" -# lc_all: "it_IT.utf8" -# name: "Locale: it_IT.utf8" + - env_file: actions-38-minimum_versions.yaml + pattern: "not slow and not network and not single_cpu" + name: "Minimum Versions" + - env_file: actions-38.yaml + pattern: "not slow and not network and not single_cpu" + extra_apt: "language-pack-it" + lang: "it_IT.utf8" + lc_all: "it_IT.utf8" + name: "Locale: it_IT.utf8" - env_file: actions-38.yaml pattern: "not slow and not network and not single_cpu" extra_apt: "language-pack-zh-hans" @@ -50,19 +50,19 @@ jobs: lang: "zh_CN.utf8" lc_all: "zh_CN.utf8" name: "Locale: zh_CN.utf8" -# - env_file: actions-38.yaml -# pattern: "not slow and not network and not single_cpu" -# pandas_data_manager: "array" -# name: "Data Manager" -# - env_file: actions-pypy-38.yaml -# pattern: "not slow and not network and not single_cpu" -# test_args: "--max-worker-restart 0" -# name: "Pypy" -# - env_file: actions-310-numpydev.yaml -# pattern: "not slow and not network and not single_cpu" -# pandas_testing_mode: "deprecate" -# test_args: "-W error" -# name: "Numpy Dev" + - env_file: actions-38.yaml + pattern: "not slow and not network and not single_cpu" + pandas_data_manager: "array" + name: "Data Manager" + - env_file: actions-pypy-38.yaml + pattern: "not slow and not network and not single_cpu" + test_args: "--max-worker-restart 0" + name: "Pypy" + - env_file: actions-310-numpydev.yaml + pattern: "not slow and not network and not single_cpu" + pandas_testing_mode: "deprecate" + test_args: "-W error" + name: "Numpy Dev" fail-fast: false name: ${{ matrix.name || format('{0} pyarrow={1} {2}', matrix.env_file, matrix.pyarrow_version, matrix.pattern) }} env: @@ -140,7 +140,7 @@ jobs: # xsel for clipboard tests run: sudo apt-get update && sudo apt-get install -y libc6-dev-i386 xsel ${{ env.EXTRA_APT }} - - name: Generate locales + - name: Generate extra locales run: | sudo locale-gen ${{ env.EXTRA_LOC }} # less /usr/share/i18n/SUPPORTED From b4ad6dd19daec1b62c0f31afbe18b88085edfce6 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Sat, 19 Mar 2022 11:26:20 +0100 Subject: [PATCH 12/46] Added whatsnew --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index b90a877012e6f..d6bd27fa89803 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -443,6 +443,7 @@ Period ^^^^^^ - Bug in subtraction of :class:`Period` from :class:`PeriodArray` returning wrong results (:issue:`45999`) - Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives ``%l`` and ``%u`` were giving wrong results (:issue:`46252`) +- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, Unicode decoding error when a locale-specific directive is used (:issue:`46319`) - Plotting From 0eafd80e638ab2f4652c3ff9274d316c9385093a Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Sat, 19 Mar 2022 11:26:39 +0100 Subject: [PATCH 13/46] Now using the fix --- pandas/_libs/tslibs/period.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index f7b6d82970554..be4f290a03f05 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1237,8 +1237,7 @@ cdef str _period_strftime(int64_t value, int freq, bytes fmt): formatted = c_strftime(&dts, fmt) # Decode result according to current locale - # TODO use char_to_string_locale once the CI is able to reproduce the issue - result = util.char_to_string(formatted) + result = util.char_to_string_locale(formatted) free(formatted) # Now we will fill the placeholders corresponding to our additional directives From b721238be8e0d7d03e4182322803797e74b0feda Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Sat, 19 Mar 2022 11:27:16 +0100 Subject: [PATCH 14/46] As per code review: moved locale-switching fixture `overridden_locale` to conftest --- pandas/conftest.py | 34 ++++++++++++++++++++++++++ pandas/tests/io/formats/test_format.py | 17 ------------- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 8c10a0375d4da..24ba45a06170e 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -28,6 +28,7 @@ timezone, ) from decimal import Decimal +import locale import operator import os @@ -1202,6 +1203,39 @@ def utc_fixture(request): utc_fixture2 = utc_fixture +@pytest.fixture( + params=[ + pytest.param(None, id=str(locale.getlocale())), + "it_IT.utf8", + "zh_CN", + ] +) +def overridden_locale(request): + """ + Fixture to temporarily change the locale. + + If a locale cannot be set (because it is not available on the host) + the test is skipped. + """ + old = locale.setlocale(locale.LC_ALL) + target = request.param + if target is None: + # Current locale - don't change + yield old + else: + try: + # Try changing the locale. + locale.setlocale(locale.LC_ALL, target) + except locale.Error as e: + # Not available on this host. Skip test. + pytest.skip(f"Skipping as locale cannot be set. {type(e).__name__}: {e}") + else: + # Run test with the temporary local + yield target + # Set back to normal + locale.setlocale(locale.LC_ALL, old) + + # ---------------------------------------------------------------- # Dtypes # ---------------------------------------------------------------- diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 6cd4337b080be..b953b8dd86ee5 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3252,23 +3252,6 @@ def test_period_custom_locale(self, overridden_locale): assert formatted[1] == f"03 01:00:00{pm_local}" -@pytest.fixture(params=[None, "fr_FR", "zh_CN"]) -def overridden_locale(request): - """A fixture used to temporarily change the locale""" - old = locale.setlocale(locale.LC_ALL) - target = request.param - if target is None: - yield old - else: - try: - locale.setlocale(locale.LC_ALL, target) - except locale.Error as e: - pytest.skip(f"Skipping as locale cannot be set. {type(e).__name__}: {e}") - else: - yield target - locale.setlocale(locale.LC_ALL, old) - - class TestDatetimeIndexFormat: def test_datetime(self): formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format() From cebed786322e0fd5de1ced09de9735f3e3a6c85c Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Sat, 19 Mar 2022 14:12:47 +0100 Subject: [PATCH 15/46] Flake8 --- pandas/tests/io/formats/test_format.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index b953b8dd86ee5..c41f69575103c 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -7,7 +7,6 @@ ) from io import StringIO import itertools -import locale from operator import methodcaller import os from pathlib import Path From a25f3a9e3bd024b43c0073059a4b480b4b443dd4 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Sat, 19 Mar 2022 14:20:23 +0100 Subject: [PATCH 16/46] Added comments on the runner --- .github/workflows/posix.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index f33b5de2d1b70..18d80de888c73 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -46,10 +46,13 @@ jobs: - env_file: actions-38.yaml pattern: "not slow and not network and not single_cpu" extra_apt: "language-pack-zh-hans" - extra_loc: "zh_CN" + # Use the utf8 version as the default, it has no bad side-effect. lang: "zh_CN.utf8" lc_all: "zh_CN.utf8" - name: "Locale: zh_CN.utf8" + # Also install zh_CN (its encoding is gb2312) but do not activate it. + # It will be temporarily activated during tests with locale.setlocale + extra_loc: "zh_CN" + name: "Locale: zh_CN.utf8 + extra zh_CN (gb2312)" - env_file: actions-38.yaml pattern: "not slow and not network and not single_cpu" pandas_data_manager: "array" @@ -141,10 +144,9 @@ jobs: run: sudo apt-get update && sudo apt-get install -y libc6-dev-i386 xsel ${{ env.EXTRA_APT }} - name: Generate extra locales + # These extra locales will be available for locale.setlocale() calls in tests run: | sudo locale-gen ${{ env.EXTRA_LOC }} - # less /usr/share/i18n/SUPPORTED - # sudo update-locale LANG=${LANG} if: ${{ env.EXTRA_LOC != '' }} - uses: conda-incubator/setup-miniconda@v2 From d175c36cf1b97f3a7ff0534aa62be4c998366d07 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Sun, 20 Mar 2022 16:21:31 +0100 Subject: [PATCH 17/46] Added a non-utf8 locale in the `it_IT` runner. Added the zh_CN.utf8 locale in the tests --- .github/workflows/posix.yml | 8 ++++++-- pandas/conftest.py | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 18d80de888c73..3a8fad02b35d2 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -40,9 +40,13 @@ jobs: - env_file: actions-38.yaml pattern: "not slow and not network and not single_cpu" extra_apt: "language-pack-it" + # Use the utf8 version as the default, it has no bad side-effect. lang: "it_IT.utf8" lc_all: "it_IT.utf8" - name: "Locale: it_IT.utf8" + # Also install it_IT (its encoding is ISO8859-1) but do not activate it. + # It will be temporarily activated during tests with locale.setlocale + extra_loc: "it_IT" + name: "Locale: it_IT" - env_file: actions-38.yaml pattern: "not slow and not network and not single_cpu" extra_apt: "language-pack-zh-hans" @@ -52,7 +56,7 @@ jobs: # Also install zh_CN (its encoding is gb2312) but do not activate it. # It will be temporarily activated during tests with locale.setlocale extra_loc: "zh_CN" - name: "Locale: zh_CN.utf8 + extra zh_CN (gb2312)" + name: "Locale: zh_CN" - env_file: actions-38.yaml pattern: "not slow and not network and not single_cpu" pandas_data_manager: "array" diff --git a/pandas/conftest.py b/pandas/conftest.py index 24ba45a06170e..fc2128e90906f 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1207,6 +1207,8 @@ def utc_fixture(request): params=[ pytest.param(None, id=str(locale.getlocale())), "it_IT.utf8", + "it_IT", + "zh_CN.utf8", "zh_CN", ] ) From 184e4804717720d0b972a9ab733d8777a5de317d Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Sun, 20 Mar 2022 16:23:03 +0100 Subject: [PATCH 18/46] Improved readability of fixture `overridden_locale` as per code review --- pandas/conftest.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index fc2128e90906f..3d1f5a1b34931 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1219,23 +1219,18 @@ def overridden_locale(request): If a locale cannot be set (because it is not available on the host) the test is skipped. """ - old = locale.setlocale(locale.LC_ALL) target = request.param if target is None: - # Current locale - don't change - yield old + # Use current locale for this test. + yield locale.setlocale(locale.LC_ALL) else: - try: - # Try changing the locale. - locale.setlocale(locale.LC_ALL, target) - except locale.Error as e: - # Not available on this host. Skip test. - pytest.skip(f"Skipping as locale cannot be set. {type(e).__name__}: {e}") + if tm.can_set_locale(target, locale.LC_ALL): + # Change locale temporarily for this test. + with tm.set_locale(target, locale.LC_ALL): + yield target else: - # Run test with the temporary local - yield target - # Set back to normal - locale.setlocale(locale.LC_ALL, old) + # Not available on this host. Skip test. + pytest.skip(f"Skipping as locale {repr(locale)} cannot be set on host.") # ---------------------------------------------------------------- From e5f306207543910a0a0d51d1354cb53ba3e26833 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Mon, 21 Mar 2022 09:23:03 +0100 Subject: [PATCH 19/46] Added two comments on default encoding --- pandas/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 3d1f5a1b34931..15c4e3a7dfe34 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1207,9 +1207,9 @@ def utc_fixture(request): params=[ pytest.param(None, id=str(locale.getlocale())), "it_IT.utf8", - "it_IT", + "it_IT", # Note: encoding will be 'ISO8859-1' "zh_CN.utf8", - "zh_CN", + "zh_CN", # Note: encoding will be 'gb2312' ] ) def overridden_locale(request): From 22b0ae49e3b90f4c02b3b028253ec7c1ea5e9e73 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Mon, 21 Mar 2022 09:26:48 +0100 Subject: [PATCH 20/46] Fixed #46319 by adding a new `char_to_string_locale` function in the `tslibs.util` module, able to decode char* using the current locale. --- pandas/tests/io/formats/test_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index c41f69575103c..ad81b5e8e93b4 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3235,7 +3235,7 @@ def test_period_tz(self): assert per.format()[0] == "2013-01-01 00:00" def test_period_custom_locale(self, overridden_locale): - # GH#46319 locale-specific formatting directive leads to non-utf8 str result + # GH#46319 locale-specific directive leads to non-utf8 c strftime char* result # Get locale-specific reference am_local, pm_local = get_local_am_pm() From bbaa0c1130bdc03ae10e98537790471394cecc71 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Mon, 21 Mar 2022 10:01:08 +0100 Subject: [PATCH 21/46] As per code review: modified the test to contain non-utf8 chars. Fixed the resulting issue. --- pandas/_libs/tslibs/period.pyx | 4 +++- pandas/_libs/tslibs/util.pxd | 6 ++++++ pandas/tests/io/formats/test_format.py | 6 +++--- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index be4f290a03f05..fa616e6869bf6 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1167,7 +1167,9 @@ cdef str period_format(int64_t value, int freq, object fmt=None): return "NaT" if isinstance(fmt, str): - fmt = fmt.encode("utf-8") + # Encode using current locale, in case fmt contains non-utf8 chars + # fmt = fmt.encode("utf-8") + fmt = util.string_encode_locale(fmt) if fmt is None: freq_group = get_freq_group(freq) diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 31dac776391e9..62a55db9d916b 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -27,6 +27,7 @@ cdef extern from "Python.h": const char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t* length) except NULL + object PyUnicode_EncodeLocale(object obj, const char *errors) nogil object PyUnicode_DecodeLocale(const char *str, const char *errors) nogil @@ -225,6 +226,11 @@ cdef inline const char* get_c_string(str py_string) except NULL: return get_c_string_buf_and_size(py_string, NULL) +cdef inline bytes string_encode_locale(str py_string): + """As opposed to PyUnicode_Encode, use current system locale to encode.""" + return PyUnicode_EncodeLocale(py_string, NULL) + + cdef inline object char_to_string_locale(const char* data): """As opposed to PyUnicode_FromString, use current system locale to decode.""" return PyUnicode_DecodeLocale(data, NULL) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index ad81b5e8e93b4..a446244028bb9 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3246,9 +3246,9 @@ def test_period_custom_locale(self, overridden_locale): # Index per = pd.period_range("2003-01-01 01:00:00", periods=2, freq="12h") - formatted = per.format(date_format="%y %I:%M:%S%p") - assert formatted[0] == f"03 01:00:00{am_local}" - assert formatted[1] == f"03 01:00:00{pm_local}" + formatted = per.format(date_format="%y é %I:%M:%S%p") + assert formatted[0] == f"03 é 01:00:00{am_local}" + assert formatted[1] == f"03 é 01:00:00{pm_local}" class TestDatetimeIndexFormat: From f6857d692cc8e0140e19dfd861802a0ca672f57d Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Mon, 21 Mar 2022 10:12:26 +0100 Subject: [PATCH 22/46] Split the test in two for clarity --- pandas/tests/io/formats/test_format.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index a446244028bb9..96904e54b2fb6 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3234,7 +3234,20 @@ def test_period_tz(self): per = dt.to_period(freq="H") assert per.format()[0] == "2013-01-01 00:00" - def test_period_custom_locale(self, overridden_locale): + def test_period_non_utf8_fmt(self, overridden_locale): + # GH#46319 non-utf8 input format string leads to wrong output + + # Scalar + per = pd.Period("2018-03-11 13:00", freq="H") + assert per.strftime("%y é") == "03 é" + + # Index + per = pd.period_range("2003-01-01 01:00:00", periods=2, freq="12h") + formatted = per.format(date_format="%y é") + assert formatted[0] == f"03 é" + assert formatted[1] == f"03 é" + + def test_period_custom_locale_directive(self, overridden_locale): # GH#46319 locale-specific directive leads to non-utf8 c strftime char* result # Get locale-specific reference @@ -3246,9 +3259,9 @@ def test_period_custom_locale(self, overridden_locale): # Index per = pd.period_range("2003-01-01 01:00:00", periods=2, freq="12h") - formatted = per.format(date_format="%y é %I:%M:%S%p") - assert formatted[0] == f"03 é 01:00:00{am_local}" - assert formatted[1] == f"03 é 01:00:00{pm_local}" + formatted = per.format(date_format="%y %I:%M:%S%p") + assert formatted[0] == f"03 01:00:00{am_local}" + assert formatted[1] == f"03 01:00:00{pm_local}" class TestDatetimeIndexFormat: From 60c9e695825792dc86a666a4d45ded9bf7e75ab6 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Mon, 21 Mar 2022 10:14:34 +0100 Subject: [PATCH 23/46] Fixed test and flake8 error. --- pandas/tests/io/formats/test_format.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 96904e54b2fb6..f7c598acc286a 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3239,13 +3239,13 @@ def test_period_non_utf8_fmt(self, overridden_locale): # Scalar per = pd.Period("2018-03-11 13:00", freq="H") - assert per.strftime("%y é") == "03 é" + assert per.strftime("%y é") == "18 é" # Index per = pd.period_range("2003-01-01 01:00:00", periods=2, freq="12h") formatted = per.format(date_format="%y é") - assert formatted[0] == f"03 é" - assert formatted[1] == f"03 é" + assert formatted[0] == "03 é" + assert formatted[1] == "03 é" def test_period_custom_locale_directive(self, overridden_locale): # GH#46319 locale-specific directive leads to non-utf8 c strftime char* result From 06a956751defeca40d6db9b3cad4ce15c21bc137 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Tue, 22 Mar 2022 09:42:15 +0100 Subject: [PATCH 24/46] Updated whatsnew to ref #46468 . Updated test name --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/tests/io/formats/test_format.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index a38f88d05a724..4a969752b907e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -483,6 +483,7 @@ Period - Bug in subtraction of :class:`Period` from :class:`PeriodArray` returning wrong results (:issue:`45999`) - Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives ``%l`` and ``%u`` were giving wrong results (:issue:`46252`) - Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, Unicode decoding error when a locale-specific directive is used (:issue:`46319`) +- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, output incorrect results when non-ascii char is present in the formatting string (:issue:`46468`) - Plotting diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index f7c598acc286a..187a75bd16469 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3234,8 +3234,8 @@ def test_period_tz(self): per = dt.to_period(freq="H") assert per.format()[0] == "2013-01-01 00:00" - def test_period_non_utf8_fmt(self, overridden_locale): - # GH#46319 non-utf8 input format string leads to wrong output + def test_period_non_ascii_fmt(self, overridden_locale): + # GH#46468 non-ascii char in input format string leads to wrong output # Scalar per = pd.Period("2018-03-11 13:00", freq="H") From e5ab44e114f9d2f370d1dc5012a6064a9c9c9750 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sylvain=20Mari=C3=A9?= Date: Tue, 22 Mar 2022 21:55:10 +0100 Subject: [PATCH 25/46] Removing wrong whatsnew bullet --- doc/source/whatsnew/v1.5.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index b0b9b1fe402f2..cfb5898b95e92 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -490,7 +490,6 @@ Period - Bug in subtraction of :class:`Period` from :class:`PeriodArray` returning wrong results (:issue:`45999`) - Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives ``%l`` and ``%u`` were giving wrong results (:issue:`46252`) - Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, Unicode decoding error when a locale-specific directive is used (:issue:`46319`) -- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, output incorrect results when non-ascii char is present in the formatting string (:issue:`46468`) - Plotting From 0790e723aaba32831d0fae837396d13edd9fa33d Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Wed, 23 Mar 2022 09:28:07 +0100 Subject: [PATCH 26/46] Nitpick on whatsnew as per code review --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index e7a6ee5196333..f1a443f2a4f0c 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -504,7 +504,7 @@ Period ^^^^^^ - Bug in subtraction of :class:`Period` from :class:`PeriodArray` returning wrong results (:issue:`45999`) - Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives ``%l`` and ``%u`` were giving wrong results (:issue:`46252`) -- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, Unicode decoding error when a locale-specific directive is used (:issue:`46319`) +- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, raising `UnicodeDecodeError` when a locale-specific directive was passed (:issue:`46319`) - Plotting From d6914f42f75cb5a045b9fa43815699d254bc8a33 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Wed, 23 Mar 2022 12:33:22 +0100 Subject: [PATCH 27/46] Fixed build error rst directive --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index f1a443f2a4f0c..6f2055bddf696 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -504,7 +504,7 @@ Period ^^^^^^ - Bug in subtraction of :class:`Period` from :class:`PeriodArray` returning wrong results (:issue:`45999`) - Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives ``%l`` and ``%u`` were giving wrong results (:issue:`46252`) -- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, raising `UnicodeDecodeError` when a locale-specific directive was passed (:issue:`46319`) +- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, raising ``UnicodeDecodeError`` when a locale-specific directive was passed (:issue:`46319`) - Plotting From 0105ac903fa0f2eb077dc3ff87f4e44e8790cf30 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Fri, 1 Apr 2022 11:17:12 +0200 Subject: [PATCH 28/46] Names incorrectly reverted in last merge commit --- .github/workflows/posix.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 88e409a5f2692..c4306a063c8a1 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -37,7 +37,7 @@ jobs: - name: "Minimum Versions" env_file: actions-38-minimum_versions.yaml pattern: "not slow and not network and not single_cpu" - - name: "Locale: it_IT.utf8" + - name: "Locale: it_IT" env_file: actions-38.yaml pattern: "not slow and not network and not single_cpu" extra_apt: "language-pack-it" @@ -47,7 +47,7 @@ jobs: # Also install it_IT (its encoding is ISO8859-1) but do not activate it. # It will be temporarily activated during tests with locale.setlocale extra_loc: "it_IT" - - name: "Locale: zh_CN.utf8" + - name: "Locale: zh_CN" env_file: actions-38.yaml pattern: "not slow and not network and not single_cpu" extra_apt: "language-pack-zh-hans" From a1f3773ea008aa89477a549725ff49c77010a2d0 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Fri, 1 Apr 2022 16:09:55 +0200 Subject: [PATCH 29/46] Fixed test_localization so that #46595 can be demonstrated on windows targets (even if today these do not run on windows targets, see #46597) --- pandas/tests/config/test_localization.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/config/test_localization.py b/pandas/tests/config/test_localization.py index 21b1b7ed6ee65..3cc03ecc51f86 100644 --- a/pandas/tests/config/test_localization.py +++ b/pandas/tests/config/test_localization.py @@ -15,7 +15,7 @@ import pandas as pd _all_locales = get_locales() or [] -_current_locale = locale.getlocale() +_current_locale = locale.setlocale(locale.LC_ALL) # getlocale() is wrong, see GH#46595 # Don't run any of these tests if we are on Windows or have no locales. pytestmark = pytest.mark.skipif( @@ -95,7 +95,8 @@ def test_set_locale(lang, enc): assert normalized_locale == new_locale # Once we exit the "with" statement, locale should be back to what it was. - current_locale = locale.getlocale() + # current_locale = locale.getlocale() is wrong, see GH#46595 + current_locale = locale.setlocale(locale.LC_ALL) assert current_locale == _current_locale From 0fcea6c8039818deb90c1beea37d10503a5578e4 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Fri, 1 Apr 2022 16:20:01 +0200 Subject: [PATCH 30/46] Fixed `tm.set_locale` context manager, it could error and leak when category LC_ALL was used. Fixed #46595 --- pandas/_config/localization.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index 2a487fa4b6877..cfe3a408b992d 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -39,7 +39,8 @@ def set_locale( particular locale, without globally setting the locale. This probably isn't thread-safe. """ - current_locale = locale.getlocale() + # getlocale is wrong, see GH#46595 + current_locale = locale.setlocale(lc_var) try: locale.setlocale(lc_var, new_locale) From 7e183757118c02692fb3c19e473ec79c5a20f0dc Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Fri, 1 Apr 2022 16:24:12 +0200 Subject: [PATCH 31/46] Removed the fixture as per code review, and added corresponding parametrization in tests. --- pandas/conftest.py | 31 ----------- pandas/tests/io/formats/test_format.py | 74 +++++++++++++++++++------- 2 files changed, 54 insertions(+), 51 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 15c4e3a7dfe34..8c10a0375d4da 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -28,7 +28,6 @@ timezone, ) from decimal import Decimal -import locale import operator import os @@ -1203,36 +1202,6 @@ def utc_fixture(request): utc_fixture2 = utc_fixture -@pytest.fixture( - params=[ - pytest.param(None, id=str(locale.getlocale())), - "it_IT.utf8", - "it_IT", # Note: encoding will be 'ISO8859-1' - "zh_CN.utf8", - "zh_CN", # Note: encoding will be 'gb2312' - ] -) -def overridden_locale(request): - """ - Fixture to temporarily change the locale. - - If a locale cannot be set (because it is not available on the host) - the test is skipped. - """ - target = request.param - if target is None: - # Use current locale for this test. - yield locale.setlocale(locale.LC_ALL) - else: - if tm.can_set_locale(target, locale.LC_ALL): - # Change locale temporarily for this test. - with tm.set_locale(target, locale.LC_ALL): - yield target - else: - # Not available on this host. Skip test. - pytest.skip(f"Skipping as locale {repr(locale)} cannot be set on host.") - - # ---------------------------------------------------------------- # Dtypes # ---------------------------------------------------------------- diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 187a75bd16469..523d2bbdbe59e 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1,12 +1,14 @@ """ Test output formatting for Series/DataFrame, including to_string & reprs """ +from contextlib import nullcontext from datetime import ( datetime, time, ) from io import StringIO import itertools +import locale from operator import methodcaller import os from pathlib import Path @@ -3234,34 +3236,66 @@ def test_period_tz(self): per = dt.to_period(freq="H") assert per.format()[0] == "2013-01-01 00:00" - def test_period_non_ascii_fmt(self, overridden_locale): + @pytest.mark.parametrize( + "locale_str", + [ + pytest.param(None, id=str(locale.getlocale())), + "it_IT.utf8", + "it_IT", # Note: encoding will be 'ISO8859-1' + "zh_CN.utf8", + "zh_CN", # Note: encoding will be 'gb2312' + ], + ) + def test_period_non_ascii_fmt(self, locale_str): # GH#46468 non-ascii char in input format string leads to wrong output - # Scalar - per = pd.Period("2018-03-11 13:00", freq="H") - assert per.strftime("%y é") == "18 é" + # Skip if locale cannot be set + if locale_str is not None and not tm.can_set_locale(locale_str, locale.LC_ALL): + pytest.skip(f"Skipping as locale '{locale_str}' cannot be set on host.") + + # Change locale temporarily for this test. + with tm.set_locale(locale_str, locale.LC_ALL) if locale_str else nullcontext(): + # Scalar + per = pd.Period("2018-03-11 13:00", freq="H") + assert per.strftime("%y é") == "18 é" - # Index - per = pd.period_range("2003-01-01 01:00:00", periods=2, freq="12h") - formatted = per.format(date_format="%y é") - assert formatted[0] == "03 é" - assert formatted[1] == "03 é" + # Index + per = pd.period_range("2003-01-01 01:00:00", periods=2, freq="12h") + formatted = per.format(date_format="%y é") + assert formatted[0] == "03 é" + assert formatted[1] == "03 é" - def test_period_custom_locale_directive(self, overridden_locale): + @pytest.mark.parametrize( + "locale_str", + [ + pytest.param(None, id=str(locale.getlocale())), + "it_IT.utf8", + "it_IT", # Note: encoding will be 'ISO8859-1' + "zh_CN.utf8", + "zh_CN", # Note: encoding will be 'gb2312' + ], + ) + def test_period_custom_locale_directive(self, locale_str): # GH#46319 locale-specific directive leads to non-utf8 c strftime char* result - # Get locale-specific reference - am_local, pm_local = get_local_am_pm() + # Skip if locale cannot be set + if locale_str is not None and not tm.can_set_locale(locale_str, locale.LC_ALL): + pytest.skip(f"Skipping as locale '{locale_str}' cannot be set on host.") + + # Change locale temporarily for this test. + with tm.set_locale(locale_str, locale.LC_ALL) if locale_str else nullcontext(): + # Get locale-specific reference + am_local, pm_local = get_local_am_pm() - # Scalar - per = pd.Period("2018-03-11 13:00", freq="H") - assert per.strftime("%p") == pm_local + # Scalar + per = pd.Period("2018-03-11 13:00", freq="H") + assert per.strftime("%p") == pm_local - # Index - per = pd.period_range("2003-01-01 01:00:00", periods=2, freq="12h") - formatted = per.format(date_format="%y %I:%M:%S%p") - assert formatted[0] == f"03 01:00:00{am_local}" - assert formatted[1] == f"03 01:00:00{pm_local}" + # Index + per = pd.period_range("2003-01-01 01:00:00", periods=2, freq="12h") + formatted = per.format(date_format="%y %I:%M:%S%p") + assert formatted[0] == f"03 01:00:00{am_local}" + assert formatted[1] == f"03 01:00:00{pm_local}" class TestDatetimeIndexFormat: From a1c6d83853fbf57936d81c984d16678b1500f080 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Fri, 1 Apr 2022 16:34:42 +0200 Subject: [PATCH 32/46] Dummy mod to trigger CI again --- pandas/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 8c10a0375d4da..688fdeafed015 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -19,6 +19,7 @@ """ # pyright: reportUntypedFunctionDecorator = false + from collections import abc from datetime import ( date, From 78ac13d39b03bc0b1119574ce1f2856d6742c57f Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Fri, 1 Apr 2022 16:34:56 +0200 Subject: [PATCH 33/46] reverted dummy mod --- pandas/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 688fdeafed015..8c10a0375d4da 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -19,7 +19,6 @@ """ # pyright: reportUntypedFunctionDecorator = false - from collections import abc from datetime import ( date, From fcd2ce25c0e2b5860a03d7fb41de2af6882833f8 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Tue, 12 Jul 2022 15:49:22 +0200 Subject: [PATCH 34/46] Attempt to fix the remaining error on the numpy worker --- pandas/_libs/tslibs/period.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index c41f865e3c345..844b61a58b2e4 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1646,7 +1646,7 @@ cdef class _Period(PeriodMixin): return freq @classmethod - def _from_ordinal(cls, ordinal: int, freq) -> "Period": + def _from_ordinal(cls, ordinal: int64_t, freq) -> "Period": """ Fast creation from an ordinal and freq that are already validated! """ From b69b0744a04012a0f07a3c58b12c9a64bab66551 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Tue, 12 Jul 2022 16:06:17 +0200 Subject: [PATCH 35/46] Fixed issue in `_from_ordinal` --- pandas/_libs/tslibs/period.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 844b61a58b2e4..7605c91361b2c 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1654,7 +1654,7 @@ cdef class _Period(PeriodMixin): return NaT else: freq = cls._maybe_convert_freq(freq) - self = _Period.__new__(cls, ordinal, freq) + self = _Period.__new__(cls, ordinal=ordinal, freq=freq) return self def __richcmp__(self, other, op): From 65c3a1df28fd95b3cb6749d2842dd89164594eba Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Tue, 12 Jul 2022 18:40:47 +0200 Subject: [PATCH 36/46] Added asserts to try to understand --- pandas/tests/extension/base/methods.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 838c9f5b8a35f..6d59cb090984e 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -500,6 +500,7 @@ def test_where_series(self, data, na_value, as_frame): @pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]]) def test_repeat(self, data, repeats, as_series, use_numpy): arr = type(data)._from_sequence(data[:3], dtype=data.dtype) + assert type(arr) is type(data) if as_series: arr = pd.Series(arr) @@ -508,6 +509,7 @@ def test_repeat(self, data, repeats, as_series, use_numpy): repeats = [repeats] * 3 if isinstance(repeats, int) else repeats expected = [x for x, n in zip(arr, repeats) for _ in range(n)] expected = type(data)._from_sequence(expected, dtype=data.dtype) + assert type(expected) is type(data) if as_series: expected = pd.Series(expected, index=arr.index.repeat(repeats)) From e5bca9f15d340414f518834723a1dae82af4f3a4 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Wed, 13 Jul 2022 14:35:50 +0200 Subject: [PATCH 37/46] Reverted debugging asserts and applied fix for numpy repeat from #47670. --- pandas/_libs/arrays.pyx | 2 +- pandas/tests/extension/base/methods.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx index 8895a2bcfca89..97631e1bfe078 100644 --- a/pandas/_libs/arrays.pyx +++ b/pandas/_libs/arrays.pyx @@ -157,7 +157,7 @@ cdef class NDArrayBacked: return self._from_backing_data(res_values) # TODO: pass NPY_MAXDIMS equiv to axis=None? - def repeat(self, repeats, axis: int = 0): + def repeat(self, repeats, axis = 0): if axis is None: axis = 0 res_values = cnp.PyArray_Repeat(self._ndarray, repeats, axis) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 6d59cb090984e..838c9f5b8a35f 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -500,7 +500,6 @@ def test_where_series(self, data, na_value, as_frame): @pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]]) def test_repeat(self, data, repeats, as_series, use_numpy): arr = type(data)._from_sequence(data[:3], dtype=data.dtype) - assert type(arr) is type(data) if as_series: arr = pd.Series(arr) @@ -509,7 +508,6 @@ def test_repeat(self, data, repeats, as_series, use_numpy): repeats = [repeats] * 3 if isinstance(repeats, int) else repeats expected = [x for x, n in zip(arr, repeats) for _ in range(n)] expected = type(data)._from_sequence(expected, dtype=data.dtype) - assert type(expected) is type(data) if as_series: expected = pd.Series(expected, index=arr.index.repeat(repeats)) From fbfefecc08c59d81923fc52481bd1805c2388be4 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Wed, 13 Jul 2022 18:31:38 +0200 Subject: [PATCH 38/46] Fixed the last issue on numpy dev: a TypeError message had changed --- pandas/tests/io/parser/test_quoting.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_quoting.py b/pandas/tests/io/parser/test_quoting.py index 456dd049d2f4a..0869eba1a87fb 100644 --- a/pandas/tests/io/parser/test_quoting.py +++ b/pandas/tests/io/parser/test_quoting.py @@ -5,6 +5,7 @@ import csv from io import StringIO +import re import pytest @@ -38,7 +39,14 @@ def test_bad_quote_char(all_parsers, kwargs, msg): @pytest.mark.parametrize( "quoting,msg", [ - ("foo", '"quoting" must be an integer'), + ( + "foo", + # This error message changed across versions + '"quoting" must be an integer|' + + re.escape( + "Argument 'quoting' has incorrect type (expected int, got str)" + ), + ), (5, 'bad "quoting" value'), # quoting must be in the range [0, 3] ], ) From 4d026cf81d966dab9e372deb4566fc2c963841ef Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Sat, 3 Sep 2022 14:39:08 +0200 Subject: [PATCH 39/46] Code review: Removed `EXTRA_LOC` --- .github/workflows/ubuntu.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index ec53e84f51321..fb2e7f0bb069e 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -88,7 +88,6 @@ jobs: ENV_FILE: ci/deps/${{ matrix.env_file }} PATTERN: ${{ matrix.pattern }} EXTRA_APT: ${{ matrix.extra_apt || '' }} - EXTRA_LOC: ${{ matrix.extra_loc || '' }} LANG: ${{ matrix.lang || '' }} LC_ALL: ${{ matrix.lc_all || '' }} PANDAS_TESTING_MODE: ${{ matrix.pandas_testing_mode || '' }} @@ -153,8 +152,8 @@ jobs: - name: Generate extra locales # These extra locales will be available for locale.setlocale() calls in tests run: | - sudo locale-gen ${{ env.EXTRA_LOC }} - if: ${{ env.EXTRA_LOC != '' }} + sudo locale-gen ${{ matrix.extra_loc }} + if: ${{ matrix.extra_loc }} - name: Set up Conda uses: ./.github/actions/setup-conda From cf3be808e806ad164ec5d14d1847a945785ea6d7 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Sat, 3 Sep 2022 14:44:48 +0200 Subject: [PATCH 40/46] Code review: removed commented line --- pandas/_libs/tslibs/period.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 7605c91361b2c..a47c69ea9939f 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1159,7 +1159,6 @@ cdef str period_format(int64_t value, int freq, object fmt=None): if isinstance(fmt, str): # Encode using current locale, in case fmt contains non-utf8 chars - # fmt = fmt.encode("utf-8") fmt = util.string_encode_locale(fmt) if fmt is None: From 8f65bd98a0e7f41eb638493747912518f966091e Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Sat, 3 Sep 2022 14:47:48 +0200 Subject: [PATCH 41/46] Code review: reverted out of scope change --- pandas/_libs/tslibs/period.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index a47c69ea9939f..7e3883065ee5e 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1653,7 +1653,7 @@ cdef class _Period(PeriodMixin): return NaT else: freq = cls._maybe_convert_freq(freq) - self = _Period.__new__(cls, ordinal=ordinal, freq=freq) + self = _Period.__new__(cls, ordinal, freq) return self def __richcmp__(self, other, op): From 7b414a9f0dd7dfb82f223b4d2bc329747553b33f Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Sat, 3 Sep 2022 14:49:44 +0200 Subject: [PATCH 42/46] Code review: reverted out of scope change --- pandas/tests/io/parser/test_quoting.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/pandas/tests/io/parser/test_quoting.py b/pandas/tests/io/parser/test_quoting.py index 0869eba1a87fb..5205a08d4c53f 100644 --- a/pandas/tests/io/parser/test_quoting.py +++ b/pandas/tests/io/parser/test_quoting.py @@ -39,15 +39,7 @@ def test_bad_quote_char(all_parsers, kwargs, msg): @pytest.mark.parametrize( "quoting,msg", [ - ( - "foo", - # This error message changed across versions - '"quoting" must be an integer|' - + re.escape( - "Argument 'quoting' has incorrect type (expected int, got str)" - ), - ), - (5, 'bad "quoting" value'), # quoting must be in the range [0, 3] + ("foo", '"quoting" must be an integer|Argument'), ], ) def test_bad_quoting(all_parsers, quoting, msg): From 59b4fe31713c51871b224b42fc91cf09132e56ba Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Sat, 3 Sep 2022 22:32:43 +0200 Subject: [PATCH 43/46] Fixed unused import --- pandas/tests/io/parser/test_quoting.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/parser/test_quoting.py b/pandas/tests/io/parser/test_quoting.py index 1e6e07859f5bf..990480746667b 100644 --- a/pandas/tests/io/parser/test_quoting.py +++ b/pandas/tests/io/parser/test_quoting.py @@ -5,7 +5,6 @@ import csv from io import StringIO -import re import pytest From 4950e06ffecb9da622036d03973fffc830e67f7f Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Tue, 6 Sep 2022 23:03:25 +0200 Subject: [PATCH 44/46] Fixed revert mistake --- pandas/tests/io/parser/test_quoting.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/parser/test_quoting.py b/pandas/tests/io/parser/test_quoting.py index 990480746667b..37bd41e2bea6c 100644 --- a/pandas/tests/io/parser/test_quoting.py +++ b/pandas/tests/io/parser/test_quoting.py @@ -40,6 +40,7 @@ def test_bad_quote_char(all_parsers, kwargs, msg): "quoting,msg", [ ("foo", '"quoting" must be an integer|Argument'), + (5, 'bad "quoting" value'), # quoting must be in the range [0, 3] ], ) def test_bad_quoting(all_parsers, quoting, msg): From 14f854ba9076452806d633a0274a500693b6f7f7 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Tue, 6 Sep 2022 23:06:31 +0200 Subject: [PATCH 45/46] Moved whatsnew to 1.6.0 --- doc/source/whatsnew/v1.5.0.rst | 1 - doc/source/whatsnew/v1.6.0.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 375645c0a8d5d..8671b73526f80 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -1144,7 +1144,6 @@ Period - Bug in adding ``np.timedelta64("NaT", "ns")`` to a :class:`Period` with a timedelta-like freq incorrectly raising ``IncompatibleFrequency`` instead of returning ``NaT`` (:issue:`47196`) - Bug in adding an array of integers to an array with :class:`PeriodDtype` giving incorrect results when ``dtype.freq.n > 1`` (:issue:`47209`) - Bug in subtracting a :class:`Period` from an array with :class:`PeriodDtype` returning incorrect results instead of raising ``OverflowError`` when the operation overflows (:issue:`47538`) -- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, raising ``UnicodeDecodeError`` when a locale-specific directive was passed (:issue:`46319`) - Plotting diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 848e87f0bc029..42d251277df7b 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -174,7 +174,7 @@ I/O Period ^^^^^^ -- +- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, raising ``UnicodeDecodeError`` when a locale-specific directive was passed (:issue:`46319`) - Plotting From 90c31cb9e4df81e4b674e72bff428b230bea1a4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sylvain=20Mari=C3=A9?= Date: Tue, 6 Sep 2022 23:07:58 +0200 Subject: [PATCH 46/46] Update pandas/tests/io/parser/test_quoting.py --- pandas/tests/io/parser/test_quoting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_quoting.py b/pandas/tests/io/parser/test_quoting.py index 37bd41e2bea6c..025a612dc47d2 100644 --- a/pandas/tests/io/parser/test_quoting.py +++ b/pandas/tests/io/parser/test_quoting.py @@ -40,7 +40,7 @@ def test_bad_quote_char(all_parsers, kwargs, msg): "quoting,msg", [ ("foo", '"quoting" must be an integer|Argument'), - (5, 'bad "quoting" value'), # quoting must be in the range [0, 3] + (5, 'bad "quoting" value'), # quoting must be in the range [0, 3] ], ) def test_bad_quoting(all_parsers, quoting, msg):