From 707c3ef79323bb7b58c6712358e883c7574316e6 Mon Sep 17 00:00:00 2001 From: Tim Cera Date: Fri, 12 Jul 2019 16:34:26 -0400 Subject: [PATCH 1/3] BUG: Need 'windows-1252' encoding for locale names. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://github.com/pandas-dev/pandas/issues/24760 https://github.com/pandas-dev/pandas/issues/23638 There are some special characters encoded with 'window-1252' in lists created by 'locale -a'. The two know locales with this problem are Norwegian 'bokmål', and 'français'. --- pandas/_config/localization.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index 9f750d8447c6a..46fccf03a55d7 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -142,10 +142,19 @@ def get_locales(prefix=None, normalize=True, locale_getter=_default_locale_gette # raw_locales is "\n" separated list of locales # it may contain non-decodable parts, so split # extract what we can and then rejoin. - raw_locales = raw_locales.split(b"\n") + raw_locales = raw_locales.split(b'\n') out_locales = [] for x in raw_locales: - out_locales.append(str(x, encoding=options.display.encoding)) + try: + out_locales.append(str( + x, encoding=options.display.encoding)) + except UnicodeError: + # 'locale -a' is used to populated 'raw_locales' and on + # Redhat 7 Linux (and maybe others) prints locale names + # using windows-1252 encoding. Bug only triggered by + # a few special characters and when there is an + # extensive list of installed locales. + out_locales.append(str(x, encoding='windows-1252')) except TypeError: pass From 064507aceb62704882df8b9e2e894af6377de131 Mon Sep 17 00:00:00 2001 From: Tim Cera Date: Mon, 15 Jul 2019 22:19:08 -0400 Subject: [PATCH 2/3] black pandas --- pandas/_config/localization.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index 46fccf03a55d7..ba60b1e003004 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -142,19 +142,18 @@ def get_locales(prefix=None, normalize=True, locale_getter=_default_locale_gette # raw_locales is "\n" separated list of locales # it may contain non-decodable parts, so split # extract what we can and then rejoin. - raw_locales = raw_locales.split(b'\n') + raw_locales = raw_locales.split(b"\n") out_locales = [] for x in raw_locales: try: - out_locales.append(str( - x, encoding=options.display.encoding)) + out_locales.append(str(x, encoding=options.display.encoding)) except UnicodeError: # 'locale -a' is used to populated 'raw_locales' and on # Redhat 7 Linux (and maybe others) prints locale names # using windows-1252 encoding. Bug only triggered by # a few special characters and when there is an # extensive list of installed locales. - out_locales.append(str(x, encoding='windows-1252')) + out_locales.append(str(x, encoding="windows-1252")) except TypeError: pass From d7a1e004f4a7cd0593e1c4c3b2abbae286b1e966 Mon Sep 17 00:00:00 2001 From: Tim Cera Date: Fri, 11 Oct 2019 15:34:27 -0400 Subject: [PATCH 3/3] DOC: Added comment to whats new 1.0.0 --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index ea52736cb11a7..cc33d1120b51e 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -239,6 +239,7 @@ Datetimelike - Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`) - Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`) - Bug in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`) +- Bug in :func:`pandas._config.localization.get_locales` where the ``locales -a`` encodes the locales list as windows-1252 (:issue:`23638`, :issue:`24760`, :issue:`27368`) Timedelta ^^^^^^^^^