From 34788572b229df05e57db0bf30c51a18ad0bf165 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 24 Mar 2019 11:54:02 -0700 Subject: [PATCH 1/7] implement _config.display, move remaning locale utilities; update tests --- pandas/_config/display.py | 72 +++++++++++++++++++++++ pandas/_config/localization.py | 69 ++++++++++++++++++++++ pandas/core/config_init.py | 24 -------- pandas/io/formats/console.py | 36 ------------ pandas/tests/config/test_localization.py | 10 +--- pandas/tests/io/formats/test_console.py | 3 +- pandas/util/testing.py | 74 +----------------------- 7 files changed, 147 insertions(+), 141 deletions(-) create mode 100644 pandas/_config/display.py diff --git a/pandas/_config/display.py b/pandas/_config/display.py new file mode 100644 index 0000000000000..0b7043f0bb1c7 --- /dev/null +++ b/pandas/_config/display.py @@ -0,0 +1,72 @@ +""" +Unopinionated display configuration. +""" +import locale +import sys + +from pandas._config import config as cf + +# ----------------------------------------------------------------------------- +# Global formatting options +_initial_defencoding = None + + +def detect_console_encoding(): + """ + Try to find the most capable encoding supported by the console. + slightly modified from the way IPython handles the same issue. + """ + global _initial_defencoding + + encoding = None + try: + encoding = sys.stdout.encoding or sys.stdin.encoding + except (AttributeError, IOError): + pass + + # try again for something better + if not encoding or 'ascii' in encoding.lower(): + try: + encoding = locale.getpreferredencoding() + except Exception: + pass + + # when all else fails. this will usually be "ascii" + if not encoding or 'ascii' in encoding.lower(): + encoding = sys.getdefaultencoding() + + # GH#3360, save the reported defencoding at import time + # MPL backends may change it. Make available for debugging. + if not _initial_defencoding: + _initial_defencoding = sys.getdefaultencoding() + + return encoding + + +pc_encoding_doc = """ +: str/unicode + Defaults to the detected encoding of the console. + Specifies the encoding to be used for strings returned by to_string, + these are generally strings meant to be displayed on the console. +""" + +pc_date_dayfirst_doc = """ +: boolean + When True, prints and parses dates with the day first, eg 20/01/2005 +""" + +pc_date_yearfirst_doc = """ +: boolean + When True, prints and parses dates with the year first, eg 2005/01/20 +""" + + +with cf.config_prefix('display'): + cf.register_option('encoding', detect_console_encoding(), pc_encoding_doc, + validator=cf.is_text) + + # Needed upstream of `_libs` because these are used in tslibs.parsing + cf.register_option('date_dayfirst', False, pc_date_dayfirst_doc, + validator=cf.is_bool) + cf.register_option('date_yearfirst', False, pc_date_yearfirst_doc, + validator=cf.is_bool) diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index a5f348241e4d7..1ca6d073f18c4 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -5,6 +5,10 @@ """ from contextlib import contextmanager import locale +import re +import subprocess + +from pandas._config.config import options @contextmanager @@ -91,3 +95,68 @@ def _valid_locales(locales, normalize): normalizer = lambda x: x.strip() return list(filter(can_set_locale, map(normalizer, locales))) + + +def _default_locale_getter(): + try: + raw_locales = subprocess.check_output(['locale -a'], shell=True) + except subprocess.CalledProcessError as e: + raise type(e)("{exception}, the 'locale -a' command cannot be found " + "on your system".format(exception=e)) + return raw_locales + + +def get_locales(prefix=None, normalize=True, + locale_getter=_default_locale_getter): + """ + Get all the locales that are available on the system. + + Parameters + ---------- + prefix : str + If not ``None`` then return only those locales with the prefix + provided. For example to get all English language locales (those that + start with ``"en"``), pass ``prefix="en"``. + normalize : bool + Call ``locale.normalize`` on the resulting list of available locales. + If ``True``, only locales that can be set without throwing an + ``Exception`` are returned. + locale_getter : callable + The function to use to retrieve the current locales. This should return + a string with each locale separated by a newline character. + + Returns + ------- + locales : list of strings + A list of locale strings that can be set with ``locale.setlocale()``. + For example:: + + locale.setlocale(locale.LC_ALL, locale_string) + + On error will return None (no locale available, e.g. Windows) + + """ + try: + raw_locales = locale_getter() + except Exception: + return None + + try: + # raw_locales is "\n" separated list of locales + # it may contain non-decodable parts, so split + # extract what we can and then rejoin. + raw_locales = raw_locales.split(b'\n') + out_locales = [] + for x in raw_locales: + out_locales.append(str( + x, encoding=options.display.encoding)) + + except TypeError: + pass + + if prefix is None: + return _valid_locales(out_locales, normalize) + + pattern = re.compile('{prefix}.*'.format(prefix=prefix)) + found = pattern.findall('\n'.join(out_locales)) + return _valid_locales(found, normalize) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 19af321f3d88b..01bab472b06d0 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -14,7 +14,6 @@ is_bool, is_callable, is_instance_factory, is_int, is_one_of_factory, is_text) -from pandas.io.formats.console import detect_console_encoding from pandas.io.formats.terminal import is_terminal # compute @@ -110,16 +109,6 @@ def use_numexpr_cb(key): pandas objects (if it is available). """ -pc_date_dayfirst_doc = """ -: boolean - When True, prints and parses dates with the day first, eg 20/01/2005 -""" - -pc_date_yearfirst_doc = """ -: boolean - When True, prints and parses dates with the year first, eg 2005/01/20 -""" - pc_pprint_nest_depth = """ : int Controls the number of nested levels to process when pretty-printing @@ -131,13 +120,6 @@ def use_numexpr_cb(key): elements in outer levels within groups) """ -pc_encoding_doc = """ -: str/unicode - Defaults to the detected encoding of the console. - Specifies the encoding to be used for strings returned by to_string, - these are generally strings meant to be displayed on the console. -""" - float_format_doc = """ : callable The callable should accept a floating point number and return @@ -331,16 +313,10 @@ def table_schema_cb(key): validator=is_text) cf.register_option('notebook_repr_html', True, pc_nb_repr_h_doc, validator=is_bool) - cf.register_option('date_dayfirst', False, pc_date_dayfirst_doc, - validator=is_bool) - cf.register_option('date_yearfirst', False, pc_date_yearfirst_doc, - validator=is_bool) cf.register_option('pprint_nest_depth', 3, pc_pprint_nest_depth, validator=is_int) cf.register_option('multi_sparse', True, pc_multi_sparse_doc, validator=is_bool) - cf.register_option('encoding', detect_console_encoding(), pc_encoding_doc, - validator=is_text) cf.register_option('expand_frame_repr', True, pc_expand_repr_doc) cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc, validator=is_one_of_factory([True, False, 'truncate'])) diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index 0a035b52d6d42..3ade74693d654 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -7,42 +7,6 @@ from pandas.io.formats.terminal import get_terminal_size -# ----------------------------------------------------------------------------- -# Global formatting options -_initial_defencoding = None - - -def detect_console_encoding(): - """ - Try to find the most capable encoding supported by the console. - slightly modified from the way IPython handles the same issue. - """ - global _initial_defencoding - - encoding = None - try: - encoding = sys.stdout.encoding or sys.stdin.encoding - except (AttributeError, IOError): - pass - - # try again for something better - if not encoding or 'ascii' in encoding.lower(): - try: - encoding = locale.getpreferredencoding() - except Exception: - pass - - # when all else fails. this will usually be "ascii" - if not encoding or 'ascii' in encoding.lower(): - encoding = sys.getdefaultencoding() - - # GH3360, save the reported defencoding at import time - # MPL backends may change it. Make available for debugging. - if not _initial_defencoding: - _initial_defencoding = sys.getdefaultencoding() - - return encoding - def get_console_size(): """Return console size as tuple = (width, height). diff --git a/pandas/tests/config/test_localization.py b/pandas/tests/config/test_localization.py index fa7b46da4af92..15556948ad9b8 100644 --- a/pandas/tests/config/test_localization.py +++ b/pandas/tests/config/test_localization.py @@ -5,15 +5,11 @@ import pytest -from pandas._config.localization import can_set_locale, set_locale +from pandas._config.localization import can_set_locale, get_locales, set_locale from pandas.compat import is_platform_windows -# TODO: move get_locales into localization, making `tm` import unnecessary. -# This is blocked by the need for core.config to be moved to _config. -import pandas.util.testing as tm - -_all_locales = tm.get_locales() or [] +_all_locales = get_locales() or [] _current_locale = locale.getlocale() # Don't run any of these tests if we are on Windows or have no locales. @@ -55,7 +51,7 @@ def test_get_locales_at_least_one(): @_skip_if_only_one_locale def test_get_locales_prefix(): first_locale = _all_locales[0] - assert len(tm.get_locales(prefix=first_locale[:2])) > 0 + assert len(get_locales(prefix=first_locale[:2])) > 0 @_skip_if_only_one_locale diff --git a/pandas/tests/io/formats/test_console.py b/pandas/tests/io/formats/test_console.py index a3e0e195f4864..ccb282249e6a1 100644 --- a/pandas/tests/io/formats/test_console.py +++ b/pandas/tests/io/formats/test_console.py @@ -2,7 +2,8 @@ import pytest -from pandas.io.formats.console import detect_console_encoding +from pandas._config.display import detect_console_encoding + from pandas.io.formats.terminal import _get_terminal_size_tput diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 20c6e2644cda9..5e162298be865 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -8,7 +8,6 @@ import re from shutil import rmtree import string -import subprocess import sys import tempfile import traceback @@ -18,7 +17,7 @@ from numpy.random import rand, randn from pandas._config.localization import ( # noqa:F401 - _valid_locales, can_set_locale, set_locale) + can_set_locale, get_locales, set_locale) from pandas._libs import testing as _testing import pandas.compat as compat @@ -424,77 +423,6 @@ def close(fignum=None): _close(fignum) -# ----------------------------------------------------------------------------- -# locale utilities - - -def _default_locale_getter(): - try: - raw_locales = subprocess.check_output(['locale -a'], shell=True) - except subprocess.CalledProcessError as e: - raise type(e)("{exception}, the 'locale -a' command cannot be found " - "on your system".format(exception=e)) - return raw_locales - - -def get_locales(prefix=None, normalize=True, - locale_getter=_default_locale_getter): - """Get all the locales that are available on the system. - - Parameters - ---------- - prefix : str - If not ``None`` then return only those locales with the prefix - provided. For example to get all English language locales (those that - start with ``"en"``), pass ``prefix="en"``. - normalize : bool - Call ``locale.normalize`` on the resulting list of available locales. - If ``True``, only locales that can be set without throwing an - ``Exception`` are returned. - locale_getter : callable - The function to use to retrieve the current locales. This should return - a string with each locale separated by a newline character. - - Returns - ------- - locales : list of strings - A list of locale strings that can be set with ``locale.setlocale()``. - For example:: - - locale.setlocale(locale.LC_ALL, locale_string) - - On error will return None (no locale available, e.g. Windows) - - """ - try: - raw_locales = locale_getter() - except Exception: - return None - - try: - # raw_locales is "\n" separated list of locales - # it may contain non-decodable parts, so split - # extract what we can and then rejoin. - raw_locales = raw_locales.split(b'\n') - out_locales = [] - for x in raw_locales: - if PY3: - out_locales.append(str( - x, encoding=pd.options.display.encoding)) - else: - out_locales.append(str(x)) - - except TypeError: - pass - - if prefix is None: - return _valid_locales(out_locales, normalize) - - pattern = re.compile('{prefix}.*'.format(prefix=prefix)) - found = pattern.findall('\n'.join(out_locales)) - return _valid_locales(found, normalize) - - # ----------------------------------------------------------------------------- # Stdout / stderr decorators From c093df1f7d3247225a8496dce2563ac4a0acf65d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 24 Mar 2019 12:16:58 -0700 Subject: [PATCH 2/7] update imports --- pandas/_config/__init__.py | 7 +++++-- pandas/tests/io/formats/test_console.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index 78434876b29fb..7a8961772a227 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -2,9 +2,12 @@ pandas._config is considered explicitly upstream of everything else in pandas, should have no intra-pandas dependencies. """ -__all__ = ["config", "get_option", "set_option", "reset_option", - "describe_option", "option_context", "options"] +__all__ = ["config", "detect_console_encoding", "get_option", "set_option", + "reset_option", "describe_option", "option_context", "options"] from pandas._config import config from pandas._config.config import ( describe_option, get_option, option_context, options, reset_option, set_option) + +# importing .display causes display.encoding and other keys to be initialized +from pandas._config.display import detect_console_encoding \ No newline at end of file diff --git a/pandas/tests/io/formats/test_console.py b/pandas/tests/io/formats/test_console.py index ccb282249e6a1..a633ae670eed5 100644 --- a/pandas/tests/io/formats/test_console.py +++ b/pandas/tests/io/formats/test_console.py @@ -2,7 +2,7 @@ import pytest -from pandas._config.display import detect_console_encoding +from pandas._config import detect_console_encoding from pandas.io.formats.terminal import _get_terminal_size_tput From 1bb6dfd953c9eda11656b663eb2858ea98fde41e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 24 Mar 2019 13:26:53 -0700 Subject: [PATCH 3/7] flake8 fixup --- pandas/_config/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index 7a8961772a227..3c8a7fda6a089 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -10,4 +10,4 @@ set_option) # importing .display causes display.encoding and other keys to be initialized -from pandas._config.display import detect_console_encoding \ No newline at end of file +from pandas._config.display import detect_console_encoding From da3d3edeb6a4d3960fbf10e132079de36f8cf3b0 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 24 Mar 2019 14:55:45 -0700 Subject: [PATCH 4/7] flake8 fixup --- pandas/_config/__init__.py | 3 +-- pandas/io/formats/console.py | 3 --- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index 3c8a7fda6a089..5c8029abdddff 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -8,6 +8,5 @@ from pandas._config.config import ( describe_option, get_option, option_context, options, reset_option, set_option) - -# importing .display causes display.encoding and other keys to be initialized from pandas._config.display import detect_console_encoding +# importing .display causes display.encoding and other keys to be initialized diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index 3ade74693d654..c914de387413c 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -2,9 +2,6 @@ Internal module for console introspection """ -import locale -import sys - from pandas.io.formats.terminal import get_terminal_size From e9f59621045afa660aee0f0ddce6f514f2ed33d3 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 24 Mar 2019 19:36:00 -0700 Subject: [PATCH 5/7] implement dates.py, isort fixups --- dates.py | 22 ++++++++++++++++++++++ pandas/_config/__init__.py | 5 ++++- pandas/_config/display.py | 17 ----------------- 3 files changed, 26 insertions(+), 18 deletions(-) create mode 100644 dates.py diff --git a/dates.py b/dates.py new file mode 100644 index 0000000000000..a6d630211fedd --- /dev/null +++ b/dates.py @@ -0,0 +1,22 @@ +""" +config for datetime formatting +""" +from pandas._config import config as cf + + +pc_date_dayfirst_doc = """ +: boolean + When True, prints and parses dates with the day first, eg 20/01/2005 +""" + +pc_date_yearfirst_doc = """ +: boolean + When True, prints and parses dates with the year first, eg 2005/01/20 +""" + +with cf.config_prefix('display'): + # Needed upstream of `_libs` because these are used in tslibs.parsing + cf.register_option('date_dayfirst', False, pc_date_dayfirst_doc, + validator=cf.is_bool) + cf.register_option('date_yearfirst', False, pc_date_yearfirst_doc, + validator=cf.is_bool) diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index 5c8029abdddff..bf221ea444288 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -1,12 +1,15 @@ """ pandas._config is considered explicitly upstream of everything else in pandas, should have no intra-pandas dependencies. + +importing `dates` and `display` ensures that keys needed by _libs +are initialized. """ __all__ = ["config", "detect_console_encoding", "get_option", "set_option", "reset_option", "describe_option", "option_context", "options"] from pandas._config import config +from pandas._config import dates # noqa:F401 from pandas._config.config import ( describe_option, get_option, option_context, options, reset_option, set_option) from pandas._config.display import detect_console_encoding -# importing .display causes display.encoding and other keys to be initialized diff --git a/pandas/_config/display.py b/pandas/_config/display.py index 0b7043f0bb1c7..7997d12e06aa9 100644 --- a/pandas/_config/display.py +++ b/pandas/_config/display.py @@ -50,23 +50,6 @@ def detect_console_encoding(): these are generally strings meant to be displayed on the console. """ -pc_date_dayfirst_doc = """ -: boolean - When True, prints and parses dates with the day first, eg 20/01/2005 -""" - -pc_date_yearfirst_doc = """ -: boolean - When True, prints and parses dates with the year first, eg 2005/01/20 -""" - - with cf.config_prefix('display'): cf.register_option('encoding', detect_console_encoding(), pc_encoding_doc, validator=cf.is_text) - - # Needed upstream of `_libs` because these are used in tslibs.parsing - cf.register_option('date_dayfirst', False, pc_date_dayfirst_doc, - validator=cf.is_bool) - cf.register_option('date_yearfirst', False, pc_date_yearfirst_doc, - validator=cf.is_bool) From 1314b81608760d57c1a67bb068c024e95b5b2a13 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 25 Mar 2019 06:55:10 -0700 Subject: [PATCH 6/7] move misplaced file --- dates.py => pandas/_config/dates.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dates.py => pandas/_config/dates.py (100%) diff --git a/dates.py b/pandas/_config/dates.py similarity index 100% rename from dates.py rename to pandas/_config/dates.py From f06c21a670f31a88307cead6296be7bbf3aad810 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 25 Mar 2019 06:55:35 -0700 Subject: [PATCH 7/7] isort fixup --- pandas/_config/dates.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_config/dates.py b/pandas/_config/dates.py index a6d630211fedd..85300a308de62 100644 --- a/pandas/_config/dates.py +++ b/pandas/_config/dates.py @@ -3,7 +3,6 @@ """ from pandas._config import config as cf - pc_date_dayfirst_doc = """ : boolean When True, prints and parses dates with the day first, eg 20/01/2005