diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index 78434876b29fb..bf221ea444288 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -1,10 +1,15 @@ """ pandas._config is considered explicitly upstream of everything else in pandas, should have no intra-pandas dependencies. + +importing `dates` and `display` ensures that keys needed by _libs +are initialized. """ -__all__ = ["config", "get_option", "set_option", "reset_option", - "describe_option", "option_context", "options"] +__all__ = ["config", "detect_console_encoding", "get_option", "set_option", + "reset_option", "describe_option", "option_context", "options"] from pandas._config import config +from pandas._config import dates # noqa:F401 from pandas._config.config import ( describe_option, get_option, option_context, options, reset_option, set_option) +from pandas._config.display import detect_console_encoding diff --git a/pandas/_config/dates.py b/pandas/_config/dates.py new file mode 100644 index 0000000000000..85300a308de62 --- /dev/null +++ b/pandas/_config/dates.py @@ -0,0 +1,21 @@ +""" +config for datetime formatting +""" +from pandas._config import config as cf + +pc_date_dayfirst_doc = """ +: boolean + When True, prints and parses dates with the day first, eg 20/01/2005 +""" + +pc_date_yearfirst_doc = """ +: boolean + When True, prints and parses dates with the year first, eg 2005/01/20 +""" + +with cf.config_prefix('display'): + # Needed upstream of `_libs` because these are used in tslibs.parsing + cf.register_option('date_dayfirst', False, pc_date_dayfirst_doc, + validator=cf.is_bool) + cf.register_option('date_yearfirst', False, pc_date_yearfirst_doc, + validator=cf.is_bool) diff --git a/pandas/_config/display.py b/pandas/_config/display.py new file mode 100644 index 0000000000000..7997d12e06aa9 --- /dev/null +++ b/pandas/_config/display.py @@ -0,0 +1,55 @@ +""" +Unopinionated display configuration. +""" +import locale +import sys + +from pandas._config import config as cf + +# ----------------------------------------------------------------------------- +# Global formatting options +_initial_defencoding = None + + +def detect_console_encoding(): + """ + Try to find the most capable encoding supported by the console. + slightly modified from the way IPython handles the same issue. + """ + global _initial_defencoding + + encoding = None + try: + encoding = sys.stdout.encoding or sys.stdin.encoding + except (AttributeError, IOError): + pass + + # try again for something better + if not encoding or 'ascii' in encoding.lower(): + try: + encoding = locale.getpreferredencoding() + except Exception: + pass + + # when all else fails. this will usually be "ascii" + if not encoding or 'ascii' in encoding.lower(): + encoding = sys.getdefaultencoding() + + # GH#3360, save the reported defencoding at import time + # MPL backends may change it. Make available for debugging. + if not _initial_defencoding: + _initial_defencoding = sys.getdefaultencoding() + + return encoding + + +pc_encoding_doc = """ +: str/unicode + Defaults to the detected encoding of the console. + Specifies the encoding to be used for strings returned by to_string, + these are generally strings meant to be displayed on the console. +""" + +with cf.config_prefix('display'): + cf.register_option('encoding', detect_console_encoding(), pc_encoding_doc, + validator=cf.is_text) diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index a5f348241e4d7..1ca6d073f18c4 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -5,6 +5,10 @@ """ from contextlib import contextmanager import locale +import re +import subprocess + +from pandas._config.config import options @contextmanager @@ -91,3 +95,68 @@ def _valid_locales(locales, normalize): normalizer = lambda x: x.strip() return list(filter(can_set_locale, map(normalizer, locales))) + + +def _default_locale_getter(): + try: + raw_locales = subprocess.check_output(['locale -a'], shell=True) + except subprocess.CalledProcessError as e: + raise type(e)("{exception}, the 'locale -a' command cannot be found " + "on your system".format(exception=e)) + return raw_locales + + +def get_locales(prefix=None, normalize=True, + locale_getter=_default_locale_getter): + """ + Get all the locales that are available on the system. + + Parameters + ---------- + prefix : str + If not ``None`` then return only those locales with the prefix + provided. For example to get all English language locales (those that + start with ``"en"``), pass ``prefix="en"``. + normalize : bool + Call ``locale.normalize`` on the resulting list of available locales. + If ``True``, only locales that can be set without throwing an + ``Exception`` are returned. + locale_getter : callable + The function to use to retrieve the current locales. This should return + a string with each locale separated by a newline character. + + Returns + ------- + locales : list of strings + A list of locale strings that can be set with ``locale.setlocale()``. + For example:: + + locale.setlocale(locale.LC_ALL, locale_string) + + On error will return None (no locale available, e.g. Windows) + + """ + try: + raw_locales = locale_getter() + except Exception: + return None + + try: + # raw_locales is "\n" separated list of locales + # it may contain non-decodable parts, so split + # extract what we can and then rejoin. + raw_locales = raw_locales.split(b'\n') + out_locales = [] + for x in raw_locales: + out_locales.append(str( + x, encoding=options.display.encoding)) + + except TypeError: + pass + + if prefix is None: + return _valid_locales(out_locales, normalize) + + pattern = re.compile('{prefix}.*'.format(prefix=prefix)) + found = pattern.findall('\n'.join(out_locales)) + return _valid_locales(found, normalize) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 19af321f3d88b..01bab472b06d0 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -14,7 +14,6 @@ is_bool, is_callable, is_instance_factory, is_int, is_one_of_factory, is_text) -from pandas.io.formats.console import detect_console_encoding from pandas.io.formats.terminal import is_terminal # compute @@ -110,16 +109,6 @@ def use_numexpr_cb(key): pandas objects (if it is available). """ -pc_date_dayfirst_doc = """ -: boolean - When True, prints and parses dates with the day first, eg 20/01/2005 -""" - -pc_date_yearfirst_doc = """ -: boolean - When True, prints and parses dates with the year first, eg 2005/01/20 -""" - pc_pprint_nest_depth = """ : int Controls the number of nested levels to process when pretty-printing @@ -131,13 +120,6 @@ def use_numexpr_cb(key): elements in outer levels within groups) """ -pc_encoding_doc = """ -: str/unicode - Defaults to the detected encoding of the console. - Specifies the encoding to be used for strings returned by to_string, - these are generally strings meant to be displayed on the console. -""" - float_format_doc = """ : callable The callable should accept a floating point number and return @@ -331,16 +313,10 @@ def table_schema_cb(key): validator=is_text) cf.register_option('notebook_repr_html', True, pc_nb_repr_h_doc, validator=is_bool) - cf.register_option('date_dayfirst', False, pc_date_dayfirst_doc, - validator=is_bool) - cf.register_option('date_yearfirst', False, pc_date_yearfirst_doc, - validator=is_bool) cf.register_option('pprint_nest_depth', 3, pc_pprint_nest_depth, validator=is_int) cf.register_option('multi_sparse', True, pc_multi_sparse_doc, validator=is_bool) - cf.register_option('encoding', detect_console_encoding(), pc_encoding_doc, - validator=is_text) cf.register_option('expand_frame_repr', True, pc_expand_repr_doc) cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc, validator=is_one_of_factory([True, False, 'truncate'])) diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index 0a035b52d6d42..c914de387413c 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -2,47 +2,8 @@ Internal module for console introspection """ -import locale -import sys - from pandas.io.formats.terminal import get_terminal_size -# ----------------------------------------------------------------------------- -# Global formatting options -_initial_defencoding = None - - -def detect_console_encoding(): - """ - Try to find the most capable encoding supported by the console. - slightly modified from the way IPython handles the same issue. - """ - global _initial_defencoding - - encoding = None - try: - encoding = sys.stdout.encoding or sys.stdin.encoding - except (AttributeError, IOError): - pass - - # try again for something better - if not encoding or 'ascii' in encoding.lower(): - try: - encoding = locale.getpreferredencoding() - except Exception: - pass - - # when all else fails. this will usually be "ascii" - if not encoding or 'ascii' in encoding.lower(): - encoding = sys.getdefaultencoding() - - # GH3360, save the reported defencoding at import time - # MPL backends may change it. Make available for debugging. - if not _initial_defencoding: - _initial_defencoding = sys.getdefaultencoding() - - return encoding - def get_console_size(): """Return console size as tuple = (width, height). diff --git a/pandas/tests/config/test_localization.py b/pandas/tests/config/test_localization.py index fa7b46da4af92..15556948ad9b8 100644 --- a/pandas/tests/config/test_localization.py +++ b/pandas/tests/config/test_localization.py @@ -5,15 +5,11 @@ import pytest -from pandas._config.localization import can_set_locale, set_locale +from pandas._config.localization import can_set_locale, get_locales, set_locale from pandas.compat import is_platform_windows -# TODO: move get_locales into localization, making `tm` import unnecessary. -# This is blocked by the need for core.config to be moved to _config. -import pandas.util.testing as tm - -_all_locales = tm.get_locales() or [] +_all_locales = get_locales() or [] _current_locale = locale.getlocale() # Don't run any of these tests if we are on Windows or have no locales. @@ -55,7 +51,7 @@ def test_get_locales_at_least_one(): @_skip_if_only_one_locale def test_get_locales_prefix(): first_locale = _all_locales[0] - assert len(tm.get_locales(prefix=first_locale[:2])) > 0 + assert len(get_locales(prefix=first_locale[:2])) > 0 @_skip_if_only_one_locale diff --git a/pandas/tests/io/formats/test_console.py b/pandas/tests/io/formats/test_console.py index a3e0e195f4864..a633ae670eed5 100644 --- a/pandas/tests/io/formats/test_console.py +++ b/pandas/tests/io/formats/test_console.py @@ -2,7 +2,8 @@ import pytest -from pandas.io.formats.console import detect_console_encoding +from pandas._config import detect_console_encoding + from pandas.io.formats.terminal import _get_terminal_size_tput diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 20c6e2644cda9..5e162298be865 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -8,7 +8,6 @@ import re from shutil import rmtree import string -import subprocess import sys import tempfile import traceback @@ -18,7 +17,7 @@ from numpy.random import rand, randn from pandas._config.localization import ( # noqa:F401 - _valid_locales, can_set_locale, set_locale) + can_set_locale, get_locales, set_locale) from pandas._libs import testing as _testing import pandas.compat as compat @@ -424,77 +423,6 @@ def close(fignum=None): _close(fignum) -# ----------------------------------------------------------------------------- -# locale utilities - - -def _default_locale_getter(): - try: - raw_locales = subprocess.check_output(['locale -a'], shell=True) - except subprocess.CalledProcessError as e: - raise type(e)("{exception}, the 'locale -a' command cannot be found " - "on your system".format(exception=e)) - return raw_locales - - -def get_locales(prefix=None, normalize=True, - locale_getter=_default_locale_getter): - """Get all the locales that are available on the system. - - Parameters - ---------- - prefix : str - If not ``None`` then return only those locales with the prefix - provided. For example to get all English language locales (those that - start with ``"en"``), pass ``prefix="en"``. - normalize : bool - Call ``locale.normalize`` on the resulting list of available locales. - If ``True``, only locales that can be set without throwing an - ``Exception`` are returned. - locale_getter : callable - The function to use to retrieve the current locales. This should return - a string with each locale separated by a newline character. - - Returns - ------- - locales : list of strings - A list of locale strings that can be set with ``locale.setlocale()``. - For example:: - - locale.setlocale(locale.LC_ALL, locale_string) - - On error will return None (no locale available, e.g. Windows) - - """ - try: - raw_locales = locale_getter() - except Exception: - return None - - try: - # raw_locales is "\n" separated list of locales - # it may contain non-decodable parts, so split - # extract what we can and then rejoin. - raw_locales = raw_locales.split(b'\n') - out_locales = [] - for x in raw_locales: - if PY3: - out_locales.append(str( - x, encoding=pd.options.display.encoding)) - else: - out_locales.append(str(x)) - - except TypeError: - pass - - if prefix is None: - return _valid_locales(out_locales, normalize) - - pattern = re.compile('{prefix}.*'.format(prefix=prefix)) - found = pattern.findall('\n'.join(out_locales)) - return _valid_locales(found, normalize) - - # ----------------------------------------------------------------------------- # Stdout / stderr decorators