Skip to content

[REF] Move remaining locale functions to _config.localization #25861

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 25, 2019
6 changes: 4 additions & 2 deletions pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
pandas._config is considered explicitly upstream of everything else in pandas,
should have no intra-pandas dependencies.
"""
__all__ = ["config", "get_option", "set_option", "reset_option",
"describe_option", "option_context", "options"]
__all__ = ["config", "detect_console_encoding", "get_option", "set_option",
"reset_option", "describe_option", "option_context", "options"]
from pandas._config import config
from pandas._config.config import (
describe_option, get_option, option_context, options, reset_option,
set_option)
from pandas._config.display import detect_console_encoding
# importing .display causes display.encoding and other keys to be initialized
72 changes: 72 additions & 0 deletions pandas/_config/display.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""
Unopinionated display configuration.
"""
import locale
import sys

from pandas._config import config as cf

# -----------------------------------------------------------------------------
# Global formatting options
_initial_defencoding = None


def detect_console_encoding():
"""
Try to find the most capable encoding supported by the console.
slightly modified from the way IPython handles the same issue.
"""
global _initial_defencoding

encoding = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn’t u check the global first? (if it’s set)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment near the bottom suggests this is kept for debugging purposes related to matplotlib

try:
encoding = sys.stdout.encoding or sys.stdin.encoding
except (AttributeError, IOError):
pass

# try again for something better
if not encoding or 'ascii' in encoding.lower():
try:
encoding = locale.getpreferredencoding()
except Exception:
pass

# when all else fails. this will usually be "ascii"
if not encoding or 'ascii' in encoding.lower():
encoding = sys.getdefaultencoding()

# GH#3360, save the reported defencoding at import time
# MPL backends may change it. Make available for debugging.
if not _initial_defencoding:
_initial_defencoding = sys.getdefaultencoding()

return encoding


pc_encoding_doc = """
: str/unicode
Defaults to the detected encoding of the console.
Specifies the encoding to be used for strings returned by to_string,
these are generally strings meant to be displayed on the console.
"""

pc_date_dayfirst_doc = """
: boolean
When True, prints and parses dates with the day first, eg 20/01/2005
"""

pc_date_yearfirst_doc = """
: boolean
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these are pretty orthogonal right ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Handling dayfirst and yearfirst finishes breaking the reliance of tslibs on core.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right but they shouldn't be in this file. not problem separateing them out, but not here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

they're under the display namespace within options. where would you put this? could go directly at the bottom of __init__ i guess

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would put them in another file under _config

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done + green

When True, prints and parses dates with the year first, eg 2005/01/20
"""


with cf.config_prefix('display'):
cf.register_option('encoding', detect_console_encoding(), pc_encoding_doc,
validator=cf.is_text)

# Needed upstream of `_libs` because these are used in tslibs.parsing
cf.register_option('date_dayfirst', False, pc_date_dayfirst_doc,
validator=cf.is_bool)
cf.register_option('date_yearfirst', False, pc_date_yearfirst_doc,
validator=cf.is_bool)
69 changes: 69 additions & 0 deletions pandas/_config/localization.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
"""
from contextlib import contextmanager
import locale
import re
import subprocess

from pandas._config.config import options


@contextmanager
Expand Down Expand Up @@ -91,3 +95,68 @@ def _valid_locales(locales, normalize):
normalizer = lambda x: x.strip()

return list(filter(can_set_locale, map(normalizer, locales)))


def _default_locale_getter():
try:
raw_locales = subprocess.check_output(['locale -a'], shell=True)
except subprocess.CalledProcessError as e:
raise type(e)("{exception}, the 'locale -a' command cannot be found "
"on your system".format(exception=e))
return raw_locales


def get_locales(prefix=None, normalize=True,
locale_getter=_default_locale_getter):
"""
Get all the locales that are available on the system.

Parameters
----------
prefix : str
If not ``None`` then return only those locales with the prefix
provided. For example to get all English language locales (those that
start with ``"en"``), pass ``prefix="en"``.
normalize : bool
Call ``locale.normalize`` on the resulting list of available locales.
If ``True``, only locales that can be set without throwing an
``Exception`` are returned.
locale_getter : callable
The function to use to retrieve the current locales. This should return
a string with each locale separated by a newline character.

Returns
-------
locales : list of strings
A list of locale strings that can be set with ``locale.setlocale()``.
For example::

locale.setlocale(locale.LC_ALL, locale_string)

On error will return None (no locale available, e.g. Windows)

"""
try:
raw_locales = locale_getter()
except Exception:
return None

try:
# raw_locales is "\n" separated list of locales
# it may contain non-decodable parts, so split
# extract what we can and then rejoin.
raw_locales = raw_locales.split(b'\n')
out_locales = []
for x in raw_locales:
out_locales.append(str(
x, encoding=options.display.encoding))

except TypeError:
pass

if prefix is None:
return _valid_locales(out_locales, normalize)

pattern = re.compile('{prefix}.*'.format(prefix=prefix))
found = pattern.findall('\n'.join(out_locales))
return _valid_locales(found, normalize)
24 changes: 0 additions & 24 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
is_bool, is_callable, is_instance_factory, is_int, is_one_of_factory,
is_text)

from pandas.io.formats.console import detect_console_encoding
from pandas.io.formats.terminal import is_terminal

# compute
Expand Down Expand Up @@ -110,16 +109,6 @@ def use_numexpr_cb(key):
pandas objects (if it is available).
"""

pc_date_dayfirst_doc = """
: boolean
When True, prints and parses dates with the day first, eg 20/01/2005
"""

pc_date_yearfirst_doc = """
: boolean
When True, prints and parses dates with the year first, eg 2005/01/20
"""

pc_pprint_nest_depth = """
: int
Controls the number of nested levels to process when pretty-printing
Expand All @@ -131,13 +120,6 @@ def use_numexpr_cb(key):
elements in outer levels within groups)
"""

pc_encoding_doc = """
: str/unicode
Defaults to the detected encoding of the console.
Specifies the encoding to be used for strings returned by to_string,
these are generally strings meant to be displayed on the console.
"""

float_format_doc = """
: callable
The callable should accept a floating point number and return
Expand Down Expand Up @@ -331,16 +313,10 @@ def table_schema_cb(key):
validator=is_text)
cf.register_option('notebook_repr_html', True, pc_nb_repr_h_doc,
validator=is_bool)
cf.register_option('date_dayfirst', False, pc_date_dayfirst_doc,
validator=is_bool)
cf.register_option('date_yearfirst', False, pc_date_yearfirst_doc,
validator=is_bool)
cf.register_option('pprint_nest_depth', 3, pc_pprint_nest_depth,
validator=is_int)
cf.register_option('multi_sparse', True, pc_multi_sparse_doc,
validator=is_bool)
cf.register_option('encoding', detect_console_encoding(), pc_encoding_doc,
validator=is_text)
cf.register_option('expand_frame_repr', True, pc_expand_repr_doc)
cf.register_option('show_dimensions', 'truncate', pc_show_dimensions_doc,
validator=is_one_of_factory([True, False, 'truncate']))
Expand Down
39 changes: 0 additions & 39 deletions pandas/io/formats/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,8 @@
Internal module for console introspection
"""

import locale
import sys

from pandas.io.formats.terminal import get_terminal_size

# -----------------------------------------------------------------------------
# Global formatting options
_initial_defencoding = None


def detect_console_encoding():
"""
Try to find the most capable encoding supported by the console.
slightly modified from the way IPython handles the same issue.
"""
global _initial_defencoding

encoding = None
try:
encoding = sys.stdout.encoding or sys.stdin.encoding
except (AttributeError, IOError):
pass

# try again for something better
if not encoding or 'ascii' in encoding.lower():
try:
encoding = locale.getpreferredencoding()
except Exception:
pass

# when all else fails. this will usually be "ascii"
if not encoding or 'ascii' in encoding.lower():
encoding = sys.getdefaultencoding()

# GH3360, save the reported defencoding at import time
# MPL backends may change it. Make available for debugging.
if not _initial_defencoding:
_initial_defencoding = sys.getdefaultencoding()

return encoding


def get_console_size():
"""Return console size as tuple = (width, height).
Expand Down
10 changes: 3 additions & 7 deletions pandas/tests/config/test_localization.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,11 @@

import pytest

from pandas._config.localization import can_set_locale, set_locale
from pandas._config.localization import can_set_locale, get_locales, set_locale

from pandas.compat import is_platform_windows

# TODO: move get_locales into localization, making `tm` import unnecessary.
# This is blocked by the need for core.config to be moved to _config.
import pandas.util.testing as tm

_all_locales = tm.get_locales() or []
_all_locales = get_locales() or []
_current_locale = locale.getlocale()

# Don't run any of these tests if we are on Windows or have no locales.
Expand Down Expand Up @@ -55,7 +51,7 @@ def test_get_locales_at_least_one():
@_skip_if_only_one_locale
def test_get_locales_prefix():
first_locale = _all_locales[0]
assert len(tm.get_locales(prefix=first_locale[:2])) > 0
assert len(get_locales(prefix=first_locale[:2])) > 0


@_skip_if_only_one_locale
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/io/formats/test_console.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import pytest

from pandas.io.formats.console import detect_console_encoding
from pandas._config import detect_console_encoding

from pandas.io.formats.terminal import _get_terminal_size_tput


Expand Down
Loading