Skip to content

Commit 075eca1

Browse files
authored
DEPR: allow options for using bottleneck/numexpr (pandas-dev#16157)
* DEPR: allow options for using bottleneck/numexpr deprecate pd.computation.expressions.set_use_numexpr() * DEPR: pandas.types.concat.union_categoricals in favor of pandas.api.type.union_categoricals closes pandas-dev#16140
1 parent 669973a commit 075eca1

File tree

17 files changed

+215
-94
lines changed

17 files changed

+215
-94
lines changed

doc/source/basics.rst

+10-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ Accelerated operations
9393
----------------------
9494

9595
pandas has support for accelerating certain types of binary numerical and boolean operations using
96-
the ``numexpr`` library (starting in 0.11.0) and the ``bottleneck`` libraries.
96+
the ``numexpr`` library and the ``bottleneck`` libraries.
9797

9898
These libraries are especially useful when dealing with large data sets, and provide large
9999
speedups. ``numexpr`` uses smart chunking, caching, and multiple cores. ``bottleneck`` is
@@ -114,6 +114,15 @@ Here is a sample (using 100 column x 100,000 row ``DataFrames``):
114114
You are highly encouraged to install both libraries. See the section
115115
:ref:`Recommended Dependencies <install.recommended_dependencies>` for more installation info.
116116

117+
These are both enabled to be used by default, you can control this by setting the options:
118+
119+
.. versionadded:: 0.20.0
120+
121+
.. code-block:: python
122+
123+
pd.set_option('compute.use_bottleneck', False)
124+
pd.set_option('compute.use_numexpr', False)
125+
117126
.. _basics.binop:
118127

119128
Flexible binary operations

doc/source/options.rst

+5-1
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,10 @@ mode.use_inf_as_null False True means treat None, NaN, -IN
425425
INF as null (old way), False means
426426
None and NaN are null, but INF, -INF
427427
are not null (new way).
428+
compute.use_bottleneck True Use the bottleneck library to accelerate
429+
computation if it is installed
430+
compute.use_numexpr True Use the numexpr library to accelerate
431+
computation if it is installed
428432
=================================== ============ ==================================
429433

430434

@@ -538,4 +542,4 @@ Only ``'display.max_rows'`` are serialized and published.
538542
.. ipython:: python
539543
:suppress:
540544
541-
pd.reset_option('display.html.table_schema')
545+
pd.reset_option('display.html.table_schema')

doc/source/whatsnew/v0.20.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,7 @@ Other Enhancements
521521
- The ``display.show_dimensions`` option can now also be used to specify
522522
whether the length of a ``Series`` should be shown in its repr (:issue:`7117`).
523523
- ``parallel_coordinates()`` has gained a ``sort_labels`` keyword arg that sorts class labels and the colours assigned to them (:issue:`15908`)
524+
- Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here <basics.accelerate>` (:issue:`16157`)
524525

525526

526527
.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
@@ -1217,7 +1218,7 @@ If indicated, a deprecation warning will be issued if you reference theses modul
12171218

12181219
"pandas.lib", "pandas._libs.lib", "X"
12191220
"pandas.tslib", "pandas._libs.tslib", "X"
1220-
"pandas.computation", "pandas.core.computation", ""
1221+
"pandas.computation", "pandas.core.computation", "X"
12211222
"pandas.msgpack", "pandas.io.msgpack", ""
12221223
"pandas.index", "pandas._libs.index", ""
12231224
"pandas.algos", "pandas._libs.algos", ""

pandas/computation/__init__.py

Whitespace-only changes.

pandas/computation/expressions.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import warnings
2+
3+
4+
def set_use_numexpr(v=True):
5+
warnings.warn("pandas.computation.expressions.set_use_numexpr is "
6+
"deprecated and will be removed in a future version.\n"
7+
"you can toggle usage of numexpr via "
8+
"pandas.get_option('compute.use_numexpr')",
9+
FutureWarning, stacklevel=2)
10+
from pandas import set_option
11+
set_option('compute.use_numexpr', v)

pandas/core/computation/expressions.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import numpy as np
1111
from pandas.core.common import _values_from_object
1212
from pandas.core.computation import _NUMEXPR_INSTALLED
13+
from pandas.core.config import get_option
1314

1415
if _NUMEXPR_INSTALLED:
1516
import numexpr as ne
@@ -156,7 +157,7 @@ def _where_numexpr(cond, a, b, raise_on_error=False):
156157

157158

158159
# turn myself on
159-
set_use_numexpr(True)
160+
set_use_numexpr(get_option('compute.use_numexpr'))
160161

161162

162163
def _has_bool_dtype(x):

pandas/core/config_init.py

+34-1
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,41 @@
1515
from pandas.core.config import (is_int, is_bool, is_text, is_instance_factory,
1616
is_one_of_factory, get_default_val,
1717
is_callable)
18-
from pandas.io.formats.format import detect_console_encoding
18+
from pandas.io.formats.console import detect_console_encoding
1919

20+
# compute
21+
22+
use_bottleneck_doc = """
23+
: bool
24+
Use the bottleneck library to accelerate if it is installed,
25+
the default is True
26+
Valid values: False,True
27+
"""
28+
29+
30+
def use_bottleneck_cb(key):
31+
from pandas.core import nanops
32+
nanops.set_use_bottleneck(cf.get_option(key))
33+
34+
35+
use_numexpr_doc = """
36+
: bool
37+
Use the numexpr library to accelerate computation if it is installed,
38+
the default is True
39+
Valid values: False,True
40+
"""
41+
42+
43+
def use_numexpr_cb(key):
44+
from pandas.core.computation import expressions
45+
expressions.set_use_numexpr(cf.get_option(key))
46+
47+
48+
with cf.config_prefix('compute'):
49+
cf.register_option('use_bottleneck', True, use_bottleneck_doc,
50+
validator=is_bool, cb=use_bottleneck_cb)
51+
cf.register_option('use_numexpr', True, use_numexpr_doc,
52+
validator=is_bool, cb=use_numexpr_cb)
2053
#
2154
# options from the "display" namespace
2255

pandas/core/frame.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
import pandas.core.nanops as nanops
9292
import pandas.core.ops as ops
9393
import pandas.io.formats.format as fmt
94+
import pandas.io.formats.console as console
9495
from pandas.io.formats.printing import pprint_thing
9596
import pandas.plotting._core as gfx
9697

@@ -513,7 +514,7 @@ def _repr_fits_horizontal_(self, ignore_width=False):
513514
GH3541, GH3573
514515
"""
515516

516-
width, height = fmt.get_console_size()
517+
width, height = console.get_console_size()
517518
max_columns = get_option("display.max_columns")
518519
nb_columns = len(self.columns)
519520

@@ -577,7 +578,7 @@ def __unicode__(self):
577578
max_cols = get_option("display.max_columns")
578579
show_dimensions = get_option("display.show_dimensions")
579580
if get_option("display.expand_frame_repr"):
580-
width, _ = fmt.get_console_size()
581+
width, _ = console.get_console_size()
581582
else:
582583
width = None
583584
self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols,

pandas/core/indexes/base.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -837,7 +837,8 @@ def _format_data(self):
837837
"""
838838
Return the formatted data as a unicode string
839839
"""
840-
from pandas.io.formats.format import get_console_size, _get_adjustment
840+
from pandas.io.formats.console import get_console_size
841+
from pandas.io.formats.format import _get_adjustment
841842
display_width, _ = get_console_size()
842843
if display_width is None:
843844
display_width = get_option('display.width') or 80

pandas/core/nanops.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,8 @@
11
import itertools
22
import functools
3-
import numpy as np
43
import operator
54

6-
try:
7-
import bottleneck as bn
8-
_USE_BOTTLENECK = True
9-
except ImportError: # pragma: no cover
10-
_USE_BOTTLENECK = False
11-
5+
import numpy as np
126
from pandas import compat
137
from pandas._libs import tslib, algos, lib
148
from pandas.core.dtypes.common import (
@@ -23,9 +17,27 @@
2317
is_int_or_datetime_dtype, is_any_int_dtype)
2418
from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask
2519
from pandas.core.dtypes.missing import isnull, notnull
26-
20+
from pandas.core.config import get_option
2721
from pandas.core.common import _values_from_object
2822

23+
try:
24+
import bottleneck as bn
25+
_BOTTLENECK_INSTALLED = True
26+
except ImportError: # pragma: no cover
27+
_BOTTLENECK_INSTALLED = False
28+
29+
_USE_BOTTLENECK = False
30+
31+
32+
def set_use_bottleneck(v=True):
33+
# set/unset to use bottleneck
34+
global _USE_BOTTLENECK
35+
if _BOTTLENECK_INSTALLED:
36+
_USE_BOTTLENECK = v
37+
38+
39+
set_use_bottleneck(get_option('compute.use_bottleneck'))
40+
2941

3042
class disallow(object):
3143

pandas/io/formats/console.py

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
"""
2+
Internal module for console introspection
3+
"""
4+
5+
import sys
6+
import locale
7+
from pandas.util.terminal import get_terminal_size
8+
9+
# -----------------------------------------------------------------------------
10+
# Global formatting options
11+
_initial_defencoding = None
12+
13+
14+
def detect_console_encoding():
15+
"""
16+
Try to find the most capable encoding supported by the console.
17+
slighly modified from the way IPython handles the same issue.
18+
"""
19+
global _initial_defencoding
20+
21+
encoding = None
22+
try:
23+
encoding = sys.stdout.encoding or sys.stdin.encoding
24+
except AttributeError:
25+
pass
26+
27+
# try again for something better
28+
if not encoding or 'ascii' in encoding.lower():
29+
try:
30+
encoding = locale.getpreferredencoding()
31+
except Exception:
32+
pass
33+
34+
# when all else fails. this will usually be "ascii"
35+
if not encoding or 'ascii' in encoding.lower():
36+
encoding = sys.getdefaultencoding()
37+
38+
# GH3360, save the reported defencoding at import time
39+
# MPL backends may change it. Make available for debugging.
40+
if not _initial_defencoding:
41+
_initial_defencoding = sys.getdefaultencoding()
42+
43+
return encoding
44+
45+
46+
def get_console_size():
47+
"""Return console size as tuple = (width, height).
48+
49+
Returns (None,None) in non-interactive session.
50+
"""
51+
from pandas import get_option
52+
from pandas.core import common as com
53+
54+
display_width = get_option('display.width')
55+
# deprecated.
56+
display_height = get_option('display.height', silent=True)
57+
58+
# Consider
59+
# interactive shell terminal, can detect term size
60+
# interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term
61+
# size non-interactive script, should disregard term size
62+
63+
# in addition
64+
# width,height have default values, but setting to 'None' signals
65+
# should use Auto-Detection, But only in interactive shell-terminal.
66+
# Simple. yeah.
67+
68+
if com.in_interactive_session():
69+
if com.in_ipython_frontend():
70+
# sane defaults for interactive non-shell terminal
71+
# match default for width,height in config_init
72+
from pandas.core.config import get_default_val
73+
terminal_width = get_default_val('display.width')
74+
terminal_height = get_default_val('display.height')
75+
else:
76+
# pure terminal
77+
terminal_width, terminal_height = get_terminal_size()
78+
else:
79+
terminal_width, terminal_height = None, None
80+
81+
# Note if the User sets width/Height to None (auto-detection)
82+
# and we're in a script (non-inter), this will return (None,None)
83+
# caller needs to deal.
84+
return (display_width or terminal_width, display_height or terminal_height)

pandas/io/formats/format.py

-77
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from distutils.version import LooseVersion
99
# pylint: disable=W0141
1010

11-
import sys
1211
from textwrap import dedent
1312

1413
from pandas.core.dtypes.missing import isnull, notnull
@@ -2290,82 +2289,6 @@ def _has_names(index):
22902289
return index.name is not None
22912290

22922291

2293-
# -----------------------------------------------------------------------------
2294-
# Global formatting options
2295-
_initial_defencoding = None
2296-
2297-
2298-
def detect_console_encoding():
2299-
"""
2300-
Try to find the most capable encoding supported by the console.
2301-
slighly modified from the way IPython handles the same issue.
2302-
"""
2303-
import locale
2304-
global _initial_defencoding
2305-
2306-
encoding = None
2307-
try:
2308-
encoding = sys.stdout.encoding or sys.stdin.encoding
2309-
except AttributeError:
2310-
pass
2311-
2312-
# try again for something better
2313-
if not encoding or 'ascii' in encoding.lower():
2314-
try:
2315-
encoding = locale.getpreferredencoding()
2316-
except Exception:
2317-
pass
2318-
2319-
# when all else fails. this will usually be "ascii"
2320-
if not encoding or 'ascii' in encoding.lower():
2321-
encoding = sys.getdefaultencoding()
2322-
2323-
# GH3360, save the reported defencoding at import time
2324-
# MPL backends may change it. Make available for debugging.
2325-
if not _initial_defencoding:
2326-
_initial_defencoding = sys.getdefaultencoding()
2327-
2328-
return encoding
2329-
2330-
2331-
def get_console_size():
2332-
"""Return console size as tuple = (width, height).
2333-
2334-
Returns (None,None) in non-interactive session.
2335-
"""
2336-
display_width = get_option('display.width')
2337-
# deprecated.
2338-
display_height = get_option('display.height', silent=True)
2339-
2340-
# Consider
2341-
# interactive shell terminal, can detect term size
2342-
# interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term
2343-
# size non-interactive script, should disregard term size
2344-
2345-
# in addition
2346-
# width,height have default values, but setting to 'None' signals
2347-
# should use Auto-Detection, But only in interactive shell-terminal.
2348-
# Simple. yeah.
2349-
2350-
if com.in_interactive_session():
2351-
if com.in_ipython_frontend():
2352-
# sane defaults for interactive non-shell terminal
2353-
# match default for width,height in config_init
2354-
from pandas.core.config import get_default_val
2355-
terminal_width = get_default_val('display.width')
2356-
terminal_height = get_default_val('display.height')
2357-
else:
2358-
# pure terminal
2359-
terminal_width, terminal_height = get_terminal_size()
2360-
else:
2361-
terminal_width, terminal_height = None, None
2362-
2363-
# Note if the User sets width/Height to None (auto-detection)
2364-
# and we're in a script (non-inter), this will return (None,None)
2365-
# caller needs to deal.
2366-
return (display_width or terminal_width, display_height or terminal_height)
2367-
2368-
23692292
class EngFormatter(object):
23702293
"""
23712294
Formats float values according to engineering format.

0 commit comments

Comments
 (0)