Skip to content

Commit 1e200af

Browse files
Update pandas version to 0.24
* pandas release notes: http://pandas.pydata.org/pandas-docs/stable/whatsnew/v0.24.0.html * Update imports to match changes in pandas * Add functionality for list of functions on `axis=1` for `apply` * Remove `pd.match` from API * Small regression in pandas requires regression in Modin * pandas-dev/pandas#25101 reports this issue * pandas-dev/pandas#25102 resolves this issue * TODO: Expose `pandas.Array` once we properly test
1 parent 226c705 commit 1e200af

File tree

3 files changed

+33
-27
lines changed

3 files changed

+33
-27
lines changed

modin/data_management/query_compiler/pandas_query_compiler.py

+22-9
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
is_datetime_or_timedelta_dtype,
1414
is_bool_dtype,
1515
)
16-
from pandas.core.index import _ensure_index
16+
from pandas.core.index import ensure_index
1717
from pandas.core.base import DataError
1818

1919
from modin.engines.base.block_partitions import BaseBlockPartitions
@@ -97,7 +97,7 @@ def pandas_index_extraction(df, axis):
9797
return index_obj[new_indices] if compute_diff else new_indices
9898

9999
def _validate_set_axis(self, new_labels, old_labels):
100-
new_labels = _ensure_index(new_labels)
100+
new_labels = ensure_index(new_labels)
101101
old_len = len(old_labels)
102102
new_len = len(new_labels)
103103
if old_len != new_len:
@@ -118,14 +118,14 @@ def _get_columns(self):
118118

119119
def _set_index(self, new_index):
120120
if self._index_cache is None:
121-
self._index_cache = _ensure_index(new_index)
121+
self._index_cache = ensure_index(new_index)
122122
else:
123123
new_index = self._validate_set_axis(new_index, self._index_cache)
124124
self._index_cache = new_index
125125

126126
def _set_columns(self, new_columns):
127127
if self._columns_cache is None:
128-
self._columns_cache = _ensure_index(new_columns)
128+
self._columns_cache = ensure_index(new_columns)
129129
else:
130130
new_columns = self._validate_set_axis(new_columns, self._columns_cache)
131131
self._columns_cache = new_columns
@@ -1388,7 +1388,9 @@ def _process_all_any(self, func, **kwargs):
13881388

13891389
if bool_only:
13901390
if axis == 0 and not axis_none and len(not_bool_col) == len(self.columns):
1391-
return pandas.Series(dtype=bool)
1391+
# TODO add this line back once pandas-dev/pandas#25101 is resolved
1392+
# return pandas.Series(dtype=bool)
1393+
pass
13921394
if len(not_bool_col) == len(self.columns):
13931395
query_compiler = self
13941396
else:
@@ -2492,11 +2494,22 @@ def _list_like_func(self, func, axis, *args, **kwargs):
24922494
Returns:
24932495
A new PandasQueryCompiler.
24942496
"""
2495-
func_prepared = self._prepare_method(lambda df: df.apply(func, *args, **kwargs))
2497+
func_prepared = self._prepare_method(
2498+
lambda df: df.apply(func, axis, *args, **kwargs)
2499+
)
24962500
new_data = self._map_across_full_axis(axis, func_prepared)
2497-
# When the function is list-like, the function names become the index
2498-
new_index = [f if isinstance(f, string_types) else f.__name__ for f in func]
2499-
return self.__constructor__(new_data, new_index, self.columns)
2501+
# When the function is list-like, the function names become the index/columns
2502+
new_index = (
2503+
[f if isinstance(f, string_types) else f.__name__ for f in func]
2504+
if axis == 0
2505+
else self.index
2506+
)
2507+
new_columns = (
2508+
[f if isinstance(f, string_types) else f.__name__ for f in func]
2509+
if axis == 1
2510+
else self.columns
2511+
)
2512+
return self.__constructor__(new_data, new_index, new_columns)
25002513

25012514
def _callable_func(self, func, axis, *args, **kwargs):
25022515
"""Apply callable functions across given axis.

modin/pandas/__init__.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
factorize,
1515
test,
1616
qcut,
17-
match,
1817
Panel,
1918
date_range,
2019
period_range,
@@ -64,7 +63,7 @@
6463
from .plotting import Plotting as plotting
6564
from .. import __execution_engine__ as execution_engine
6665

67-
__pandas_version__ = "0.23.4"
66+
__pandas_version__ = "0.24.0"
6867

6968
if pandas.__version__ != __pandas_version__:
7069
raise ImportError(

modin/pandas/dataframe.py

+10-16
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,17 @@
55
import pandas
66
from pandas.api.types import is_scalar
77
from pandas.compat import to_str, string_types, numpy as numpy_compat, cPickle as pkl
8-
import pandas.core.common as com
8+
from pandas.core.common import count_not_none, _pipe, apply_if_callable, is_bool_indexer
99
from pandas.core.dtypes.common import (
10-
_get_dtype_from_object,
10+
infer_dtype_from_object,
1111
is_list_like,
1212
is_numeric_dtype,
1313
is_datetime_or_timedelta_dtype,
1414
is_dtype_equal,
1515
is_object_dtype,
1616
is_integer_dtype,
1717
)
18-
from pandas.core.index import _ensure_index_from_sequences
18+
from pandas.core.index import ensure_index_from_sequences
1919
from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable
2020
from pandas.util._validators import validate_bool_kwarg
2121

@@ -769,13 +769,7 @@ def apply(
769769
FutureWarning,
770770
stacklevel=2,
771771
)
772-
elif is_list_like(func):
773-
if axis == 1:
774-
raise TypeError(
775-
"(\"'list' object is not callable\", "
776-
"'occurred at index {0}'".format(self.index[0])
777-
)
778-
elif not callable(func):
772+
elif not callable(func) and not is_list_like(func):
779773
return
780774

781775
query_compiler = self._query_compiler.apply(func, axis, *args, **kwds)
@@ -1512,7 +1506,7 @@ def filter(self, items=None, like=None, regex=None, axis=None):
15121506
Returns:
15131507
A new DataFrame with the filter applied.
15141508
"""
1515-
nkw = com._count_not_none(items, like, regex)
1509+
nkw = count_not_none(items, like, regex)
15161510
if nkw > 1:
15171511
raise TypeError(
15181512
"Keyword arguments `items`, `like`, or `regex` "
@@ -2553,7 +2547,7 @@ def pipe(self, func, *args, **kwargs):
25532547
Returns:
25542548
object: the return type of ``func``.
25552549
"""
2556-
return com._pipe(self, func, *args, **kwargs)
2550+
return _pipe(self, func, *args, **kwargs)
25572551

25582552
def pivot(self, index=None, columns=None, values=None):
25592553
return self._default_to_pandas(
@@ -3465,7 +3459,7 @@ def select_dtypes(self, include=None, exclude=None):
34653459
exclude = []
34663460

34673461
sel = tuple(map(set, (include, exclude)))
3468-
include, exclude = map(lambda x: set(map(_get_dtype_from_object, x)), sel)
3462+
include, exclude = map(lambda x: set(map(infer_dtype_from_object, x)), sel)
34693463
include_these = pandas.Series(not bool(include), index=self.columns)
34703464
exclude_these = pandas.Series(not bool(exclude), index=self.columns)
34713465

@@ -3595,7 +3589,7 @@ def set_index(
35953589
if drop:
35963590
to_remove.append(col)
35973591
arrays.append(level)
3598-
index = _ensure_index_from_sequences(arrays, names)
3592+
index = ensure_index_from_sequences(arrays, names)
35993593

36003594
if verify_integrity and not index.is_unique:
36013595
duplicates = index.get_duplicates()
@@ -4500,7 +4494,7 @@ def __getitem__(self, key):
45004494
Returns:
45014495
A Pandas Series representing the value for the column.
45024496
"""
4503-
key = com._apply_if_callable(key, self)
4497+
key = apply_if_callable(key, self)
45044498
# Shortcut if key is an actual column
45054499
is_mi_columns = isinstance(self.columns, pandas.MultiIndex)
45064500
try:
@@ -4529,7 +4523,7 @@ def _getitem_column(self, key):
45294523
)
45304524

45314525
def _getitem_array(self, key):
4532-
if com.is_bool_indexer(key):
4526+
if is_bool_indexer(key):
45334527
if isinstance(key, pandas.Series) and not key.index.equals(self.index):
45344528
warnings.warn(
45354529
"Boolean Series key will be reindexed to match DataFrame index.",

0 commit comments

Comments
 (0)