Update pandas version to 0.24

devin-petersohn · devin-petersohn · commit 1e200af7f860 · 2019-02-19T17:55:37.000-08:00
* pandas release notes: http://pandas.pydata.org/pandas-docs/stable/whatsnew/v0.24.0.html * Update imports to match changes in pandas * Add functionality for list of functions on `axis=1` for `apply` * Remove `pd.match` from API * Small regression in pandas requires regression in Modin * pandas-dev/pandas#25101 reports this issue * pandas-dev/pandas#25102 resolves this issue * TODO: Expose `pandas.Array` once we properly test
diff --git a/modin/data_management/query_compiler/pandas_query_compiler.py b/modin/data_management/query_compiler/pandas_query_compiler.py
@@ -13,7 +13,7 @@
     is_datetime_or_timedelta_dtype,
     is_bool_dtype,
 )
-from pandas.core.index import _ensure_index
+from pandas.core.index import ensure_index
 from pandas.core.base import DataError
 
 from modin.engines.base.block_partitions import BaseBlockPartitions
@@ -97,7 +97,7 @@ def pandas_index_extraction(df, axis):
         return index_obj[new_indices] if compute_diff else new_indices
 
     def _validate_set_axis(self, new_labels, old_labels):
-        new_labels = _ensure_index(new_labels)
+        new_labels = ensure_index(new_labels)
         old_len = len(old_labels)
         new_len = len(new_labels)
         if old_len != new_len:
@@ -118,14 +118,14 @@ def _get_columns(self):
 
     def _set_index(self, new_index):
         if self._index_cache is None:
-            self._index_cache = _ensure_index(new_index)
+            self._index_cache = ensure_index(new_index)
         else:
             new_index = self._validate_set_axis(new_index, self._index_cache)
             self._index_cache = new_index
 
     def _set_columns(self, new_columns):
         if self._columns_cache is None:
-            self._columns_cache = _ensure_index(new_columns)
+            self._columns_cache = ensure_index(new_columns)
         else:
             new_columns = self._validate_set_axis(new_columns, self._columns_cache)
             self._columns_cache = new_columns
@@ -1388,7 +1388,9 @@ def _process_all_any(self, func, **kwargs):
 
         if bool_only:
             if axis == 0 and not axis_none and len(not_bool_col) == len(self.columns):
-                return pandas.Series(dtype=bool)
+                # TODO add this line back once pandas-dev/pandas#25101 is resolved
+                # return pandas.Series(dtype=bool)
+                pass
             if len(not_bool_col) == len(self.columns):
                 query_compiler = self
             else:
@@ -2492,11 +2494,22 @@ def _list_like_func(self, func, axis, *args, **kwargs):
         Returns:
             A new PandasQueryCompiler.
         """
-        func_prepared = self._prepare_method(lambda df: df.apply(func, *args, **kwargs))
+        func_prepared = self._prepare_method(
+            lambda df: df.apply(func, axis, *args, **kwargs)
+        )
         new_data = self._map_across_full_axis(axis, func_prepared)
-        # When the function is list-like, the function names become the index
-        new_index = [f if isinstance(f, string_types) else f.__name__ for f in func]
-        return self.__constructor__(new_data, new_index, self.columns)
+        # When the function is list-like, the function names become the index/columns
+        new_index = (
+            [f if isinstance(f, string_types) else f.__name__ for f in func]
+            if axis == 0
+            else self.index
+        )
+        new_columns = (
+            [f if isinstance(f, string_types) else f.__name__ for f in func]
+            if axis == 1
+            else self.columns
+        )
+        return self.__constructor__(new_data, new_index, new_columns)
 
     def _callable_func(self, func, axis, *args, **kwargs):
         """Apply callable functions across given axis.
diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py
@@ -14,7 +14,6 @@
     factorize,
     test,
     qcut,
-    match,
     Panel,
     date_range,
     period_range,
@@ -64,7 +63,7 @@
 from .plotting import Plotting as plotting
 from .. import __execution_engine__ as execution_engine
 
-__pandas_version__ = "0.23.4"
+__pandas_version__ = "0.24.0"
 
 if pandas.__version__ != __pandas_version__:
     raise ImportError(
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
@@ -5,17 +5,17 @@
 import pandas
 from pandas.api.types import is_scalar
 from pandas.compat import to_str, string_types, numpy as numpy_compat, cPickle as pkl
-import pandas.core.common as com
+from pandas.core.common import count_not_none, _pipe, apply_if_callable, is_bool_indexer
 from pandas.core.dtypes.common import (
-    _get_dtype_from_object,
+    infer_dtype_from_object,
     is_list_like,
     is_numeric_dtype,
     is_datetime_or_timedelta_dtype,
     is_dtype_equal,
     is_object_dtype,
     is_integer_dtype,
 )
-from pandas.core.index import _ensure_index_from_sequences
+from pandas.core.index import ensure_index_from_sequences
 from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable
 from pandas.util._validators import validate_bool_kwarg
 
@@ -769,13 +769,7 @@ def apply(
                     FutureWarning,
                     stacklevel=2,
                 )
-        elif is_list_like(func):
-            if axis == 1:
-                raise TypeError(
-                    "(\"'list' object is not callable\", "
-                    "'occurred at index {0}'".format(self.index[0])
-                )
-        elif not callable(func):
+        elif not callable(func) and not is_list_like(func):
             return
 
         query_compiler = self._query_compiler.apply(func, axis, *args, **kwds)
@@ -1512,7 +1506,7 @@ def filter(self, items=None, like=None, regex=None, axis=None):
         Returns:
             A new DataFrame with the filter applied.
         """
-        nkw = com._count_not_none(items, like, regex)
+        nkw = count_not_none(items, like, regex)
         if nkw > 1:
             raise TypeError(
                 "Keyword arguments `items`, `like`, or `regex` "
@@ -2553,7 +2547,7 @@ def pipe(self, func, *args, **kwargs):
         Returns:
             object: the return type of ``func``.
         """
-        return com._pipe(self, func, *args, **kwargs)
+        return _pipe(self, func, *args, **kwargs)
 
     def pivot(self, index=None, columns=None, values=None):
         return self._default_to_pandas(
@@ -3465,7 +3459,7 @@ def select_dtypes(self, include=None, exclude=None):
             exclude = []
 
         sel = tuple(map(set, (include, exclude)))
-        include, exclude = map(lambda x: set(map(_get_dtype_from_object, x)), sel)
+        include, exclude = map(lambda x: set(map(infer_dtype_from_object, x)), sel)
         include_these = pandas.Series(not bool(include), index=self.columns)
         exclude_these = pandas.Series(not bool(exclude), index=self.columns)
 
@@ -3595,7 +3589,7 @@ def set_index(
                 if drop:
                     to_remove.append(col)
             arrays.append(level)
-        index = _ensure_index_from_sequences(arrays, names)
+        index = ensure_index_from_sequences(arrays, names)
 
         if verify_integrity and not index.is_unique:
             duplicates = index.get_duplicates()
@@ -4500,7 +4494,7 @@ def __getitem__(self, key):
         Returns:
             A Pandas Series representing the value for the column.
         """
-        key = com._apply_if_callable(key, self)
+        key = apply_if_callable(key, self)
         # Shortcut if key is an actual column
         is_mi_columns = isinstance(self.columns, pandas.MultiIndex)
         try:
@@ -4529,7 +4523,7 @@ def _getitem_column(self, key):
         )
 
     def _getitem_array(self, key):
-        if com.is_bool_indexer(key):
+        if is_bool_indexer(key):
             if isinstance(key, pandas.Series) and not key.index.equals(self.index):
                 warnings.warn(
                     "Boolean Series key will be reindexed to match DataFrame index.",