Skip to content

Commit 5b861c2

Browse files
Update pandas version to 0.24 (#451)
* Update pandas version to 0.24 * pandas release notes: http://pandas.pydata.org/pandas-docs/stable/whatsnew/v0.24.0.html * Update imports to match changes in pandas * Add functionality for list of functions on `axis=1` for `apply` * Remove `pd.match` from API * Small regression in pandas requires regression in Modin * pandas-dev/pandas#25101 reports this issue * pandas-dev/pandas#25102 resolves this issue * TODO: Expose `pandas.Array` once we properly test * Finishing regression update in `all`/`any` * Update to pandas 0.24 in setup.py and requirements.txt * Bump to 0.24.1 * Update API and add a test for the API * Add test for API, update API * Update API test and finalize compatibility updates * Revert bug * Cleanup and add tests * Fix bug in test * Lint * Lint * Remove print * Fix transform tests and bug in transform * Add list test for test_rename * Fix transform bug
1 parent 226c705 commit 5b861c2

File tree

11 files changed

+493
-196
lines changed

11 files changed

+493
-196
lines changed

.coveragerc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,6 @@ exclude_lines =
2020
pragma: no cover
2121
# Don't complain if tests don't hit defensive assertion code:
2222
raise AssertionError
23-
raise NotImplementedError
23+
raise NotImplementedError
24+
raise ImportError
25+
assert

.travis.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,15 @@ matrix:
4040
- black --check modin/
4141
- flake8 .
4242

43+
- os: linux
44+
dist: trusty
45+
env:
46+
- PYTHON=3.6
47+
- API_COMPAT=1
48+
script:
49+
- export PATH="$HOME/miniconda/bin:$PATH"
50+
- python -m pytest modin/pandas/test/test_api.py
51+
4352
install:
4453
- ./.travis/install-dependencies.sh
4554

modin/data_management/query_compiler/pandas_query_compiler.py

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
is_datetime_or_timedelta_dtype,
1414
is_bool_dtype,
1515
)
16-
from pandas.core.index import _ensure_index
16+
from pandas.core.index import ensure_index
1717
from pandas.core.base import DataError
1818

1919
from modin.engines.base.block_partitions import BaseBlockPartitions
@@ -97,7 +97,7 @@ def pandas_index_extraction(df, axis):
9797
return index_obj[new_indices] if compute_diff else new_indices
9898

9999
def _validate_set_axis(self, new_labels, old_labels):
100-
new_labels = _ensure_index(new_labels)
100+
new_labels = ensure_index(new_labels)
101101
old_len = len(old_labels)
102102
new_len = len(new_labels)
103103
if old_len != new_len:
@@ -118,14 +118,14 @@ def _get_columns(self):
118118

119119
def _set_index(self, new_index):
120120
if self._index_cache is None:
121-
self._index_cache = _ensure_index(new_index)
121+
self._index_cache = ensure_index(new_index)
122122
else:
123123
new_index = self._validate_set_axis(new_index, self._index_cache)
124124
self._index_cache = new_index
125125

126126
def _set_columns(self, new_columns):
127127
if self._columns_cache is None:
128-
self._columns_cache = _ensure_index(new_columns)
128+
self._columns_cache = ensure_index(new_columns)
129129
else:
130130
new_columns = self._validate_set_axis(new_columns, self._columns_cache)
131131
self._columns_cache = new_columns
@@ -1388,11 +1388,16 @@ def _process_all_any(self, func, **kwargs):
13881388

13891389
if bool_only:
13901390
if axis == 0 and not axis_none and len(not_bool_col) == len(self.columns):
1391-
return pandas.Series(dtype=bool)
1392-
if len(not_bool_col) == len(self.columns):
1393-
query_compiler = self
1394-
else:
1395-
query_compiler = self.drop(columns=not_bool_col)
1391+
# TODO add this line back once pandas-dev/pandas#25101 is resolved
1392+
# return pandas.Series(dtype=bool)
1393+
pass
1394+
# See note above about pandas-dev/pandas#25101
1395+
# TODO remove this when pandas 0.24.2 is released.
1396+
query_compiler = self
1397+
# if len(not_bool_col) == len(self.columns):
1398+
# query_compiler = self
1399+
# else:
1400+
# query_compiler = self.drop(columns=not_bool_col)
13961401
else:
13971402
if (
13981403
bool_only is False
@@ -2492,11 +2497,22 @@ def _list_like_func(self, func, axis, *args, **kwargs):
24922497
Returns:
24932498
A new PandasQueryCompiler.
24942499
"""
2495-
func_prepared = self._prepare_method(lambda df: df.apply(func, *args, **kwargs))
2500+
func_prepared = self._prepare_method(
2501+
lambda df: df.apply(func, axis, *args, **kwargs)
2502+
)
24962503
new_data = self._map_across_full_axis(axis, func_prepared)
2497-
# When the function is list-like, the function names become the index
2498-
new_index = [f if isinstance(f, string_types) else f.__name__ for f in func]
2499-
return self.__constructor__(new_data, new_index, self.columns)
2504+
# When the function is list-like, the function names become the index/columns
2505+
new_index = (
2506+
[f if isinstance(f, string_types) else f.__name__ for f in func]
2507+
if axis == 0
2508+
else self.index
2509+
)
2510+
new_columns = (
2511+
[f if isinstance(f, string_types) else f.__name__ for f in func]
2512+
if axis == 1
2513+
else self.columns
2514+
)
2515+
return self.__constructor__(new_data, new_index, new_columns)
25002516

25012517
def _callable_func(self, func, axis, *args, **kwargs):
25022518
"""Apply callable functions across given axis.

modin/engines/base/io.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,7 @@ def to_sql(
435435
index_label=None,
436436
chunksize=None,
437437
dtype=None,
438+
method=None,
438439
):
439440
ErrorMessage.default_to_pandas("`to_sql`")
440441
df = qc.to_pandas()
@@ -447,4 +448,5 @@ def to_sql(
447448
index_label=index_label,
448449
chunksize=chunksize,
449450
dtype=dtype,
451+
method=method,
450452
)

modin/pandas/__init__.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
factorize,
1515
test,
1616
qcut,
17-
match,
1817
Panel,
1918
date_range,
2019
period_range,
@@ -64,7 +63,7 @@
6463
from .plotting import Plotting as plotting
6564
from .. import __execution_engine__ as execution_engine
6665

67-
__pandas_version__ = "0.23.4"
66+
__pandas_version__ = "0.24.1"
6867

6968
if pandas.__version__ != __pandas_version__:
7069
raise ImportError(
@@ -131,7 +130,7 @@ def initialize_ray():
131130
if execution_engine == "Ray":
132131
initialize_ray()
133132
num_cpus = ray.global_state.cluster_resources()["CPU"]
134-
elif execution_engine == "Dask":
133+
elif execution_engine == "Dask": # pragma: no cover
135134
from distributed.client import _get_global_client
136135

137136
if threading.current_thread().name == "MainThread":
@@ -174,7 +173,6 @@ def initialize_ray():
174173
"factorize",
175174
"test",
176175
"qcut",
177-
"match",
178176
"to_datetime",
179177
"get_dummies",
180178
"isna",

0 commit comments

Comments
 (0)