Skip to content

Commit 785a0a0

Browse files
authored
Merge pull request #155 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents 173f5e1 + acacff3 commit 785a0a0

25 files changed

+358
-292
lines changed

ci/run_tests.sh

+5
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,8 @@ fi
2929

3030
echo $PYTEST_CMD
3131
sh -c "$PYTEST_CMD"
32+
33+
PYTEST_AM_CMD="PANDAS_DATA_MANAGER=array pytest -m \"$PATTERN and arraymanager\" -n $PYTEST_WORKERS --dist=loadfile -s --strict-markers --durations=30 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas"
34+
35+
echo $PYTEST_AM_CMD
36+
sh -c "$PYTEST_AM_CMD"

doc/source/development/contributing.rst

+5-1
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,11 @@ Creating a Python environment (pip)
325325
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
326326

327327
If you aren't using conda for your development environment, follow these instructions.
328-
You'll need to have at least Python 3.6.1 installed on your system.
328+
You'll need to have at least Python 3.7.0 installed on your system. If your Python version
329+
is 3.8.0 (or later), you might need to update your ``setuptools`` to version 42.0.0 (or later)
330+
in your development environment before installing the build dependencies::
331+
332+
pip install --upgrade setuptools
329333

330334
**Unix**/**macOS with virtualenv**
331335

doc/source/whatsnew/v1.2.4.rst

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Fixed regressions
1818
- Fixed regression in :meth:`DataFrame.sum` when ``min_count`` greater than the :class:`DataFrame` shape was passed resulted in a ``ValueError`` (:issue:`39738`)
1919
- Fixed regression in :meth:`DataFrame.to_json` raising ``AttributeError`` when run on PyPy (:issue:`39837`)
2020
- Fixed regression in :meth:`DataFrame.where` not returning a copy in the case of an all True condition (:issue:`39595`)
21+
- Fixed regression in :meth:`DataFrame.replace` raising ``IndexError`` when ``regex`` was a multi-key dictionary (:issue:`39338`)
2122
-
2223

2324
.. ---------------------------------------------------------------------------

pandas/_libs/groupby.pyx

+33-63
Original file line numberDiff line numberDiff line change
@@ -1249,26 +1249,30 @@ def group_min(groupby_t[:, ::1] out,
12491249

12501250
@cython.boundscheck(False)
12511251
@cython.wraparound(False)
1252-
def group_cummin(groupby_t[:, ::1] out,
1253-
ndarray[groupby_t, ndim=2] values,
1254-
const int64_t[:] labels,
1255-
int ngroups,
1256-
bint is_datetimelike):
1252+
def group_cummin_max(groupby_t[:, ::1] out,
1253+
ndarray[groupby_t, ndim=2] values,
1254+
const int64_t[:] labels,
1255+
int ngroups,
1256+
bint is_datetimelike,
1257+
bint compute_max):
12571258
"""
1258-
Cumulative minimum of columns of `values`, in row groups `labels`.
1259+
Cumulative minimum/maximum of columns of `values`, in row groups `labels`.
12591260
12601261
Parameters
12611262
----------
12621263
out : array
1263-
Array to store cummin in.
1264+
Array to store cummin/max in.
12641265
values : array
1265-
Values to take cummin of.
1266+
Values to take cummin/max of.
12661267
labels : int64 array
12671268
Labels to group by.
12681269
ngroups : int
12691270
Number of groups, larger than all entries of `labels`.
12701271
is_datetimelike : bool
12711272
True if `values` contains datetime-like entries.
1273+
compute_max : bool
1274+
True if cumulative maximum should be computed, False
1275+
if cumulative minimum should be computed
12721276
12731277
Notes
12741278
-----
@@ -1283,11 +1287,11 @@ def group_cummin(groupby_t[:, ::1] out,
12831287
N, K = (<object>values).shape
12841288
accum = np.empty((ngroups, K), dtype=np.asarray(values).dtype)
12851289
if groupby_t is int64_t:
1286-
accum[:] = _int64_max
1290+
accum[:] = -_int64_max if compute_max else _int64_max
12871291
elif groupby_t is uint64_t:
1288-
accum[:] = np.iinfo(np.uint64).max
1292+
accum[:] = 0 if compute_max else np.iinfo(np.uint64).max
12891293
else:
1290-
accum[:] = np.inf
1294+
accum[:] = -np.inf if compute_max else np.inf
12911295

12921296
with nogil:
12931297
for i in range(N):
@@ -1302,66 +1306,32 @@ def group_cummin(groupby_t[:, ::1] out,
13021306
out[i, j] = val
13031307
else:
13041308
mval = accum[lab, j]
1305-
if val < mval:
1306-
accum[lab, j] = mval = val
1309+
if compute_max:
1310+
if val > mval:
1311+
accum[lab, j] = mval = val
1312+
else:
1313+
if val < mval:
1314+
accum[lab, j] = mval = val
13071315
out[i, j] = mval
13081316

13091317

13101318
@cython.boundscheck(False)
13111319
@cython.wraparound(False)
1312-
def group_cummax(groupby_t[:, ::1] out,
1320+
def group_cummin(groupby_t[:, ::1] out,
13131321
ndarray[groupby_t, ndim=2] values,
13141322
const int64_t[:] labels,
13151323
int ngroups,
13161324
bint is_datetimelike):
1317-
"""
1318-
Cumulative maximum of columns of `values`, in row groups `labels`.
1325+
"""See group_cummin_max.__doc__"""
1326+
group_cummin_max(out, values, labels, ngroups, is_datetimelike, compute_max=False)
13191327

1320-
Parameters
1321-
----------
1322-
out : array
1323-
Array to store cummax in.
1324-
values : array
1325-
Values to take cummax of.
1326-
labels : int64 array
1327-
Labels to group by.
1328-
ngroups : int
1329-
Number of groups, larger than all entries of `labels`.
1330-
is_datetimelike : bool
1331-
True if `values` contains datetime-like entries.
13321328

1333-
Notes
1334-
-----
1335-
This method modifies the `out` parameter, rather than returning an object.
1336-
"""
1337-
cdef:
1338-
Py_ssize_t i, j, N, K, size
1339-
groupby_t val, mval
1340-
ndarray[groupby_t, ndim=2] accum
1341-
int64_t lab
1342-
1343-
N, K = (<object>values).shape
1344-
accum = np.empty((ngroups, K), dtype=np.asarray(values).dtype)
1345-
if groupby_t is int64_t:
1346-
accum[:] = -_int64_max
1347-
elif groupby_t is uint64_t:
1348-
accum[:] = 0
1349-
else:
1350-
accum[:] = -np.inf
1351-
1352-
with nogil:
1353-
for i in range(N):
1354-
lab = labels[i]
1355-
1356-
if lab < 0:
1357-
continue
1358-
for j in range(K):
1359-
val = values[i, j]
1360-
1361-
if _treat_as_na(val, is_datetimelike):
1362-
out[i, j] = val
1363-
else:
1364-
mval = accum[lab, j]
1365-
if val > mval:
1366-
accum[lab, j] = mval = val
1367-
out[i, j] = mval
1329+
@cython.boundscheck(False)
1330+
@cython.wraparound(False)
1331+
def group_cummax(groupby_t[:, ::1] out,
1332+
ndarray[groupby_t, ndim=2] values,
1333+
const int64_t[:] labels,
1334+
int ngroups,
1335+
bint is_datetimelike):
1336+
"""See group_cummin_max.__doc__"""
1337+
group_cummin_max(out, values, labels, ngroups, is_datetimelike, compute_max=True)

pandas/_libs/join.pyx

+31-31
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ cdef ndarray[intp_t] _get_result_indexer(
231231
return res
232232

233233

234-
def ffill_indexer(const intp_t[:] indexer):
234+
def ffill_indexer(const intp_t[:] indexer) -> np.ndarray:
235235
cdef:
236236
Py_ssize_t i, n = len(indexer)
237237
ndarray[intp_t] result
@@ -275,15 +275,15 @@ ctypedef fused join_t:
275275
def left_join_indexer_unique(ndarray[join_t] left, ndarray[join_t] right):
276276
cdef:
277277
Py_ssize_t i, j, nleft, nright
278-
ndarray[int64_t] indexer
278+
ndarray[intp_t] indexer
279279
join_t lval, rval
280280

281281
i = 0
282282
j = 0
283283
nleft = len(left)
284284
nright = len(right)
285285

286-
indexer = np.empty(nleft, dtype=np.int64)
286+
indexer = np.empty(nleft, dtype=np.intp)
287287
while True:
288288
if i == nleft:
289289
break
@@ -324,7 +324,7 @@ def left_join_indexer(ndarray[join_t] left, ndarray[join_t] right):
324324
cdef:
325325
Py_ssize_t i, j, k, nright, nleft, count
326326
join_t lval, rval
327-
ndarray[int64_t] lindexer, rindexer
327+
ndarray[intp_t] lindexer, rindexer
328328
ndarray[join_t] result
329329

330330
nleft = len(left)
@@ -366,8 +366,8 @@ def left_join_indexer(ndarray[join_t] left, ndarray[join_t] right):
366366

367367
# do it again now that result size is known
368368

369-
lindexer = np.empty(count, dtype=np.int64)
370-
rindexer = np.empty(count, dtype=np.int64)
369+
lindexer = np.empty(count, dtype=np.intp)
370+
rindexer = np.empty(count, dtype=np.intp)
371371
result = np.empty(count, dtype=left.dtype)
372372

373373
i = 0
@@ -427,7 +427,7 @@ def inner_join_indexer(ndarray[join_t] left, ndarray[join_t] right):
427427
cdef:
428428
Py_ssize_t i, j, k, nright, nleft, count
429429
join_t lval, rval
430-
ndarray[int64_t] lindexer, rindexer
430+
ndarray[intp_t] lindexer, rindexer
431431
ndarray[join_t] result
432432

433433
nleft = len(left)
@@ -468,8 +468,8 @@ def inner_join_indexer(ndarray[join_t] left, ndarray[join_t] right):
468468

469469
# do it again now that result size is known
470470

471-
lindexer = np.empty(count, dtype=np.int64)
472-
rindexer = np.empty(count, dtype=np.int64)
471+
lindexer = np.empty(count, dtype=np.intp)
472+
rindexer = np.empty(count, dtype=np.intp)
473473
result = np.empty(count, dtype=left.dtype)
474474

475475
i = 0
@@ -517,7 +517,7 @@ def outer_join_indexer(ndarray[join_t] left, ndarray[join_t] right):
517517
cdef:
518518
Py_ssize_t i, j, nright, nleft, count
519519
join_t lval, rval
520-
ndarray[int64_t] lindexer, rindexer
520+
ndarray[intp_t] lindexer, rindexer
521521
ndarray[join_t] result
522522

523523
nleft = len(left)
@@ -564,8 +564,8 @@ def outer_join_indexer(ndarray[join_t] left, ndarray[join_t] right):
564564
count += 1
565565
j += 1
566566

567-
lindexer = np.empty(count, dtype=np.int64)
568-
rindexer = np.empty(count, dtype=np.int64)
567+
lindexer = np.empty(count, dtype=np.intp)
568+
rindexer = np.empty(count, dtype=np.intp)
569569
result = np.empty(count, dtype=left.dtype)
570570

571571
# do it again, but populate the indexers / result
@@ -673,12 +673,12 @@ def asof_join_backward_on_X_by_Y(asof_t[:] left_values,
673673
asof_t[:] right_values,
674674
by_t[:] left_by_values,
675675
by_t[:] right_by_values,
676-
bint allow_exact_matches=1,
676+
bint allow_exact_matches=True,
677677
tolerance=None):
678678

679679
cdef:
680680
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
681-
ndarray[int64_t] left_indexer, right_indexer
681+
ndarray[intp_t] left_indexer, right_indexer
682682
bint has_tolerance = False
683683
asof_t tolerance_ = 0
684684
asof_t diff = 0
@@ -693,8 +693,8 @@ def asof_join_backward_on_X_by_Y(asof_t[:] left_values,
693693
left_size = len(left_values)
694694
right_size = len(right_values)
695695

696-
left_indexer = np.empty(left_size, dtype=np.int64)
697-
right_indexer = np.empty(left_size, dtype=np.int64)
696+
left_indexer = np.empty(left_size, dtype=np.intp)
697+
right_indexer = np.empty(left_size, dtype=np.intp)
698698

699699
if by_t is object:
700700
hash_table = PyObjectHashTable(right_size)
@@ -747,7 +747,7 @@ def asof_join_forward_on_X_by_Y(asof_t[:] left_values,
747747

748748
cdef:
749749
Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos
750-
ndarray[int64_t] left_indexer, right_indexer
750+
ndarray[intp_t] left_indexer, right_indexer
751751
bint has_tolerance = False
752752
asof_t tolerance_ = 0
753753
asof_t diff = 0
@@ -762,8 +762,8 @@ def asof_join_forward_on_X_by_Y(asof_t[:] left_values,
762762
left_size = len(left_values)
763763
right_size = len(right_values)
764764

765-
left_indexer = np.empty(left_size, dtype=np.int64)
766-
right_indexer = np.empty(left_size, dtype=np.int64)
765+
left_indexer = np.empty(left_size, dtype=np.intp)
766+
right_indexer = np.empty(left_size, dtype=np.intp)
767767

768768
if by_t is object:
769769
hash_table = PyObjectHashTable(right_size)
@@ -816,14 +816,14 @@ def asof_join_nearest_on_X_by_Y(asof_t[:] left_values,
816816

817817
cdef:
818818
Py_ssize_t left_size, right_size, i
819-
ndarray[int64_t] left_indexer, right_indexer, bli, bri, fli, fri
819+
ndarray[intp_t] left_indexer, right_indexer, bli, bri, fli, fri
820820
asof_t bdiff, fdiff
821821

822822
left_size = len(left_values)
823823
right_size = len(right_values)
824824

825-
left_indexer = np.empty(left_size, dtype=np.int64)
826-
right_indexer = np.empty(left_size, dtype=np.int64)
825+
left_indexer = np.empty(left_size, dtype=np.intp)
826+
right_indexer = np.empty(left_size, dtype=np.intp)
827827

828828
# search both forward and backward
829829
bli, bri = asof_join_backward_on_X_by_Y(
@@ -867,7 +867,7 @@ def asof_join_backward(asof_t[:] left_values,
867867

868868
cdef:
869869
Py_ssize_t left_pos, right_pos, left_size, right_size
870-
ndarray[int64_t] left_indexer, right_indexer
870+
ndarray[intp_t] left_indexer, right_indexer
871871
bint has_tolerance = False
872872
asof_t tolerance_ = 0
873873
asof_t diff = 0
@@ -880,8 +880,8 @@ def asof_join_backward(asof_t[:] left_values,
880880
left_size = len(left_values)
881881
right_size = len(right_values)
882882

883-
left_indexer = np.empty(left_size, dtype=np.int64)
884-
right_indexer = np.empty(left_size, dtype=np.int64)
883+
left_indexer = np.empty(left_size, dtype=np.intp)
884+
right_indexer = np.empty(left_size, dtype=np.intp)
885885

886886
right_pos = 0
887887
for left_pos in range(left_size):
@@ -920,7 +920,7 @@ def asof_join_forward(asof_t[:] left_values,
920920

921921
cdef:
922922
Py_ssize_t left_pos, right_pos, left_size, right_size
923-
ndarray[int64_t] left_indexer, right_indexer
923+
ndarray[intp_t] left_indexer, right_indexer
924924
bint has_tolerance = False
925925
asof_t tolerance_ = 0
926926
asof_t diff = 0
@@ -933,8 +933,8 @@ def asof_join_forward(asof_t[:] left_values,
933933
left_size = len(left_values)
934934
right_size = len(right_values)
935935

936-
left_indexer = np.empty(left_size, dtype=np.int64)
937-
right_indexer = np.empty(left_size, dtype=np.int64)
936+
left_indexer = np.empty(left_size, dtype=np.intp)
937+
right_indexer = np.empty(left_size, dtype=np.intp)
938938

939939
right_pos = right_size - 1
940940
for left_pos in range(left_size - 1, -1, -1):
@@ -974,14 +974,14 @@ def asof_join_nearest(asof_t[:] left_values,
974974

975975
cdef:
976976
Py_ssize_t left_size, right_size, i
977-
ndarray[int64_t] left_indexer, right_indexer, bli, bri, fli, fri
977+
ndarray[intp_t] left_indexer, right_indexer, bli, bri, fli, fri
978978
asof_t bdiff, fdiff
979979

980980
left_size = len(left_values)
981981
right_size = len(right_values)
982982

983-
left_indexer = np.empty(left_size, dtype=np.int64)
984-
right_indexer = np.empty(left_size, dtype=np.int64)
983+
left_indexer = np.empty(left_size, dtype=np.intp)
984+
right_indexer = np.empty(left_size, dtype=np.intp)
985985

986986
# search both forward and backward
987987
bli, bri = asof_join_backward(left_values, right_values,

0 commit comments

Comments
 (0)