Skip to content

Commit 6547e9c

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into issues2
2 parents 8a927b0 + 2b51c96 commit 6547e9c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+745
-597
lines changed

doc/source/whatsnew/v0.24.0.txt

+4-5
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ New features
1616
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1717

1818
A ``Series`` based on an ``ExtensionArray`` now supports arithmetic and comparison
19-
operators. (:issue:`19577`). There are two approaches for providing operator support for an ``ExtensionArray``:
19+
operators (:issue:`19577`). There are two approaches for providing operator support for an ``ExtensionArray``:
2020

2121
1. Define each of the operators on your ``ExtensionArray`` subclass.
2222
2. Use an operator implementation from pandas that depends on operators that are already defined
@@ -110,7 +110,7 @@ Other Enhancements
110110
<https://pandas-gbq.readthedocs.io/en/latest/changelog.html#changelog-0-5-0>`__.
111111
(:issue:`21627`)
112112
- New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of an HDF5 file (:issue:`10932`)
113-
- :func:`read_html` copies cell data across ``colspan``s and ``rowspan``s, and it treats all-``th`` table rows as headers if ``header`` kwarg is not given and there is no ``thead`` (:issue:`17054`)
113+
- :func:`read_html` copies cell data across ``colspan`` and ``rowspan``, and it treats all-``th`` table rows as headers if ``header`` kwarg is not given and there is no ``thead`` (:issue:`17054`)
114114
- :meth:`Series.nlargest`, :meth:`Series.nsmallest`, :meth:`DataFrame.nlargest`, and :meth:`DataFrame.nsmallest` now accept the value ``"all"`` for the ``keep`` argument. This keeps all ties for the nth largest/smallest value (:issue:`16818`)
115115
- :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`)
116116
- :func:`~DataFrame.to_csv` and :func:`~DataFrame.to_json` now support ``compression='infer'`` to infer compression based on filename (:issue:`15008`)
@@ -389,7 +389,7 @@ Timezones
389389
- Bug in :class:`Timestamp` when passing different string date formats with a timezone offset would produce different timezone offsets (:issue:`12064`)
390390
- Bug when comparing a tz-naive :class:`Timestamp` to a tz-aware :class:`DatetimeIndex` which would coerce the :class:`DatetimeIndex` to tz-naive (:issue:`12601`)
391391
- Bug in :meth:`Series.truncate` with a tz-aware :class:`DatetimeIndex` which would cause a core dump (:issue:`9243`)
392-
- Bug in :class:`Series` constructor which would coerce tz-aware and tz-naive :class:`Timestamp`s to tz-aware (:issue:`13051`)
392+
- Bug in :class:`Series` constructor which would coerce tz-aware and tz-naive :class:`Timestamp` to tz-aware (:issue:`13051`)
393393
- Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`)
394394
- Bug in :class:`DatetimeIndex` where constructing with an integer and tz would not localize correctly (:issue:`12619`)
395395
- Fixed bug where :meth:`DataFrame.describe` and :meth:`Series.describe` on tz-aware datetimes did not show `first` and `last` result (:issue:`21328`)
@@ -433,8 +433,7 @@ Indexing
433433
- ``DataFrame.__getitem__`` now accepts dictionaries and dictionary keys as list-likes of labels, consistently with ``Series.__getitem__`` (:issue:`21294`)
434434
- Fixed ``DataFrame[np.nan]`` when columns are non-unique (:issue:`21428`)
435435
- Bug when indexing :class:`DatetimeIndex` with nanosecond resolution dates and timezones (:issue:`11679`)
436-
437-
-
436+
- Bug where indexing with a Numpy array containing negative values would mutate the indexer (:issue:`21867`)
438437

439438
Missing
440439
^^^^^^^

pandas/_libs/groupby.pyx

+6-7
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,20 @@
11
# -*- coding: utf-8 -*-
22
# cython: profile=False
33

4-
cimport numpy as cnp
5-
import numpy as np
6-
74
cimport cython
5+
from cython cimport Py_ssize_t
86

9-
cnp.import_array()
7+
from libc.stdlib cimport malloc, free
108

9+
import numpy as np
1110
from numpy cimport (ndarray,
1211
double_t,
1312
int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
1413
uint32_t, uint64_t, float32_t, float64_t)
1514

16-
from libc.stdlib cimport malloc, free
1715

1816
from util cimport numeric, get_nat
17+
1918
from algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, TIEBREAK_MIN,
2019
TIEBREAK_MAX, TIEBREAK_FIRST, TIEBREAK_DENSE)
2120
from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers
@@ -74,8 +73,8 @@ cdef inline float64_t kth_smallest_c(float64_t* a,
7473
double_t x, t
7574

7675
l = 0
77-
m = n -1
78-
while (l<m):
76+
m = n - 1
77+
while l < m:
7978
x = a[k]
8079
i = l
8180
j = m

pandas/_libs/index.pyx

+4-6
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,12 @@ from cpython.slice cimport PySlice_Check
99
import numpy as np
1010
cimport numpy as cnp
1111
from numpy cimport (ndarray, float64_t, int32_t,
12-
int64_t, uint8_t, uint64_t, intp_t)
12+
int64_t, uint8_t, uint64_t, intp_t,
13+
# Note: NPY_DATETIME, NPY_TIMEDELTA are only available
14+
# for cimport in cython>=0.27.3
15+
NPY_DATETIME, NPY_TIMEDELTA)
1316
cnp.import_array()
1417

15-
cdef extern from "numpy/arrayobject.h":
16-
# These can be cimported directly from numpy in cython>=0.27.3
17-
cdef enum NPY_TYPES:
18-
NPY_DATETIME
19-
NPY_TIMEDELTA
2018

2119
cimport util
2220

pandas/_libs/interval.pyx

+12-7
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,22 @@
1-
cimport numpy as cnp
2-
import numpy as np
1+
# -*- coding: utf-8 -*-
2+
import numbers
3+
4+
from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE,
5+
PyObject_RichCompare)
36

4-
cimport util
57
cimport cython
6-
import cython
8+
from cython cimport Py_ssize_t
9+
10+
import numpy as np
711
from numpy cimport ndarray
12+
13+
14+
cimport util
15+
816
from tslibs import Timestamp
917
from tslibs.timezones cimport tz_compare
1018

11-
from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE,
12-
PyObject_RichCompare)
1319

14-
import numbers
1520
_VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither'])
1621

1722

pandas/_libs/lib.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -753,4 +753,4 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys,
753753
return result
754754

755755

756-
include "inference.pyx"
756+
include "src/inference.pyx"

pandas/_libs/src/datetime/np_datetime.c

+18-18
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
2828
#define PyInt_AsLong PyLong_AsLong
2929
#endif
3030

31-
const pandas_datetimestruct _NS_MIN_DTS = {
31+
const npy_datetimestruct _NS_MIN_DTS = {
3232
1677, 9, 21, 0, 12, 43, 145225, 0, 0};
33-
const pandas_datetimestruct _NS_MAX_DTS = {
33+
const npy_datetimestruct _NS_MAX_DTS = {
3434
2262, 4, 11, 23, 47, 16, 854775, 807000, 0};
3535

3636

@@ -62,7 +62,7 @@ int dayofweek(int y, int m, int d) {
6262
* Adjusts a datetimestruct based on a minutes offset. Assumes
6363
* the current values are valid.g
6464
*/
65-
void add_minutes_to_datetimestruct(pandas_datetimestruct *dts, int minutes) {
65+
void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) {
6666
int isleap;
6767

6868
/* MINUTES */
@@ -111,7 +111,7 @@ void add_minutes_to_datetimestruct(pandas_datetimestruct *dts, int minutes) {
111111
/*
112112
* Calculates the days offset from the 1970 epoch.
113113
*/
114-
npy_int64 get_datetimestruct_days(const pandas_datetimestruct *dts) {
114+
npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) {
115115
int i, month;
116116
npy_int64 year, days = 0;
117117
const int *month_lengths;
@@ -211,7 +211,7 @@ static npy_int64 days_to_yearsdays(npy_int64 *days_) {
211211
* Adjusts a datetimestruct based on a seconds offset. Assumes
212212
* the current values are valid.
213213
*/
214-
NPY_NO_EXPORT void add_seconds_to_datetimestruct(pandas_datetimestruct *dts,
214+
NPY_NO_EXPORT void add_seconds_to_datetimestruct(npy_datetimestruct *dts,
215215
int seconds) {
216216
int minutes;
217217

@@ -236,7 +236,7 @@ NPY_NO_EXPORT void add_seconds_to_datetimestruct(pandas_datetimestruct *dts,
236236
* offset from 1970.
237237
*/
238238
static void set_datetimestruct_days(npy_int64 days,
239-
pandas_datetimestruct *dts) {
239+
npy_datetimestruct *dts) {
240240
const int *month_lengths;
241241
int i;
242242

@@ -255,10 +255,10 @@ static void set_datetimestruct_days(npy_int64 days,
255255
}
256256

257257
/*
258-
* Compares two pandas_datetimestruct objects chronologically
258+
* Compares two npy_datetimestruct objects chronologically
259259
*/
260-
int cmp_pandas_datetimestruct(const pandas_datetimestruct *a,
261-
const pandas_datetimestruct *b) {
260+
int cmp_npy_datetimestruct(const npy_datetimestruct *a,
261+
const npy_datetimestruct *b) {
262262
if (a->year > b->year) {
263263
return 1;
264264
} else if (a->year < b->year) {
@@ -319,7 +319,7 @@ int cmp_pandas_datetimestruct(const pandas_datetimestruct *a,
319319
/*
320320
*
321321
* Tests for and converts a Python datetime.datetime or datetime.date
322-
* object into a NumPy pandas_datetimestruct. Uses tzinfo (if present)
322+
* object into a NumPy npy_datetimestruct. Uses tzinfo (if present)
323323
* to convert to UTC time.
324324
*
325325
* While the C API has PyDate_* and PyDateTime_* functions, the following
@@ -331,12 +331,12 @@ int cmp_pandas_datetimestruct(const pandas_datetimestruct *a,
331331
* if obj doesn't have the needed date or datetime attributes.
332332
*/
333333
int convert_pydatetime_to_datetimestruct(PyObject *obj,
334-
pandas_datetimestruct *out) {
334+
npy_datetimestruct *out) {
335335
PyObject *tmp;
336336
int isleap;
337337

338338
/* Initialize the output to all zeros */
339-
memset(out, 0, sizeof(pandas_datetimestruct));
339+
memset(out, 0, sizeof(npy_datetimestruct));
340340
out->month = 1;
341341
out->day = 1;
342342

@@ -512,16 +512,16 @@ int convert_pydatetime_to_datetimestruct(PyObject *obj,
512512
return -1;
513513
}
514514

515-
npy_datetime pandas_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr,
516-
pandas_datetimestruct *d) {
515+
npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr,
516+
npy_datetimestruct *d) {
517517
npy_datetime result = NPY_DATETIME_NAT;
518518

519519
convert_datetimestruct_to_datetime(fr, d, &result);
520520
return result;
521521
}
522522

523523
void pandas_datetime_to_datetimestruct(npy_datetime val, NPY_DATETIMEUNIT fr,
524-
pandas_datetimestruct *result) {
524+
npy_datetimestruct *result) {
525525
convert_datetime_to_datetimestruct(fr, val, result);
526526
}
527527

@@ -539,7 +539,7 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta val,
539539
* Returns 0 on success, -1 on failure.
540540
*/
541541
int convert_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
542-
const pandas_datetimestruct *dts,
542+
const npy_datetimestruct *dts,
543543
npy_datetime *out) {
544544
npy_datetime ret;
545545

@@ -643,11 +643,11 @@ int convert_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
643643
*/
644644
int convert_datetime_to_datetimestruct(NPY_DATETIMEUNIT base,
645645
npy_datetime dt,
646-
pandas_datetimestruct *out) {
646+
npy_datetimestruct *out) {
647647
npy_int64 perday;
648648

649649
/* Initialize the output to all zeros */
650-
memset(out, 0, sizeof(pandas_datetimestruct));
650+
memset(out, 0, sizeof(npy_datetimestruct));
651651
out->year = 1970;
652652
out->month = 1;
653653
out->day = 1;

pandas/_libs/src/datetime/np_datetime.h

+12-17
Original file line numberDiff line numberDiff line change
@@ -19,30 +19,25 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
1919

2020
#include <numpy/ndarraytypes.h>
2121

22-
typedef struct {
23-
npy_int64 year;
24-
npy_int32 month, day, hour, min, sec, us, ps, as;
25-
} pandas_datetimestruct;
26-
2722
typedef struct {
2823
npy_int64 days;
2924
npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds;
3025
} pandas_timedeltastruct;
3126

32-
extern const pandas_datetimestruct _NS_MIN_DTS;
33-
extern const pandas_datetimestruct _NS_MAX_DTS;
27+
extern const npy_datetimestruct _NS_MIN_DTS;
28+
extern const npy_datetimestruct _NS_MAX_DTS;
3429

3530
// stuff pandas needs
3631
// ----------------------------------------------------------------------------
3732

3833
int convert_pydatetime_to_datetimestruct(PyObject *obj,
39-
pandas_datetimestruct *out);
34+
npy_datetimestruct *out);
4035

41-
npy_datetime pandas_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr,
42-
pandas_datetimestruct *d);
36+
npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr,
37+
npy_datetimestruct *d);
4338

4439
void pandas_datetime_to_datetimestruct(npy_datetime val, NPY_DATETIMEUNIT fr,
45-
pandas_datetimestruct *result);
40+
npy_datetimestruct *result);
4641

4742
void pandas_timedelta_to_timedeltastruct(npy_timedelta val,
4843
NPY_DATETIMEUNIT fr,
@@ -61,27 +56,27 @@ int is_leapyear(npy_int64 year);
6156
* Calculates the days offset from the 1970 epoch.
6257
*/
6358
npy_int64
64-
get_datetimestruct_days(const pandas_datetimestruct *dts);
59+
get_datetimestruct_days(const npy_datetimestruct *dts);
6560

6661

6762
/*
68-
* Compares two pandas_datetimestruct objects chronologically
63+
* Compares two npy_datetimestruct objects chronologically
6964
*/
70-
int cmp_pandas_datetimestruct(const pandas_datetimestruct *a,
71-
const pandas_datetimestruct *b);
65+
int cmp_npy_datetimestruct(const npy_datetimestruct *a,
66+
const npy_datetimestruct *b);
7267

7368

7469
/*
7570
* Adjusts a datetimestruct based on a minutes offset. Assumes
7671
* the current values are valid.
7772
*/
7873
void
79-
add_minutes_to_datetimestruct(pandas_datetimestruct *dts, int minutes);
74+
add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes);
8075

8176

8277
int
8378
convert_datetime_to_datetimestruct(NPY_DATETIMEUNIT base,
8479
npy_datetime dt,
85-
pandas_datetimestruct *out);
80+
npy_datetimestruct *out);
8681

8782
#endif // PANDAS__LIBS_SRC_DATETIME_NP_DATETIME_H_

pandas/_libs/src/datetime/np_datetime_strings.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ This file implements string parsing and creation for NumPy datetime.
6363
* Returns 0 on success, -1 on failure.
6464
*/
6565
int parse_iso_8601_datetime(char *str, int len,
66-
pandas_datetimestruct *out,
66+
npy_datetimestruct *out,
6767
int *out_local, int *out_tzoffset) {
6868
int year_leap = 0;
6969
int i, numdigits;
@@ -86,7 +86,7 @@ int parse_iso_8601_datetime(char *str, int len,
8686
int hour_was_2_digits = 0;
8787

8888
/* Initialize the output to all zeros */
89-
memset(out, 0, sizeof(pandas_datetimestruct));
89+
memset(out, 0, sizeof(npy_datetimestruct));
9090
out->month = 1;
9191
out->day = 1;
9292

@@ -567,7 +567,7 @@ int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) {
567567

568568

569569
/*
570-
* Converts an pandas_datetimestruct to an (almost) ISO 8601
570+
* Converts an npy_datetimestruct to an (almost) ISO 8601
571571
* NULL-terminated string using timezone Z (UTC). If the string fits in
572572
* the space exactly, it leaves out the NULL terminator and returns success.
573573
*
@@ -580,7 +580,7 @@ int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) {
580580
* Returns 0 on success, -1 on failure (for example if the output
581581
* string was too short).
582582
*/
583-
int make_iso_8601_datetime(pandas_datetimestruct *dts, char *outstr, int outlen,
583+
int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
584584
NPY_DATETIMEUNIT base) {
585585
char *substr = outstr, sublen = outlen;
586586
int tmplen;

pandas/_libs/src/datetime/np_datetime_strings.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ This file implements string parsing and creation for NumPy datetime.
5151
*/
5252
int
5353
parse_iso_8601_datetime(char *str, int len,
54-
pandas_datetimestruct *out,
55-
int *out_local,
56-
int *out_tzoffset);
54+
npy_datetimestruct *out,
55+
int *out_local,
56+
int *out_tzoffset);
5757

5858
/*
5959
* Provides a string length to use for converting datetime
@@ -63,7 +63,7 @@ int
6363
get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base);
6464

6565
/*
66-
* Converts an pandas_datetimestruct to an (almost) ISO 8601
66+
* Converts an npy_datetimestruct to an (almost) ISO 8601
6767
* NULL-terminated string using timezone Z (UTC).
6868
*
6969
* 'base' restricts the output to that unit. Set 'base' to
@@ -73,7 +73,7 @@ get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base);
7373
* string was too short).
7474
*/
7575
int
76-
make_iso_8601_datetime(pandas_datetimestruct *dts, char *outstr, int outlen,
76+
make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
7777
NPY_DATETIMEUNIT base);
7878

7979
#endif // PANDAS__LIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_

0 commit comments

Comments
 (0)