Skip to content

Commit 0429a72

Browse files
chean.wei.khorchean.wei.khor
chean.wei.khor
authored and
chean.wei.khor
committed
Merge branch 'main' of https://github.com/weikhor/pandas into enh-consistency-interval-range
2 parents 3a9fd9e + afec0e9 commit 0429a72

38 files changed

+249
-230
lines changed

.github/workflows/posix.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ jobs:
162162
shell: bash
163163
run: |
164164
# TODO: re-enable cov, its slowing the tests down though
165-
pip install Cython numpy python-dateutil pytz pytest>=6.0 pytest-xdist>=1.31.0 hypothesis>=5.5.3
165+
pip install Cython numpy python-dateutil pytz pytest>=6.0 pytest-xdist>=1.31.0 pytest-asyncio hypothesis>=5.5.3
166166
if: ${{ env.IS_PYPY == 'true' }}
167167

168168
- name: Build Pandas

asv_bench/benchmarks/categoricals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def time_remove_categories(self):
187187
class Rank:
188188
def setup(self):
189189
N = 10**5
190-
ncats = 100
190+
ncats = 15
191191

192192
self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
193193
self.s_str_cat = pd.Series(self.s_str, dtype="category")

doc/source/whatsnew/v1.4.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Fixed regressions
1919
- Fixed memory performance regression in :meth:`Series.fillna` when called on a :class:`DataFrame` column with ``inplace=True`` (:issue:`46149`)
2020
- Provided an alternative solution for passing custom Excel formats in :meth:`.Styler.to_excel`, which was a regression based on stricter CSS validation. Examples available in the documentation for :meth:`.Styler.format` (:issue:`46152`)
2121
- Fixed regression in :meth:`DataFrame.replace` when a replacement value was also a target for replacement (:issue:`46306`)
22+
- Fixed regression in :meth:`DataFrame.replace` when the replacement value was explicitly ``None`` when passed in a dictionary to ``to_replace`` (:issue:`45601`, :issue:`45836`)
2223
- Fixed regression when setting values with :meth:`DataFrame.loc` losing :class:`MultiIndex` names if :class:`DataFrame` was empty before (:issue:`46317`)
2324
-
2425

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,7 @@ Groupby/resample/rolling
504504
- Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`)
505505
- Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`)
506506
- Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`)
507+
- Bug in :meth:`GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`)
507508
-
508509

509510
Reshaping

pandas/_libs/hashing.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def hash_object_array(
5353
"""
5454
cdef:
5555
Py_ssize_t i, n
56-
uint64_t[:] result
56+
uint64_t[::1] result
5757
bytes data, k
5858
uint8_t *kb
5959
uint64_t *lens

pandas/_libs/hashtable_class_helper.pxi.in

+8-8
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ cdef class {{name}}HashTable(HashTable):
503503
int ret = 0
504504
{{c_type}} val
505505
khiter_t k
506-
intp_t[:] locs = np.empty(n, dtype=np.intp)
506+
intp_t[::1] locs = np.empty(n, dtype=np.intp)
507507

508508
with nogil:
509509
for i in range(n):
@@ -561,7 +561,7 @@ cdef class {{name}}HashTable(HashTable):
561561
"""
562562
cdef:
563563
Py_ssize_t i, idx, count = count_prior, n = len(values)
564-
intp_t[:] labels
564+
intp_t[::1] labels
565565
int ret = 0
566566
{{c_type}} val, na_value2
567567
khiter_t k
@@ -710,7 +710,7 @@ cdef class {{name}}HashTable(HashTable):
710710
# tuple[np.ndarray[np.intp], np.ndarray[{{dtype}}]]
711711
cdef:
712712
Py_ssize_t i, n = len(values)
713-
intp_t[:] labels
713+
intp_t[::1] labels
714714
Py_ssize_t idx, count = 0
715715
int ret = 0
716716
{{c_type}} val
@@ -848,7 +848,7 @@ cdef class StringHashTable(HashTable):
848848
object val
849849
const char *v
850850
khiter_t k
851-
intp_t[:] locs = np.empty(n, dtype=np.intp)
851+
intp_t[::1] locs = np.empty(n, dtype=np.intp)
852852

853853
# these by-definition *must* be strings
854854
vecs = <const char **>malloc(n * sizeof(char *))
@@ -946,8 +946,8 @@ cdef class StringHashTable(HashTable):
946946
"""
947947
cdef:
948948
Py_ssize_t i, idx, count = count_prior, n = len(values)
949-
intp_t[:] labels
950-
int64_t[:] uindexer
949+
intp_t[::1] labels
950+
int64_t[::1] uindexer
951951
int ret = 0
952952
object val
953953
const char *v
@@ -1168,7 +1168,7 @@ cdef class PyObjectHashTable(HashTable):
11681168
int ret = 0
11691169
object val
11701170
khiter_t k
1171-
intp_t[:] locs = np.empty(n, dtype=np.intp)
1171+
intp_t[::1] locs = np.empty(n, dtype=np.intp)
11721172

11731173
for i in range(n):
11741174
val = values[i]
@@ -1223,7 +1223,7 @@ cdef class PyObjectHashTable(HashTable):
12231223
"""
12241224
cdef:
12251225
Py_ssize_t i, idx, count = count_prior, n = len(values)
1226-
intp_t[:] labels
1226+
intp_t[::1] labels
12271227
int ret = 0
12281228
object val
12291229
khiter_t k

pandas/_libs/hashtable_func_helper.pxi.in

+4-2
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,9 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
8585
{{endif}}
8686

8787
# collect counts in the order corresponding to result_keys:
88-
cdef int64_t[:] result_counts = np.empty(table.size, dtype=np.int64)
88+
cdef:
89+
int64_t[::1] result_counts = np.empty(table.size, dtype=np.int64)
90+
8991
for i in range(table.size):
9092
{{if dtype == 'object'}}
9193
k = kh_get_{{ttype}}(table, result_keys.data[i])
@@ -366,7 +368,7 @@ def mode(ndarray[htfunc_t] values, bint dropna):
366368
ndarray[htfunc_t] keys
367369
ndarray[htfunc_t] modes
368370

369-
int64_t[:] counts
371+
int64_t[::1] counts
370372
int64_t count, max_count = -1
371373
Py_ssize_t nkeys, k, j = 0
372374

pandas/_libs/interval.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ from operator import (
55
)
66

77
from cpython.datetime cimport (
8-
PyDateTime_IMPORT,
98
PyDelta_Check,
9+
import_datetime,
1010
)
1111

12-
PyDateTime_IMPORT
12+
import_datetime()
1313

1414
from cpython.object cimport (
1515
Py_EQ,

pandas/_libs/lib.pyx

+4-4
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ from cython import Py_ssize_t
99
from cpython.datetime cimport (
1010
PyDate_Check,
1111
PyDateTime_Check,
12-
PyDateTime_IMPORT,
1312
PyDelta_Check,
1413
PyTime_Check,
14+
import_datetime,
1515
)
1616
from cpython.iterator cimport PyIter_Check
1717
from cpython.number cimport PyNumber_Check
@@ -27,7 +27,7 @@ from cpython.tuple cimport (
2727
)
2828
from cython cimport floating
2929

30-
PyDateTime_IMPORT
30+
import_datetime()
3131

3232
import numpy as np
3333

@@ -2470,8 +2470,8 @@ def maybe_convert_objects(ndarray[object] objects,
24702470
ndarray[int64_t] ints
24712471
ndarray[uint64_t] uints
24722472
ndarray[uint8_t] bools
2473-
int64_t[:] idatetimes
2474-
int64_t[:] itimedeltas
2473+
int64_t[::1] idatetimes
2474+
int64_t[::1] itimedeltas
24752475
Seen seen = Seen()
24762476
object val
24772477
float64_t fval, fnan = np.nan

pandas/_libs/ops.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def scalar_binop(object[:] values, object val, object op) -> ndarray:
194194
"""
195195
cdef:
196196
Py_ssize_t i, n = len(values)
197-
object[:] result
197+
object[::1] result
198198
object x
199199

200200
result = np.empty(n, dtype=object)
@@ -231,7 +231,7 @@ def vec_binop(object[:] left, object[:] right, object op) -> ndarray:
231231
"""
232232
cdef:
233233
Py_ssize_t i, n = len(left)
234-
object[:] result
234+
object[::1] result
235235

236236
if n != <Py_ssize_t>len(right):
237237
raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}')

pandas/_libs/parsers.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -1457,7 +1457,7 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
14571457
const char *word = NULL
14581458

14591459
int64_t NA = -1
1460-
int64_t[:] codes
1460+
int64_t[::1] codes
14611461
int64_t current_category = 0
14621462

14631463
char *errors = "strict"

pandas/_libs/tslib.pyx

+2-3
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@ import cython
55
from cpython.datetime cimport (
66
PyDate_Check,
77
PyDateTime_Check,
8-
PyDateTime_IMPORT,
98
datetime,
9+
import_datetime,
1010
tzinfo,
1111
)
1212

1313
# import datetime C API
14-
PyDateTime_IMPORT
14+
import_datetime()
1515

1616

1717
cimport numpy as cnp
@@ -63,7 +63,6 @@ from pandas._libs.tslibs.timestamps cimport _Timestamp
6363
from pandas._libs.tslibs.timestamps import Timestamp
6464

6565
# Note: this is the only non-tslibs intra-pandas dependency here
66-
6766
from pandas._libs.missing cimport checknull_with_nat_and_na
6867
from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single
6968

pandas/_libs/tslibs/conversion.pyx

+5-5
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@ import pytz
1818
from cpython.datetime cimport (
1919
PyDate_Check,
2020
PyDateTime_Check,
21-
PyDateTime_IMPORT,
2221
datetime,
22+
import_datetime,
2323
time,
2424
tzinfo,
2525
)
2626

27-
PyDateTime_IMPORT
27+
import_datetime()
2828

2929
from pandas._libs.tslibs.base cimport ABCTimestamp
3030
from pandas._libs.tslibs.np_datetime cimport (
@@ -71,7 +71,7 @@ from pandas._libs.tslibs.nattype cimport (
7171
)
7272
from pandas._libs.tslibs.tzconversion cimport (
7373
bisect_right_i8,
74-
tz_convert_utc_to_tzlocal,
74+
localize_tzinfo_api,
7575
tz_localize_to_utc_single,
7676
)
7777

@@ -556,7 +556,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
556556
if is_utc(tz):
557557
pass
558558
elif is_tzlocal(tz):
559-
tz_convert_utc_to_tzlocal(obj.value, tz, &obj.fold)
559+
localize_tzinfo_api(obj.value, tz, &obj.fold)
560560
else:
561561
trans, deltas, typ = get_dst_info(tz)
562562

@@ -725,7 +725,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
725725
elif obj.value == NPY_NAT:
726726
pass
727727
elif is_tzlocal(tz):
728-
local_val = tz_convert_utc_to_tzlocal(obj.value, tz, &obj.fold)
728+
local_val = obj.value + localize_tzinfo_api(obj.value, tz, &obj.fold)
729729
dt64_to_dtstruct(local_val, &obj.dts)
730730
else:
731731
# Adjust datetime64 timestamp, recompute datetimestruct

pandas/_libs/tslibs/dtypes.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ class FreqGroup(Enum):
165165
FR_MS = c_FreqGroup.FR_MS
166166
FR_US = c_FreqGroup.FR_US
167167
FR_NS = c_FreqGroup.FR_NS
168-
FR_UND = -c_FreqGroup.FR_UND # undefined
168+
FR_UND = c_FreqGroup.FR_UND # undefined
169169

170170
@staticmethod
171171
def from_period_dtype_code(code: int) -> "FreqGroup":

pandas/_libs/tslibs/fields.pyi

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def get_date_field(
2222
field: str,
2323
) -> npt.NDArray[np.int32]: ...
2424
def get_timedelta_field(
25-
tdindex: np.ndarray, # const int64_t[:]
25+
tdindex: npt.NDArray[np.int64], # const int64_t[:]
2626
field: str,
2727
) -> npt.NDArray[np.int32]: ...
2828
def isleapyear_arr(
@@ -31,7 +31,7 @@ def isleapyear_arr(
3131
def build_isocalendar_sarray(
3232
dtindex: npt.NDArray[np.int64], # const int64_t[:]
3333
) -> np.ndarray: ...
34-
def get_locale_names(name_type: str, locale: str | None = ...): ...
34+
def _get_locale_names(name_type: str, locale: str | None = ...): ...
3535

3636
class RoundTo:
3737
@property

pandas/_libs/tslibs/fields.pyx

+5-5
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def get_date_name_field(const int64_t[:] dtindex, str field, object locale=None)
152152
if locale is None:
153153
names = np.array(DAYS_FULL, dtype=np.object_)
154154
else:
155-
names = np.array(get_locale_names('f_weekday', locale),
155+
names = np.array(_get_locale_names('f_weekday', locale),
156156
dtype=np.object_)
157157
for i in range(count):
158158
if dtindex[i] == NPY_NAT:
@@ -167,7 +167,7 @@ def get_date_name_field(const int64_t[:] dtindex, str field, object locale=None)
167167
if locale is None:
168168
names = np.array(MONTHS_FULL, dtype=np.object_)
169169
else:
170-
names = np.array(get_locale_names('f_month', locale),
170+
names = np.array(_get_locale_names('f_month', locale),
171171
dtype=np.object_)
172172
for i in range(count):
173173
if dtindex[i] == NPY_NAT:
@@ -574,7 +574,7 @@ def build_isocalendar_sarray(const int64_t[:] dtindex):
574574
return out
575575

576576

577-
def get_locale_names(name_type: str, locale: object = None):
577+
def _get_locale_names(name_type: str, locale: object = None):
578578
"""
579579
Returns an array of localized day or month names.
580580
@@ -650,7 +650,7 @@ class RoundTo:
650650
return 4
651651

652652

653-
cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit):
653+
cdef inline ndarray[int64_t] _floor_int64(const int64_t[:] values, int64_t unit):
654654
cdef:
655655
Py_ssize_t i, n = len(values)
656656
ndarray[int64_t] result = np.empty(n, dtype="i8")
@@ -668,7 +668,7 @@ cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit):
668668
return result
669669

670670

671-
cdef inline ndarray[int64_t] _ceil_int64(int64_t[:] values, int64_t unit):
671+
cdef inline ndarray[int64_t] _ceil_int64(const int64_t[:] values, int64_t unit):
672672
cdef:
673673
Py_ssize_t i, n = len(values)
674674
ndarray[int64_t] result = np.empty(n, dtype="i8")

pandas/_libs/tslibs/nattype.pxd

-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ from numpy cimport int64_t
44

55
cdef int64_t NPY_NAT
66

7-
cdef bint _nat_scalar_rules[6]
87
cdef set c_nat_strings
98

109
cdef class _NaT(datetime):

0 commit comments

Comments
 (0)