Skip to content

Commit 049b551

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into sty-private
2 parents 9841801 + 3ca6d8f commit 049b551

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+274
-208
lines changed

doc/source/whatsnew/v1.1.2.rst

+4-1
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ Fixed regressions
1717
- Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`)
1818
- Fix regression in updating a column inplace (e.g. using ``df['col'].fillna(.., inplace=True)``) (:issue:`35731`)
1919
- Performance regression for :meth:`RangeIndex.format` (:issue:`35712`)
20+
- Fix regression in invalid cache after an indexing operation; this can manifest when setting which does not update the data (:issue:`35521`)
2021
- Regression in :meth:`DataFrame.replace` where a ``TypeError`` would be raised when attempting to replace elements of type :class:`Interval` (:issue:`35931`)
22+
- Fix regression in pickle roundtrip of the ``closed`` attribute of :class:`IntervalIndex` (:issue:`35658`)
23+
- Fixed regression in :meth:`DataFrameGroupBy.agg` where a ``ValueError: buffer source array is read-only`` would be raised when the underlying array is read-only (:issue:`36014`)
2124
-
2225

23-
2426
.. ---------------------------------------------------------------------------
2527
2628
.. _whatsnew_112.bug_fixes:
@@ -32,6 +34,7 @@ Bug fixes
3234
- Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
3335
- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should be ``""`` (:issue:`35712`)
3436
- Bug in :meth:`Float64Index.__contains__` incorrectly raising ``TypeError`` instead of returning ``False`` (:issue:`35788`)
37+
- Bug in :meth:`Series.dt.isocalendar` and :meth:`DatetimeIndex.isocalendar` that returned incorrect year for certain dates (:issue:`36032`)
3538
- Bug in :class:`DataFrame` indexing returning an incorrect :class:`Series` in some cases when the series has been altered and a cache not invalidated (:issue:`33675`)
3639

3740
.. ---------------------------------------------------------------------------

pandas/_libs/groupby.pyx

+18-14
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ def group_cumprod_float64(float64_t[:, :] out,
229229
@cython.boundscheck(False)
230230
@cython.wraparound(False)
231231
def group_cumsum(numeric[:, :] out,
232-
numeric[:, :] values,
232+
ndarray[numeric, ndim=2] values,
233233
const int64_t[:] labels,
234234
int ngroups,
235235
is_datetimelike,
@@ -472,7 +472,7 @@ ctypedef fused complexfloating_t:
472472
@cython.boundscheck(False)
473473
def _group_add(complexfloating_t[:, :] out,
474474
int64_t[:] counts,
475-
complexfloating_t[:, :] values,
475+
ndarray[complexfloating_t, ndim=2] values,
476476
const int64_t[:] labels,
477477
Py_ssize_t min_count=0):
478478
"""
@@ -483,8 +483,9 @@ def _group_add(complexfloating_t[:, :] out,
483483
complexfloating_t val, count
484484
complexfloating_t[:, :] sumx
485485
int64_t[:, :] nobs
486+
Py_ssize_t len_values = len(values), len_labels = len(labels)
486487

487-
if len(values) != len(labels):
488+
if len_values != len_labels:
488489
raise ValueError("len(index) != len(labels)")
489490

490491
nobs = np.zeros((<object>out).shape, dtype=np.int64)
@@ -530,7 +531,7 @@ group_add_complex128 = _group_add['double complex']
530531
@cython.boundscheck(False)
531532
def _group_prod(floating[:, :] out,
532533
int64_t[:] counts,
533-
floating[:, :] values,
534+
ndarray[floating, ndim=2] values,
534535
const int64_t[:] labels,
535536
Py_ssize_t min_count=0):
536537
"""
@@ -541,8 +542,9 @@ def _group_prod(floating[:, :] out,
541542
floating val, count
542543
floating[:, :] prodx
543544
int64_t[:, :] nobs
545+
Py_ssize_t len_values = len(values), len_labels = len(labels)
544546

545-
if not len(values) == len(labels):
547+
if len_values != len_labels:
546548
raise ValueError("len(index) != len(labels)")
547549

548550
nobs = np.zeros((<object>out).shape, dtype=np.int64)
@@ -582,7 +584,7 @@ group_prod_float64 = _group_prod['double']
582584
@cython.cdivision(True)
583585
def _group_var(floating[:, :] out,
584586
int64_t[:] counts,
585-
floating[:, :] values,
587+
ndarray[floating, ndim=2] values,
586588
const int64_t[:] labels,
587589
Py_ssize_t min_count=-1,
588590
int64_t ddof=1):
@@ -591,10 +593,11 @@ def _group_var(floating[:, :] out,
591593
floating val, ct, oldmean
592594
floating[:, :] mean
593595
int64_t[:, :] nobs
596+
Py_ssize_t len_values = len(values), len_labels = len(labels)
594597

595598
assert min_count == -1, "'min_count' only used in add and prod"
596599

597-
if not len(values) == len(labels):
600+
if len_values != len_labels:
598601
raise ValueError("len(index) != len(labels)")
599602

600603
nobs = np.zeros((<object>out).shape, dtype=np.int64)
@@ -639,18 +642,19 @@ group_var_float64 = _group_var['double']
639642
@cython.boundscheck(False)
640643
def _group_mean(floating[:, :] out,
641644
int64_t[:] counts,
642-
floating[:, :] values,
645+
ndarray[floating, ndim=2] values,
643646
const int64_t[:] labels,
644647
Py_ssize_t min_count=-1):
645648
cdef:
646649
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
647650
floating val, count
648651
floating[:, :] sumx
649652
int64_t[:, :] nobs
653+
Py_ssize_t len_values = len(values), len_labels = len(labels)
650654

651655
assert min_count == -1, "'min_count' only used in add and prod"
652656

653-
if not len(values) == len(labels):
657+
if len_values != len_labels:
654658
raise ValueError("len(index) != len(labels)")
655659

656660
nobs = np.zeros((<object>out).shape, dtype=np.int64)
@@ -689,7 +693,7 @@ group_mean_float64 = _group_mean['double']
689693
@cython.boundscheck(False)
690694
def _group_ohlc(floating[:, :] out,
691695
int64_t[:] counts,
692-
floating[:, :] values,
696+
ndarray[floating, ndim=2] values,
693697
const int64_t[:] labels,
694698
Py_ssize_t min_count=-1):
695699
"""
@@ -740,7 +744,7 @@ group_ohlc_float64 = _group_ohlc['double']
740744
@cython.boundscheck(False)
741745
@cython.wraparound(False)
742746
def group_quantile(ndarray[float64_t] out,
743-
numeric[:] values,
747+
ndarray[numeric, ndim=1] values,
744748
ndarray[int64_t] labels,
745749
ndarray[uint8_t] mask,
746750
float64_t q,
@@ -1072,7 +1076,7 @@ def group_nth(rank_t[:, :] out,
10721076
@cython.boundscheck(False)
10731077
@cython.wraparound(False)
10741078
def group_rank(float64_t[:, :] out,
1075-
rank_t[:, :] values,
1079+
ndarray[rank_t, ndim=2] values,
10761080
const int64_t[:] labels,
10771081
int ngroups,
10781082
bint is_datetimelike, object ties_method="average",
@@ -1424,7 +1428,7 @@ def group_min(groupby_t[:, :] out,
14241428
@cython.boundscheck(False)
14251429
@cython.wraparound(False)
14261430
def group_cummin(groupby_t[:, :] out,
1427-
groupby_t[:, :] values,
1431+
ndarray[groupby_t, ndim=2] values,
14281432
const int64_t[:] labels,
14291433
int ngroups,
14301434
bint is_datetimelike):
@@ -1484,7 +1488,7 @@ def group_cummin(groupby_t[:, :] out,
14841488
@cython.boundscheck(False)
14851489
@cython.wraparound(False)
14861490
def group_cummax(groupby_t[:, :] out,
1487-
groupby_t[:, :] values,
1491+
ndarray[groupby_t, ndim=2] values,
14881492
const int64_t[:] labels,
14891493
int ngroups,
14901494
bint is_datetimelike):

pandas/_libs/indexing.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cdef class _NDFrameIndexerBase:
1+
cdef class NDFrameIndexerBase:
22
"""
33
A base class for _NDFrameIndexer for fast instantiation and attribute access.
44
"""

pandas/_libs/missing.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ from pandas._libs.tslibs.nattype cimport (
1818
from pandas._libs.tslibs.np_datetime cimport get_datetime64_value, get_timedelta64_value
1919

2020
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
21-
from pandas.compat import is_platform_32bit
21+
from pandas.compat import IS64
2222

2323
cdef:
2424
float64_t INF = <float64_t>np.inf
2525
float64_t NEGINF = -INF
2626

2727
int64_t NPY_NAT = util.get_nat()
2828

29-
bint is_32bit = is_platform_32bit()
29+
bint is_32bit = not IS64
3030

3131

3232
cpdef bint checknull(object val):

pandas/_libs/tslibs/ccalendar.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -201,10 +201,10 @@ cpdef iso_calendar_t get_iso_calendar(int year, int month, int day) nogil:
201201
iso_week = 1
202202

203203
iso_year = year
204-
if iso_week == 1 and doy > 7:
204+
if iso_week == 1 and month == 12:
205205
iso_year += 1
206206

207-
elif iso_week >= 52 and doy < 7:
207+
elif iso_week >= 52 and month == 1:
208208
iso_year -= 1
209209

210210
return iso_year, iso_week, dow + 1

pandas/_libs/tslibs/parsing.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -771,7 +771,7 @@ class _timelex:
771771
_DATEUTIL_LEXER_SPLIT = _timelex.split
772772

773773

774-
def _format_is_iso(f) -> bint:
774+
def format_is_iso(f: str) -> bint:
775775
"""
776776
Does format match the iso8601 set that can be handled by the C parser?
777777
Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different
@@ -789,7 +789,7 @@ def _format_is_iso(f) -> bint:
789789
return False
790790

791791

792-
def _guess_datetime_format(
792+
def guess_datetime_format(
793793
dt_str,
794794
bint dayfirst=False,
795795
dt_str_parse=du_parse,

pandas/compat/__init__.py

+1-23
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
* platform checker
99
"""
1010
import platform
11-
import struct
1211
import sys
1312
import warnings
1413

@@ -20,14 +19,6 @@
2019
IS64 = sys.maxsize > 2 ** 32
2120

2221

23-
# ----------------------------------------------------------------------------
24-
# functions largely based / taken from the six module
25-
26-
# Much of the code in this module comes from Benjamin Peterson's six library.
27-
# The license for this library can be found in LICENSES/SIX and the code can be
28-
# found at https://bitbucket.org/gutworth/six
29-
30-
3122
def set_function_name(f: F, name: str, cls) -> F:
3223
"""
3324
Bind the name/qualname attributes of the function.
@@ -38,7 +29,6 @@ def set_function_name(f: F, name: str, cls) -> F:
3829
return f
3930

4031

41-
# https://github.com/pandas-dev/pandas/pull/9123
4232
def is_platform_little_endian() -> bool:
4333
"""
4434
Checking if the running platform is little endian.
@@ -72,7 +62,7 @@ def is_platform_linux() -> bool:
7262
bool
7363
True if the running platform is linux.
7464
"""
75-
return sys.platform == "linux2"
65+
return sys.platform == "linux"
7666

7767

7868
def is_platform_mac() -> bool:
@@ -87,18 +77,6 @@ def is_platform_mac() -> bool:
8777
return sys.platform == "darwin"
8878

8979

90-
def is_platform_32bit() -> bool:
91-
"""
92-
Checking if the running platform is 32-bit.
93-
94-
Returns
95-
-------
96-
bool
97-
True if the running platform is 32-bit.
98-
"""
99-
return struct.calcsize("P") * 8 < 64
100-
101-
10280
def _import_lzma():
10381
"""
10482
Importing the `lzma` module.

pandas/core/arrays/datetimes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -602,9 +602,9 @@ def astype(self, dtype, copy=True):
602602
# Rendering Methods
603603

604604
def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs):
605-
from pandas.io.formats.format import _get_format_datetime64_from_values
605+
from pandas.io.formats.format import get_format_datetime64_from_values
606606

607-
fmt = _get_format_datetime64_from_values(self, date_format)
607+
fmt = get_format_datetime64_from_values(self, date_format)
608608

609609
return tslib.format_array_from_datetime(
610610
self.asi8.ravel(), tz=self.tz, format=fmt, na_rep=na_rep

pandas/core/arrays/timedeltas.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -379,14 +379,14 @@ def median(
379379
# Rendering Methods
380380

381381
def _formatter(self, boxed=False):
382-
from pandas.io.formats.format import _get_format_timedelta64
382+
from pandas.io.formats.format import get_format_timedelta64
383383

384-
return _get_format_timedelta64(self, box=True)
384+
return get_format_timedelta64(self, box=True)
385385

386386
def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs):
387-
from pandas.io.formats.format import _get_format_timedelta64
387+
from pandas.io.formats.format import get_format_timedelta64
388388

389-
formatter = _get_format_timedelta64(self._data, na_rep)
389+
formatter = get_format_timedelta64(self._data, na_rep)
390390
return np.array([formatter(x) for x in self._data.ravel()]).reshape(self.shape)
391391

392392
# ----------------------------------------------------------------

pandas/core/common.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
ABCIndexClass,
3232
ABCSeries,
3333
)
34-
from pandas.core.dtypes.inference import _iterable_not_string
34+
from pandas.core.dtypes.inference import iterable_not_string
3535
from pandas.core.dtypes.missing import isna, isnull, notnull # noqa
3636

3737

@@ -61,7 +61,7 @@ def flatten(l):
6161
flattened : generator
6262
"""
6363
for el in l:
64-
if _iterable_not_string(el):
64+
if iterable_not_string(el):
6565
for s in flatten(el):
6666
yield s
6767
else:

pandas/core/computation/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from pandas._config import get_option
66

77

8-
def _ensure_decoded(s):
8+
def ensure_decoded(s):
99
"""
1010
If we have bytes, decode them to unicode.
1111
"""

pandas/core/computation/ops.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from pandas.core.dtypes.common import is_list_like, is_scalar
1616

1717
import pandas.core.common as com
18-
from pandas.core.computation.common import _ensure_decoded, result_type_many
18+
from pandas.core.computation.common import ensure_decoded, result_type_many
1919
from pandas.core.computation.scope import _DEFAULT_GLOBALS
2020

2121
from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded
@@ -466,7 +466,7 @@ def stringify(value):
466466
v = rhs.value
467467
if isinstance(v, (int, float)):
468468
v = stringify(v)
469-
v = Timestamp(_ensure_decoded(v))
469+
v = Timestamp(ensure_decoded(v))
470470
if v.tz is not None:
471471
v = v.tz_convert("UTC")
472472
self.rhs.update(v)
@@ -475,7 +475,7 @@ def stringify(value):
475475
v = lhs.value
476476
if isinstance(v, (int, float)):
477477
v = stringify(v)
478-
v = Timestamp(_ensure_decoded(v))
478+
v = Timestamp(ensure_decoded(v))
479479
if v.tz is not None:
480480
v = v.tz_convert("UTC")
481481
self.lhs.update(v)

pandas/core/computation/pytables.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import pandas as pd
1515
import pandas.core.common as com
1616
from pandas.core.computation import expr, ops, scope as _scope
17-
from pandas.core.computation.common import _ensure_decoded
17+
from pandas.core.computation.common import ensure_decoded
1818
from pandas.core.computation.expr import BaseExprVisitor
1919
from pandas.core.computation.ops import UndefinedVariableError, is_term
2020
from pandas.core.construction import extract_array
@@ -189,12 +189,12 @@ def stringify(value):
189189
encoder = pprint_thing
190190
return encoder(value)
191191

192-
kind = _ensure_decoded(self.kind)
193-
meta = _ensure_decoded(self.meta)
192+
kind = ensure_decoded(self.kind)
193+
meta = ensure_decoded(self.meta)
194194
if kind == "datetime64" or kind == "datetime":
195195
if isinstance(v, (int, float)):
196196
v = stringify(v)
197-
v = _ensure_decoded(v)
197+
v = ensure_decoded(v)
198198
v = Timestamp(v)
199199
if v.tz is not None:
200200
v = v.tz_convert("UTC")

pandas/core/dtypes/cast.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -651,7 +651,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj,
651651
If False, scalar belongs to pandas extension types is inferred as
652652
object
653653
"""
654-
dtype = np.dtype(object)
654+
dtype: DtypeObj = np.dtype(object)
655655

656656
# a 1-element ndarray
657657
if isinstance(val, np.ndarray):

0 commit comments

Comments
 (0)