Skip to content

Commit 982472d

Browse files
Merge remote-tracking branch 'upstream/master' into expressions-dtypes-check
2 parents 8d4e0b5 + c13027c commit 982472d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+1130
-500
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ jobs:
169169
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column
170170
171171
pytest pandas/tests/api/
172+
pytest pandas/tests/apply/
172173
pytest pandas/tests/arrays/
173174
pytest pandas/tests/base/
174175
pytest pandas/tests/computation/

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.PHONY : develop build clean clean_pyc doc lint-diff black
1+
.PHONY : develop build clean clean_pyc doc lint-diff black test-scripts
22

33
all: develop
44

@@ -25,3 +25,6 @@ doc:
2525
cd doc; \
2626
python make.py clean; \
2727
python make.py html
28+
29+
test-scripts:
30+
pytest scripts

doc/source/whatsnew/v1.3.0.rst

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,41 @@ In pandas 1.3.0, ``df`` continues to share data with ``values``
225225
np.shares_memory(df["A"], values)
226226
227227
228+
.. _whatsnew_130.notable_bug_fixes.setitem_never_inplace:
229+
230+
Never Operate Inplace When Setting ``frame[keys] = values``
231+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
232+
233+
When setting multiple columns using ``frame[keys] = values`` new arrays will
234+
replace pre-existing arrays for these keys, which will *not* be over-written
235+
(:issue:`39510`). As a result, the columns will retain the dtype(s) of ``values``,
236+
never casting to the dtypes of the existing arrays.
237+
238+
.. ipython:: python
239+
240+
df = pd.DataFrame(range(3), columns=["A"], dtype="float64")
241+
df[["A"]] = 5
242+
243+
In the old behavior, ``5`` was cast to ``float64`` and inserted into the existing
244+
array backing ``df``:
245+
246+
*pandas 1.2.x*
247+
248+
.. code-block:: ipython
249+
250+
In [1]: df.dtypes
251+
Out[1]:
252+
A float64
253+
254+
In the new behavior, we get a new array, and retain an integer-dtyped ``5``:
255+
256+
*pandas 1.3.0*
257+
258+
.. ipython:: python
259+
260+
df.dtypes
261+
262+
228263
.. _whatsnew_130.notable_bug_fixes.setitem_with_bool_casting:
229264

230265
Consistent Casting With Setting Into Boolean Series
@@ -300,7 +335,7 @@ Optional libraries below the lowest tested version may still work, but are not c
300335
+=================+=================+=========+
301336
| beautifulsoup4 | 4.6.0 | |
302337
+-----------------+-----------------+---------+
303-
| fastparquet | 0.3.2 | |
338+
| fastparquet | 0.4.0 | X |
304339
+-----------------+-----------------+---------+
305340
| fsspec | 0.7.4 | |
306341
+-----------------+-----------------+---------+
@@ -627,6 +662,7 @@ Other
627662
- Bug in :class:`Styler` where multiple elements in CSS-selectors were not correctly added to ``table_styles`` (:issue:`39942`)
628663
- Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`)
629664
- Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`)
665+
- Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised ValueError when called on an empty DataFrame (:issue:`40393`)
630666

631667

632668
.. ---------------------------------------------------------------------------

pandas/_libs/index.pyx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import warnings
22

3+
cimport cython
4+
35
import numpy as np
46

57
cimport numpy as cnp
@@ -47,6 +49,7 @@ cdef inline bint is_definitely_invalid_key(object val):
4749
_SIZE_CUTOFF = 1_000_000
4850

4951

52+
@cython.freelist(32)
5053
cdef class IndexEngine:
5154

5255
cdef readonly:

pandas/_libs/missing.pyx

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ cpdef bint checknull(object val):
104104
- np.datetime64 representation of NaT
105105
- np.timedelta64 representation of NaT
106106
- NA
107+
- Decimal("NaN")
107108
108109
Parameters
109110
----------
@@ -143,6 +144,8 @@ cpdef bint checknull_old(object val):
143144
- NaT
144145
- np.datetime64 representation of NaT
145146
- np.timedelta64 representation of NaT
147+
- NA
148+
- Decimal("NaN")
146149
147150
Parameters
148151
----------
@@ -175,6 +178,8 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr):
175178
- NaT
176179
- np.datetime64 representation of NaT
177180
- np.timedelta64 representation of NaT
181+
- NA
182+
- Decimal("NaN")
178183
179184
Parameters
180185
----------
@@ -211,6 +216,7 @@ def isnaobj_old(arr: ndarray) -> ndarray:
211216
- NEGINF
212217
- NaT
213218
- NA
219+
- Decimal("NaN")
214220

215221
Parameters
216222
----------
@@ -249,6 +255,8 @@ def isnaobj2d(arr: ndarray) -> ndarray:
249255
- NaT
250256
- np.datetime64 representation of NaT
251257
- np.timedelta64 representation of NaT
258+
- NA
259+
- Decimal("NaN")
252260

253261
Parameters
254262
----------
@@ -293,6 +301,8 @@ def isnaobj2d_old(arr: ndarray) -> ndarray:
293301
- NaT
294302
- np.datetime64 representation of NaT
295303
- np.timedelta64 representation of NaT
304+
- NA
305+
- Decimal("NaN")
296306

297307
Parameters
298308
----------

pandas/_libs/tslibs/ccalendar.pyi

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
DAYS: list[str]
3+
MONTH_ALIASES: dict[int, str]
4+
MONTH_NUMBERS: dict[str, int]
5+
MONTHS: list[str]
6+
int_to_weekday: dict[int, str]
7+
8+
def get_firstbday(year: int, month: int) -> int: ...
9+
def get_lastbday(year: int, month: int) -> int: ...
10+
def get_day_of_year(year: int, month: int, day: int) -> int: ...
11+
def get_iso_calendar(year: int, month: int, day: int) -> tuple[int, int, int]: ...
12+
def get_week_of_year(year: int, month: int, day: int) -> int: ...
13+
def get_days_in_month(year: int, month: int) -> int: ...

pandas/_libs/tslibs/strptime.pyi

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from typing import Optional
2+
3+
import numpy as np
4+
5+
def array_strptime(
6+
values: np.ndarray, # np.ndarray[object]
7+
fmt: Optional[str],
8+
exact: bool = True,
9+
errors: str = "raise"
10+
) -> tuple[np.ndarray, np.ndarray]: ...
11+
# first ndarray is M8[ns], second is object ndarray of Optional[tzinfo]

pandas/_libs/tslibs/timezones.pyi

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from datetime import (
2+
datetime,
3+
tzinfo,
4+
)
5+
from typing import (
6+
Callable,
7+
Optional,
8+
Union,
9+
)
10+
11+
import numpy as np
12+
13+
# imported from dateutil.tz
14+
dateutil_gettz: Callable[[str], tzinfo]
15+
16+
17+
def tz_standardize(tz: tzinfo) -> tzinfo: ...
18+
19+
def tz_compare(start: Optional[tzinfo], end: Optional[tzinfo]) -> bool: ...
20+
21+
def infer_tzinfo(
22+
start: Optional[datetime], end: Optional[datetime],
23+
) -> Optional[tzinfo]: ...
24+
25+
# ndarrays returned are both int64_t
26+
def get_dst_info(tz: tzinfo) -> tuple[np.ndarray, np.ndarray, str]: ...
27+
28+
def maybe_get_tz(tz: Optional[Union[str, int, np.int64, tzinfo]]) -> Optional[tzinfo]: ...
29+
30+
def get_timezone(tz: tzinfo) -> Union[tzinfo, str]: ...
31+
32+
def is_utc(tz: Optional[tzinfo]) -> bool: ...

pandas/_libs/tslibs/timezones.pyx

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ cdef inline bint treat_tz_as_dateutil(tzinfo tz):
6767
return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx')
6868

6969

70+
# Returns str or tzinfo object
7071
cpdef inline object get_timezone(tzinfo tz):
7172
"""
7273
We need to do several things here:
@@ -80,6 +81,8 @@ cpdef inline object get_timezone(tzinfo tz):
8081
the tz name. It needs to be a string so that we can serialize it with
8182
UJSON/pytables. maybe_get_tz (below) is the inverse of this process.
8283
"""
84+
if tz is None:
85+
raise TypeError("tz argument cannot be None")
8386
if is_utc(tz):
8487
return tz
8588
else:
@@ -364,6 +367,8 @@ cpdef bint tz_compare(tzinfo start, tzinfo end):
364367
elif is_utc(end):
365368
# Ensure we don't treat tzlocal as equal to UTC when running in UTC
366369
return False
370+
elif start is None or end is None:
371+
return start is None and end is None
367372
return get_timezone(start) == get_timezone(end)
368373

369374

pandas/_libs/tslibs/tzconversion.pyi

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from datetime import (
2+
timedelta,
3+
tzinfo,
4+
)
5+
from typing import (
6+
Iterable,
7+
Optional,
8+
Union,
9+
)
10+
11+
import numpy as np
12+
13+
def tz_convert_from_utc(
14+
vals: np.ndarray, # const int64_t[:]
15+
tz: tzinfo,
16+
) -> np.ndarray: ... # np.ndarray[np.int64]
17+
18+
def tz_convert_from_utc_single(val: np.int64, tz: tzinfo) -> np.int64: ...
19+
20+
def tz_localize_to_utc(
21+
vals: np.ndarray, # np.ndarray[np.int64]
22+
tz: Optional[tzinfo],
23+
ambiguous: Optional[Union[str, bool, Iterable[bool]]] = None,
24+
nonexistent: Optional[Union[str, timedelta, np.timedelta64]] = None,
25+
) -> np.ndarray: ... # np.ndarray[np.int64]

pandas/_libs/tslibs/vectorized.pyi

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""
2+
For cython types that cannot be represented precisely, closest-available
3+
python equivalents are used, and the precise types kept as adjacent comments.
4+
"""
5+
from datetime import tzinfo
6+
from typing import (
7+
Optional,
8+
Union,
9+
)
10+
11+
import numpy as np
12+
13+
from pandas._libs.tslibs.dtypes import Resolution
14+
from pandas._libs.tslibs.offsets import BaseOffset
15+
16+
def dt64arr_to_periodarr(
17+
stamps: np.ndarray, # const int64_t[:]
18+
freq: int,
19+
tz: Optional[tzinfo],
20+
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1]
21+
22+
23+
def is_date_array_normalized(
24+
stamps: np.ndarray, # const int64_t[:]
25+
tz: Optional[tzinfo] = None,
26+
) -> bool: ...
27+
28+
29+
def normalize_i8_timestamps(
30+
stamps: np.ndarray, # const int64_t[:]
31+
tz: Optional[tzinfo],
32+
) -> np.ndarray: ... # np.ndarray[np.int64]
33+
34+
35+
def get_resolution(
36+
stamps: np.ndarray, # const int64_t[:]
37+
tz: Optional[tzinfo] = None,
38+
) -> Resolution: ...
39+
40+
41+
def ints_to_pydatetime(
42+
arr: np.ndarray, # const int64_t[:}]
43+
tz: Optional[tzinfo] = None,
44+
freq: Optional[Union[str, BaseOffset]] = None,
45+
fold: bool = False,
46+
box: str = "datetime",
47+
) -> np.ndarray: ... # np.ndarray[object]

pandas/_testing/_io.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,7 @@ def dec(f):
9393
is_decorating = not kwargs and len(args) == 1 and callable(args[0])
9494
if is_decorating:
9595
f = args[0]
96-
# error: Incompatible types in assignment (expression has type
97-
# "List[<nothing>]", variable has type "Tuple[Any, ...]")
98-
args = [] # type: ignore[assignment]
96+
args = ()
9997
return dec(f)
10098
else:
10199
return dec

pandas/compat/_optional.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
"bs4": "4.6.0",
1212
"bottleneck": "1.2.1",
1313
"fsspec": "0.7.4",
14-
"fastparquet": "0.3.2",
14+
"fastparquet": "0.4.0",
1515
"gcsfs": "0.6.0",
1616
"lxml.etree": "4.3.0",
1717
"matplotlib": "2.2.3",

pandas/compat/numpy/__init__.py

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,41 +27,39 @@
2727
_tz_regex = re.compile("[+-]0000$")
2828

2929

30-
def tz_replacer(s):
31-
if isinstance(s, str):
32-
if s.endswith("Z"):
33-
s = s[:-1]
34-
elif _tz_regex.search(s):
35-
s = s[:-5]
36-
return s
30+
def _tz_replacer(tstring):
31+
if isinstance(tstring, str):
32+
if tstring.endswith("Z"):
33+
tstring = tstring[:-1]
34+
elif _tz_regex.search(tstring):
35+
tstring = tstring[:-5]
36+
return tstring
3737

3838

39-
def np_datetime64_compat(s, *args, **kwargs):
39+
def np_datetime64_compat(tstring: str, unit: str = "ns"):
4040
"""
4141
provide compat for construction of strings to numpy datetime64's with
4242
tz-changes in 1.11 that make '2015-01-01 09:00:00Z' show a deprecation
4343
warning, when need to pass '2015-01-01 09:00:00'
4444
"""
45-
s = tz_replacer(s)
46-
# error: No overload variant of "datetime64" matches argument types "Any",
47-
# "Tuple[Any, ...]", "Dict[str, Any]"
48-
return np.datetime64(s, *args, **kwargs) # type: ignore[call-overload]
45+
tstring = _tz_replacer(tstring)
46+
return np.datetime64(tstring, unit)
4947

5048

51-
def np_array_datetime64_compat(arr, *args, **kwargs):
49+
def np_array_datetime64_compat(arr, dtype="M8[ns]"):
5250
"""
5351
provide compat for construction of an array of strings to a
5452
np.array(..., dtype=np.datetime64(..))
5553
tz-changes in 1.11 that make '2015-01-01 09:00:00Z' show a deprecation
5654
warning, when need to pass '2015-01-01 09:00:00'
5755
"""
58-
# is_list_like
56+
# is_list_like; can't import as it would be circular
5957
if hasattr(arr, "__iter__") and not isinstance(arr, (str, bytes)):
60-
arr = [tz_replacer(s) for s in arr]
58+
arr = [_tz_replacer(s) for s in arr]
6159
else:
62-
arr = tz_replacer(arr)
60+
arr = _tz_replacer(arr)
6361

64-
return np.array(arr, *args, **kwargs)
62+
return np.array(arr, dtype=dtype)
6563

6664

6765
__all__ = [

pandas/compat/pickle_compat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def load_reduce(self):
4949
return
5050
except TypeError:
5151
pass
52-
elif args and issubclass(args[0], BaseOffset):
52+
elif args and isinstance(args[0], type) and issubclass(args[0], BaseOffset):
5353
# TypeError: object.__new__(Day) is not safe, use Day.__new__()
5454
cls = args[0]
5555
stack[-1] = cls.__new__(*args)

0 commit comments

Comments
 (0)