Skip to content

Commit 94c1de0

Browse files
committed
Merge remote-tracking branch 'upstream/master' into move-metadata-to-cfg
2 parents 20d24b5 + a0f6702 commit 94c1de0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+458
-205
lines changed

.pre-commit-config.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,12 @@ repos:
168168
pandas/tests/io/excel/test_writers\.py
169169
|pandas/tests/io/pytables/common\.py
170170
|pandas/tests/io/pytables/test_store\.py$
171+
- id: no-pandas-api-types
172+
name: Check code for instances of pd.api.types
173+
entry: (pd|pandas)\.api\.types\.
174+
language: pygrep
175+
types: [python]
176+
files: ^pandas/tests/
171177
- repo: https://github.com/asottile/yesqa
172178
rev: v1.2.2
173179
hooks:

asv_bench/benchmarks/reshape.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import pandas as pd
77
from pandas import DataFrame, MultiIndex, date_range, melt, wide_to_long
8+
from pandas.api.types import CategoricalDtype
89

910

1011
class Melt:
@@ -196,7 +197,7 @@ def setup(self):
196197
categories = list(string.ascii_letters[:12])
197198
s = pd.Series(
198199
np.random.choice(categories, size=1000000),
199-
dtype=pd.api.types.CategoricalDtype(categories),
200+
dtype=CategoricalDtype(categories),
200201
)
201202
self.s = s
202203

doc/source/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@
164164

165165
# The language for content autogenerated by Sphinx. Refer to documentation
166166
# for a list of supported languages.
167-
# language = None
167+
language = "en"
168168

169169
# There are two options for replacing |today|: either, you set today to some
170170
# non-false value, then it is used:

doc/source/user_guide/timeseries.rst

+1-8
Original file line numberDiff line numberDiff line change
@@ -2605,17 +2605,10 @@ For example, to localize and convert a naive stamp to time zone aware.
26052605
s_naive.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
26062606
26072607
Time zone information can also be manipulated using the ``astype`` method.
2608-
This method can localize and convert time zone naive timestamps or
2609-
convert time zone aware timestamps.
2608+
This method can convert between different timezone-aware dtypes.
26102609

26112610
.. ipython:: python
26122611
2613-
# localize and convert a naive time zone
2614-
s_naive.astype("datetime64[ns, US/Eastern]")
2615-
2616-
# make an aware tz naive
2617-
s_aware.astype("datetime64[ns]")
2618-
26192612
# convert to a new time zone
26202613
s_aware.astype("datetime64[ns, CET]")
26212614

doc/source/whatsnew/v1.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ I/O
746746
- Parse missing values using :func:`read_json` with ``dtype=False`` to ``NaN`` instead of ``None`` (:issue:`28501`)
747747
- :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other ``read_*`` functions (:issue:`37909`)
748748
- :meth:`DataFrame.to_html` was ignoring ``formatters`` argument for ``ExtensionDtype`` columns (:issue:`36525`)
749-
- Bumped minimum xarray version to 0.12.3 to avoid reference to the removed ``Panel`` class (:issue:`27101`)
749+
- Bumped minimum xarray version to 0.12.3 to avoid reference to the removed ``Panel`` class (:issue:`27101`, :issue:`37983`)
750750
- :meth:`DataFrame.to_csv` was re-opening file-like handles that also implement ``os.PathLike`` (:issue:`38125`)
751751
- Bug in the conversion of a sliced ``pyarrow.Table`` with missing values to a DataFrame (:issue:`38525`)
752752
- Bug in :func:`read_sql_table` raising a ``sqlalchemy.exc.OperationalError`` when column names contained a percentage sign (:issue:`37517`)

doc/source/whatsnew/v1.3.0.rst

+5
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ Deprecations
195195
- Deprecated :attr:`Rolling.win_type` returning ``"freq"`` (:issue:`38963`)
196196
- Deprecated :attr:`Rolling.is_datetimelike` (:issue:`38963`)
197197
- Deprecated :meth:`core.window.ewm.ExponentialMovingWindow.vol` (:issue:`39220`)
198+
- Using ``.astype`` to convert between ``datetime64[ns]`` dtype and :class:`DatetimeTZDtype` is deprecated and will raise in a future version, use ``obj.tz_localize`` or ``obj.dt.tz_localize`` instead (:issue:`38622`)
198199
-
199200

200201
.. ---------------------------------------------------------------------------
@@ -235,6 +236,7 @@ Datetimelike
235236
- Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`)
236237
- Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`)
237238
- Bug in :class:`Categorical` incorrectly typecasting ``datetime`` object to ``Timestamp`` (:issue:`38878`)
239+
- Bug in :meth:`Timestamp.round`, :meth:`Timestamp.floor`, :meth:`Timestamp.ceil` for values near the implementation bounds of :class:`Timestamp` (:issue:`39244`)
238240
- Bug in :func:`date_range` incorrectly creating :class:`DatetimeIndex` containing ``NaT`` instead of raising ``OutOfBoundsDatetime`` in corner cases (:issue:`24124`)
239241

240242
Timedelta
@@ -274,14 +276,17 @@ Interval
274276
- Bug in :meth:`IntervalIndex.intersection` and :meth:`IntervalIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38653`, :issue:`38741`)
275277
- Bug in :meth:`IntervalIndex.intersection` returning duplicates when at least one of both Indexes has duplicates which are present in the other (:issue:`38743`)
276278
- :meth:`IntervalIndex.union`, :meth:`IntervalIndex.intersection`, :meth:`IntervalIndex.difference`, and :meth:`IntervalIndex.symmetric_difference` now cast to the appropriate dtype instead of raising ``TypeError`` when operating with another :class:`IntervalIndex` with incompatible dtype (:issue:`39267`)
279+
- :meth:`PeriodIndex.union`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference`, :meth:`PeriodIndex.difference` now cast to object dtype instead of raising ``IncompatibleFrequency`` when opearting with another :class:`PeriodIndex` with incompatible dtype (:issue:`??`)
277280

278281
Indexing
279282
^^^^^^^^
280283
- Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`)
281284
- Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`)
285+
- Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when setting multiple values to duplicate columns (:issue:`15695`)
282286
- Bug in :meth:`DataFrame.loc`, :meth:`Series.loc`, :meth:`DataFrame.__getitem__` and :meth:`Series.__getitem__` returning incorrect elements for non-monotonic :class:`DatetimeIndex` for string slices (:issue:`33146`)
283287
- Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` with timezone aware indexes raising ``TypeError`` for ``method="ffill"`` and ``method="bfill"`` and specified ``tolerance`` (:issue:`38566`)
284288
- Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` with empty :class:`DataFrame` and specified columns for string indexer and non empty :class:`DataFrame` to set (:issue:`38831`)
289+
- Bug in :meth:`DataFrame.loc.__setitem__` raising ValueError when expanding unique column for :class:`DataFrame` with duplicate columns (:issue:`38521`)
285290
- Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`)
286291
- Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`)
287292
- Bug in setting ``timedelta64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`)

pandas/_libs/tslibs/period.pyx

+65-56
Original file line numberDiff line numberDiff line change
@@ -1489,6 +1489,60 @@ cdef class PeriodMixin:
14891489
return FR_SEC
14901490
return base
14911491

1492+
@property
1493+
def start_time(self) -> Timestamp:
1494+
"""
1495+
Get the Timestamp for the start of the period.
1496+
1497+
Returns
1498+
-------
1499+
Timestamp
1500+
1501+
See Also
1502+
--------
1503+
Period.end_time : Return the end Timestamp.
1504+
Period.dayofyear : Return the day of year.
1505+
Period.daysinmonth : Return the days in that month.
1506+
Period.dayofweek : Return the day of the week.
1507+
1508+
Examples
1509+
--------
1510+
>>> period = pd.Period('2012-1-1', freq='D')
1511+
>>> period
1512+
Period('2012-01-01', 'D')
1513+
1514+
>>> period.start_time
1515+
Timestamp('2012-01-01 00:00:00')
1516+
1517+
>>> period.end_time
1518+
Timestamp('2012-01-01 23:59:59.999999999')
1519+
"""
1520+
return self.to_timestamp(how="start")
1521+
1522+
@property
1523+
def end_time(self) -> Timestamp:
1524+
return self.to_timestamp(how="end")
1525+
1526+
def _require_matching_freq(self, other, base=False):
1527+
# See also arrays.period.raise_on_incompatible
1528+
if is_offset_object(other):
1529+
other_freq = other
1530+
else:
1531+
other_freq = other.freq
1532+
1533+
if base:
1534+
condition = self.freq.base != other_freq.base
1535+
else:
1536+
condition = self.freq != other_freq
1537+
1538+
if condition:
1539+
msg = DIFFERENT_FREQ.format(
1540+
cls=type(self).__name__,
1541+
own_freq=self.freqstr,
1542+
other_freq=other_freq.freqstr,
1543+
)
1544+
raise IncompatibleFrequency(msg)
1545+
14921546

14931547
cdef class _Period(PeriodMixin):
14941548

@@ -1551,10 +1605,7 @@ cdef class _Period(PeriodMixin):
15511605
return False
15521606
elif op == Py_NE:
15531607
return True
1554-
msg = DIFFERENT_FREQ.format(cls=type(self).__name__,
1555-
own_freq=self.freqstr,
1556-
other_freq=other.freqstr)
1557-
raise IncompatibleFrequency(msg)
1608+
self._require_matching_freq(other)
15581609
return PyObject_RichCompareBool(self.ordinal, other.ordinal, op)
15591610
elif other is NaT:
15601611
return _nat_scalar_rules[op]
@@ -1563,15 +1614,15 @@ cdef class _Period(PeriodMixin):
15631614
def __hash__(self):
15641615
return hash((self.ordinal, self.freqstr))
15651616

1566-
def _add_delta(self, other) -> "Period":
1617+
def _add_timedeltalike_scalar(self, other) -> "Period":
15671618
cdef:
1568-
int64_t nanos, offset_nanos
1619+
int64_t nanos, base_nanos
15691620

15701621
if is_tick_object(self.freq):
15711622
nanos = delta_to_nanoseconds(other)
1572-
offset_nanos = self.freq.base.nanos
1573-
if nanos % offset_nanos == 0:
1574-
ordinal = self.ordinal + (nanos // offset_nanos)
1623+
base_nanos = self.freq.base.nanos
1624+
if nanos % base_nanos == 0:
1625+
ordinal = self.ordinal + (nanos // base_nanos)
15751626
return Period(ordinal=ordinal, freq=self.freq)
15761627
raise IncompatibleFrequency("Input cannot be converted to "
15771628
f"Period(freq={self.freqstr})")
@@ -1581,14 +1632,10 @@ cdef class _Period(PeriodMixin):
15811632
cdef:
15821633
int64_t ordinal
15831634

1584-
if other.base == self.freq.base:
1585-
ordinal = self.ordinal + other.n
1586-
return Period(ordinal=ordinal, freq=self.freq)
1635+
self._require_matching_freq(other, base=True)
15871636

1588-
msg = DIFFERENT_FREQ.format(cls=type(self).__name__,
1589-
own_freq=self.freqstr,
1590-
other_freq=other.freqstr)
1591-
raise IncompatibleFrequency(msg)
1637+
ordinal = self.ordinal + other.n
1638+
return Period(ordinal=ordinal, freq=self.freq)
15921639

15931640
def __add__(self, other):
15941641
if not is_period_object(self):
@@ -1598,7 +1645,7 @@ cdef class _Period(PeriodMixin):
15981645
return other.__add__(self)
15991646

16001647
if is_any_td_scalar(other):
1601-
return self._add_delta(other)
1648+
return self._add_timedeltalike_scalar(other)
16021649
elif is_offset_object(other):
16031650
return self._add_offset(other)
16041651
elif other is NaT:
@@ -1635,11 +1682,7 @@ cdef class _Period(PeriodMixin):
16351682
ordinal = self.ordinal - other * self.freq.n
16361683
return Period(ordinal=ordinal, freq=self.freq)
16371684
elif is_period_object(other):
1638-
if other.freq != self.freq:
1639-
msg = DIFFERENT_FREQ.format(cls=type(self).__name__,
1640-
own_freq=self.freqstr,
1641-
other_freq=other.freqstr)
1642-
raise IncompatibleFrequency(msg)
1685+
self._require_matching_freq(other)
16431686
# GH 23915 - mul by base freq since __add__ is agnostic of n
16441687
return (self.ordinal - other.ordinal) * self.freq.base
16451688
elif other is NaT:
@@ -1677,40 +1720,6 @@ cdef class _Period(PeriodMixin):
16771720

16781721
return Period(ordinal=ordinal, freq=freq)
16791722

1680-
@property
1681-
def start_time(self) -> Timestamp:
1682-
"""
1683-
Get the Timestamp for the start of the period.
1684-
1685-
Returns
1686-
-------
1687-
Timestamp
1688-
1689-
See Also
1690-
--------
1691-
Period.end_time : Return the end Timestamp.
1692-
Period.dayofyear : Return the day of year.
1693-
Period.daysinmonth : Return the days in that month.
1694-
Period.dayofweek : Return the day of the week.
1695-
1696-
Examples
1697-
--------
1698-
>>> period = pd.Period('2012-1-1', freq='D')
1699-
>>> period
1700-
Period('2012-01-01', 'D')
1701-
1702-
>>> period.start_time
1703-
Timestamp('2012-01-01 00:00:00')
1704-
1705-
>>> period.end_time
1706-
Timestamp('2012-01-01 23:59:59.999999999')
1707-
"""
1708-
return self.to_timestamp(how='S')
1709-
1710-
@property
1711-
def end_time(self) -> Timestamp:
1712-
return self.to_timestamp(how="end")
1713-
17141723
def to_timestamp(self, freq=None, how='start', tz=None) -> Timestamp:
17151724
"""
17161725
Return the Timestamp representation of the Period.

pandas/_libs/tslibs/timestamps.pyx

+48-9
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ shadows the python class, where we do any heavy lifting.
88
"""
99
import warnings
1010

11+
cimport cython
12+
1113
import numpy as np
1214

1315
cimport numpy as cnp
@@ -153,32 +155,69 @@ class RoundTo:
153155
return 4
154156

155157

156-
cdef inline _floor_int64(values, unit):
157-
return values - np.remainder(values, unit)
158+
cdef inline ndarray[int64_t] _floor_int64(int64_t[:] values, int64_t unit):
159+
cdef:
160+
Py_ssize_t i, n = len(values)
161+
ndarray[int64_t] result = np.empty(n, dtype="i8")
162+
int64_t res, value
163+
164+
with cython.overflowcheck(True):
165+
for i in range(n):
166+
value = values[i]
167+
if value == NPY_NAT:
168+
res = NPY_NAT
169+
else:
170+
res = value - value % unit
171+
result[i] = res
172+
173+
return result
174+
175+
176+
cdef inline ndarray[int64_t] _ceil_int64(int64_t[:] values, int64_t unit):
177+
cdef:
178+
Py_ssize_t i, n = len(values)
179+
ndarray[int64_t] result = np.empty(n, dtype="i8")
180+
int64_t res, value
158181

159-
cdef inline _ceil_int64(values, unit):
160-
return values + np.remainder(-values, unit)
182+
with cython.overflowcheck(True):
183+
for i in range(n):
184+
value = values[i]
161185

162-
cdef inline _rounddown_int64(values, unit):
186+
if value == NPY_NAT:
187+
res = NPY_NAT
188+
else:
189+
remainder = value % unit
190+
if remainder == 0:
191+
res = value
192+
else:
193+
res = value + (unit - remainder)
194+
195+
result[i] = res
196+
197+
return result
198+
199+
200+
cdef inline ndarray[int64_t] _rounddown_int64(values, int64_t unit):
163201
return _ceil_int64(values - unit//2, unit)
164202

165-
cdef inline _roundup_int64(values, unit):
203+
204+
cdef inline ndarray[int64_t] _roundup_int64(values, int64_t unit):
166205
return _floor_int64(values + unit//2, unit)
167206

168207

169-
def round_nsint64(values, mode, freq):
208+
def round_nsint64(values: np.ndarray, mode: RoundTo, freq) -> np.ndarray:
170209
"""
171210
Applies rounding mode at given frequency
172211

173212
Parameters
174213
----------
175-
values : :obj:`ndarray`
214+
values : np.ndarray[int64_t]`
176215
mode : instance of `RoundTo` enumeration
177216
freq : str, obj
178217

179218
Returns
180219
-------
181-
:obj:`ndarray`
220+
np.ndarray[int64_t]
182221
"""
183222

184223
unit = to_offset(freq).nanos

pandas/conftest.py

+18
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,24 @@ def float_frame():
711711
return DataFrame(tm.getSeriesData())
712712

713713

714+
@pytest.fixture
715+
def mixed_type_frame():
716+
"""
717+
Fixture for DataFrame of float/int/string columns with RangeIndex
718+
Columns are ['a', 'b', 'c', 'float32', 'int32'].
719+
"""
720+
return DataFrame(
721+
{
722+
"a": 1.0,
723+
"b": 2,
724+
"c": "foo",
725+
"float32": np.array([1.0] * 10, dtype="float32"),
726+
"int32": np.array([1] * 10, dtype="int32"),
727+
},
728+
index=np.arange(10),
729+
)
730+
731+
714732
# ----------------------------------------------------------------
715733
# Scalars
716734
# ----------------------------------------------------------------

0 commit comments

Comments
 (0)