Skip to content

Commit 917c0e1

Browse files
authored
Merge branch 'master' into clean-_use_dynamic_x
2 parents f0d6800 + 2428cdd commit 917c0e1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+553
-355
lines changed

asv_bench/benchmarks/indexing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,9 @@ def time_boolean_rows_boolean(self):
158158
class DataFrameNumericIndexing:
159159
def setup(self):
160160
self.idx_dupe = np.array(range(30)) * 99
161-
self.df = DataFrame(np.random.randn(10000, 5))
161+
self.df = DataFrame(np.random.randn(100000, 5))
162162
self.df_dup = concat([self.df, 2 * self.df, 3 * self.df])
163-
self.bool_indexer = [True] * 5000 + [False] * 5000
163+
self.bool_indexer = [True] * 50000 + [False] * 50000
164164

165165
def time_iloc_dups(self):
166166
self.df_dup.iloc[self.idx_dupe]

doc/source/whatsnew/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Version 1.0
2424
.. toctree::
2525
:maxdepth: 2
2626

27+
v1.0.5
2728
v1.0.4
2829
v1.0.3
2930
v1.0.2

doc/source/whatsnew/v1.0.4.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,4 @@ Bug fixes
4545
Contributors
4646
~~~~~~~~~~~~
4747

48-
.. contributors:: v1.0.3..v1.0.4|HEAD
48+
.. contributors:: v1.0.3..v1.0.4

doc/source/whatsnew/v1.0.5.rst

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
2+
.. _whatsnew_105:
3+
4+
What's new in 1.0.5 (June XX, 2020)
5+
-----------------------------------
6+
7+
These are the changes in pandas 1.0.5. See :ref:`release` for a full changelog
8+
including other versions of pandas.
9+
10+
{{ header }}
11+
12+
.. ---------------------------------------------------------------------------
13+
14+
.. _whatsnew_105.regressions:
15+
16+
Fixed regressions
17+
~~~~~~~~~~~~~~~~~
18+
-
19+
-
20+
21+
.. _whatsnew_105.bug_fixes:
22+
23+
Bug fixes
24+
~~~~~~~~~
25+
-
26+
-
27+
28+
Contributors
29+
~~~~~~~~~~~~
30+
31+
.. contributors:: v1.0.4..v1.0.5|HEAD

doc/source/whatsnew/v1.1.0.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,8 @@ Backwards incompatible API changes
394394
- :meth:`Series.to_timestamp` now raises a ``TypeError`` if the axis is not a :class:`PeriodIndex`. Previously an ``AttributeError`` was raised (:issue:`33327`)
395395
- :meth:`Series.to_period` now raises a ``TypeError`` if the axis is not a :class:`DatetimeIndex`. Previously an ``AttributeError`` was raised (:issue:`33327`)
396396
- :func: `pandas.api.dtypes.is_string_dtype` no longer incorrectly identifies categorical series as string.
397+
- :func:`read_excel` no longer takes ``**kwds`` arguments. This means that passing in keyword ``chunksize`` now raises a ``TypeError``
398+
(previously raised a ``NotImplementedError``), while passing in keyword ``encoding`` now raises a ``TypeError`` (:issue:`34464`)
397399

398400
``MultiIndex.get_indexer`` interprets `method` argument differently
399401
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -727,6 +729,7 @@ Performance improvements
727729
- Performance improvement in arithmetic operations between two :class:`DataFrame` objects (:issue:`32779`)
728730
- Performance improvement in :class:`pandas.core.groupby.RollingGroupby` (:issue:`34052`)
729731
- Performance improvement in arithmetic operations (sub, add, mul, div) for MultiIndex (:issue:`34297`)
732+
- Performance improvement in `DataFrame[bool_indexer]` when `bool_indexer` is a list (:issue:`33924`)
730733

731734
.. ---------------------------------------------------------------------------
732735
@@ -759,6 +762,7 @@ Datetimelike
759762
- Bug in :meth:`DatetimeIndex.to_period` not infering the frequency when called with no arguments (:issue:`33358`)
760763
- Bug in :meth:`DatetimeIndex.tz_localize` incorrectly retaining ``freq`` in some cases where the original freq is no longer valid (:issue:`30511`)
761764
- Bug in :meth:`DatetimeIndex.intersection` losing ``freq`` and timezone in some cases (:issue:`33604`)
765+
- Bug in :meth:`DatetimeIndex.get_indexer` where incorrect output would be returned for mixed datetime-like targets (:issue:`33741`)
762766
- Bug in :class:`DatetimeIndex` addition and subtraction with some types of :class:`DateOffset` objects incorrectly retaining an invalid ``freq`` attribute (:issue:`33779`)
763767
- Bug in :class:`DatetimeIndex` where setting the ``freq`` attribute on an index could silently change the ``freq`` attribute on another index viewing the same data (:issue:`33552`)
764768
- :meth:`DataFrame.min`/:meth:`DataFrame.max` not returning consistent result with :meth:`Series.min`/:meth:`Series.max` when called on objects initialized with empty :func:`pd.to_datetime`
@@ -963,6 +967,7 @@ Sparse
963967
- Creating a :class:`SparseArray` from timezone-aware dtype will issue a warning before dropping timezone information, instead of doing so silently (:issue:`32501`)
964968
- Bug in :meth:`arrays.SparseArray.from_spmatrix` wrongly read scipy sparse matrix (:issue:`31991`)
965969
- Bug in :meth:`Series.sum` with ``SparseArray`` raises ``TypeError`` (:issue:`25777`)
970+
- Bug where :class:`DataFrame` containing :class:`SparseArray` filled with ``NaN`` when indexed by a list-like (:issue:`27781`, :issue:`29563`)
966971
- The repr of :class:`SparseDtype` now includes the repr of its ``fill_value`` attribute. Previously it used ``fill_value``'s string representation (:issue:`34352`)
967972

968973
ExtensionArray
@@ -994,6 +999,7 @@ Other
994999
- Bug in :meth:`DataFrame.plot.scatter` caused an error when plotting variable marker sizes (:issue:`32904`)
9951000
- :class:`IntegerArray` now implements the ``sum`` operation (:issue:`33172`)
9961001
- Bug in :class:`Tick` comparisons raising ``TypeError`` when comparing against timedelta-like objects (:issue:`34088`)
1002+
- Bug in :class:`Tick` multiplication raising ``TypeError`` when multiplying by a float (:issue:`34486`)
9971003

9981004
.. ---------------------------------------------------------------------------
9991005

pandas/_libs/lib.pyx

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1380,8 +1380,10 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
13801380
return "mixed-integer"
13811381

13821382
elif PyDateTime_Check(val):
1383-
if is_datetime_array(values):
1383+
if is_datetime_array(values, skipna=skipna):
13841384
return "datetime"
1385+
elif is_date_array(values, skipna=skipna):
1386+
return "date"
13851387

13861388
elif PyDate_Check(val):
13871389
if is_date_array(values, skipna=skipna):
@@ -1752,10 +1754,10 @@ cdef class DatetimeValidator(TemporalValidator):
17521754
return is_null_datetime64(value)
17531755

17541756

1755-
cpdef bint is_datetime_array(ndarray values):
1757+
cpdef bint is_datetime_array(ndarray values, bint skipna=True):
17561758
cdef:
17571759
DatetimeValidator validator = DatetimeValidator(len(values),
1758-
skipna=True)
1760+
skipna=skipna)
17591761
return validator.validate(values)
17601762

17611763

pandas/_libs/tslibs/conversion.pxd

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
from cpython.datetime cimport datetime
1+
from cpython.datetime cimport datetime, tzinfo
22

3-
from numpy cimport int64_t, int32_t
3+
from numpy cimport int64_t, int32_t, ndarray
44

55
from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct
66

@@ -24,3 +24,5 @@ cdef int64_t get_datetime64_nanos(object val) except? -1
2424

2525
cpdef datetime localize_pydatetime(datetime dt, object tz)
2626
cdef int64_t cast_from_unit(object ts, str unit) except? -1
27+
28+
cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo tz)

pandas/_libs/tslibs/conversion.pyx

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -763,7 +763,7 @@ cpdef inline datetime localize_pydatetime(datetime dt, object tz):
763763

764764
@cython.wraparound(False)
765765
@cython.boundscheck(False)
766-
def normalize_i8_timestamps(int64_t[:] stamps, object tz):
766+
cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo tz):
767767
"""
768768
Normalize each of the (nanosecond) timezone aware timestamps in the given
769769
array by rounding down to the beginning of the day (i.e. midnight).
@@ -774,31 +774,6 @@ def normalize_i8_timestamps(int64_t[:] stamps, object tz):
774774
stamps : int64 ndarray
775775
tz : tzinfo or None
776776
777-
Returns
778-
-------
779-
result : int64 ndarray of converted of normalized nanosecond timestamps
780-
"""
781-
cdef:
782-
int64_t[:] result
783-
784-
result = _normalize_local(stamps, tz)
785-
786-
return result.base # .base to access underlying np.ndarray
787-
788-
789-
@cython.wraparound(False)
790-
@cython.boundscheck(False)
791-
cdef int64_t[:] _normalize_local(const int64_t[:] stamps, tzinfo tz):
792-
"""
793-
Normalize each of the (nanosecond) timestamps in the given array by
794-
rounding down to the beginning of the day (i.e. midnight) for the
795-
given timezone `tz`.
796-
797-
Parameters
798-
----------
799-
stamps : int64 ndarray
800-
tz : tzinfo
801-
802777
Returns
803778
-------
804779
result : int64 ndarray of converted of normalized nanosecond timestamps
@@ -813,7 +788,16 @@ cdef int64_t[:] _normalize_local(const int64_t[:] stamps, tzinfo tz):
813788
npy_datetimestruct dts
814789
int64_t delta, local_val
815790

816-
if is_tzlocal(tz):
791+
if tz is None or is_utc(tz):
792+
with nogil:
793+
for i in range(n):
794+
if stamps[i] == NPY_NAT:
795+
result[i] = NPY_NAT
796+
continue
797+
local_val = stamps[i]
798+
dt64_to_dtstruct(local_val, &dts)
799+
result[i] = _normalized_stamp(&dts)
800+
elif is_tzlocal(tz):
817801
for i in range(n):
818802
if stamps[i] == NPY_NAT:
819803
result[i] = NPY_NAT
@@ -843,7 +827,7 @@ cdef int64_t[:] _normalize_local(const int64_t[:] stamps, tzinfo tz):
843827
dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts)
844828
result[i] = _normalized_stamp(&dts)
845829

846-
return result
830+
return result.base # `.base` to access underlying ndarray
847831

848832

849833
cdef inline int64_t _normalized_stamp(npy_datetimestruct *dts) nogil:

pandas/_libs/tslibs/offsets.pyx

Lines changed: 60 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,11 @@ cnp.import_array()
2525
from pandas._libs.properties import cache_readonly
2626

2727
from pandas._libs.tslibs cimport util
28-
from pandas._libs.tslibs.util cimport is_integer_object, is_datetime64_object
28+
from pandas._libs.tslibs.util cimport (
29+
is_integer_object,
30+
is_datetime64_object,
31+
is_float_object,
32+
)
2933

3034
from pandas._libs.tslibs.base cimport ABCTimestamp
3135

@@ -743,6 +747,25 @@ cdef class Tick(SingleConstructorOffset):
743747
"Tick offset with `normalize=True` are not allowed."
744748
)
745749

750+
# FIXME: Without making this cpdef, we get AttributeError when calling
751+
# from __mul__
752+
cpdef Tick _next_higher_resolution(Tick self):
753+
if type(self) is Day:
754+
return Hour(self.n * 24)
755+
if type(self) is Hour:
756+
return Minute(self.n * 60)
757+
if type(self) is Minute:
758+
return Second(self.n * 60)
759+
if type(self) is Second:
760+
return Milli(self.n * 1000)
761+
if type(self) is Milli:
762+
return Micro(self.n * 1000)
763+
if type(self) is Micro:
764+
return Nano(self.n * 1000)
765+
raise NotImplementedError(type(self))
766+
767+
# --------------------------------------------------------------------
768+
746769
def _repr_attrs(self) -> str:
747770
# Since cdef classes have no __dict__, we need to override
748771
return ""
@@ -791,6 +814,21 @@ cdef class Tick(SingleConstructorOffset):
791814
def __gt__(self, other):
792815
return self.delta.__gt__(other)
793816

817+
def __mul__(self, other):
818+
if not isinstance(self, Tick):
819+
# cython semantics, this is __rmul__
820+
return other.__mul__(self)
821+
if is_float_object(other):
822+
n = other * self.n
823+
# If the new `n` is an integer, we can represent it using the
824+
# same Tick subclass as self, otherwise we need to move up
825+
# to a higher-resolution subclass
826+
if np.isclose(n % 1, 0):
827+
return type(self)(int(n))
828+
new_self = self._next_higher_resolution()
829+
return new_self * other
830+
return BaseOffset.__mul__(self, other)
831+
794832
def __truediv__(self, other):
795833
if not isinstance(self, Tick):
796834
# cython semantics mean the args are sometimes swapped
@@ -3563,6 +3601,9 @@ cpdef to_offset(freq):
35633601
>>> to_offset(Hour())
35643602
<Hour>
35653603
"""
3604+
# TODO: avoid runtime imports
3605+
from pandas._libs.tslibs.timedeltas import Timedelta
3606+
35663607
if freq is None:
35673608
return None
35683609

@@ -3589,7 +3630,9 @@ cpdef to_offset(freq):
35893630
if split[-1] != "" and not split[-1].isspace():
35903631
# the last element must be blank
35913632
raise ValueError("last element must be blank")
3592-
for sep, stride, name in zip(split[0::4], split[1::4], split[2::4]):
3633+
3634+
tups = zip(split[0::4], split[1::4], split[2::4])
3635+
for n, (sep, stride, name) in enumerate(tups):
35933636
if sep != "" and not sep.isspace():
35943637
raise ValueError("separator must be spaces")
35953638
prefix = _lite_rule_alias.get(name) or name
@@ -3598,15 +3641,22 @@ cpdef to_offset(freq):
35983641
if not stride:
35993642
stride = 1
36003643

3601-
from .resolution import Resolution # TODO: avoid runtime import
3644+
if prefix in {"D", "H", "T", "S", "L", "U", "N"}:
3645+
# For these prefixes, we have something like "3H" or
3646+
# "2.5T", so we can construct a Timedelta with the
3647+
# matching unit and get our offset from delta_to_tick
3648+
td = Timedelta(1, unit=prefix)
3649+
off = delta_to_tick(td)
3650+
offset = off * float(stride)
3651+
if n != 0:
3652+
# If n==0, then stride_sign is already incorporated
3653+
# into the offset
3654+
offset *= stride_sign
3655+
else:
3656+
stride = int(stride)
3657+
offset = _get_offset(name)
3658+
offset = offset * int(np.fabs(stride) * stride_sign)
36023659

3603-
if prefix in Resolution.reso_str_bump_map:
3604-
stride, name = Resolution.get_stride_from_decimal(
3605-
float(stride), prefix
3606-
)
3607-
stride = int(stride)
3608-
offset = _get_offset(name)
3609-
offset = offset * int(np.fabs(stride) * stride_sign)
36103660
if delta is None:
36113661
delta = offset
36123662
else:

pandas/_libs/tslibs/period.pyx

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1702,10 +1702,7 @@ cdef class _Period:
17021702

17031703
@property
17041704
def end_time(self) -> Timestamp:
1705-
# freq.n can't be negative or 0
1706-
# ordinal = (self + self.freq.n).start_time.value - 1
1707-
ordinal = (self + self.freq).start_time.value - 1
1708-
return Timestamp(ordinal)
1705+
return self.to_timestamp(how="end")
17091706

17101707
def to_timestamp(self, freq=None, how='start', tz=None) -> Timestamp:
17111708
"""
@@ -1727,18 +1724,22 @@ cdef class _Period:
17271724
-------
17281725
Timestamp
17291726
"""
1730-
if freq is not None:
1731-
freq = self._maybe_convert_freq(freq)
17321727
how = validate_end_alias(how)
17331728

17341729
end = how == 'E'
17351730
if end:
1731+
if freq == "B" or self.freq == "B":
1732+
# roll forward to ensure we land on B date
1733+
adjust = Timedelta(1, "D") - Timedelta(1, "ns")
1734+
return self.to_timestamp(how="start") + adjust
17361735
endpoint = (self + self.freq).to_timestamp(how='start')
17371736
return endpoint - Timedelta(1, 'ns')
17381737

17391738
if freq is None:
17401739
base, mult = get_freq_code(self.freq)
17411740
freq = get_to_timestamp_base(base)
1741+
else:
1742+
freq = self._maybe_convert_freq(freq)
17421743

17431744
base, mult = get_freq_code(freq)
17441745
val = self.asfreq(freq, how)

0 commit comments

Comments
 (0)