Skip to content

Commit 9b80ed7

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into is_scalar
Merging master of pandas-dev/pandas
2 parents 4e426de + deb7b4d commit 9b80ed7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+839
-544
lines changed

.pep8speaks.yml

-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ pycodestyle:
1313
- W503, # line break before binary operator
1414
- W504, # line break after binary operator
1515
- E402, # module level import not at top of file
16-
- E722, # do not use bare except
1716
- E731, # do not assign a lambda expression, use a def
1817
- C406, # Unnecessary list literal - rewrite as a dict literal.
1918
- C408, # Unnecessary dict call - rewrite as a literal.

asv_bench/benchmarks/timeseries.py

+31-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from datetime import timedelta
22

3+
import dateutil
34
import numpy as np
45
from pandas import to_datetime, date_range, Series, DataFrame, period_range
56
from pandas.tseries.frequencies import infer_freq
@@ -57,7 +58,10 @@ def time_to_pydatetime(self, index_type):
5758

5859
class TzLocalize(object):
5960

60-
def setup(self):
61+
params = [None, 'US/Eastern', 'UTC', dateutil.tz.tzutc()]
62+
param_names = 'tz'
63+
64+
def setup(self, tz):
6165
dst_rng = date_range(start='10/29/2000 1:00:00',
6266
end='10/29/2000 1:59:59', freq='S')
6367
self.index = date_range(start='10/29/2000',
@@ -68,8 +72,8 @@ def setup(self):
6872
end='10/29/2000 3:00:00',
6973
freq='S'))
7074

71-
def time_infer_dst(self):
72-
self.index.tz_localize('US/Eastern', ambiguous='infer')
75+
def time_infer_dst(self, tz):
76+
self.index.tz_localize(tz, ambiguous='infer')
7377

7478

7579
class ResetIndex(object):
@@ -377,15 +381,35 @@ def time_dup_string_tzoffset_dates(self, cache):
377381

378382
class DatetimeAccessor(object):
379383

380-
def setup(self):
384+
params = [None, 'US/Eastern', 'UTC', dateutil.tz.tzutc()]
385+
param_names = 'tz'
386+
387+
def setup(self, tz):
381388
N = 100000
382-
self.series = Series(date_range(start='1/1/2000', periods=N, freq='T'))
389+
self.series = Series(
390+
date_range(start='1/1/2000', periods=N, freq='T', tz=tz)
391+
)
383392

384-
def time_dt_accessor(self):
393+
def time_dt_accessor(self, tz):
385394
self.series.dt
386395

387-
def time_dt_accessor_normalize(self):
396+
def time_dt_accessor_normalize(self, tz):
388397
self.series.dt.normalize()
389398

399+
def time_dt_accessor_month_name(self, tz):
400+
self.series.dt.month_name()
401+
402+
def time_dt_accessor_day_name(self, tz):
403+
self.series.dt.day_name()
404+
405+
def time_dt_accessor_time(self, tz):
406+
self.series.dt.time
407+
408+
def time_dt_accessor_date(self, tz):
409+
self.series.dt.date
410+
411+
def time_dt_accessor_year(self, tz):
412+
self.series.dt.year
413+
390414

391415
from .pandas_vb_common import setup # noqa: F401

asv_bench/benchmarks/timestamp.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from pandas import Timestamp
44
import pytz
5+
import dateutil
56

67

78
class TimestampConstruction(object):
@@ -29,7 +30,8 @@ def time_fromtimestamp(self):
2930

3031

3132
class TimestampProperties(object):
32-
_tzs = [None, pytz.timezone('Europe/Amsterdam')]
33+
_tzs = [None, pytz.timezone('Europe/Amsterdam'), pytz.UTC,
34+
dateutil.tz.tzutc()]
3335
_freqs = [None, 'B']
3436
params = [_tzs, _freqs]
3537
param_names = ['tz', 'freq']
@@ -87,7 +89,8 @@ def time_microsecond(self, tz, freq):
8789

8890

8991
class TimestampOps(object):
90-
params = [None, 'US/Eastern', 'UTC']
92+
params = [None, 'US/Eastern', pytz.UTC,
93+
dateutil.tz.tzutc()]
9194
param_names = ['tz']
9295

9396
def setup(self, tz):
@@ -105,6 +108,14 @@ def time_to_pydatetime(self, tz):
105108
def time_normalize(self, tz):
106109
self.ts.normalize()
107110

111+
def time_tz_convert(self, tz):
112+
if self.ts.tz is not None:
113+
self.ts.tz_convert(tz)
114+
115+
def time_tz_localize(self, tz):
116+
if self.ts.tz is None:
117+
self.ts.tz_localize(tz)
118+
108119

109120
class TimestampAcrossDst(object):
110121
def setup(self):

ci/deps/azure-27-compat.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ dependencies:
1616
- pytz=2013b
1717
- scipy=0.18.1
1818
- sqlalchemy=0.7.8
19-
- xlrd=0.9.2
19+
- xlrd=1.0.0
2020
- xlsxwriter=0.5.2
2121
- xlwt=0.7.5
2222
# universal

ci/deps/travis-27-locale.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ dependencies:
1616
- pytz=2013b
1717
- scipy
1818
- sqlalchemy=0.8.1
19-
- xlrd=0.9.2
19+
- xlrd=1.0.0
2020
- xlsxwriter=0.5.2
2121
- xlwt=0.7.5
2222
# universal

ci/deps/travis-27.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ dependencies:
3535
- scipy
3636
- sqlalchemy=0.9.6
3737
- xarray=0.9.6
38-
- xlrd=0.9.2
38+
- xlrd=1.0.0
3939
- xlsxwriter=0.5.2
4040
- xlwt=0.7.5
4141
# universal

doc/source/conf.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -586,7 +586,7 @@ def linkcode_resolve(domain, info):
586586
for part in fullname.split('.'):
587587
try:
588588
obj = getattr(obj, part)
589-
except:
589+
except AttributeError:
590590
return None
591591

592592
try:
@@ -595,14 +595,14 @@ def linkcode_resolve(domain, info):
595595
fn = inspect.getsourcefile(inspect.unwrap(obj))
596596
else:
597597
fn = inspect.getsourcefile(obj)
598-
except:
598+
except TypeError:
599599
fn = None
600600
if not fn:
601601
return None
602602

603603
try:
604604
source, lineno = inspect.getsourcelines(obj)
605-
except:
605+
except OSError:
606606
lineno = None
607607

608608
if lineno:

doc/source/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ Optional Dependencies
269269
* `matplotlib <http://matplotlib.org/>`__: for plotting, Version 2.0.0 or higher.
270270
* For Excel I/O:
271271

272-
* `xlrd/xlwt <http://www.python-excel.org/>`__: Excel reading (xlrd) and writing (xlwt)
272+
* `xlrd/xlwt <http://www.python-excel.org/>`__: Excel reading (xlrd), version 1.0.0 or higher required, and writing (xlwt)
273273
* `openpyxl <https://openpyxl.readthedocs.io/en/stable/>`__: openpyxl version 2.4.0
274274
for writing .xlsx files (xlrd >= 0.9.0)
275275
* `XlsxWriter <https://pypi.org/project/XlsxWriter>`__: Alternative Excel writer

doc/source/whatsnew/v0.24.0.rst

+7-1
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ Other Enhancements
291291
- :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`)
292292
- :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object.
293293
- :meth:`DataFrame.to_stata` and :class:` pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`)
294+
- :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the an ``axis`` parameter (:issue: `8839`)
294295

295296
.. _whatsnew_0240.api_breaking:
296297

@@ -306,7 +307,7 @@ Backwards incompatible API changes
306307
Dependencies have increased minimum versions
307308
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
308309

309-
We have updated our minimum supported versions of dependencies (:issue:`21242`, `18742`).
310+
We have updated our minimum supported versions of dependencies (:issue:`21242`, :issue:`18742`, :issue:`23774`).
310311
If installed, we now require:
311312

312313
+-----------------+-----------------+----------+
@@ -330,6 +331,8 @@ If installed, we now require:
330331
+-----------------+-----------------+----------+
331332
| scipy | 0.18.1 | |
332333
+-----------------+-----------------+----------+
334+
| xlrd | 1.0.0 | |
335+
+-----------------+-----------------+----------+
333336

334337
Additionally we no longer depend on `feather-format` for feather based storage
335338
and replaced it with references to `pyarrow` (:issue:`21639` and :issue:`23053`).
@@ -1144,6 +1147,7 @@ Performance Improvements
11441147
- Improved performance of :func:`PeriodIndex.unique` (:issue:`23083`)
11451148
- Improved performance of :func:`pd.concat` for `Series` objects (:issue:`23404`)
11461149
- Improved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive or UTC datetimes (:issue:`23634`)
1150+
- Improved performance of :meth:`DatetimeIndex.tz_localize` and various ``DatetimeIndex`` attributes with dateutil UTC timezone (:issue:`23772`)
11471151

11481152

11491153
.. _whatsnew_0240.docs:
@@ -1381,8 +1385,10 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
13811385
- Bug in :func:`DataFrame.to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`)
13821386
- Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`).
13831387
- Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`)
1388+
- Bug in :func:`read_csv()` in which memory leaks occurred in the C engine when parsing ``NaN`` values due to insufficient cleanup on completion or error (:issue:`21353`)
13841389
- Bug in :func:`read_csv()` in which incorrect error messages were being raised when ``skipfooter`` was passed in along with ``nrows``, ``iterator``, or ``chunksize`` (:issue:`23711`)
13851390
- Bug in :meth:`read_csv()` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`)
1391+
- Bug in :meth:`read_csv()` in which unnecessary warnings were being raised when the dialect's values conflicted with the default arguments (:issue:`23761`)
13861392
- Bug in :meth:`read_html()` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`)
13871393
- Bug in :meth:`read_excel()` in which extraneous header names were extracted, even though none were specified (:issue:`11733`)
13881394
- Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`20480`)

pandas/_libs/algos_rank_helper.pxi.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
126126

127127
sorted_data = values.take(_as)
128128
sorted_mask = mask.take(_as)
129-
_indices = np.diff(sorted_mask).nonzero()[0]
129+
_indices = np.diff(sorted_mask.astype(int)).nonzero()[0]
130130
non_na_idx = _indices[0] if len(_indices) > 0 else -1
131131
argsorted = _as.astype('i8')
132132

pandas/_libs/index.pyx

+2
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ cdef class IndexEngine:
113113
if not self.is_unique:
114114
return self._get_loc_duplicates(val)
115115
values = self._get_index_values()
116+
117+
self._check_type(val)
116118
loc = _bin_search(values, val) # .searchsorted(val, side='left')
117119
if loc >= len(values):
118120
raise KeyError(val)

pandas/_libs/index_class_helper.pxi.in

+2
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ cdef class {{name}}Engine(IndexEngine):
5151
raise KeyError(val)
5252
elif util.is_float_object(val):
5353
raise KeyError(val)
54+
elif not util.is_integer_object(val):
55+
raise KeyError(val)
5456
{{endif}}
5557

5658
{{if name != 'Object'}}

pandas/_libs/parsers.pyx

+28-18
Original file line numberDiff line numberDiff line change
@@ -1070,18 +1070,6 @@ cdef class TextReader:
10701070

10711071
conv = self._get_converter(i, name)
10721072

1073-
# XXX
1074-
na_flist = set()
1075-
if self.na_filter:
1076-
na_list, na_flist = self._get_na_list(i, name)
1077-
if na_list is None:
1078-
na_filter = 0
1079-
else:
1080-
na_filter = 1
1081-
na_hashset = kset_from_list(na_list)
1082-
else:
1083-
na_filter = 0
1084-
10851073
col_dtype = None
10861074
if self.dtype is not None:
10871075
if isinstance(self.dtype, dict):
@@ -1106,13 +1094,34 @@ cdef class TextReader:
11061094
self.c_encoding)
11071095
continue
11081096

1109-
# Should return as the desired dtype (inferred or specified)
1110-
col_res, na_count = self._convert_tokens(
1111-
i, start, end, name, na_filter, na_hashset,
1112-
na_flist, col_dtype)
1097+
# Collect the list of NaN values associated with the column.
1098+
# If we aren't supposed to do that, or none are collected,
1099+
# we set `na_filter` to `0` (`1` otherwise).
1100+
na_flist = set()
1101+
1102+
if self.na_filter:
1103+
na_list, na_flist = self._get_na_list(i, name)
1104+
if na_list is None:
1105+
na_filter = 0
1106+
else:
1107+
na_filter = 1
1108+
na_hashset = kset_from_list(na_list)
1109+
else:
1110+
na_filter = 0
11131111

1114-
if na_filter:
1115-
self._free_na_set(na_hashset)
1112+
# Attempt to parse tokens and infer dtype of the column.
1113+
# Should return as the desired dtype (inferred or specified).
1114+
try:
1115+
col_res, na_count = self._convert_tokens(
1116+
i, start, end, name, na_filter, na_hashset,
1117+
na_flist, col_dtype)
1118+
finally:
1119+
# gh-21353
1120+
#
1121+
# Cleanup the NaN hash that we generated
1122+
# to avoid memory leaks.
1123+
if na_filter:
1124+
self._free_na_set(na_hashset)
11161125

11171126
if upcast_na and na_count > 0:
11181127
col_res = _maybe_upcast(col_res)
@@ -2059,6 +2068,7 @@ cdef kh_str_t* kset_from_list(list values) except NULL:
20592068

20602069
# None creeps in sometimes, which isn't possible here
20612070
if not isinstance(val, bytes):
2071+
kh_destroy_str(table)
20622072
raise ValueError('Must be all encoded bytes')
20632073

20642074
k = kh_put_str(table, PyBytes_AsString(val), &ret)

pandas/_libs/tslibs/conversion.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -882,7 +882,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
882882
bint shift = False, fill_nonexist = False
883883

884884
# Vectorized version of DstTzInfo.localize
885-
if tz == UTC or tz is None:
885+
if is_utc(tz) or tz is None:
886886
return vals
887887

888888
result = np.empty(n, dtype=np.int64)

pandas/core/arrays/datetimelike.py

+8
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,10 @@ def __add__(self, other):
727727
else: # pragma: no cover
728728
return NotImplemented
729729

730+
if is_timedelta64_dtype(result) and isinstance(result, np.ndarray):
731+
from pandas.core.arrays import TimedeltaArrayMixin
732+
# TODO: infer freq?
733+
return TimedeltaArrayMixin(result)
730734
return result
731735

732736
cls.__add__ = __add__
@@ -791,6 +795,10 @@ def __sub__(self, other):
791795
else: # pragma: no cover
792796
return NotImplemented
793797

798+
if is_timedelta64_dtype(result) and isinstance(result, np.ndarray):
799+
from pandas.core.arrays import TimedeltaArrayMixin
800+
# TODO: infer freq?
801+
return TimedeltaArrayMixin(result)
794802
return result
795803

796804
cls.__sub__ = __sub__

0 commit comments

Comments
 (0)