pandas-dev
diff --git a/‎.pep8speaks.yml
Lines changed: 0 additions & 1 deletion b/‎.pep8speaks.yml
Lines changed: 0 additions & 1 deletion
diff --git a/‎asv_bench/benchmarks/timeseries.py
Lines changed: 31 additions & 7 deletions b/‎asv_bench/benchmarks/timeseries.py
Lines changed: 31 additions & 7 deletions
diff --git a/‎asv_bench/benchmarks/timestamp.py
Lines changed: 13 additions & 2 deletions b/‎asv_bench/benchmarks/timestamp.py
Lines changed: 13 additions & 2 deletions
diff --git a/‎ci/deps/azure-27-compat.yaml
Lines changed: 1 addition & 1 deletion b/‎ci/deps/azure-27-compat.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/deps/travis-27-locale.yaml
Lines changed: 1 addition & 1 deletion b/‎ci/deps/travis-27-locale.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/deps/travis-27.yaml
Lines changed: 1 addition & 1 deletion b/‎ci/deps/travis-27.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/conf.py
Lines changed: 3 additions & 3 deletions b/‎doc/source/conf.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/source/install.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/install.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v0.24.0.rst
Lines changed: 7 additions & 1 deletion b/‎doc/source/whatsnew/v0.24.0.rst
Lines changed: 7 additions & 1 deletion
diff --git a/‎pandas/_libs/algos_rank_helper.pxi.in
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/algos_rank_helper.pxi.in
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/index.pyx
Lines changed: 2 additions & 0 deletions b/‎pandas/_libs/index.pyx
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/index_class_helper.pxi.in
Lines changed: 2 additions & 0 deletions b/‎pandas/_libs/index_class_helper.pxi.in
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/parsers.pyx
Lines changed: 28 additions & 18 deletions b/‎pandas/_libs/parsers.pyx
Lines changed: 28 additions & 18 deletions
diff --git a/‎pandas/_libs/tslibs/conversion.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/tslibs/conversion.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/arrays/datetimelike.py
Lines changed: 8 additions & 0 deletions b/‎pandas/core/arrays/datetimelike.py
Lines changed: 8 additions & 0 deletions
@@ -13,7 +13,6 @@ pycodestyle:
         - W503,  # line break before binary operator
         - W504,  # line break after binary operator
         - E402,  # module level import not at top of file
-        - E722,  # do not use bare except
         - E731,  # do not assign a lambda expression, use a def
         - C406,  # Unnecessary list literal - rewrite as a dict literal.
         - C408,  # Unnecessary dict call - rewrite as a literal.
 
@@ -1,5 +1,6 @@
 from datetime import timedelta
 
+import dateutil
 import numpy as np
 from pandas import to_datetime, date_range, Series, DataFrame, period_range
 from pandas.tseries.frequencies import infer_freq
@@ -57,7 +58,10 @@ def time_to_pydatetime(self, index_type):
 
 class TzLocalize(object):
 
-    def setup(self):
+    params = [None, 'US/Eastern', 'UTC', dateutil.tz.tzutc()]
+    param_names = 'tz'
+
+    def setup(self, tz):
         dst_rng = date_range(start='10/29/2000 1:00:00',
                              end='10/29/2000 1:59:59', freq='S')
         self.index = date_range(start='10/29/2000',
@@ -68,8 +72,8 @@ def setup(self):
                                                   end='10/29/2000 3:00:00',
                                                   freq='S'))
 
-    def time_infer_dst(self):
-        self.index.tz_localize('US/Eastern', ambiguous='infer')
+    def time_infer_dst(self, tz):
+        self.index.tz_localize(tz, ambiguous='infer')
 
 
 class ResetIndex(object):
@@ -377,15 +381,35 @@ def time_dup_string_tzoffset_dates(self, cache):
 
 class DatetimeAccessor(object):
 
-    def setup(self):
+    params = [None, 'US/Eastern', 'UTC', dateutil.tz.tzutc()]
+    param_names = 'tz'
+
+    def setup(self, tz):
         N = 100000
-        self.series = Series(date_range(start='1/1/2000', periods=N, freq='T'))
+        self.series = Series(
+            date_range(start='1/1/2000', periods=N, freq='T', tz=tz)
+        )
 
-    def time_dt_accessor(self):
+    def time_dt_accessor(self, tz):
         self.series.dt
 
-    def time_dt_accessor_normalize(self):
+    def time_dt_accessor_normalize(self, tz):
         self.series.dt.normalize()
 
+    def time_dt_accessor_month_name(self, tz):
+        self.series.dt.month_name()
+
+    def time_dt_accessor_day_name(self, tz):
+        self.series.dt.day_name()
+
+    def time_dt_accessor_time(self, tz):
+        self.series.dt.time
+
+    def time_dt_accessor_date(self, tz):
+        self.series.dt.date
+
+    def time_dt_accessor_year(self, tz):
+        self.series.dt.year
+
 
 from .pandas_vb_common import setup  # noqa: F401
@@ -2,6 +2,7 @@
 
 from pandas import Timestamp
 import pytz
+import dateutil
 
 
 class TimestampConstruction(object):
@@ -29,7 +30,8 @@ def time_fromtimestamp(self):
 
 
 class TimestampProperties(object):
-    _tzs = [None, pytz.timezone('Europe/Amsterdam')]
+    _tzs = [None, pytz.timezone('Europe/Amsterdam'), pytz.UTC,
+            dateutil.tz.tzutc()]
     _freqs = [None, 'B']
     params = [_tzs, _freqs]
     param_names = ['tz', 'freq']
@@ -87,7 +89,8 @@ def time_microsecond(self, tz, freq):
 
 
 class TimestampOps(object):
-    params = [None, 'US/Eastern', 'UTC']
+    params = [None, 'US/Eastern', pytz.UTC,
+              dateutil.tz.tzutc()]
     param_names = ['tz']
 
     def setup(self, tz):
@@ -105,6 +108,14 @@ def time_to_pydatetime(self, tz):
     def time_normalize(self, tz):
         self.ts.normalize()
 
+    def time_tz_convert(self, tz):
+        if self.ts.tz is not None:
+            self.ts.tz_convert(tz)
+
+    def time_tz_localize(self, tz):
+        if self.ts.tz is None:
+            self.ts.tz_localize(tz)
+
 
 class TimestampAcrossDst(object):
     def setup(self):
 
@@ -16,7 +16,7 @@ dependencies:
   - pytz=2013b
   - scipy=0.18.1
   - sqlalchemy=0.7.8
-  - xlrd=0.9.2
+  - xlrd=1.0.0
   - xlsxwriter=0.5.2
   - xlwt=0.7.5
   # universal
 
@@ -16,7 +16,7 @@ dependencies:
   - pytz=2013b
   - scipy
   - sqlalchemy=0.8.1
-  - xlrd=0.9.2
+  - xlrd=1.0.0
   - xlsxwriter=0.5.2
   - xlwt=0.7.5
   # universal
 
@@ -35,7 +35,7 @@ dependencies:
   - scipy
   - sqlalchemy=0.9.6
   - xarray=0.9.6
-  - xlrd=0.9.2
+  - xlrd=1.0.0
   - xlsxwriter=0.5.2
   - xlwt=0.7.5
   # universal
 
@@ -586,7 +586,7 @@ def linkcode_resolve(domain, info):
     for part in fullname.split('.'):
         try:
             obj = getattr(obj, part)
-        except:
+        except AttributeError:
             return None
 
     try:
@@ -595,14 +595,14 @@ def linkcode_resolve(domain, info):
             fn = inspect.getsourcefile(inspect.unwrap(obj))
         else:
             fn = inspect.getsourcefile(obj)
-    except:
+    except TypeError:
         fn = None
     if not fn:
         return None
 
     try:
         source, lineno = inspect.getsourcelines(obj)
-    except:
+    except OSError:
         lineno = None
 
     if lineno:
 
@@ -269,7 +269,7 @@ Optional Dependencies
 * `matplotlib <http://matplotlib.org/>`__: for plotting, Version 2.0.0 or higher.
 * For Excel I/O:
 
-    * `xlrd/xlwt <http://www.python-excel.org/>`__: Excel reading (xlrd) and writing (xlwt)
+    * `xlrd/xlwt <http://www.python-excel.org/>`__: Excel reading (xlrd), version 1.0.0 or higher required, and writing (xlwt)
     * `openpyxl <https://openpyxl.readthedocs.io/en/stable/>`__: openpyxl version 2.4.0
       for writing .xlsx files (xlrd >= 0.9.0)
     * `XlsxWriter <https://pypi.org/project/XlsxWriter>`__: Alternative Excel writer
 
@@ -291,6 +291,7 @@ Other Enhancements
 - :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`)
 - :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object.
 - :meth:`DataFrame.to_stata` and :class:` pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`)
+- :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the an ``axis`` parameter (:issue: `8839`)
 
 .. _whatsnew_0240.api_breaking:
 
@@ -306,7 +307,7 @@ Backwards incompatible API changes
 Dependencies have increased minimum versions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-We have updated our minimum supported versions of dependencies (:issue:`21242`, `18742`).
+We have updated our minimum supported versions of dependencies (:issue:`21242`, :issue:`18742`, :issue:`23774`).
 If installed, we now require:
 
 +-----------------+-----------------+----------+
@@ -330,6 +331,8 @@ If installed, we now require:
 +-----------------+-----------------+----------+
 | scipy           | 0.18.1          |          |
 +-----------------+-----------------+----------+
+| xlrd            | 1.0.0           |          |
++-----------------+-----------------+----------+
 
 Additionally we no longer depend on `feather-format` for feather based storage
 and replaced it with references to `pyarrow` (:issue:`21639` and :issue:`23053`).
@@ -1144,6 +1147,7 @@ Performance Improvements
 - Improved performance of :func:`PeriodIndex.unique` (:issue:`23083`)
 - Improved performance of :func:`pd.concat` for `Series` objects (:issue:`23404`)
 - Improved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive or UTC datetimes (:issue:`23634`)
+- Improved performance of :meth:`DatetimeIndex.tz_localize` and various ``DatetimeIndex`` attributes with dateutil UTC timezone (:issue:`23772`)
 
 
 .. _whatsnew_0240.docs:
@@ -1381,8 +1385,10 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
 - Bug in :func:`DataFrame.to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`)
 - Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`).
 - Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`)
+- Bug in :func:`read_csv()` in which memory leaks occurred in the C engine when parsing ``NaN`` values due to insufficient cleanup on completion or error (:issue:`21353`)
 - Bug in :func:`read_csv()` in which incorrect error messages were being raised when ``skipfooter`` was passed in along with ``nrows``, ``iterator``, or ``chunksize`` (:issue:`23711`)
 - Bug in :meth:`read_csv()` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`)
+- Bug in :meth:`read_csv()` in which unnecessary warnings were being raised when the dialect's values conflicted with the default arguments (:issue:`23761`)
 - Bug in :meth:`read_html()` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`)
 - Bug in :meth:`read_excel()` in which extraneous header names were extracted, even though none were specified (:issue:`11733`)
 - Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`20480`)
 
@@ -126,7 +126,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
 
     sorted_data = values.take(_as)
     sorted_mask = mask.take(_as)
-    _indices = np.diff(sorted_mask).nonzero()[0]
+    _indices = np.diff(sorted_mask.astype(int)).nonzero()[0]
     non_na_idx = _indices[0] if len(_indices) > 0 else -1
     argsorted = _as.astype('i8')
 
 
@@ -113,6 +113,8 @@ cdef class IndexEngine:
             if not self.is_unique:
                 return self._get_loc_duplicates(val)
             values = self._get_index_values()
+
+            self._check_type(val)
             loc = _bin_search(values, val)  # .searchsorted(val, side='left')
             if loc >= len(values):
                 raise KeyError(val)
 
@@ -51,6 +51,8 @@ cdef class {{name}}Engine(IndexEngine):
             raise KeyError(val)
         elif util.is_float_object(val):
             raise KeyError(val)
+        elif not util.is_integer_object(val):
+            raise KeyError(val)
     {{endif}}
 
     {{if name != 'Object'}}
 
@@ -1070,18 +1070,6 @@ cdef class TextReader:
 
             conv = self._get_converter(i, name)
 
-            # XXX
-            na_flist = set()
-            if self.na_filter:
-                na_list, na_flist = self._get_na_list(i, name)
-                if na_list is None:
-                    na_filter = 0
-                else:
-                    na_filter = 1
-                    na_hashset = kset_from_list(na_list)
-            else:
-                na_filter = 0
-
             col_dtype = None
             if self.dtype is not None:
                 if isinstance(self.dtype, dict):
@@ -1106,13 +1094,34 @@ cdef class TextReader:
                                               self.c_encoding)
                 continue
 
-            # Should return as the desired dtype (inferred or specified)
-            col_res, na_count = self._convert_tokens(
-                i, start, end, name, na_filter, na_hashset,
-                na_flist, col_dtype)
+            # Collect the list of NaN values associated with the column.
+            # If we aren't supposed to do that, or none are collected,
+            # we set `na_filter` to `0` (`1` otherwise).
+            na_flist = set()
+
+            if self.na_filter:
+                na_list, na_flist = self._get_na_list(i, name)
+                if na_list is None:
+                    na_filter = 0
+                else:
+                    na_filter = 1
+                    na_hashset = kset_from_list(na_list)
+            else:
+                na_filter = 0
 
-            if na_filter:
-                self._free_na_set(na_hashset)
+            # Attempt to parse tokens and infer dtype of the column.
+            # Should return as the desired dtype (inferred or specified).
+            try:
+                col_res, na_count = self._convert_tokens(
+                    i, start, end, name, na_filter, na_hashset,
+                    na_flist, col_dtype)
+            finally:
+                # gh-21353
+                #
+                # Cleanup the NaN hash that we generated
+                # to avoid memory leaks.
+                if na_filter:
+                    self._free_na_set(na_hashset)
 
             if upcast_na and na_count > 0:
                 col_res = _maybe_upcast(col_res)
@@ -2059,6 +2068,7 @@ cdef kh_str_t* kset_from_list(list values) except NULL:
 
         # None creeps in sometimes, which isn't possible here
         if not isinstance(val, bytes):
+            kh_destroy_str(table)
             raise ValueError('Must be all encoded bytes')
 
         k = kh_put_str(table, PyBytes_AsString(val), &ret)
 
@@ -882,7 +882,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
         bint shift = False, fill_nonexist = False
 
     # Vectorized version of DstTzInfo.localize
-    if tz == UTC or tz is None:
+    if is_utc(tz) or tz is None:
         return vals
 
     result = np.empty(n, dtype=np.int64)
 
@@ -727,6 +727,10 @@ def __add__(self, other):
             else:  # pragma: no cover
                 return NotImplemented
 
+            if is_timedelta64_dtype(result) and isinstance(result, np.ndarray):
+                from pandas.core.arrays import TimedeltaArrayMixin
+                # TODO: infer freq?
+                return TimedeltaArrayMixin(result)
             return result
 
         cls.__add__ = __add__
@@ -791,6 +795,10 @@ def __sub__(self, other):
             else:  # pragma: no cover
                 return NotImplemented
 
+            if is_timedelta64_dtype(result) and isinstance(result, np.ndarray):
+                from pandas.core.arrays import TimedeltaArrayMixin
+                # TODO: infer freq?
+                return TimedeltaArrayMixin(result)
             return result
 
         cls.__sub__ = __sub__