TomAugspurger
diff --git a/‎.pep8speaks.yml
-1 b/‎.pep8speaks.yml
-1
diff --git a/‎asv_bench/benchmarks/timeseries.py
+31-7 b/‎asv_bench/benchmarks/timeseries.py
+31-7
diff --git a/‎asv_bench/benchmarks/timestamp.py
+16-2 b/‎asv_bench/benchmarks/timestamp.py
+16-2
diff --git a/‎doc/source/conf.py
+3-3 b/‎doc/source/conf.py
+3-3
diff --git a/‎doc/source/install.rst
+3-1 b/‎doc/source/install.rst
+3-1
diff --git a/‎doc/source/whatsnew/v0.24.0.rst
+31-10 b/‎doc/source/whatsnew/v0.24.0.rst
+31-10
diff --git a/‎pandas/_libs/lib.pyx
+2-2 b/‎pandas/_libs/lib.pyx
+2-2
diff --git a/‎pandas/_libs/parsers.pyx
+28-18 b/‎pandas/_libs/parsers.pyx
+28-18
diff --git a/‎pandas/_libs/tslibs/ccalendar.pyx
+3 b/‎pandas/_libs/tslibs/ccalendar.pyx
+3
@@ -13,7 +13,6 @@ pycodestyle:
         - W503,  # line break before binary operator
         - W504,  # line break after binary operator
         - E402,  # module level import not at top of file
-        - E722,  # do not use bare except
         - E731,  # do not assign a lambda expression, use a def
         - C406,  # Unnecessary list literal - rewrite as a dict literal.
         - C408,  # Unnecessary dict call - rewrite as a literal.
 
@@ -1,5 +1,6 @@
 from datetime import timedelta
 
+import dateutil
 import numpy as np
 from pandas import to_datetime, date_range, Series, DataFrame, period_range
 from pandas.tseries.frequencies import infer_freq
@@ -57,7 +58,10 @@ def time_to_pydatetime(self, index_type):
 
 class TzLocalize(object):
 
-    def setup(self):
+    params = [None, 'US/Eastern', 'UTC', dateutil.tz.tzutc()]
+    param_names = 'tz'
+
+    def setup(self, tz):
         dst_rng = date_range(start='10/29/2000 1:00:00',
                              end='10/29/2000 1:59:59', freq='S')
         self.index = date_range(start='10/29/2000',
@@ -68,8 +72,8 @@ def setup(self):
                                                   end='10/29/2000 3:00:00',
                                                   freq='S'))
 
-    def time_infer_dst(self):
-        self.index.tz_localize('US/Eastern', ambiguous='infer')
+    def time_infer_dst(self, tz):
+        self.index.tz_localize(tz, ambiguous='infer')
 
 
 class ResetIndex(object):
@@ -377,15 +381,35 @@ def time_dup_string_tzoffset_dates(self, cache):
 
 class DatetimeAccessor(object):
 
-    def setup(self):
+    params = [None, 'US/Eastern', 'UTC', dateutil.tz.tzutc()]
+    param_names = 'tz'
+
+    def setup(self, tz):
         N = 100000
-        self.series = Series(date_range(start='1/1/2000', periods=N, freq='T'))
+        self.series = Series(
+            date_range(start='1/1/2000', periods=N, freq='T', tz=tz)
+        )
 
-    def time_dt_accessor(self):
+    def time_dt_accessor(self, tz):
         self.series.dt
 
-    def time_dt_accessor_normalize(self):
+    def time_dt_accessor_normalize(self, tz):
         self.series.dt.normalize()
 
+    def time_dt_accessor_month_name(self, tz):
+        self.series.dt.month_name()
+
+    def time_dt_accessor_day_name(self, tz):
+        self.series.dt.day_name()
+
+    def time_dt_accessor_time(self, tz):
+        self.series.dt.time
+
+    def time_dt_accessor_date(self, tz):
+        self.series.dt.date
+
+    def time_dt_accessor_year(self, tz):
+        self.series.dt.year
+
 
 from .pandas_vb_common import setup  # noqa: F401
@@ -2,6 +2,7 @@
 
 from pandas import Timestamp
 import pytz
+import dateutil
 
 
 class TimestampConstruction(object):
@@ -29,7 +30,8 @@ def time_fromtimestamp(self):
 
 
 class TimestampProperties(object):
-    _tzs = [None, pytz.timezone('Europe/Amsterdam')]
+    _tzs = [None, pytz.timezone('Europe/Amsterdam'), pytz.UTC,
+            dateutil.tz.tzutc()]
     _freqs = [None, 'B']
     params = [_tzs, _freqs]
     param_names = ['tz', 'freq']
@@ -87,7 +89,8 @@ def time_microsecond(self, tz, freq):
 
 
 class TimestampOps(object):
-    params = [None, 'US/Eastern']
+    params = [None, 'US/Eastern', pytz.UTC,
+              dateutil.tz.tzutc()]
     param_names = ['tz']
 
     def setup(self, tz):
@@ -102,6 +105,17 @@ def time_replace_None(self, tz):
     def time_to_pydatetime(self, tz):
         self.ts.to_pydatetime()
 
+    def time_normalize(self, tz):
+        self.ts.normalize()
+
+    def time_tz_convert(self, tz):
+        if self.ts.tz is not None:
+            self.ts.tz_convert(tz)
+
+    def time_tz_localize(self, tz):
+        if self.ts.tz is None:
+            self.ts.tz_localize(tz)
+
 
 class TimestampAcrossDst(object):
     def setup(self):
 
@@ -586,7 +586,7 @@ def linkcode_resolve(domain, info):
     for part in fullname.split('.'):
         try:
             obj = getattr(obj, part)
-        except:
+        except AttributeError:
             return None
 
     try:
@@ -595,14 +595,14 @@ def linkcode_resolve(domain, info):
             fn = inspect.getsourcefile(inspect.unwrap(obj))
         else:
             fn = inspect.getsourcefile(obj)
-    except:
+    except TypeError:
         fn = None
     if not fn:
         return None
 
     try:
         source, lineno = inspect.getsourcelines(obj)
-    except:
+    except OSError:
         lineno = None
 
     if lineno:
 
@@ -286,7 +286,9 @@ Optional Dependencies
   `xsel <http://www.vergenet.net/~conrad/software/xsel/>`__, or
   `xclip <https://github.com/astrand/xclip/>`__: necessary to use
   :func:`~pandas.read_clipboard`. Most package managers on Linux distributions will have ``xclip`` and/or ``xsel`` immediately available for installation.
-* `pandas-gbq <https://pandas-gbq.readthedocs.io/en/latest/install.html#dependencies>`__: for Google BigQuery I/O.
+* `pandas-gbq
+  <https://pandas-gbq.readthedocs.io/en/latest/install.html#dependencies>`__:
+  for Google BigQuery I/O. (pandas-gbq >= 0.8.0)
 
 
 * `Backports.lzma <https://pypi.org/project/backports.lzma/>`__: Only for Python 2, for writing to and/or reading from an xz compressed DataFrame in CSV; Python 3 support is built into the standard library.
 
@@ -24,7 +24,8 @@ New features
   the user to override the engine's default behavior to include or omit the
   dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
 - :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`)
-
+- :func:`DataFrame.to_string` now accepts ``decimal`` as an argument, allowing
+the user to specify which decimal separator should be used in the output. (:issue:`23614`)
 
 .. _whatsnew_0240.enhancements.extension_array_operators:
 
@@ -259,9 +260,12 @@ Other Enhancements
 - :meth:`Series.droplevel` and :meth:`DataFrame.droplevel` are now implemented (:issue:`20342`)
 - Added support for reading from/writing to Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`, :issue:`23094`)
 - :func:`to_gbq` and :func:`read_gbq` signature and documentation updated to
-  reflect changes from the `Pandas-GBQ library version 0.6.0
-  <https://pandas-gbq.readthedocs.io/en/latest/changelog.html#changelog-0-6-0>`__.
-  (:issue:`21627`, :issue:`22557`)
+  reflect changes from the `Pandas-GBQ library version 0.8.0
+  <https://pandas-gbq.readthedocs.io/en/latest/changelog.html#changelog-0-8-0>`__.
+  Adds a ``credentials`` argument, which enables the use of any kind of
+  `google-auth credentials
+  <https://google-auth.readthedocs.io/en/latest/>`__. (:issue:`21627`,
+  :issue:`22557`, :issue:`23662`)
 - New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of an HDF5 file (:issue:`10932`)
 - :func:`read_html` copies cell data across ``colspan`` and ``rowspan``, and it treats all-``th`` table rows as headers if ``header`` kwarg is not given and there is no ``thead`` (:issue:`17054`)
 - :meth:`Series.nlargest`, :meth:`Series.nsmallest`, :meth:`DataFrame.nlargest`, and :meth:`DataFrame.nsmallest` now accept the value ``"all"`` for the ``keep`` argument. This keeps all ties for the nth largest/smallest value (:issue:`16818`)
@@ -287,6 +291,7 @@ Other Enhancements
 - :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`)
 - :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object.
 - :meth:`DataFrame.to_stata` and :class:` pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`)
+- :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the an ``axis`` parameter (:issue: `8839`)
 
 .. _whatsnew_0240.api_breaking:
 
@@ -312,17 +317,19 @@ If installed, we now require:
 +-----------------+-----------------+----------+
 | bottleneck      | 1.2.0           |          |
 +-----------------+-----------------+----------+
+| fastparquet     | 0.1.2           |          |
++-----------------+-----------------+----------+
 | matplotlib      | 2.0.0           |          |
 +-----------------+-----------------+----------+
 | numexpr         | 2.6.1           |          |
 +-----------------+-----------------+----------+
-| pytables        | 3.4.2           |          |
-+-----------------+-----------------+----------+
-| scipy           | 0.18.1          |          |
+| pandas-gbq      | 0.8.0           |          |
 +-----------------+-----------------+----------+
 | pyarrow         | 0.7.0           |          |
 +-----------------+-----------------+----------+
-| fastparquet     | 0.1.2           |          |
+| pytables        | 3.4.2           |          |
++-----------------+-----------------+----------+
+| scipy           | 0.18.1          |          |
 +-----------------+-----------------+----------+
 
 Additionally we no longer depend on `feather-format` for feather based storage
@@ -1002,7 +1009,10 @@ Other API Changes
 - Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
 - :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)
 - Comparing :class:`Timedelta` to be less or greater than unknown types now raises a ``TypeError`` instead of returning ``False`` (:issue:`20829`)
+- :meth:`Categorical.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23466`).
+- :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`).
 - :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`).
+- The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`)
 
 .. _whatsnew_0240.deprecations:
 
@@ -1029,6 +1039,9 @@ Deprecations
 - :meth:`ExtensionArray._formatting_values` is deprecated. Use `ExtensionArray._formatter` instead. (:issue:`23601`)
 - :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`)
 - Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`)
+- The ``keep_tz=False`` option (the default) of the ``keep_tz`` keyword of
+  :meth:`DatetimeIndex.to_series` is deprecated (:issue:`17832`).
+- Timezone converting a tz-aware ``datetime.datetime`` or :class:`Timestamp` with :class:`Timestamp` and the ``tz`` argument is now deprecated. Instead, use :meth:`Timestamp.tz_convert` (:issue:`23579`)
 
 .. _whatsnew_0240.deprecations.datetimelike_int_ops:
 
@@ -1132,6 +1145,8 @@ Performance Improvements
 - Improved performance of :func:`IndexEngine.get_indexer_non_unique` for sorted, non-unique indexes (:issue:`9466`)
 - Improved performance of :func:`PeriodIndex.unique` (:issue:`23083`)
 - Improved performance of :func:`pd.concat` for `Series` objects (:issue:`23404`)
+- Improved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive or UTC datetimes (:issue:`23634`)
+- Improved performance of :meth:`DatetimeIndex.tz_localize` and various ``DatetimeIndex`` attributes with dateutil UTC timezone (:issue:`23772`)
 
 
 .. _whatsnew_0240.docs:
@@ -1262,8 +1277,8 @@ Numeric
 Strings
 ^^^^^^^
 
--
--
+- Bug in :meth:`Index.str.partition` was not nan-safe (:issue:`23558`).
+- Bug in :meth:`Index.str.split` was not nan-safe (:issue:`23677`).
 -
 
 Interval
@@ -1360,6 +1375,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
 - :func:`read_csv()` and func:`read_table()` will throw ``UnicodeError`` and not coredump on badly encoded strings (:issue:`22748`)
 - :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`)
 - Bug in :func:`read_csv()` in which memory management was prematurely optimized for the C engine when the data was being read in chunks (:issue:`23509`)
+- Bug in :func:`read_csv()` in unnamed columns were being improperly identified when extracting a multi-index (:issue:`23687`)
 - :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`)
 - :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`)
 - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`)
@@ -1369,8 +1385,12 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
 - Bug in :func:`DataFrame.to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`)
 - Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`).
 - Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`)
+- Bug in :func:`read_csv()` in which memory leaks occurred in the C engine when parsing ``NaN`` values due to insufficient cleanup on completion or error (:issue:`21353`)
+- Bug in :func:`read_csv()` in which incorrect error messages were being raised when ``skipfooter`` was passed in along with ``nrows``, ``iterator``, or ``chunksize`` (:issue:`23711`)
 - Bug in :meth:`read_csv()` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`)
+- Bug in :meth:`read_csv()` in which unnecessary warnings were being raised when the dialect's values conflicted with the default arguments (:issue:`23761`)
 - Bug in :meth:`read_html()` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`)
+- Bug in :meth:`read_excel()` in which extraneous header names were extracted, even though none were specified (:issue:`11733`)
 - Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`20480`)
 - Bug in :meth:`read_excel()` in which ``usecols`` was not being validated for proper column names when passed in as a string (:issue:`20480`)
 
@@ -1434,6 +1454,7 @@ Sparse
 - Bug in ``DataFrame.groupby`` not including ``fill_value`` in the groups for non-NA ``fill_value`` when grouping by a sparse column (:issue:`5078`)
 - Bug in unary inversion operator (``~``) on a ``SparseSeries`` with boolean values. The performance of this has also been improved (:issue:`22835`)
 - Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`)
+- Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`)
 
 Build Changes
 ^^^^^^^^^^^^^
 
@@ -2273,7 +2273,7 @@ def to_object_array_tuples(rows: list):
 
     k = 0
     for i in range(n):
-        tmp = len(rows[i])
+        tmp = 1 if checknull(rows[i]) else len(rows[i])
         if tmp > k:
             k = tmp
 
@@ -2287,7 +2287,7 @@ def to_object_array_tuples(rows: list):
     except Exception:
         # upcast any subclasses to tuple
         for i in range(n):
-            row = tuple(rows[i])
+            row = (rows[i],) if checknull(rows[i]) else tuple(rows[i])
             for j in range(len(row)):
                 result[i, j] = row[j]
 
 
@@ -1070,18 +1070,6 @@ cdef class TextReader:
 
             conv = self._get_converter(i, name)
 
-            # XXX
-            na_flist = set()
-            if self.na_filter:
-                na_list, na_flist = self._get_na_list(i, name)
-                if na_list is None:
-                    na_filter = 0
-                else:
-                    na_filter = 1
-                    na_hashset = kset_from_list(na_list)
-            else:
-                na_filter = 0
-
             col_dtype = None
             if self.dtype is not None:
                 if isinstance(self.dtype, dict):
@@ -1106,13 +1094,34 @@ cdef class TextReader:
                                               self.c_encoding)
                 continue
 
-            # Should return as the desired dtype (inferred or specified)
-            col_res, na_count = self._convert_tokens(
-                i, start, end, name, na_filter, na_hashset,
-                na_flist, col_dtype)
+            # Collect the list of NaN values associated with the column.
+            # If we aren't supposed to do that, or none are collected,
+            # we set `na_filter` to `0` (`1` otherwise).
+            na_flist = set()
+
+            if self.na_filter:
+                na_list, na_flist = self._get_na_list(i, name)
+                if na_list is None:
+                    na_filter = 0
+                else:
+                    na_filter = 1
+                    na_hashset = kset_from_list(na_list)
+            else:
+                na_filter = 0
 
-            if na_filter:
-                self._free_na_set(na_hashset)
+            # Attempt to parse tokens and infer dtype of the column.
+            # Should return as the desired dtype (inferred or specified).
+            try:
+                col_res, na_count = self._convert_tokens(
+                    i, start, end, name, na_filter, na_hashset,
+                    na_flist, col_dtype)
+            finally:
+                # gh-21353
+                #
+                # Cleanup the NaN hash that we generated
+                # to avoid memory leaks.
+                if na_filter:
+                    self._free_na_set(na_hashset)
 
             if upcast_na and na_count > 0:
                 col_res = _maybe_upcast(col_res)
@@ -2059,6 +2068,7 @@ cdef kh_str_t* kset_from_list(list values) except NULL:
 
         # None creeps in sometimes, which isn't possible here
         if not isinstance(val, bytes):
+            kh_destroy_str(table)
             raise ValueError('Must be all encoded bytes')
 
         k = kh_put_str(table, PyBytes_AsString(val), &ret)
 
@@ -49,6 +49,9 @@ DAYS_FULL = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
 int_to_weekday = {num: name for num, name in enumerate(DAYS)}
 weekday_to_int = {int_to_weekday[key]: key for key in int_to_weekday}
 
+DAY_SECONDS = 86400
+HOUR_SECONDS = 3600
+
 # ----------------------------------------------------------------------