pandas-dev
diff --git a/‎.github/CONTRIBUTING.md
Lines changed: 1 addition & 1 deletion b/‎.github/CONTRIBUTING.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎asv_bench/benchmarks/io/csv.py
Lines changed: 29 additions & 0 deletions b/‎asv_bench/benchmarks/io/csv.py
Lines changed: 29 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/io/parsers.py
Lines changed: 34 additions & 0 deletions b/‎asv_bench/benchmarks/io/parsers.py
Lines changed: 34 additions & 0 deletions
diff --git a/‎ci/code_checks.sh
Lines changed: 2 additions & 2 deletions b/‎ci/code_checks.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎ci/deps/azure-windows-37.yaml
Lines changed: 0 additions & 2 deletions b/‎ci/deps/azure-windows-37.yaml
Lines changed: 0 additions & 2 deletions
diff --git a/‎ci/incremental/setup_conda_environment.cmd
Lines changed: 2 additions & 0 deletions b/‎ci/incremental/setup_conda_environment.cmd
Lines changed: 2 additions & 0 deletions
diff --git a/‎ci/run_with_env.cmd
Lines changed: 0 additions & 95 deletions b/‎ci/run_with_env.cmd
Lines changed: 0 additions & 95 deletions
diff --git a/‎doc/source/reference/frame.rst
Lines changed: 23 additions & 0 deletions b/‎doc/source/reference/frame.rst
Lines changed: 23 additions & 0 deletions
diff --git a/‎doc/source/user_guide/enhancingperf.rst
Lines changed: 13 additions & 6 deletions b/‎doc/source/user_guide/enhancingperf.rst
Lines changed: 13 additions & 6 deletions
diff --git a/‎doc/source/whatsnew/v0.25.0.rst
Lines changed: 8 additions & 1 deletion b/‎doc/source/whatsnew/v0.25.0.rst
Lines changed: 8 additions & 1 deletion
diff --git a/‎mypy.ini
Lines changed: 0 additions & 18 deletions b/‎mypy.ini
Lines changed: 0 additions & 18 deletions
diff --git a/‎pandas/_libs/lib.pyx
Lines changed: 5 additions & 8 deletions b/‎pandas/_libs/lib.pyx
Lines changed: 5 additions & 8 deletions
@@ -2,7 +2,7 @@
 
 Whether you are a novice or experienced software developer, all contributions and suggestions are welcome!
 
-Our main contributing guide can be found [in this repo](https://github.com/pandas-dev/pandas/blob/master/doc/source/development/contributing.rst) or [on the website](https://pandas-docs.github.io/pandas-docs-travis/contributing.html). If you do not want to read it in its entirety, we will summarize the main ways in which you can contribute and point to relevant sections of that document for further information.
+Our main contributing guide can be found [in this repo](https://github.com/pandas-dev/pandas/blob/master/doc/source/development/contributing.rst) or [on the website](https://pandas-docs.github.io/pandas-docs-travis/development/contributing.html). If you do not want to read it in its entirety, we will summarize the main ways in which you can contribute and point to relevant sections of that document for further information.
 
 ## Getting Started
 
 
@@ -96,6 +96,35 @@ def time_read_csv(self, infer_datetime_format, format):
                  infer_datetime_format=infer_datetime_format)
 
 
+class ReadCSVConcatDatetime(StringIORewind):
+
+    iso8601 = '%Y-%m-%d %H:%M:%S'
+
+    def setup(self):
+        rng = date_range('1/1/2000', periods=50000, freq='S')
+        self.StringIO_input = StringIO('\n'.join(
+                                       rng.strftime(self.iso8601).tolist()))
+
+    def time_read_csv(self):
+        read_csv(self.data(self.StringIO_input),
+                 header=None, names=['foo'], parse_dates=['foo'],
+                 infer_datetime_format=False)
+
+
+class ReadCSVConcatDatetimeBadDateValue(StringIORewind):
+
+    params = (['nan', '0', ''],)
+    param_names = ['bad_date_value']
+
+    def setup(self, bad_date_value):
+        self.StringIO_input = StringIO(('%s,\n' % bad_date_value) * 50000)
+
+    def time_read_csv(self, bad_date_value):
+        read_csv(self.data(self.StringIO_input),
+                 header=None, names=['foo', 'bar'], parse_dates=['foo'],
+                 infer_datetime_format=False)
+
+
 class ReadCSVSkipRows(BaseIO):
 
     fname = '__test__.csv'
 
@@ -0,0 +1,34 @@
+import numpy as np
+
+from pandas._libs.tslibs.parsing import (
+    _concat_date_cols, _does_string_look_like_datetime)
+
+
+class DoesStringLookLikeDatetime(object):
+
+    params = (['2Q2005', '0.0', '10000'],)
+    param_names = ['value']
+
+    def setup(self, value):
+        self.objects = [value] * 1000000
+
+    def time_check_datetimes(self, value):
+        for obj in self.objects:
+            _does_string_look_like_datetime(obj)
+
+
+class ConcatDateCols(object):
+
+    params = ([1234567890, 'AAAA'], [1, 2])
+    param_names = ['value', 'dim']
+
+    def setup(self, value, dim):
+        count_elem = 10000
+        if dim == 1:
+            self.object = (np.array([value] * count_elem),)
+        if dim == 2:
+            self.object = (np.array([value] * count_elem),
+                           np.array([value] * count_elem))
+
+    def time_check_concat(self, value, dim):
+        _concat_date_cols(self.object)
@@ -148,8 +148,8 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     invgrep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
-    MSG='Check for python2 new-style classes' ; echo $MSG
-    invgrep -R --include="*.py" --include="*.pyx" -E "class\s\S*\(object\):" pandas scripts
+    MSG='Check for python2 new-style classes and for empty parentheses' ; echo $MSG
+    invgrep -R --include="*.py" --include="*.pyx" -E "class\s\S*\((object)?\):" pandas scripts
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG
 
@@ -1,11 +1,9 @@
 name: pandas-dev
 channels:
   - defaults
-  - conda-forge
 dependencies:
   - beautifulsoup4
   - bottleneck
-  - gcsfs
   - html5lib
   - jinja2
   - lxml
 
@@ -16,6 +16,8 @@ conda remove --all -q -y -n pandas-dev
 conda env create --file=ci\deps\azure-windows-%CONDA_PY%.yaml
 
 call activate pandas-dev
+@rem gh-26345: we need to separate this out so that Azure doesn't complain
+conda install -c conda-forge gcsfs
 conda list
 
 if %errorlevel% neq 0 exit /b %errorlevel%
@@ -311,6 +311,29 @@ specific plotting methods of the form ``DataFrame.plot.<kind>``.
    DataFrame.boxplot
    DataFrame.hist
 
+
+.. _api.frame.sparse:
+
+Sparse Accessor
+~~~~~~~~~~~~~~~
+
+Sparse-dtype specific methods and attributes are provided under the
+``DataFrame.sparse`` accessor.
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/accessor_attribute.rst
+
+   DataFrame.sparse.density
+
+.. autosummary::
+   :toctree: api/
+
+   DataFrame.sparse.from_spmatrix
+   DataFrame.sparse.to_coo
+   DataFrame.sparse.to_dense
+
+
 Serialization / IO / Conversion
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
 
@@ -234,14 +234,18 @@ the rows, applying our ``integrate_f_typed``, and putting this in the zeros arra
 
 .. code-block:: ipython
 
-   In [4]: %timeit apply_integrate_f(df['a'].values, df['b'].values, df['N'].values)
+   In [4]: %timeit apply_integrate_f(df['a'].to_numpy(),
+                                     df['b'].to_numpy(),
+                                     df['N'].to_numpy())
    1000 loops, best of 3: 1.25 ms per loop
 
 We've gotten another big improvement. Let's check again where the time is spent:
 
 .. ipython:: python
 
-   %prun -l 4 apply_integrate_f(df['a'].values, df['b'].values, df['N'].values)
+   %prun -l 4 apply_integrate_f(df['a'].to_numpy(),
+                                df['b'].to_numpy(),
+                                df['N'].to_numpy())
 
 As one might expect, the majority of the time is now spent in ``apply_integrate_f``,
 so if we wanted to make anymore efficiencies we must continue to concentrate our
@@ -286,7 +290,9 @@ advanced Cython techniques:
 
 .. code-block:: ipython
 
-   In [4]: %timeit apply_integrate_f_wrap(df['a'].values, df['b'].values, df['N'].values)
+   In [4]: %timeit apply_integrate_f_wrap(df['a'].to_numpy(),
+                                          df['b'].to_numpy(),
+                                          df['N'].to_numpy())
    1000 loops, best of 3: 987 us per loop
 
 Even faster, with the caveat that a bug in our Cython code (an off-by-one error,
@@ -349,8 +355,9 @@ take the plain Python code from above and annotate with the ``@jit`` decorator.
 
 
    def compute_numba(df):
-       result = apply_integrate_f_numba(df['a'].values, df['b'].values,
-                                        df['N'].values)
+       result = apply_integrate_f_numba(df['a'].to_numpy(),
+                                        df['b'].to_numpy(),
+                                        df['N'].to_numpy())
        return pd.Series(result, index=df.index, name='result')
 
 Note that we directly pass NumPy arrays to the Numba function. ``compute_numba`` is just a wrapper that provides a
@@ -394,7 +401,7 @@ Consider the following toy example of doubling each observation:
    1000 loops, best of 3: 233 us per loop
 
    # Custom function with numba
-   In [7]: %timeit (df['col1_doubled'] = double_every_value_withnumba(df.a.values)
+   In [7]: %timeit (df['col1_doubled'] = double_every_value_withnumba(df.a.to_numpy())
    1000 loops, best of 3: 145 us per loop
 
 Caveats
 
@@ -35,6 +35,7 @@ Other Enhancements
 - :meth:`RangeIndex.union` now supports the ``sort`` argument. If ``sort=False`` an unsorted ``Int64Index`` is always returned. ``sort=None`` is the default and returns a mononotically increasing ``RangeIndex`` if possible or a sorted ``Int64Index`` if not (:issue:`24471`)
 - :meth:`TimedeltaIndex.intersection` now also supports the ``sort`` keyword (:issue:`24471`)
 - :meth:`DataFrame.rename` now supports the ``errors`` argument to raise errors when attempting to rename nonexistent keys (:issue:`13473`)
+- Added :ref:`api.frame.sparse` for working with a ``DataFrame`` whose values are sparse (:issue:`25681`)
 - :class:`RangeIndex` has gained :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop`, and :attr:`~RangeIndex.step` attributes (:issue:`25710`)
 - :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`)
 - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
@@ -252,6 +253,8 @@ Performance Improvements
 - Improved performance of :meth:`read_csv` by much faster parsing of ``MM/YYYY`` and ``DD/MM/YYYY`` datetime formats (:issue:`25922`)
 - Improved performance of nanops for dtypes that cannot store NaNs. Speedup is particularly prominent for :meth:`Series.all` and :meth:`Series.any` (:issue:`25070`)
 - Improved performance of :meth:`Series.map` for dictionary mappers on categorical series by mapping the categories instead of mapping all values (:issue:`23785`)
+- Improved performance of :meth:`read_csv` by faster concatenating date columns without extra conversion to string for integer/float zero
+  and float NaN; by faster checking the string for the possibility of being a date (:issue:`25754`)
 
 .. _whatsnew_0250.bug_fixes:
 
@@ -276,6 +279,7 @@ Datetimelike
 - Improved :class:`Timestamp` type checking in various datetime functions to prevent exceptions when using a subclassed ``datetime`` (:issue:`25851`)
 - Bug in :class:`Series` and :class:`DataFrame` repr where ``np.datetime64('NaT')`` and ``np.timedelta64('NaT')`` with ``dtype=object`` would be represented as ``NaN`` (:issue:`25445`)
 - Bug in :func:`to_datetime` which does not replace the invalid argument with ``NaT`` when error is set to coerce (:issue:`26122`)
+- Bug in adding :class:`DateOffset` with nonzero month to :class:`DatetimeIndex` would raise ``ValueError`` (:issue:`26258`)
 
 Timedelta
 ^^^^^^^^^
@@ -305,8 +309,9 @@ Numeric
 - Bug in :meth:`Series.divmod` and :meth:`Series.rdivmod` which would raise an (incorrect) ``ValueError`` rather than return a pair of :class:`Series` objects as result (:issue:`25557`)
 - Raises a helpful exception when a non-numeric index is sent to :meth:`interpolate` with methods which require numeric index. (:issue:`21662`)
 - Bug in :meth:`~pandas.eval` when comparing floats with scalar operators, for example: ``x < -0.1`` (:issue:`25928`)
+- Fixed bug where casting all-boolean array to integer extension array failed (:issue:`25211`)
+-
 -
-
 
 Conversion
 ^^^^^^^^^^
@@ -415,6 +420,7 @@ Reshaping
 - Bug in :func:`pivot_table` where columns with ``NaN`` values are dropped even if ``dropna`` argument is ``False``, when the ``aggfunc`` argument contains a ``list`` (:issue:`22159`)
 - Bug in :func:`concat` where the resulting ``freq`` of two :class:`DatetimeIndex` with the same ``freq`` would be dropped (:issue:`3232`).
 - Bug in :func:`merge` where merging with equivalent Categorical dtypes was raising an error (:issue:`22501`)
+- bug in :class:`DataFrame` instantiating with a ``range`` (e.g. ``pd.DataFrame(range(3))``) raised an error (:issue:`26342`).
 - Bug in :class:`DataFrame` constructor when passing non-empty tuples would cause a segmentation fault (:issue:`25691`)
 - Bug in :func:`pandas.cut` where large bins could incorrectly raise an error due to an integer overflow (:issue:`26045`)
 - Bug in :func:`DataFrame.sort_index` where an error is thrown when a multi-indexed DataFrame is sorted on all levels with the initial level sorted last (:issue:`26053`)
@@ -433,6 +439,7 @@ Other
 
 - Removed unused C functions from vendored UltraJSON implementation (:issue:`26198`)
 - Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`).
+- Allow :class:`Index` and :class:`RangeIndex` to be passed to numpy ``min`` and ``max`` functions.
 
 
 .. _whatsnew_0.250.contributors:
 
@@ -5,24 +5,6 @@ follow_imports=silent
 [mypy-pandas.conftest,pandas.tests.*]
 ignore_errors=True
 
-[mypy-pandas.core.api]
-ignore_errors=True
-
-[mypy-pandas.core.base]
-ignore_errors=True
-
-[mypy-pandas.core.computation.expr]
-ignore_errors=True
-
-[mypy-pandas.core.computation.ops]
-ignore_errors=True
-
-[mypy-pandas.core.computation.pytables]
-ignore_errors=True
-
-[mypy-pandas.core.indexes.base]
-ignore_errors=True
-
 [mypy-pandas.core.indexes.datetimes]
 ignore_errors=True
 
 
@@ -8,10 +8,9 @@ import warnings
 import cython
 from cython import Py_ssize_t
 
-from cpython cimport (Py_INCREF, PyTuple_SET_ITEM,
-                      PyTuple_New,
-                      Py_EQ,
-                      PyObject_RichCompareBool)
+from cpython cimport (Py_INCREF, PyTuple_SET_ITEM, PyTuple_New, PyObject_Str,
+                      Py_EQ, Py_SIZE, PyObject_RichCompareBool,
+                      PyUnicode_Join, PyList_New)
 
 from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
                                PyTime_Check, PyDelta_Check,
@@ -23,10 +22,8 @@ cimport numpy as cnp
 from numpy cimport (ndarray, PyArray_GETITEM,
                     PyArray_ITER_DATA, PyArray_ITER_NEXT, PyArray_IterNew,
                     flatiter, NPY_OBJECT,
-                    int64_t,
-                    float32_t, float64_t,
-                    uint8_t, uint64_t,
-                    complex128_t)
+                    int64_t, float32_t, float64_t,
+                    uint8_t, uint64_t, complex128_t)
 cnp.import_array()
 
 cdef extern from "numpy/arrayobject.h":