diff --git a/appveyor.yml b/appveyor.yml index 65e62f887554e..a1f8886f6d068 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -74,12 +74,18 @@ install: # create our env - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist - cmd: activate pandas + - cmd: pip install moto - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run - cmd: echo "installing requirements from %REQ%" - cmd: conda install -n pandas --file=%REQ% - cmd: conda list -n pandas - cmd: echo "installing requirements from %REQ% - done" + # add some pip only reqs to the env + - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.pip + - cmd: echo "installing requirements from %REQ%" + - cmd: pip install -Ur %REQ% + # build em using the local source checkout in the correct windows env - cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace' diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 6432ccfb19efe..d90c994b3d194 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -67,6 +67,9 @@ def time_value_counts_dropna(self): def time_rendering(self): str(self.sel) + def time_set_categories(self): + self.ts.cat.set_categories(self.ts.cat.categories[::2]) + class Categoricals3(object): goal_time = 0.2 diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index f9837191a7bae..df3c2bf3e4b46 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -2,6 +2,35 @@ from pandas import Series, Period, PeriodIndex, date_range +class PeriodProperties(object): + def setup(self): + self.per = Period('2012-06-01', freq='M') + + def time_year(self): + self.per.year + + def time_month(self): + self.per.month + + def time_quarter(self): + self.per.quarter + + def time_day(self): + self.per.day + + def time_hour(self): + self.per.hour + + def time_minute(self): + self.per.second + + def time_second(self): + self.per.second + + def time_leap_year(self): + self.per.is_leapyear + + class Constructor(object): goal_time = 0.2 @@ -49,6 +78,65 @@ def time_value_counts_pindex(self): self.i.value_counts() +class Properties(object): + def setup(self): + self.per = Period('2017-09-06 08:28', freq='min') + + def time_year(self): + self.per.year + + def time_month(self): + self.per.month + + def time_day(self): + self.per.day + + def time_hour(self): + self.per.hour + + def time_minute(self): + self.per.minute + + def time_second(self): + self.per.second + + def time_is_leap_year(self): + self.per.is_leap_year + + def time_quarter(self): + self.per.quarter + + def time_qyear(self): + self.per.qyear + + def time_week(self): + self.per.week + + def time_daysinmonth(self): + self.per.daysinmonth + + def time_dayofweek(self): + self.per.dayofweek + + def time_dayofyear(self): + self.per.dayofyear + + def time_start_time(self): + self.per.start_time + + def time_end_time(self): + self.per.end_time + + def time_to_timestamp(): + self.per.to_timestamp() + + def time_now(): + self.per.now() + + def time_asfreq(): + self.per.asfreq('A') + + class period_standard_indexing(object): goal_time = 0.2 diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py new file mode 100644 index 0000000000000..066479b22739a --- /dev/null +++ b/asv_bench/benchmarks/timestamp.py @@ -0,0 +1,60 @@ +from .pandas_vb_common import * +from pandas import to_timedelta, Timestamp + + +class TimestampProperties(object): + goal_time = 0.2 + + def setup(self): + self.ts = Timestamp('2017-08-25 08:16:14') + + def time_tz(self): + self.ts.tz + + def time_offset(self): + self.ts.offset + + def time_dayofweek(self): + self.ts.dayofweek + + def time_weekday_name(self): + self.ts.weekday_name + + def time_dayofyear(self): + self.ts.dayofyear + + def time_week(self): + self.ts.week + + def time_quarter(self): + self.ts.quarter + + def time_days_in_month(self): + self.ts.days_in_month + + def time_freqstr(self): + self.ts.freqstr + + def time_is_month_start(self): + self.ts.is_month_start + + def time_is_month_end(self): + self.ts.is_month_end + + def time_is_quarter_start(self): + self.ts.is_quarter_start + + def time_is_quarter_end(self): + self.ts.is_quarter_end + + def time_is_year_start(self): + self.ts.is_quarter_end + + def time_is_year_end(self): + self.ts.is_quarter_end + + def time_is_leap_year(self): + self.ts.is_quarter_end + + def time_microsecond(self): + self.ts.microsecond diff --git a/ci/install_circle.sh b/ci/install_circle.sh index 29ca69970104b..fd79f907625e9 100755 --- a/ci/install_circle.sh +++ b/ci/install_circle.sh @@ -67,6 +67,7 @@ time conda create -n pandas -q --file=${REQ_BUILD} || exit 1 time conda install -n pandas pytest>=3.1.0 || exit 1 source activate pandas +time pip install moto || exit 1 # build but don't install echo "[build em]" diff --git a/ci/install_travis.sh b/ci/install_travis.sh index d26689f2e6b4b..b85263daa1eac 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -104,7 +104,7 @@ if [ -e ${REQ} ]; then fi time conda install -n pandas pytest>=3.1.0 -time pip install pytest-xdist +time pip install pytest-xdist moto if [ "$LINT" ]; then conda install flake8 diff --git a/ci/requirements-2.7_WIN.pip b/ci/requirements-2.7_WIN.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements-3.5.sh b/ci/requirements-3.5.sh index 33db9c28c78a9..d694ad3679ac1 100644 --- a/ci/requirements-3.5.sh +++ b/ci/requirements-3.5.sh @@ -8,4 +8,4 @@ echo "install 35" conda remove -n pandas python-dateutil --force pip install python-dateutil -conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 +conda install -n pandas -c conda-forge feather-format pyarrow=0.5.0 diff --git a/ci/requirements-3.6_NUMPY_DEV.pip b/ci/requirements-3.6_NUMPY_DEV.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements-3.6_WIN.pip b/ci/requirements-3.6_WIN.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index c7190c506ba18..dbc4f6cbd6509 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -5,3 +5,4 @@ cython pytest>=3.1.0 pytest-cov flake8 +moto diff --git a/doc/source/10min.rst b/doc/source/10min.rst index def49a641a0ff..ef6b2d6ef2c90 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -655,7 +655,7 @@ the quarter end: Categoricals ------------ -Since version 0.15, pandas can include categorical data in a ``DataFrame``. For full docs, see the +pandas can include categorical data in a ``DataFrame``. For full docs, see the :ref:`categorical introduction ` and the :ref:`API documentation `. .. ipython:: python diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 711c3e9a95d05..3bda8c7eacb61 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -26,12 +26,6 @@ See the :ref:`Indexing and Selecting Data ` for general indexing docum should be avoided. See :ref:`Returning a View versus Copy ` -.. warning:: - - In 0.15.0 ``Index`` has internally been refactored to no longer sub-class ``ndarray`` - but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This should be - a transparent change with only very limited API implications (See the :ref:`Internal Refactoring `) - See the :ref:`cookbook` for some advanced strategies .. _advanced.hierarchical: @@ -270,9 +264,6 @@ Passing a list of labels or tuples works similar to reindexing: Using slicers ~~~~~~~~~~~~~ -.. versionadded:: 0.14.0 - -In 0.14.0 we added a new way to slice multi-indexed objects. You can slice a multi-index by providing multiple indexers. You can provide any of the selectors as if you are indexing by label, see :ref:`Selection by Label `, @@ -384,7 +375,7 @@ selecting data at a particular level of a MultiIndex easier. .. ipython:: python - # using the slicers (new in 0.14.0) + # using the slicers df.loc[(slice(None),'one'),:] You can also select on the columns with :meth:`~pandas.MultiIndex.xs`, by @@ -397,7 +388,7 @@ providing the axis argument .. ipython:: python - # using the slicers (new in 0.14.0) + # using the slicers df.loc[:,(slice(None),'one')] :meth:`~pandas.MultiIndex.xs` also allows selection with multiple keys @@ -408,11 +399,9 @@ providing the axis argument .. ipython:: python - # using the slicers (new in 0.14.0) + # using the slicers df.loc[:,('bar','one')] -.. versionadded:: 0.13.0 - You can pass ``drop_level=False`` to :meth:`~pandas.MultiIndex.xs` to retain the level that was selected @@ -636,19 +625,16 @@ Index Types We have discussed ``MultiIndex`` in the previous sections pretty extensively. ``DatetimeIndex`` and ``PeriodIndex`` are shown :ref:`here `. ``TimedeltaIndex`` are :ref:`here `. -In the following sub-sections we will highlite some other index types. +In the following sub-sections we will highlight some other index types. .. _indexing.categoricalindex: CategoricalIndex ~~~~~~~~~~~~~~~~ -.. versionadded:: 0.16.1 - -We introduce a ``CategoricalIndex``, a new type of index object that is useful for supporting -indexing with duplicates. This is a container around a ``Categorical`` (introduced in v0.15.0) -and allows efficient indexing and storage of an index with a large number of duplicated elements. Prior to 0.16.1, -setting the index of a ``DataFrame/Series`` with a ``category`` dtype would convert this to regular object-based ``Index``. +``CategoricalIndex`` is a type of index that is useful for supporting +indexing with duplicates. This is a container around a ``Categorical`` +and allows efficient indexing and storage of an index with a large number of duplicated elements. .. ipython:: python @@ -659,7 +645,7 @@ setting the index of a ``DataFrame/Series`` with a ``category`` dtype would conv df.dtypes df.B.cat.categories -Setting the index, will create create a ``CategoricalIndex`` +Setting the index, will create a ``CategoricalIndex`` .. ipython:: python @@ -695,7 +681,7 @@ Groupby operations on the index will preserve the index nature as well Reindexing operations, will return a resulting index based on the type of the passed indexer, meaning that passing a list will return a plain-old-``Index``; indexing with a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories -of the PASSED ``Categorical`` dtype. This allows one to arbitrarly index these even with +of the PASSED ``Categorical`` dtype. This allows one to arbitrarily index these even with values NOT in the categories, similarly to how you can reindex ANY pandas index. .. ipython :: python @@ -736,23 +722,13 @@ Int64Index and RangeIndex Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``NDFrame`` objects. ``RangeIndex`` is a sub-class of ``Int64Index`` added in version 0.18.0, now providing the default index for all ``NDFrame`` objects. -``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analagous to python `range types `__. +``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analogous to python `range types `__. .. _indexing.float64index: Float64Index ~~~~~~~~~~~~ -.. note:: - - As of 0.14.0, ``Float64Index`` is backed by a native ``float64`` dtype - array. Prior to 0.14.0, ``Float64Index`` was backed by an ``object`` dtype - array. Using a ``float64`` dtype in the backend speeds up arithmetic - operations by about 30x and boolean indexing operations on the - ``Float64Index`` itself are about 2x as fast. - -.. versionadded:: 0.13.0 - By default a ``Float64Index`` will be automatically created when passing floating, or mixed-integer-floating values in index creation. This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the same. @@ -987,7 +963,7 @@ index can be somewhat complicated. For example, the following does not work: s.loc['c':'e'+1] A very common use case is to limit a time series to start and end at two -specific dates. To enable this, we made the design design to make label-based +specific dates. To enable this, we made the design to make label-based slicing include both endpoints: .. ipython:: python diff --git a/doc/source/api.rst b/doc/source/api.rst index 12e6c7ad7f630..4e02f7b11f466 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -218,10 +218,19 @@ Top-level dealing with datetimelike to_timedelta date_range bdate_range + cdate_range period_range timedelta_range infer_freq +Top-level dealing with intervals +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + interval_range + Top-level evaluation ~~~~~~~~~~~~~~~~~~~~ @@ -1282,7 +1291,7 @@ Index ----- **Many of these methods or variants thereof are available on the objects -that contain an index (Series/Dataframe) and those should most likely be +that contain an index (Series/DataFrame) and those should most likely be used before calling these methods directly.** .. autosummary:: @@ -1599,6 +1608,198 @@ Conversion TimedeltaIndex.floor TimedeltaIndex.ceil +.. currentmodule:: pandas + +Scalars +------- + +Period +~~~~~~ +.. autosummary:: + :toctree: generated/ + + Period + +Attributes +~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Period.day + Period.dayofweek + Period.dayofyear + Period.days_in_month + Period.daysinmonth + Period.end_time + Period.freq + Period.freqstr + Period.hour + Period.is_leap_year + Period.minute + Period.month + Period.now + Period.ordinal + Period.quarter + Period.qyear + Period.second + Period.start_time + Period.strftime + Period.week + Period.weekday + Period.weekofyear + Period.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Period.asfreq + Period.strftime + Period.to_timestamp + +Timestamp +~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timestamp + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timestamp.asm8 + Timestamp.day + Timestamp.dayofweek + Timestamp.dayofyear + Timestamp.days_in_month + Timestamp.daysinmonth + Timestamp.hour + Timestamp.is_leap_year + Timestamp.is_month_end + Timestamp.is_month_start + Timestamp.is_quarter_end + Timestamp.is_quarter_start + Timestamp.is_year_end + Timestamp.is_year_start + Timestamp.max + Timestamp.microsecond + Timestamp.min + Timestamp.month + Timestamp.nanosecond + Timestamp.quarter + Timestamp.resolution + Timestamp.second + Timestamp.tz + Timestamp.tzinfo + Timestamp.value + Timestamp.weekday_name + Timestamp.weekofyear + Timestamp.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timestamp.astimezone + Timestamp.ceil + Timestamp.combine + Timestamp.ctime + Timestamp.date + Timestamp.dst + Timestamp.floor + Timestamp.freq + Timestamp.freqstr + Timestamp.fromordinal + Timestamp.fromtimestamp + Timestamp.isocalendar + Timestamp.isoformat + Timestamp.isoweekday + Timestamp.normalize + Timestamp.now + Timestamp.replace + Timestamp.round + Timestamp.strftime + Timestamp.strptime + Timestamp.time + Timestamp.timetuple + Timestamp.timetz + Timestamp.to_datetime64 + Timestamp.to_julian_date + Timestamp.to_period + Timestamp.to_pydatetime + Timestamp.today + Timestamp.toordinal + Timestamp.tz_convert + Timestamp.tz_localize + Timestamp.tzname + Timestamp.utcfromtimestamp + Timestamp.utcnow + Timestamp.utcoffset + Timestamp.utctimetuple + Timestamp.weekday + +Interval +~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Interval + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree generated/ + + Interval.closed + Interval.closed_left + Interval.closed_right + Interval.left + Interval.mid + Interval.open_left + Interval.open_right + Interval.right + +Timedelta +~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timedelta + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree generated/ + + Timedelta.asm8 + Timedelta.components + Timedelta.days + Timedelta.freq + Timedelta.max + Timedelta.microseconds + Timedelta.min + Timedelta.nanoseconds + Timedelta.resolution + Timedelta.seconds + Timedelta.value + +Methods +~~~~~~~ +.. autosummary:: + :toctree generated/ + + Timedelta.ceil + Timedelta.floor + Timedelta.isoformat + Timedelta.round + Timedelta.to_pytimedelta + Timedelta.to_timedelta64 + Timedelta.total_seconds + Window ------ .. currentmodule:: pandas.core.window @@ -1870,6 +2071,7 @@ Style Application Styler.apply Styler.applymap + Styler.where Styler.format Styler.set_precision Styler.set_table_styles diff --git a/doc/source/basics.rst b/doc/source/basics.rst index fe20a7eb2b786..0990d2bd15ee6 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -251,8 +251,8 @@ replace NaN with some other value using ``fillna`` if you wish). Flexible Comparisons ~~~~~~~~~~~~~~~~~~~~ -Starting in v0.8, pandas introduced binary comparison methods eq, ne, lt, gt, -le, and ge to Series and DataFrame whose behavior is analogous to the binary +Series and DataFrame have the binary comparison methods ``eq``, ``ne``, ``lt``, ``gt``, +``le``, and ``ge`` whose behavior is analogous to the binary arithmetic operations described above: .. ipython:: python @@ -347,7 +347,7 @@ That is because NaNs do not compare as equals: np.nan == np.nan -So, as of v0.13.1, NDFrames (such as Series, DataFrames, and Panels) +So, NDFrames (such as Series, DataFrames, and Panels) have an :meth:`~DataFrame.equals` method for testing equality, with NaNs in corresponding locations treated as equal. @@ -719,8 +719,6 @@ on an entire ``DataFrame`` or ``Series``, row- or column-wise, or elementwise. Tablewise Function Application ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.16.2 - ``DataFrames`` and ``Series`` can of course just be passed into functions. However, if the function needs to be called in a chain, consider using the :meth:`~DataFrame.pipe` method. Compare the following @@ -925,7 +923,7 @@ Passing a named function will yield that name for the row: Aggregating with a dict +++++++++++++++++++++++ -Passing a dictionary of column names to a scalar or a list of scalars, to ``DataFame.agg`` +Passing a dictionary of column names to a scalar or a list of scalars, to ``DataFrame.agg`` allows you to customize which functions are applied to which columns. Note that the results are not in any particular order, you can use an ``OrderedDict`` instead to guarantee ordering. @@ -1104,10 +1102,6 @@ Applying with a ``Panel`` will pass a ``Series`` to the applied function. If the function returns a ``Series``, the result of the application will be a ``Panel``. If the applied function reduces to a scalar, the result of the application will be a ``DataFrame``. -.. note:: - - Prior to 0.13.1 ``apply`` on a ``Panel`` would only work on ``ufuncs`` (e.g. ``np.sum/np.max``). - .. ipython:: python import pandas.util.testing as tm @@ -1800,8 +1794,6 @@ Series has the :meth:`~Series.searchsorted` method, which works similar to smallest / largest values ~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14.0 - ``Series`` has the :meth:`~Series.nsmallest` and :meth:`~Series.nlargest` methods which return the smallest or largest :math:`n` values. For a large ``Series`` this can be much faster than sorting the entire Series and calling ``head(n)`` on the result. @@ -1866,8 +1858,10 @@ dtypes ------ The main types stored in pandas objects are ``float``, ``int``, ``bool``, -``datetime64[ns]`` and ``datetime64[ns, tz]`` (in >= 0.17.0), ``timedelta[ns]``, ``category`` (in >= 0.15.0), and ``object``. In addition these dtypes -have item sizes, e.g. ``int64`` and ``int32``. See :ref:`Series with TZ ` for more detail on ``datetime64[ns, tz]`` dtypes. +``datetime64[ns]`` and ``datetime64[ns, tz]`` (in >= 0.17.0), ``timedelta[ns]``, +``category`` and ``object``. In addition these dtypes have item sizes, e.g. +``int64`` and ``int32``. See :ref:`Series with TZ ` +for more detail on ``datetime64[ns, tz]`` dtypes. A convenient :attr:`~DataFrame.dtypes` attribute for DataFrames returns a Series with the data type of each column. @@ -1908,7 +1902,7 @@ each type in a ``DataFrame``: dft.get_dtype_counts() -Numeric dtypes will propagate and can coexist in DataFrames (starting in v0.11.0). +Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passed (either directly via the ``dtype`` keyword, a passed ``ndarray``, or a passed ``Series``, then it will be preserved in DataFrame operations. Furthermore, different numeric dtypes will **NOT** be combined. The following example will give you a taste. @@ -2137,7 +2131,7 @@ gotchas ~~~~~~~ Performing selection operations on ``integer`` type data can easily upcast the data to ``floating``. -The dtype of the input data will be preserved in cases where ``nans`` are not introduced (starting in 0.11.0) +The dtype of the input data will be preserved in cases where ``nans`` are not introduced. See also :ref:`Support for integer NA ` .. ipython:: python @@ -2168,8 +2162,6 @@ Selecting columns based on ``dtype`` .. _basics.selectdtypes: -.. versionadded:: 0.14.1 - The :meth:`~DataFrame.select_dtypes` method implements subsetting of columns based on their ``dtype``. diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 02d7920bc4a84..8835c4a1533d0 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -16,13 +16,6 @@ Categorical Data **************** -.. versionadded:: 0.15 - -.. note:: - While there was `pandas.Categorical` in earlier versions, the ability to use - categorical data in `Series` and `DataFrame` is new. - - This is an introduction to pandas categorical data type, including a short comparison with R's ``factor``. @@ -295,10 +288,6 @@ Sorting and Order .. _categorical.sort: -.. warning:: - - The default for construction has changed in v0.16.0 to ``ordered=False``, from the prior implicit ``ordered=True`` - If categorical data is ordered (``s.cat.ordered == True``), then the order of the categories has a meaning and certain operations are possible. If the categorical is unordered, ``.min()/.max()`` will raise a `TypeError`. @@ -803,13 +792,11 @@ Following table summarizes the results of ``Categoricals`` related concatenation Getting Data In/Out ------------------- -.. versionadded:: 0.15.2 +You can write data that contains ``category`` dtypes to a ``HDFStore``. +See :ref:`here ` for an example and caveats. -Writing data (`Series`, `Frames`) to a HDF store that contains a ``category`` dtype was implemented -in 0.15.2. See :ref:`here ` for an example and caveats. - -Writing data to and reading data from *Stata* format files was implemented in -0.15.2. See :ref:`here ` for an example and caveats. +It is also possible to write data to and reading data from *Stata* format files. +See :ref:`here ` for an example and caveats. Writing to a CSV file will convert the data, effectively removing any information about the categorical (categories and ordering). So if you read back the CSV file you have to convert the @@ -928,32 +915,6 @@ an ``object`` dtype is a constant times the length of the data. s.astype('category').nbytes -Old style constructor usage -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In earlier versions than pandas 0.15, a `Categorical` could be constructed by passing in precomputed -`codes` (called then `labels`) instead of values with categories. The `codes` were interpreted as -pointers to the categories with `-1` as `NaN`. This type of constructor usage is replaced by -the special constructor :func:`Categorical.from_codes`. - -Unfortunately, in some special cases, using code which assumes the old style constructor usage -will work with the current pandas version, resulting in subtle bugs: - -.. code-block:: python - - >>> cat = pd.Categorical([1,2], [1,2,3]) - >>> # old version - >>> cat.get_values() - array([2, 3], dtype=int64) - >>> # new version - >>> cat.get_values() - array([1, 2], dtype=int64) - -.. warning:: - If you used `Categoricals` with older versions of pandas, please audit your code before - upgrading and change your code to use the :func:`~pandas.Categorical.from_codes` - constructor. - `Categorical` is not a `numpy` array ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -982,8 +943,7 @@ Dtype comparisons work: dtype == np.str_ np.str_ == dtype -To check if a Series contains Categorical data, with pandas 0.16 or later, use -``hasattr(s, 'cat')``: +To check if a Series contains Categorical data, use ``hasattr(s, 'cat')``: .. ipython:: python @@ -1023,13 +983,13 @@ basic type) and applying along columns will also convert to object. Categorical Index ~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.16.1 - -A new ``CategoricalIndex`` index type is introduced in version 0.16.1. See the -:ref:`advanced indexing docs ` for a more detailed +``CategoricalIndex`` is a type of index that is useful for supporting +indexing with duplicates. This is a container around a ``Categorical`` +and allows efficient indexing and storage of an index with a large number of duplicated elements. +See the :ref:`advanced indexing docs ` for a more detailed explanation. -Setting the index, will create create a ``CategoricalIndex`` +Setting the index will create a ``CategoricalIndex`` .. ipython:: python @@ -1041,10 +1001,6 @@ Setting the index, will create create a ``CategoricalIndex`` # This now sorts by the categories order df.sort_index() -In previous versions (<0.16.1) there is no index of type ``category``, so -setting the index to categorical column will convert the categorical data to a -"normal" dtype first and therefore remove any custom ordering of the categories. - Side Effects ~~~~~~~~~~~~ diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst index 194e022e34c7c..eb97aeeb7e696 100644 --- a/doc/source/comparison_with_r.rst +++ b/doc/source/comparison_with_r.rst @@ -247,8 +247,6 @@ For more details and examples see :ref:`the reshaping documentation |subset|_ ~~~~~~~~~~ -.. versionadded:: 0.13 - The :meth:`~pandas.DataFrame.query` method is similar to the base R ``subset`` function. In R you might want to get the rows of a ``data.frame`` where one column's values are less than another column's values: @@ -277,8 +275,6 @@ For more details and examples see :ref:`the query documentation |with|_ ~~~~~~~~ -.. versionadded:: 0.13 - An expression using a data.frame called ``df`` in R with the columns ``a`` and ``b`` would be evaluated using ``with`` like so: @@ -509,8 +505,6 @@ For more details and examples see :ref:`the reshaping documentation |factor|_ ~~~~~~~~~ -.. versionadded:: 0.15 - pandas has a data type for categorical data. .. code-block:: r diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 76a030d355e33..14cfdbc364837 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -654,7 +654,7 @@ aggregation with, outputting a DataFrame: r['A'].agg([np.sum, np.mean, np.std]) -On a widowed DataFrame, you can pass a list of functions to apply to each +On a windowed DataFrame, you can pass a list of functions to apply to each column, which produces an aggregated result with a hierarchical index: .. ipython:: python @@ -924,15 +924,12 @@ EWM has a ``min_periods`` argument, which has the same meaning it does for all the ``.expanding`` and ``.rolling`` methods: no output values will be set until at least ``min_periods`` non-null values are encountered in the (expanding) window. -(This is a change from versions prior to 0.15.0, in which the ``min_periods`` -argument affected only the ``min_periods`` consecutive entries starting at the -first non-null value.) -EWM also has an ``ignore_na`` argument, which deterines how +EWM also has an ``ignore_na`` argument, which determines how intermediate null values affect the calculation of the weights. When ``ignore_na=False`` (the default), weights are calculated based on absolute positions, so that intermediate null values affect the result. -When ``ignore_na=True`` (which reproduces the behavior in versions prior to 0.15.0), +When ``ignore_na=True``, weights are calculated by ignoring intermediate null values. For example, assuming ``adjust=True``, if ``ignore_na=False``, the weighted average of ``3, NaN, 5`` would be calculated as diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 32e7a616fe856..5bb3ba75fe51b 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -256,12 +256,6 @@ Panels pf = pd.Panel({'df1':df1,'df2':df2,'df3':df3});pf - #Assignment using Transpose (pandas < 0.15) - pf = pf.transpose(2,0,1) - pf['E'] = pd.DataFrame(data, rng, cols) - pf = pf.transpose(1,2,0);pf - - #Direct assignment (pandas > 0.15) pf.loc[:,:,'F'] = pd.DataFrame(data, rng, cols);pf `Mask a panel by using np.where and then reconstructing the panel with the new masked values @@ -818,7 +812,7 @@ The :ref:`Concat ` docs. The :ref:`Join ` d df1 = pd.DataFrame(np.random.randn(6, 3), index=rng, columns=['A', 'B', 'C']) df2 = df1.copy() -ignore_index is needed in pandas < v0.13, and depending on df construction +Depending on df construction, ``ignore_index`` may be needed .. ipython:: python diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 3c6572229802d..ec0a1c7a00bf7 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -73,7 +73,7 @@ index is passed, one will be created having values ``[0, ..., len(data) - 1]``. .. note:: - Starting in v0.8.0, pandas supports non-unique index values. If an operation + pandas supports non-unique index values. If an operation that does not support duplicate index values is attempted, an exception will be raised at that time. The reason for being lazy is nearly all performance-based (there are many instances in computations, like parts of GroupBy, where the index @@ -453,8 +453,6 @@ available to insert at a particular location in the columns: Assigning New Columns in Method Chains ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.16.0 - Inspired by `dplyr's `__ ``mutate`` verb, DataFrame has an :meth:`~pandas.DataFrame.assign` @@ -698,7 +696,7 @@ DataFrame in tabular form, though it won't always fit the console width: print(baseball.iloc[-20:, :12].to_string()) -New since 0.10.0, wide DataFrames will now be printed across multiple rows by +Wide DataFrames will be printed across multiple rows by default: .. ipython:: python @@ -845,19 +843,16 @@ DataFrame objects with mixed-type columns, all of the data will get upcasted to .. note:: - Unfortunately Panel, being less commonly used than Series and DataFrame, + Panel, being less commonly used than Series and DataFrame, has been slightly neglected feature-wise. A number of methods and options - available in DataFrame are not available in Panel. This will get worked - on, of course, in future releases. And faster if you join me in working on - the codebase. + available in DataFrame are not available in Panel. .. _dsintro.to_panel: From DataFrame using ``to_panel`` method ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This method was introduced in v0.7 to replace ``LongPanel.to_long``, and converts -a DataFrame with a two-level index to a Panel. +``to_panel`` converts a DataFrame with a two-level index to a Panel. .. ipython:: python :okwarning: diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst index 685a8690a53d5..264bd1de1fc77 100644 --- a/doc/source/enhancingperf.rst +++ b/doc/source/enhancingperf.rst @@ -213,17 +213,18 @@ the rows, applying our ``integrate_f_typed``, and putting this in the zeros arra .. warning:: - In 0.13.0 since ``Series`` has internaly been refactored to no longer sub-class ``ndarray`` - but instead subclass ``NDFrame``, you can **not pass** a ``Series`` directly as a ``ndarray`` typed parameter - to a cython function. Instead pass the actual ``ndarray`` using the ``.values`` attribute of the Series. + You can **not pass** a ``Series`` directly as a ``ndarray`` typed parameter + to a cython function. Instead pass the actual ``ndarray`` using the + ``.values`` attribute of the Series. The reason is that the cython + definition is specific to an ndarray and not the passed Series. - Prior to 0.13.0 + So, do not do this: .. code-block:: python apply_integrate_f(df['a'], df['b'], df['N']) - Use ``.values`` to get the underlying ``ndarray`` + But rather, use ``.values`` to get the underlying ``ndarray`` .. code-block:: python @@ -399,10 +400,8 @@ Read more in the `numba docs `__. .. _enhancingperf.eval: -Expression Evaluation via :func:`~pandas.eval` (Experimental) -------------------------------------------------------------- - -.. versionadded:: 0.13 +Expression Evaluation via :func:`~pandas.eval` +----------------------------------------------- The top-level function :func:`pandas.eval` implements expression evaluation of :class:`~pandas.Series` and :class:`~pandas.DataFrame` objects. @@ -539,10 +538,8 @@ Now let's do the same thing but with comparisons: of type ``bool`` or ``np.bool_``. Again, you should perform these kinds of operations in plain Python. -The ``DataFrame.eval`` method (Experimental) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. versionadded:: 0.13 +The ``DataFrame.eval`` method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In addition to the top level :func:`pandas.eval` function you can also evaluate an expression in the "context" of a :class:`~pandas.DataFrame`. @@ -646,19 +643,6 @@ whether the query modifies the original frame. Local Variables ~~~~~~~~~~~~~~~ -In pandas version 0.14 the local variable API has changed. In pandas 0.13.x, -you could refer to local variables the same way you would in standard Python. -For example, - -.. code-block:: python - - df = pd.DataFrame(np.random.randn(5, 2), columns=['a', 'b']) - newcol = np.random.randn(len(df)) - df.eval('b + newcol') - - UndefinedVariableError: name 'newcol' is not defined - -As you can see from the exception generated, this syntax is no longer allowed. You must *explicitly reference* any local variable that you want to use in an expression by placing the ``@`` character in front of the name. For example, diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index a3062b4086673..9e6f98923fca6 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -22,8 +22,8 @@ Frequently Asked Questions (FAQ) DataFrame memory usage ---------------------- -As of pandas version 0.15.0, the memory usage of a dataframe (including -the index) is shown when accessing the ``info`` method of a dataframe. A +The memory usage of a dataframe (including the index) +is shown when accessing the ``info`` method of a dataframe. A configuration option, ``display.memory_usage`` (see :ref:`options`), specifies if the dataframe's memory usage will be displayed when invoking the ``df.info()`` method. diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 937d682d238b3..e9a7d8dd0a46e 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -140,7 +140,7 @@ columns: In [5]: grouped = df.groupby(get_letter_type, axis=1) -Starting with 0.8, pandas Index objects now support duplicate values. If a +pandas Index objects support duplicate values. If a non-unique index is used as the group key in a groupby operation, all values for the same index value will be considered to be in one group and thus the output of aggregation functions will only contain unique index values: @@ -288,8 +288,6 @@ chosen level: s.sum(level='second') -.. versionadded:: 0.6 - Grouping with multiple levels is supported. .. ipython:: python @@ -563,7 +561,7 @@ must be either implemented on GroupBy or available via :ref:`dispatching .. note:: - If you pass a dict to ``aggregate``, the ordering of the output colums is + If you pass a dict to ``aggregate``, the ordering of the output columns is non-deterministic. If you want to be sure the output columns will be in a specific order, you can use an ``OrderedDict``. Compare the output of the following two commands: @@ -768,8 +766,6 @@ missing values with the ``ffill()`` method. Filtration ---------- -.. versionadded:: 0.12 - The ``filter`` method returns a subset of the original object. Suppose we want to take only elements that belong to groups with a group sum greater than 2. @@ -860,8 +856,6 @@ In this example, we chopped the collection of time series into yearly chunks then independently called :ref:`fillna ` on the groups. -.. versionadded:: 0.14.1 - The ``nlargest`` and ``nsmallest`` methods work on ``Series`` style groupbys: .. ipython:: python @@ -1050,19 +1044,6 @@ Just like for a DataFrame or Series you can call head and tail on a groupby: This shows the first or last n rows from each group. -.. warning:: - - Before 0.14.0 this was implemented with a fall-through apply, - so the result would incorrectly respect the as_index flag: - - .. code-block:: python - - >>> g.head(1): # was equivalent to g.apply(lambda x: x.head(1)) - A B - A - 1 0 1 2 - 5 2 5 6 - .. _groupby.nth: Taking the nth row of each group @@ -1115,8 +1096,6 @@ You can also select multiple rows from each group by specifying multiple nth val Enumerate group items ~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.13.0 - To see the order in which each row appears within its group, use the ``cumcount`` method: @@ -1232,7 +1211,7 @@ Groupby by Indexer to 'resample' data Resampling produces new hypothetical samples (resamples) from already existing observed data or from a model that generates data. These new samples are similar to the pre-existing samples. -In order to resample to work on indices that are non-datetimelike , the following procedure can be utilized. +In order to resample to work on indices that are non-datetimelike, the following procedure can be utilized. In the following examples, **df.index // 5** returns a binary array which is used to determine what gets selected for the groupby operation. diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 53a259ad6eb15..edbc4e6d7fd22 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -47,12 +47,6 @@ advanced indexing. should be avoided. See :ref:`Returning a View versus Copy ` -.. warning:: - - In 0.15.0 ``Index`` has internally been refactored to no longer subclass ``ndarray`` - but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This should be - a transparent change with only very limited API implications (See the :ref:`Internal Refactoring `) - .. warning:: Indexing on an integer-based Index with floats has been clarified in 0.18.0, for a summary of the changes, see :ref:`here `. @@ -66,8 +60,6 @@ See the :ref:`cookbook` for some advanced strategies Different Choices for Indexing ------------------------------ -.. versionadded:: 0.11.0 - Object selection has had a number of user-requested additions in order to support more explicit location based indexing. Pandas now supports three types of multi-axis indexing. @@ -250,8 +242,6 @@ as an attribute: - In any of these cases, standard indexing will still work, e.g. ``s['1']``, ``s['min']``, and ``s['index']`` will access the corresponding element or column. - - The ``Series/Panel`` accesses are available starting in 0.13.0. - If you are using the IPython environment, you may also use tab-completion to see these accessible attributes. @@ -279,21 +269,6 @@ new column. In 0.21.0 and later, this will raise a ``UserWarning``: 1 2.0 2 3.0 -Similarly, it is possible to create a column with a name which collides with one of Pandas's -built-in methods or attributes, which can cause confusion later when attempting to access -that column as an attribute. This behavior now warns: - -.. code-block:: ipython - - In[4]: df['sum'] = [5., 7., 9.] - UserWarning: Column name 'sum' collides with a built-in method, which will cause unexpected attribute behavior - In[5]: df.sum - Out[5]: - - Slicing ranges -------------- @@ -531,7 +506,6 @@ Out of range slice indexes are handled gracefully just as in Python/Numpy. .. ipython:: python # these are allowed in python/numpy. - # Only works in Pandas starting from v0.14.0. x = list('abcdef') x x[4:10] @@ -541,14 +515,8 @@ Out of range slice indexes are handled gracefully just as in Python/Numpy. s.iloc[4:10] s.iloc[8:10] -.. note:: - - Prior to v0.14.0, ``iloc`` would not accept out of bounds indexers for - slices, e.g. a value that exceeds the length of the object being indexed. - - -Note that this could result in an empty axis (e.g. an empty DataFrame being -returned) +Note that using slices that go out of bounds can result in +an empty axis (e.g. an empty DataFrame being returned) .. ipython:: python @@ -671,7 +639,6 @@ For getting *multiple* indexers, using ``.get_indexer`` Selecting Random Samples ------------------------ -.. versionadded::0.16.1 A random selection of rows or columns from a Series, DataFrame, or Panel with the :meth:`~DataFrame.sample` method. The method will sample rows by default, and accepts a specific number of rows/columns to return, or a fraction of rows. @@ -747,9 +714,7 @@ Finally, one can also set a seed for ``sample``'s random number generator using Setting With Enlargement ------------------------ -.. versionadded:: 0.13 - -The ``.loc/[]`` operations can perform enlargement when setting a non-existant key for that axis. +The ``.loc/[]`` operations can perform enlargement when setting a non-existent key for that axis. In the ``Series`` case this is effectively an appending operation @@ -1022,8 +987,6 @@ partial setting via ``.loc`` (but on the contents rather than the axis labels) df2[ df2[1:4] > 0 ] = 3 df2 -.. versionadded:: 0.13 - Where can also accept ``axis`` and ``level`` parameters to align the input when performing the ``where``. @@ -1066,8 +1029,6 @@ as condition and ``other`` argument. The :meth:`~pandas.DataFrame.query` Method (Experimental) --------------------------------------------------------- -.. versionadded:: 0.13 - :class:`~pandas.DataFrame` objects have a :meth:`~pandas.DataFrame.query` method that allows selection using an expression. @@ -1508,8 +1469,6 @@ The name, if set, will be shown in the console display: Setting metadata ~~~~~~~~~~~~~~~~ -.. versionadded:: 0.13.0 - Indexes are "mostly immutable", but it is possible to set and change their metadata, like the index ``name`` (or, for ``MultiIndex``, ``levels`` and ``labels``). @@ -1529,8 +1488,6 @@ See :ref:`Advanced Indexing ` for usage of MultiIndexes. ind.name = "bob" ind -.. versionadded:: 0.15.0 - ``set_names``, ``set_levels``, and ``set_labels`` also take an optional `level`` argument @@ -1546,11 +1503,6 @@ Set operations on Index objects .. _indexing.set_ops: -.. warning:: - - In 0.15.0. the set operations ``+`` and ``-`` were deprecated in order to provide these for numeric type operations on certain - index types. ``+`` can be replace by ``.union()`` or ``|``, and ``-`` by ``.difference()``. - The two main operations are ``union (|)``, ``intersection (&)`` These can be directly called as instance methods or used via overloaded operators. Difference is provided via the ``.difference()`` method. @@ -1792,7 +1744,7 @@ Evaluation order matters Furthermore, in chained expressions, the order may determine whether a copy is returned or not. If an expression will set values on a copy of a slice, then a ``SettingWithCopy`` -exception will be raised (this raise/warn behavior is new starting in 0.13.0) +warning will be issued. You can control the action of a chained assignment via the option ``mode.chained_assignment``, which can take the values ``['raise','warn',None]``, where showing a warning is the default. diff --git a/doc/source/install.rst b/doc/source/install.rst index f92c43839ee31..c805f84d0faaa 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -18,7 +18,7 @@ Instructions for installing from source, Python version support ---------------------- -Officially Python 2.7, 3.4, 3.5, and 3.6 +Officially Python 2.7, 3.5, and 3.6. Installing pandas ----------------- @@ -107,7 +107,7 @@ following command:: To install a specific pandas version:: - conda install pandas=0.13.1 + conda install pandas=0.20.3 To install other packages, IPython for example:: @@ -183,21 +183,17 @@ installed), make sure you have `pytest >>> import pandas as pd >>> pd.test() - Running unit tests for pandas - pandas version 0.18.0 - numpy version 1.10.2 - pandas is installed in pandas - Python version 2.7.11 |Continuum Analytics, Inc.| - (default, Dec 6 2015, 18:57:58) [GCC 4.2.1 (Apple Inc. build 5577)] - nose version 1.3.7 + running: pytest --skip-slow --skip-network C:\Users\TP\Anaconda3\envs\py36\lib\site-packages\pandas + ============================= test session starts ============================= + platform win32 -- Python 3.6.2, pytest-3.2.1, py-1.4.34, pluggy-0.4.0 + rootdir: C:\Users\TP\Documents\Python\pandasdev\pandas, inifile: setup.cfg + collected 12145 items / 3 skipped + ..................................................................S...... ........S................................................................ ......................................................................... - ---------------------------------------------------------------------- - Ran 9252 tests in 368.339s - - OK (SKIP=117) + ==================== 12130 passed, 12 skipped in 368.339 seconds ===================== Dependencies ------------ diff --git a/doc/source/io.rst b/doc/source/io.rst index e338407361705..fcf7f6029197b 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -364,7 +364,7 @@ warn_bad_lines : boolean, default ``True`` Specifying column data types '''''''''''''''''''''''''''' -Starting with v0.10, you can indicate the data type for the whole DataFrame or +You can indicate the data type for the whole DataFrame or individual columns: .. ipython:: python @@ -592,8 +592,7 @@ Ignoring line comments and empty lines ++++++++++++++++++++++++++++++++++++++ If the ``comment`` parameter is specified, then completely commented lines will -be ignored. By default, completely blank lines will be ignored as well. Both of -these are API changes introduced in version 0.15. +be ignored. By default, completely blank lines will be ignored as well. .. ipython:: python @@ -1310,8 +1309,6 @@ column widths for contiguous columns: The parser will take care of extra white spaces around the columns so it's ok to have extra separation between the columns in the file. -.. versionadded:: 0.13.0 - By default, ``read_fwf`` will try to infer the file's ``colspecs`` by using the first 100 rows of the file. It can do it only in cases when the columns are aligned and correctly separated by the provided ``delimiter`` (default delimiter @@ -1407,8 +1404,7 @@ Reading columns with a ``MultiIndex`` By specifying list of row locations for the ``header`` argument, you can read in a ``MultiIndex`` for the columns. Specifying non-consecutive -rows will skip the intervening rows. In order to have the pre-0.13 behavior -of tupleizing columns, specify ``tupleize_cols=True``. +rows will skip the intervening rows. .. ipython:: python @@ -1418,7 +1414,7 @@ of tupleizing columns, specify ``tupleize_cols=True``. print(open('mi.csv').read()) pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1]) -Starting in 0.13.0, ``read_csv`` will be able to interpret a more common format +``read_csv`` is also able to interpret a more common format of multi-columns indices. .. ipython:: python @@ -2012,8 +2008,6 @@ The speedup is less noticeable for smaller datasets: Normalization ''''''''''''' -.. versionadded:: 0.13.0 - pandas provides a utility function to take a dict or list of dicts and *normalize* this semi-structured data into a flat table. @@ -2198,8 +2192,6 @@ Reading HTML Content We **highly encourage** you to read the :ref:`HTML Table Parsing gotchas ` below regarding the issues surrounding the BeautifulSoup4/html5lib/lxml parsers. -.. versionadded:: 0.12.0 - The top-level :func:`~pandas.io.html.read_html` function can accept an HTML string/file/URL and will parse HTML tables into list of pandas DataFrames. Let's look at a few examples. @@ -2653,10 +2645,6 @@ of sheet names can simply be passed to ``read_excel`` with no loss in performanc # equivalent using the read_excel function data = read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'], index_col=None, na_values=['NA']) -.. versionadded:: 0.12 - -``ExcelFile`` has been moved to the top level namespace. - .. versionadded:: 0.17 ``read_excel`` can take an ``ExcelFile`` object as input @@ -2712,13 +2700,8 @@ Using a list to get multiple sheets: # Returns the 1st and 4th sheet, as a dictionary of DataFrames. read_excel('path_to_file.xls',sheet_name=['Sheet1',3]) -.. versionadded:: 0.16 - ``read_excel`` can read more than one sheet, by setting ``sheet_name`` to either a list of sheet names, a list of sheet positions, or ``None`` to read all sheets. - -.. versionadded:: 0.13 - Sheets can be specified by sheet index or sheet name, using an integer or string, respectively. @@ -2866,9 +2849,9 @@ Files with a ``.xls`` extension will be written using ``xlwt`` and those with a ``.xlsx`` extension will be written using ``xlsxwriter`` (if available) or ``openpyxl``. -The DataFrame will be written in a way that tries to mimic the REPL output. One -difference from 0.12.0 is that the ``index_label`` will be placed in the second -row instead of the first. You can get the previous behaviour by setting the +The DataFrame will be written in a way that tries to mimic the REPL output. +The ``index_label`` will be placed in the second +row instead of the first. You can place it in the first row by setting the ``merge_cells`` option in ``to_excel()`` to ``False``: .. code-block:: python @@ -2945,8 +2928,6 @@ Added support for Openpyxl >= 2.2 Excel writer engines '''''''''''''''''''' -.. versionadded:: 0.13 - ``pandas`` chooses an Excel writer via two methods: 1. the ``engine`` keyword argument @@ -3074,14 +3055,19 @@ any pickled pandas object (or any other pickled object) from file: Loading pickled data received from untrusted sources can be unsafe. - See: http://docs.python.org/2.7/library/pickle.html + See: https://docs.python.org/3.6/library/pickle.html .. warning:: - Several internal refactorings, 0.13 (:ref:`Series Refactoring `), and 0.15 (:ref:`Index Refactoring `), - preserve compatibility with pickles created prior to these versions. However, these must - be read with ``pd.read_pickle``, rather than the default python ``pickle.load``. - See `this question `__ + Several internal refactorings have been done while still preserving + compatibility with pickles created with older versions of pandas. However, + for such cases, pickled dataframes, series etc, must be read with + ``pd.read_pickle``, rather than ``pickle.load``. + + See `here `__ + and `here `__ + for some examples of compatibility-breaking changes. See + `this question `__ for a detailed explanation. .. _io.pickle.compression: @@ -3091,7 +3077,7 @@ Compressed pickle files .. versionadded:: 0.20.0 -:func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` can read +:func:`read_pickle`, :meth:`DataFrame.to_pickle` and :meth:`Series.to_pickle` can read and write compressed pickle files. The compression types of ``gzip``, ``bz2``, ``xz`` are supported for reading and writing. `zip`` file supports read only and must contain only one data file to be read in. @@ -3150,9 +3136,7 @@ The default is to 'infer msgpack ------- -.. versionadded:: 0.13.0 - -Starting in 0.13.0, pandas is supporting the ``msgpack`` format for +pandas supports the ``msgpack`` format for object serialization. This is a lightweight portable binary format, similar to binary JSON, that is highly space efficient, and provides good performance both on the writing (serialization), and reading (deserialization). @@ -3254,11 +3238,10 @@ for some advanced strategies .. warning:: - As of version 0.15.0, pandas requires ``PyTables`` >= 3.0.0. Stores written with prior versions of pandas / ``PyTables`` >= 2.3 are fully compatible (this was the previous minimum ``PyTables`` required version). - -.. warning:: - - There is a ``PyTables`` indexing bug which may appear when querying stores using an index. If you see a subset of results being returned, upgrade to ``PyTables`` >= 3.2. Stores created previously will need to be rewritten using the updated version. + pandas requires ``PyTables`` >= 3.0.0. + There is a indexing bug in ``PyTables`` < 3.2 which may appear when querying stores using an index. + If you see a subset of results being returned, upgrade to ``PyTables`` >= 3.2. + Stores created previously will need to be rewritten using the updated version. .. warning:: @@ -3346,7 +3329,7 @@ Read/Write API '''''''''''''' ``HDFStore`` supports an top-level API using ``read_hdf`` for reading and ``to_hdf`` for writing, -similar to how ``read_csv`` and ``to_csv`` work. (new in 0.11.0) +similar to how ``read_csv`` and ``to_csv`` work. .. ipython:: python @@ -3424,10 +3407,6 @@ This is also true for the major axis of a ``Panel``: Fixed Format '''''''''''' -.. note:: - - This was prior to 0.13.0 the ``Storer`` format. - The examples above show storing using ``put``, which write the HDF5 to ``PyTables`` in a fixed array format, called the ``fixed`` format. These types of stores are **not** appendable once written (though you can simply remove them and rewrite). Nor are they **queryable**; they must be @@ -3460,8 +3439,6 @@ other sessions. In addition, delete & query type operations are supported. This format is specified by ``format='table'`` or ``format='t'`` to ``append`` or ``put`` or ``to_hdf`` -.. versionadded:: 0.13 - This format can be set as an option as well ``pd.set_option('io.hdf.default_format','table')`` to enable ``put/append/to_hdf`` to by default store in the ``table`` format. @@ -3765,9 +3742,7 @@ space. These are in terms of the total number of rows in a table. Using timedelta64[ns] +++++++++++++++++++++ -.. versionadded:: 0.13 - -Beginning in 0.13.0, you can store and query using the ``timedelta64[ns]`` type. Terms can be +You can store and query using the ``timedelta64[ns]`` type. Terms can be specified in the format: ``()``, where float may be signed (and fractional), and unit can be ``D,s,ms,us,ns`` for the timedelta. Here's an example: @@ -3791,7 +3766,7 @@ indexed dimension as the ``where``. .. note:: - Indexes are automagically created (starting ``0.10.1``) on the indexables + Indexes are automagically created on the indexables and any data columns you specify. This behavior can be turned off by passing ``index=False`` to ``append``. @@ -3878,7 +3853,7 @@ create a new table!) Iterator ++++++++ -Starting in ``0.11.0``, you can pass, ``iterator=True`` or ``chunksize=number_in_a_chunk`` +You can pass ``iterator=True`` or ``chunksize=number_in_a_chunk`` to ``select`` and ``select_as_multiple`` to return an iterator on the results. The default is 50,000 rows returned in a chunk. @@ -3889,8 +3864,6 @@ The default is 50,000 rows returned in a chunk. .. note:: - .. versionadded:: 0.12.0 - You can also use the iterator with ``read_hdf`` which will open, then automatically close the store when finished iterating. @@ -3986,8 +3959,8 @@ of rows in an object. Multiple Table Queries ++++++++++++++++++++++ -New in 0.10.1 are the methods ``append_to_multiple`` and -``select_as_multiple``, that can perform appending/selecting from +The methods ``append_to_multiple`` and +``select_as_multiple`` can perform appending/selecting from multiple tables at once. The idea is to have one table (call it the selector table) that you index most/all of the columns, and perform your queries. The other table(s) are data tables with an index matching the @@ -4233,10 +4206,8 @@ object : ``strings`` ``np.nan`` Categorical Data ++++++++++++++++ -.. versionadded:: 0.15.2 - -Writing data to a ``HDFStore`` that contains a ``category`` dtype was implemented -in 0.15.2. Queries work the same as if it was an object array. However, the ``category`` dtyped data is +You can write data that contains ``category`` dtypes to a ``HDFStore``. +Queries work the same as if it was an object array. However, the ``category`` dtyped data is stored in a more efficient manner. .. ipython:: python @@ -4251,21 +4222,6 @@ stored in a more efficient manner. result result.dtypes -.. warning:: - - The format of the ``Categorical`` is readable by prior versions of pandas (< 0.15.2), but will retrieve - the data as an integer based column (e.g. the ``codes``). However, the ``categories`` *can* be retrieved - but require the user to select them manually using the explicit meta path. - - The data is stored like so: - - .. ipython:: python - - cstore - - # to get the categories - cstore.select('dfcat/meta/A/meta') - .. ipython:: python :suppress: :okexcept: @@ -4291,7 +4247,7 @@ Pass ``min_itemsize`` on the first table creation to a-priori specify the minimu ``min_itemsize`` can be an integer, or a dict mapping a column name to an integer. You can pass ``values`` as a key to allow all *indexables* or *data_columns* to have this min_itemsize. -Starting in 0.11.0, passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically. +Passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically. .. note:: @@ -4419,44 +4375,6 @@ Now you can import the ``DataFrame`` into R: starting point if you have stored multiple ``DataFrame`` objects to a single HDF5 file. -Backwards Compatibility -''''''''''''''''''''''' - -0.10.1 of ``HDFStore`` can read tables created in a prior version of pandas, -however query terms using the -prior (undocumented) methodology are unsupported. ``HDFStore`` will -issue a warning if you try to use a legacy-format file. You must -read in the entire file and write it out using the new format, using the -method ``copy`` to take advantage of the updates. The group attribute -``pandas_version`` contains the version information. ``copy`` takes a -number of options, please see the docstring. - - -.. ipython:: python - :suppress: - - import os - legacy_file_path = os.path.abspath('source/_static/legacy_0.10.h5') - -.. ipython:: python - :okwarning: - - # a legacy store - legacy_store = pd.HDFStore(legacy_file_path,'r') - legacy_store - - # copy (and return the new handle) - new_store = legacy_store.copy('store_new.h5') - new_store - new_store.close() - -.. ipython:: python - :suppress: - - legacy_store.close() - import os - os.remove('store_new.h5') - Performance ''''''''''' @@ -4597,8 +4515,7 @@ See the documentation for `pyarrow `__ and 'd': np.arange(4.0, 7.0, dtype='float64'), 'e': [True, False, True], 'f': pd.date_range('20130101', periods=3), - 'g': pd.date_range('20130101', periods=3, tz='US/Eastern'), - 'h': pd.date_range('20130101', periods=3, freq='ns')}) + 'g': pd.date_range('20130101', periods=3, tz='US/Eastern')}) df df.dtypes @@ -4641,8 +4558,6 @@ included in Python's standard library by default. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs `__. -.. versionadded:: 0.14.0 - If SQLAlchemy is not installed, a fallback is only provided for sqlite (and for mysql for backwards compatibility, but this is deprecated and will be removed in a future version). @@ -4809,8 +4724,6 @@ You can check if a table exists using :func:`~pandas.io.sql.has_table` Schema support '''''''''''''' -.. versionadded:: 0.15.0 - Reading from and writing to different schema's is supported through the ``schema`` keyword in the :func:`~pandas.read_sql_table` and :func:`~pandas.DataFrame.to_sql` functions. Note however that this depends on the database flavor (sqlite does not @@ -4975,8 +4888,6 @@ Full documentation can be found `here `__ Stata Format ------------ -.. versionadded:: 0.12.0 - .. _io.stata_writer: Writing to Stata format @@ -5040,8 +4951,6 @@ be used to read the file incrementally. pd.read_stata('stata.dta') -.. versionadded:: 0.16.0 - Specifying a ``chunksize`` yields a :class:`~pandas.io.stata.StataReader` instance that can be used to read ``chunksize`` lines from the file at a time. The ``StataReader`` @@ -5099,8 +5008,6 @@ values will have ``object`` data type. Categorical Data ++++++++++++++++ -.. versionadded:: 0.15.2 - ``Categorical`` data can be exported to *Stata* data files as value labeled data. The exported data consists of the underlying category codes as integer data values and the categories as value labels. *Stata* does not have an explicit equivalent diff --git a/doc/source/merging.rst b/doc/source/merging.rst index d956f1ca54e6b..72787ea97a782 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -1053,8 +1053,6 @@ As you can see, this drops any rows where there was no match. Joining a single Index to a Multi-index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14.0 - You can join a singly-indexed ``DataFrame`` with a level of a multi-indexed ``DataFrame``. The level will match on the name of the index of the singly-indexed frame against a level name of the multi-indexed frame. @@ -1331,7 +1329,7 @@ By default we are taking the asof of the quotes. on='time', by='ticker') -We only asof within ``2ms`` betwen the quote time and the trade time. +We only asof within ``2ms`` between the quote time and the trade time. .. ipython:: python @@ -1340,8 +1338,8 @@ We only asof within ``2ms`` betwen the quote time and the trade time. by='ticker', tolerance=pd.Timedelta('2ms')) -We only asof within ``10ms`` betwen the quote time and the trade time and we exclude exact matches on time. -Note that though we exclude the exact matches (of the quotes), prior quotes DO propogate to that point +We only asof within ``10ms`` between the quote time and the trade time and we exclude exact matches on time. +Note that though we exclude the exact matches (of the quotes), prior quotes DO propagate to that point in time. .. ipython:: python diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index d54288baa389b..b33b5c304853a 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -67,9 +67,8 @@ arise and we wish to also consider that "missing" or "not available" or "NA". .. note:: - Prior to version v0.10.0 ``inf`` and ``-inf`` were also - considered to be "NA" in computations. This is no longer the case by - default; use the ``mode.use_inf_as_na`` option to recover it. + If you want to consider ``inf`` and ``-inf`` to be "NA" in computations, + you can set ``pandas.options.mode.use_inf_as_na = True``. .. _missing.isna: @@ -264,8 +263,6 @@ and ``bfill()`` is equivalent to ``fillna(method='bfill')`` Filling with a PandasObject ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.12 - You can also fillna using a dict or Series that is alignable. The labels of the dict or index of the Series must match the columns of the frame you wish to fill. The use case of this is to fill a DataFrame with the mean of that column. @@ -281,8 +278,6 @@ use case of this is to fill a DataFrame with the mean of that column. dff.fillna(dff.mean()) dff.fillna(dff.mean()['B':'C']) -.. versionadded:: 0.13 - Same result as above, but is aligning the 'fill' value which is a Series in this case. @@ -321,16 +316,11 @@ examined :ref:`in the API `. Interpolation ~~~~~~~~~~~~~ -.. versionadded:: 0.13.0 - - :meth:`~pandas.DataFrame.interpolate`, and :meth:`~pandas.Series.interpolate` have - revamped interpolation methods and functionality. - .. versionadded:: 0.17.0 The ``limit_direction`` keyword argument was added. -Both Series and Dataframe objects have an ``interpolate`` method that, by default, +Both Series and DataFrame objects have an ``interpolate`` method that, by default, performs linear interpolation at missing datapoints. .. ipython:: python @@ -485,8 +475,8 @@ respectively: Replacing Generic Values ~~~~~~~~~~~~~~~~~~~~~~~~ -Often times we want to replace arbitrary values with other values. New in v0.8 -is the ``replace`` method in Series/DataFrame that provides an efficient yet +Often times we want to replace arbitrary values with other values. The +``replace`` method in Series/DataFrame provides an efficient yet flexible way to perform such replacements. For a Series, you can replace a single value or a list of values by another diff --git a/doc/source/options.rst b/doc/source/options.rst index 51d02bc89692a..f042e4d3f5120 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -306,16 +306,16 @@ display.float_format None The callable should accept a fl See core.format.EngFormatter for an example. display.large_repr truncate For DataFrames exceeding max_rows/max_cols, the repr (and HTML repr) can show - a truncated table (the default from 0.13), + a truncated table (the default), or switch to the view from df.info() (the behaviour in earlier versions of pandas). allowable settings, ['truncate', 'info'] display.latex.repr False Whether to produce a latex DataFrame representation for jupyter frontends that support it. -display.latex.escape True Escapes special caracters in Dataframes, when +display.latex.escape True Escapes special characters in DataFrames, when using the to_latex method. -display.latex.longtable False Specifies if the to_latex method of a Dataframe +display.latex.longtable False Specifies if the to_latex method of a DataFrame uses the longtable format. display.latex.multicolumn True Combines columns when using a MultiIndex display.latex.multicolumn_format 'l' Alignment of multicolumn labels diff --git a/doc/source/remote_data.rst b/doc/source/remote_data.rst index 7980133582125..9af66058a7aaa 100644 --- a/doc/source/remote_data.rst +++ b/doc/source/remote_data.rst @@ -11,14 +11,13 @@ Remote Data Access DataReader ---------- -The sub-package ``pandas.io.data`` is removed in favor of a separately -installable `pandas-datareader package +The sub-package ``pandas.io.data`` was deprecated in v.0.17 and removed in +`v.0.19 `__. + Instead there has been created a separately installable `pandas-datareader package `_. This will allow the data -modules to be independently updated to your pandas installation. The API for -``pandas-datareader v0.1.1`` is the same as in ``pandas v0.16.1``. -(:issue:`8961`) +modules to be independently updated on your pandas installation. - You should replace the imports of the following: + For code older than < 0.19 you should replace the imports of the following: .. code-block:: python diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 3dce73b302c7c..1209c4a8d6be8 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -156,7 +156,7 @@ the level numbers: stacked.unstack('second') Notice that the ``stack`` and ``unstack`` methods implicitly sort the index -levels involved. Hence a call to ``stack`` and then ``unstack``, or viceversa, +levels involved. Hence a call to ``stack`` and then ``unstack``, or vice versa, will result in a **sorted** copy of the original DataFrame or Series: .. ipython:: python @@ -569,8 +569,6 @@ This function is often used along with discretization functions like ``cut``: See also :func:`Series.str.get_dummies `. -.. versionadded:: 0.15.0 - :func:`get_dummies` also accepts a DataFrame. By default all categorical variables (categorical in the statistical sense, those with `object` or `categorical` dtype) are encoded as dummy variables. @@ -675,4 +673,4 @@ handling of NaN: you can use ``df["cat_col"] = pd.Categorical(df["col"])`` or ``df["cat_col"] = df["col"].astype("category")``. For full docs on :class:`~pandas.Categorical`, see the :ref:`Categorical introduction ` and the - :ref:`API documentation `. This feature was introduced in version 0.15. + :ref:`API documentation `. diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst index b4884cf1c4141..89efa7b4be3ee 100644 --- a/doc/source/sparse.rst +++ b/doc/source/sparse.rst @@ -132,7 +132,7 @@ dtype, ``fill_value`` default changes: s.to_sparse() You can change the dtype using ``.astype()``, the result is also sparse. Note that -``.astype()`` also affects to the ``fill_value`` to keep its dense represantation. +``.astype()`` also affects to the ``fill_value`` to keep its dense representation. .. ipython:: python @@ -216,8 +216,6 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you SparseSeries ~~~~~~~~~~~~ -.. versionadded:: 0.16.0 - A :meth:`SparseSeries.to_coo` method is implemented for transforming a ``SparseSeries`` indexed by a ``MultiIndex`` to a ``scipy.sparse.coo_matrix``. The method requires a ``MultiIndex`` with two or more levels. diff --git a/doc/source/style.ipynb b/doc/source/style.ipynb index c250787785e14..1d6ce163cf977 100644 --- a/doc/source/style.ipynb +++ b/doc/source/style.ipynb @@ -169,7 +169,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Notice the similarity with the standard `df.applymap`, which operates on DataFrames elementwise. We want you to be able to resuse your existing knowledge of how to interact with DataFrames.\n", + "Notice the similarity with the standard `df.applymap`, which operates on DataFrames elementwise. We want you to be able to reuse your existing knowledge of how to interact with DataFrames.\n", "\n", "Notice also that our function returned a string containing the CSS attribute and value, separated by a colon just like in a `