From e38ba8d9f77eefd6c9aeaff209e59fa5d7fc7bcb Mon Sep 17 00:00:00 2001 From: shubhamsm Date: Fri, 25 Sep 2020 14:44:15 +0530 Subject: [PATCH 01/17] Replace single with double backticks --- .pre-commit-config.yaml | 31 +------ doc/source/development/contributing.rst | 20 ++--- .../development/contributing_docstring.rst | 60 +++++++------- doc/source/development/extending.rst | 2 +- doc/source/ecosystem.rst | 2 +- .../comparison/comparison_with_sql.rst | 6 +- doc/source/getting_started/install.rst | 2 +- .../06_calculate_statistics.rst | 2 +- .../intro_tutorials/08_combine_dataframes.rst | 6 +- doc/source/getting_started/overview.rst | 2 +- doc/source/reference/panel.rst | 2 +- doc/source/reference/plotting.rst | 2 +- doc/source/user_guide/10min.rst | 6 +- doc/source/user_guide/basics.rst | 16 ++-- doc/source/user_guide/categorical.rst | 68 +++++++-------- doc/source/user_guide/cookbook.rst | 2 +- doc/source/user_guide/enhancingperf.rst | 6 +- doc/source/user_guide/groupby.rst | 4 +- doc/source/user_guide/io.rst | 82 +++++++++---------- doc/source/user_guide/merging.rst | 8 +- doc/source/user_guide/missing_data.rst | 2 +- doc/source/user_guide/options.rst | 14 ++-- doc/source/user_guide/reshaping.rst | 4 +- doc/source/user_guide/scale.rst | 4 +- doc/source/user_guide/text.rst | 2 +- doc/source/user_guide/timeseries.rst | 4 +- doc/source/user_guide/visualization.rst | 8 +- doc/source/whatsnew/v0.10.1.rst | 8 +- doc/source/whatsnew/v0.11.0.rst | 8 +- doc/source/whatsnew/v0.13.0.rst | 8 +- doc/source/whatsnew/v0.13.1.rst | 4 +- doc/source/whatsnew/v0.14.0.rst | 30 +++---- doc/source/whatsnew/v0.14.1.rst | 8 +- doc/source/whatsnew/v0.15.0.rst | 8 +- doc/source/whatsnew/v0.15.1.rst | 4 +- doc/source/whatsnew/v0.15.2.rst | 6 +- doc/source/whatsnew/v0.16.0.rst | 4 +- doc/source/whatsnew/v0.16.1.rst | 10 +-- doc/source/whatsnew/v0.16.2.rst | 2 +- doc/source/whatsnew/v0.17.0.rst | 8 +- doc/source/whatsnew/v0.18.0.rst | 2 +- doc/source/whatsnew/v0.19.1.rst | 2 +- doc/source/whatsnew/v0.23.0.rst | 22 ++--- doc/source/whatsnew/v0.23.1.rst | 6 +- doc/source/whatsnew/v0.24.0.rst | 20 ++--- doc/source/whatsnew/v0.24.1.rst | 4 +- doc/source/whatsnew/v0.25.0.rst | 6 +- doc/source/whatsnew/v0.25.1.rst | 14 ++-- doc/source/whatsnew/v0.6.0.rst | 2 +- doc/source/whatsnew/v0.6.1.rst | 6 +- doc/source/whatsnew/v0.7.0.rst | 4 +- doc/source/whatsnew/v0.8.0.rst | 8 +- doc/source/whatsnew/v0.9.0.rst | 2 +- doc/source/whatsnew/v0.9.1.rst | 26 +++--- doc/source/whatsnew/v1.0.0.rst | 32 ++++---- doc/source/whatsnew/v1.1.0.rst | 20 ++--- 56 files changed, 311 insertions(+), 340 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d01956bb79e11..f4b23e362ec1d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,33 +35,4 @@ repos: hooks: - id: rst-backticks # these exclusions should be removed and the files fixed - exclude: (?x)( - text\.rst| - timeseries\.rst| - visualization\.rst| - missing_data\.rst| - options\.rst| - reshaping\.rst| - scale\.rst| - merging\.rst| - cookbook\.rst| - enhancingperf\.rst| - groupby\.rst| - io\.rst| - overview\.rst| - panel\.rst| - plotting\.rst| - 10min\.rst| - basics\.rst| - categorical\.rst| - contributing\.rst| - contributing_docstring\.rst| - extending\.rst| - ecosystem\.rst| - comparison_with_sql\.rst| - install\.rst| - calculate_statistics\.rst| - combine_dataframes\.rst| - v0\.| - v1\.0\.| - v1\.1\.[012]) + \ No newline at end of file diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst index bb13fbed09677..d6955c5d4b8d2 100644 --- a/doc/source/development/contributing.rst +++ b/doc/source/development/contributing.rst @@ -31,13 +31,13 @@ comment letting others know they are working on an issue. While this is ok, you check each issue individually, and it's not possible to find the unassigned ones. For this reason, we implemented a workaround consisting of adding a comment with the exact -text `take`. When you do it, a GitHub action will automatically assign you the issue +text ``take``. When you do it, a GitHub action will automatically assign you the issue (this will take seconds, and may require refreshing the page to see it). By doing this, it's possible to filter the list of issues and find only the unassigned ones. So, a good way to find an issue to start contributing to pandas is to check the list of `unassigned good first issues `_ -and assign yourself one you like by writing a comment with the exact text `take`. +and assign yourself one you like by writing a comment with the exact text ``take``. If for whatever reason you are not able to continue working with the issue, please try to unassign it, so other people know it's available again. You can check the list of @@ -133,7 +133,7 @@ want to clone your fork to your machine:: cd pandas-yourname git remote add upstream https://github.com/pandas-dev/pandas.git -This creates the directory `pandas-yourname` and connects your repository to +This creates the directory ``pandas-yourname`` and connects your repository to the upstream (main project) *pandas* repository. Note that performing a shallow clone (with ``--depth==N``, for some ``N`` greater @@ -155,12 +155,12 @@ Using a Docker container Instead of manually setting up a development environment, you can use `Docker `_ to automatically create the environment with just several -commands. Pandas provides a `DockerFile` in the root directory to build a Docker image +commands. Pandas provides a ``DockerFile`` in the root directory to build a Docker image with a full pandas development environment. **Docker Commands** -Pass your GitHub username in the `DockerFile` to use your own fork:: +Pass your GitHub username in the ``DockerFile`` to use your own fork:: # Build the image pandas-yourname-env docker build --tag pandas-yourname-env . @@ -172,7 +172,7 @@ Even easier, you can integrate Docker with the following IDEs: **Visual Studio Code** You can use the DockerFile to launch a remote session with Visual Studio Code, -a popular free IDE, using the `.devcontainer.json` file. +a popular free IDE, using the ``.devcontainer.json`` file. See https://code.visualstudio.com/docs/remote/containers for details. **PyCharm (Professional)** @@ -782,7 +782,7 @@ As part of :ref:`Continuous Integration ` checks we run:: isort --check-only pandas -to check that imports are correctly formatted as per the `setup.cfg`. +to check that imports are correctly formatted as per the ``setup.cfg``. If you see output like the below in :ref:`Continuous Integration ` checks: @@ -979,7 +979,7 @@ For example, quite a few functions in pandas accept a ``dtype`` argument. This c def as_type(dtype: Dtype) -> ...: ... -This module will ultimately house types for repeatedly used concepts like "path-like", "array-like", "numeric", etc... and can also hold aliases for commonly appearing parameters like `axis`. Development of this module is active so be sure to refer to the source for the most up to date list of available types. +This module will ultimately house types for repeatedly used concepts like "path-like", "array-like", "numeric", etc... and can also hold aliases for commonly appearing parameters like ``axis``. Development of this module is active so be sure to refer to the source for the most up to date list of available types. Validating type hints ~~~~~~~~~~~~~~~~~~~~~ @@ -1302,7 +1302,7 @@ Or with one of the following constructs:: Using `pytest-xdist `_, one can speed up local testing on multicore machines. To use this feature, you will -need to install `pytest-xdist` via:: +need to install ``pytest-xdist`` via:: pip install pytest-xdist @@ -1465,7 +1465,7 @@ The following defines how a commit message should be structured. Please referen relevant GitHub issues in your commit message using GH1234 or #1234. Either style is fine, but the former is generally preferred: -* a subject line with `< 80` chars. +* a subject line with ``< 80`` chars. * One blank line. * Optionally, a commit message body. diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst index 33f30e1d97512..fdba12c404990 100644 --- a/doc/source/development/contributing_docstring.rst +++ b/doc/source/development/contributing_docstring.rst @@ -25,7 +25,7 @@ The next example gives an idea of what a docstring looks like: """ Add up two integer numbers. - This function simply wraps the `+` operator, and does not + This function simply wraps the ``+`` operator, and does not do anything interesting, except for illustrating what the docstring of a very simple function looks like. @@ -39,7 +39,7 @@ The next example gives an idea of what a docstring looks like: Returns ------- int - The sum of `num1` and `num2`. + The sum of ``num1`` and ``num2``. See Also -------- @@ -126,9 +126,9 @@ backticks. The following are considered inline code: def add_values(arr): """ - Add the values in `arr`. + Add the values in ``arr``. - This is equivalent to Python `sum` of :meth:`pandas.Series.sum`. + This is equivalent to Python ``sum`` of :meth:``pandas.Series.sum``. Some sections are omitted here for simplicity. """ @@ -144,13 +144,13 @@ backticks. The following are considered inline code: With several mistakes in the docstring. - It has a blank like after the signature `def func():`. + It has a blank like after the signature ``def func():``. The text 'Some function' should go in the line after the opening quotes of the docstring, not in the same line. There is a blank line between the docstring and the first line - of code `foo = 1`. + of code ``foo = 1``. The closing quotes should be in the next line, not in this one.""" @@ -269,11 +269,11 @@ after, and not between the line with the word "Parameters" and the one with the hyphens. After the title, each parameter in the signature must be documented, including -`*args` and `**kwargs`, but not `self`. +``*args`` and ``**kwargs``, but not ``self``. The parameters are defined by their name, followed by a space, a colon, another space, and the type (or types). Note that the space between the name and the -colon is important. Types are not defined for `*args` and `**kwargs`, but must +colon is important. Types are not defined for ``*args`` and ``**kwargs``, but must be defined for all other parameters. After the parameter definition, it is required to have a line with the parameter description, which is indented, and can have multiple lines. The description must start with a capital letter, and @@ -285,13 +285,13 @@ comma at the end of the type. The exact form of the type in this case will be argument means, which can be added after a comma "int, default -1, meaning all cpus". -In cases where the default value is `None`, meaning that the value will not be +In cases where the default value is ``None``, meaning that the value will not be used. Instead of "str, default None", it is preferred to write "str, optional". -When `None` is a value being used, we will keep the form "str, default None". -For example, in `df.to_csv(compression=None)`, `None` is not a value being used, +When ``None`` is a value being used, we will keep the form "str, default None". +For example, in ``df.to_csv(compression=None)``, ``None`` is not a value being used, but means that compression is optional, and no compression is being used if not -provided. In this case we will use `str, optional`. Only in cases like -`func(value=None)` and `None` is being used in the same way as `0` or `foo` +provided. In this case we will use ``str, optional``. Only in cases like +``func(value=None)`` and ``None`` is being used in the same way as ``0`` or ``foo`` would be used, then we will specify "str, int or None, default None". **Good:** @@ -331,13 +331,13 @@ would be used, then we will specify "str, int or None, default None". specified kind. Note the blank line between the parameters title and the first - parameter. Also, note that after the name of the parameter `kind` + parameter. Also, note that after the name of the parameter ``kind`` and before the colon, a space is missing. Also, note that the parameter descriptions do not start with a capital letter, and do not finish with a dot. - Finally, the `**kwargs` parameter is missing. + Finally, the ``**kwargs`` parameter is missing. Parameters ---------- @@ -361,9 +361,9 @@ boolean, etc): * str * bool -For complex types, define the subtypes. For `dict` and `tuple`, as more than +For complex types, define the subtypes. For ``dict`` and ``tuple``, as more than one type is present, we use the brackets to help read the type (curly brackets -for `dict` and normal brackets for `tuple`): +for ``dict`` and normal brackets for ``tuple``): * list of int * dict of {str : int} @@ -512,8 +512,8 @@ This section is used to let users know about pandas functionality related to the one being documented. In rare cases, if no related methods or functions can be found at all, this section can be skipped. -An obvious example would be the `head()` and `tail()` methods. As `tail()` does -the equivalent as `head()` but at the end of the `Series` or `DataFrame` +An obvious example would be the ``head()`` and ``tail()`` methods. As ``tail()`` does +the equivalent as ``head()`` but at the end of the ``Series`` or ``DataFrame`` instead of at the beginning, it is good to let the users know about it. To give an intuition on what can be considered related, here there are some @@ -608,8 +608,8 @@ Examples in docstrings, besides illustrating the usage of the function or method, must be valid Python code, that returns the given output in a deterministic way, and that can be copied and run by users. -Examples are presented as a session in the Python terminal. `>>>` is used to -present code. `...` is used for code continuing from the previous line. +Examples are presented as a session in the Python terminal. ``>>>`` is used to +present code. ``...`` is used for code continuing from the previous line. Output is presented immediately after the last line of code generating the output (no blank lines in between). Comments describing the examples can be added with blank lines before and after them. @@ -664,7 +664,7 @@ A simple example could be: 4 Falcon dtype: object - With the `n` parameter, we can change the number of returned rows: + With the ``n`` parameter, we can change the number of returned rows: >>> s.head(n=3) 0 Ant @@ -742,7 +742,7 @@ positional arguments ``head(3)``. def fillna(self, value): """ - Replace missing values by `value`. + Replace missing values by ``value``. Examples -------- @@ -771,7 +771,7 @@ positional arguments ``head(3)``. def contains(self, pattern, case_sensitive=True, na=numpy.nan): """ - Return whether each value contains `pattern`. + Return whether each value contains ``pattern``. In this case, we are illustrating how to use sections, even if the example is simple enough and does not require them. @@ -788,8 +788,8 @@ positional arguments ``head(3)``. **Case sensitivity** - With `case_sensitive` set to `False` we can match `a` with both - `a` and `A`: + With ``case_sensitive`` set to ``False`` we can match ``a`` with both + ``a`` and ``A``: >>> s.contains(pattern='a', case_sensitive=False) 0 True @@ -800,7 +800,7 @@ positional arguments ``head(3)``. **Missing values** - We can fill missing values in the output using the `na` parameter: + We can fill missing values in the output using the ``na`` parameter: >>> s.contains(pattern='a', na=False) 0 False @@ -824,9 +824,9 @@ positional arguments ``head(3)``. Try to use meaningful data, when it makes the example easier to understand. - Try to avoid positional arguments like in `df.method(1)`. They + Try to avoid positional arguments like in ``df.method(1)``. They can be all right if previously defined with a meaningful name, - like in `present_value(interest_rate)`, but avoid them otherwise. + like in ``present_value(interest_rate)``, but avoid them otherwise. When presenting the behavior with different parameters, do not place all the calls one next to the other. Instead, add a short sentence @@ -914,7 +914,7 @@ plot will be generated automatically when building the documentation. class Series: def plot(self): """ - Generate a plot with the `Series` data. + Generate a plot with the ``Series`` data. Examples -------- diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 46c2cbbe39b34..c708ebb361ed1 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -61,7 +61,7 @@ This can be a convenient way to extend pandas objects without subclassing them. If you write a custom accessor, make a pull request adding it to our :ref:`ecosystem` page. -We highly recommend validating the data in your accessor's `__init__`. +We highly recommend validating the data in your accessor's ``__init__``. In our ``GeoAccessor``, we validate that the data contains the expected columns, raising an ``AttributeError`` when the validation fails. For a ``Series`` accessor, you should validate the ``dtype`` if the accessor diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index 624c0551de607..ed6ce7e9759b6 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -436,7 +436,7 @@ arrays can be stored inside pandas' Series and DataFrame. `Pint-Pandas`_ ~~~~~~~~~~~~~~ -`Pint-Pandas ` provides an extension type for +``Pint-Pandas `` provides an extension type for storing numeric arrays with units. These arrays can be stored inside pandas' Series and DataFrame. Operations between Series and DataFrame columns which use pint's extension array are then units aware. diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst index aa7218c3e4fad..04f97a27cde39 100644 --- a/doc/source/getting_started/comparison/comparison_with_sql.rst +++ b/doc/source/getting_started/comparison/comparison_with_sql.rst @@ -19,7 +19,7 @@ As is customary, we import pandas and NumPy as follows: import numpy as np Most of the examples will utilize the ``tips`` dataset found within pandas tests. We'll read -the data into a DataFrame called `tips` and assume we have a database table of the same name and +the data into a DataFrame called ``tips`` and assume we have a database table of the same name and structure. .. ipython:: python @@ -429,7 +429,7 @@ Top n rows per group .query('rn < 3') .sort_values(['day', 'rn'])) -the same using `rank(method='first')` function +the same using ``rank(method='first')`` function .. ipython:: python @@ -453,7 +453,7 @@ the same using `rank(method='first')` function Let's find tips with (rank < 3) per gender group for (tips < 2). Notice that when using ``rank(method='min')`` function -`rnk_min` remains the same for the same `tip` +``rnk_min`` remains the same for the same ``tip`` (as Oracle's RANK() function) .. ipython:: python diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 2196c908ecf37..78bd76bbd230f 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -179,7 +179,7 @@ In Linux/Mac you can run ``which python`` on your terminal and it will tell you using. If it's something like "/usr/bin/python", you're using the Python from the system, which is not recommended. It is highly recommended to use ``conda``, for quick installation and for package and dependency updates. -You can find simple installation instructions for pandas in this document: `installation instructions `. +You can find simple installation instructions for pandas in this document: ``installation instructions ``. Installing from source ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst index c7363b94146ac..bd85160d2622a 100644 --- a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst +++ b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst @@ -197,7 +197,7 @@ on the grouped data as well: :align: center .. note:: - The `Pclass` column contains numerical data but actually + The ``Pclass`` column contains numerical data but actually represents 3 categories (or factors) with respectively the labels ‘1’, ‘2’ and ‘3’. Calculating statistics on these does not make much sense. Therefore, pandas provides a ``Categorical`` data type to handle this diff --git a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst index 600a75b156ac4..d6da9a0aa4f22 100644 --- a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst +++ b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst @@ -123,9 +123,9 @@ concatenated tables to verify the operation: .. ipython:: python - print('Shape of the `air_quality_pm25` table: ', air_quality_pm25.shape) - print('Shape of the `air_quality_no2` table: ', air_quality_no2.shape) - print('Shape of the resulting `air_quality` table: ', air_quality.shape) + print('Shape of the ``air_quality_pm25`` table: ', air_quality_pm25.shape) + print('Shape of the ``air_quality_no2`` table: ', air_quality_no2.shape) + print('Shape of the resulting ``air_quality`` table: ', air_quality.shape) Hence, the resulting table has 3178 = 1110 + 2068 rows. diff --git a/doc/source/getting_started/overview.rst b/doc/source/getting_started/overview.rst index 032ba73a7293d..57d87d4ec8a91 100644 --- a/doc/source/getting_started/overview.rst +++ b/doc/source/getting_started/overview.rst @@ -40,7 +40,7 @@ Here are just a few of the things that pandas does well: higher dimensional objects - Automatic and explicit **data alignment**: objects can be explicitly aligned to a set of labels, or the user can simply ignore the labels and - let `Series`, `DataFrame`, etc. automatically align the data for you in + let ``Series``, ``DataFrame``, etc. automatically align the data for you in computations - Powerful, flexible **group by** functionality to perform split-apply-combine operations on data sets, for both aggregating and diff --git a/doc/source/reference/panel.rst b/doc/source/reference/panel.rst index 94bfe87fe39f0..713265292db33 100644 --- a/doc/source/reference/panel.rst +++ b/doc/source/reference/panel.rst @@ -7,4 +7,4 @@ Panel ===== .. currentmodule:: pandas -`Panel` was removed in 0.25.0. For prior documentation, see the `0.24 documentation `_ +``Panel`` was removed in 0.25.0. For prior documentation, see the ``0.24 documentation ``_ diff --git a/doc/source/reference/plotting.rst b/doc/source/reference/plotting.rst index 95657dfa5fde5..632b39a1fa858 100644 --- a/doc/source/reference/plotting.rst +++ b/doc/source/reference/plotting.rst @@ -7,7 +7,7 @@ Plotting ======== .. currentmodule:: pandas.plotting -The following functions are contained in the `pandas.plotting` module. +The following functions are contained in the ``pandas.plotting`` module. .. autosummary:: :toctree: api/ diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst index 93c50fff40305..7f14e17560231 100644 --- a/doc/source/user_guide/10min.rst +++ b/doc/source/user_guide/10min.rst @@ -431,10 +431,10 @@ See more at :ref:`Histogramming and Discretization `. String Methods ~~~~~~~~~~~~~~ -Series is equipped with a set of string processing methods in the `str` +Series is equipped with a set of string processing methods in the ``str`` attribute that make it easy to operate on each element of the array, as in the -code snippet below. Note that pattern-matching in `str` generally uses `regular -expressions `__ by default (and in +code snippet below. Note that pattern-matching in ``str`` generally uses ``regular +expressions ``__ by default (and in some cases always uses them). See more at :ref:`Vectorized String Methods `. diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index 6b13319061ea4..4038f706b9338 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -1459,7 +1459,7 @@ for altering the ``Series.name`` attribute. .. versionadded:: 0.24.0 The methods :meth:`DataFrame.rename_axis` and :meth:`Series.rename_axis` -allow specific names of a `MultiIndex` to be changed (as opposed to the +allow specific names of a ``MultiIndex`` to be changed (as opposed to the labels). .. ipython:: python @@ -1592,7 +1592,7 @@ index value along with a Series containing the data in each row: row All values in ``row``, returned as a Series, are now upcasted - to floats, also the original integer value in column `x`: + to floats, also the original integer value in column ``x``: .. ipython:: python @@ -1787,8 +1787,8 @@ used to sort a pandas object by its index levels. .. versionadded:: 1.1.0 Sorting by index also supports a ``key`` parameter that takes a callable -function to apply to the index being sorted. For `MultiIndex` objects, -the key is applied per-level to the levels specified by `level`. +function to apply to the index being sorted. For ``MultiIndex`` objects, +the key is applied per-level to the levels specified by ``level``. .. ipython:: python @@ -1812,8 +1812,8 @@ For information on key sorting by value, see :ref:`value sorting By values ~~~~~~~~~ -The :meth:`Series.sort_values` method is used to sort a `Series` by its values. The -:meth:`DataFrame.sort_values` method is used to sort a `DataFrame` by its column or row values. +The :meth:``Series.sort_values`` method is used to sort a ``Series`` by its values. The +:meth:``DataFrame.sort_values`` method is used to sort a ``DataFrame`` by its column or row values. The optional ``by`` parameter to :meth:`DataFrame.sort_values` may used to specify one or more columns to use to determine the sorted order. @@ -1855,8 +1855,8 @@ to apply to the values being sorted. s1.sort_values() s1.sort_values(key=lambda x: x.str.lower()) -`key` will be given the :class:`Series` of values and should return a ``Series`` -or array of the same shape with the transformed values. For `DataFrame` objects, +``key`` will be given the :class:``Series`` of values and should return a ``Series`` +or array of the same shape with the transformed values. For ``DataFrame`` objects, the key is applied per column, so the key should still expect a Series and return a Series, e.g. diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index b7475ae7bb132..f3f0f1ca652df 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -9,9 +9,9 @@ Categorical data This is an introduction to pandas categorical data type, including a short comparison with R's ``factor``. -`Categoricals` are a pandas data type corresponding to categorical variables in +``Categoricals`` are a pandas data type corresponding to categorical variables in statistics. A categorical variable takes on a limited, and usually fixed, -number of possible values (`categories`; `levels` in R). Examples are gender, +number of possible values (``categories``; ``levels`` in R). Examples are gender, social class, blood type, country affiliation, observation time or rating via Likert scales. @@ -19,10 +19,10 @@ In contrast to statistical categorical variables, categorical data might have an 'strongly agree' vs 'agree' or 'first observation' vs. 'second observation'), but numerical operations (additions, divisions, ...) are not possible. -All values of categorical data are either in `categories` or `np.nan`. Order is defined by -the order of `categories`, not lexical order of the values. Internally, the data structure -consists of a `categories` array and an integer array of `codes` which point to the real value in -the `categories` array. +All values of categorical data are either in ``categories`` or ``np.nan``. Order is defined by +the order of ``categories``, not lexical order of the values. Internally, the data structure +consists of a ``categories`` array and an integer array of ``codes`` which point to the real value in +the ``categories`` array. The categorical data type is useful in the following cases: @@ -196,13 +196,13 @@ To get back to the original ``Series`` or NumPy array, use .. note:: - In contrast to R's `factor` function, categorical data is not converting input values to + In contrast to R's ``factor`` function, categorical data is not converting input values to strings; categories will end up the same data type as the original values. .. note:: - In contrast to R's `factor` function, there is currently no way to assign/change labels at - creation time. Use `categories` to change the categories after creation time. + In contrast to R's ``factor`` function, there is currently no way to assign/change labels at + creation time. Use ``categories`` to change the categories after creation time. .. _categorical.categoricaldtype: @@ -228,7 +228,7 @@ by default. CategoricalDtype() A :class:`~pandas.api.types.CategoricalDtype` can be used in any place pandas -expects a `dtype`. For example :func:`pandas.read_csv`, +expects a ``dtype``. For example :func:``pandas.read_csv``, :func:`pandas.DataFrame.astype`, or in the ``Series`` constructor. .. note:: @@ -288,7 +288,7 @@ output to a ``Series`` or ``DataFrame`` of type ``string``. Working with categories ----------------------- -Categorical data has a `categories` and a `ordered` property, which list their +Categorical data has a ``categories`` and a ``ordered`` property, which list their possible values and whether the ordering matters or not. These properties are exposed as ``s.cat.categories`` and ``s.cat.ordered``. If you don't manually specify categories and ordering, they are inferred from the passed arguments. @@ -353,14 +353,14 @@ Renaming categories is done by assigning new values to the .. note:: - In contrast to R's `factor`, categorical data can have categories of other types than string. + In contrast to R's ``factor``, categorical data can have categories of other types than string. .. note:: Be aware that assigning new categories is an inplace operation, while most other operations - under ``Series.cat`` per default return a new ``Series`` of dtype `category`. + under ``Series.cat`` per default return a new ``Series`` of dtype ``category``. -Categories must be unique or a `ValueError` is raised: +Categories must be unique or a ``ValueError`` is raised: .. ipython:: python @@ -369,7 +369,7 @@ Categories must be unique or a `ValueError` is raised: except ValueError as e: print("ValueError:", str(e)) -Categories must also not be ``NaN`` or a `ValueError` is raised: +Categories must also not be ``NaN`` or a ``ValueError`` is raised: .. ipython:: python @@ -535,7 +535,7 @@ Comparing categorical data with other objects is possible in three cases: * Comparing equality (``==`` and ``!=``) to a list-like object (list, Series, array, ...) of the same length as the categorical data. * All comparisons (``==``, ``!=``, ``>``, ``>=``, ``<``, and ``<=``) of categorical data to - another categorical Series, when ``ordered==True`` and the `categories` are the same. + another categorical Series, when ``ordered==True`` and the ``categories`` are the same. * All comparisons of a categorical data to a scalar. All other comparisons, especially "non-equality" comparisons of two categoricals with different @@ -657,7 +657,7 @@ Data munging The optimized pandas data access methods ``.loc``, ``.iloc``, ``.at``, and ``.iat``, work as normal. The only difference is the return type (for getting) and -that only values already in `categories` can be assigned. +that only values already in ``categories`` can be assigned. Getting ~~~~~~~ @@ -695,8 +695,8 @@ of length "1". df.at["h", "cats"] # returns a string .. note:: - The is in contrast to R's `factor` function, where ``factor(c(1,2,3))[1]`` - returns a single value `factor`. + The is in contrast to R's ``factor`` function, where ``factor(c(1,2,3))[1]`` + returns a single value ``factor``. To get a single value ``Series`` of type ``category``, you pass in a list with a single value: @@ -732,7 +732,7 @@ an appropriate type: That means, that the returned values from methods and properties on the accessors of a ``Series`` and the returned values from methods and properties on the accessors of this -``Series`` transformed to one of type `category` will be equal: +``Series`` transformed to one of type ``category`` will be equal: .. ipython:: python @@ -753,7 +753,7 @@ Setting ~~~~~~~ Setting values in a categorical column (or ``Series``) works as long as the -value is included in the `categories`: +value is included in the ``categories``: .. ipython:: python @@ -770,7 +770,7 @@ value is included in the `categories`: except ValueError as e: print("ValueError:", str(e)) -Setting values by assigning categorical data will also check that the `categories` match: +Setting values by assigning categorical data will also check that the ``categories`` match: .. ipython:: python @@ -941,7 +941,7 @@ See :ref:`here ` for an example and caveats. Writing to a CSV file will convert the data, effectively removing any information about the categorical (categories and ordering). So if you read back the CSV file you have to convert the -relevant columns back to `category` and assign the right categories and categories ordering. +relevant columns back to ``category`` and assign the right categories and categories ordering. .. ipython:: python @@ -970,7 +970,7 @@ The same holds for writing to a SQL database with ``to_sql``. Missing data ------------ -pandas primarily uses the value `np.nan` to represent missing data. It is by +pandas primarily uses the value ``np.nan`` to represent missing data. It is by default not included in computations. See the :ref:`Missing Data section `. @@ -998,20 +998,20 @@ Methods for working with missing data, e.g. :meth:`~Series.isna`, :meth:`~Series pd.isna(s) s.fillna("a") -Differences to R's `factor` +Differences to R's ``factor`` --------------------------- The following differences to R's factor functions can be observed: -* R's `levels` are named `categories`. -* R's `levels` are always of type string, while `categories` in pandas can be of any dtype. +* R's ``levels`` are named ``categories``. +* R's ``levels`` are always of type string, while ``categories`` in pandas can be of any dtype. * It's not possible to specify labels at creation time. Use ``s.cat.rename_categories(new_labels)`` afterwards. -* In contrast to R's `factor` function, using categorical data as the sole input to create a +* In contrast to R's ``factor`` function, using categorical data as the sole input to create a new categorical series will *not* remove unused categories but create a new categorical series which is equal to the passed in one! -* R allows for missing values to be included in its `levels` (pandas' `categories`). Pandas - does not allow `NaN` categories, but missing values can still be in the `values`. +* R allows for missing values to be included in its ``levels`` (pandas' ``categories``). Pandas + does not allow ``NaN`` categories, but missing values can still be in the ``values``. Gotchas @@ -1053,13 +1053,13 @@ an ``object`` dtype is a constant times the length of the data. s.astype('category').nbytes -`Categorical` is not a `numpy` array +``Categorical`` is not a ``numpy`` array ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Currently, categorical data and the underlying ``Categorical`` is implemented as a Python object and not as a low-level NumPy array dtype. This leads to some problems. -NumPy itself doesn't know about the new `dtype`: +NumPy itself doesn't know about the new ``dtype``: .. ipython:: python @@ -1088,7 +1088,7 @@ To check if a Series contains Categorical data, use ``hasattr(s, 'cat')``: hasattr(pd.Series(['a'], dtype='category'), 'cat') hasattr(pd.Series(['a']), 'cat') -Using NumPy functions on a ``Series`` of type ``category`` should not work as `Categoricals` +Using NumPy functions on a ``Series`` of type ``category`` should not work as ``Categoricals`` are not numeric data (even in the case that ``.categories`` is numeric). .. ipython:: python @@ -1107,7 +1107,7 @@ dtype in apply ~~~~~~~~~~~~~~ Pandas currently does not preserve the dtype in apply functions: If you apply along rows you get -a `Series` of ``object`` `dtype` (same as getting a row -> getting one element will return a +a ``Series`` of ``object`` ``dtype`` (same as getting a row -> getting one element will return a basic type) and applying along columns will also convert to object. ``NaN`` values are unaffected. You can use ``fillna`` to handle missing values before applying a function. diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 7542e1dc7df6f..e33e85d3d2224 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -1270,7 +1270,7 @@ Often it's useful to obtain the lower (or upper) triangular form of a correlatio corr_mat.where(mask) -The `method` argument within `DataFrame.corr` can accept a callable in addition to the named correlation types. Here we compute the `distance correlation `__ matrix for a `DataFrame` object. +The ``method`` argument within ``DataFrame.corr`` can accept a callable in addition to the named correlation types. Here we compute the ``distance correlation ``__ matrix for a ``DataFrame`` object. .. ipython:: python diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst index 9e101c1a20371..ce9db0a5279c3 100644 --- a/doc/source/user_guide/enhancingperf.rst +++ b/doc/source/user_guide/enhancingperf.rst @@ -488,9 +488,9 @@ These operations are supported by :func:`pandas.eval`: * Attribute access, e.g., ``df.a`` * Subscript expressions, e.g., ``df[0]`` * Simple variable evaluation, e.g., ``pd.eval('df')`` (this is not very useful) -* Math functions: `sin`, `cos`, `exp`, `log`, `expm1`, `log1p`, - `sqrt`, `sinh`, `cosh`, `tanh`, `arcsin`, `arccos`, `arctan`, `arccosh`, - `arcsinh`, `arctanh`, `abs`, `arctan2` and `log10`. +* Math functions: ``sin``, ``cos``, ``exp``, ``log``, ``expm1``, ``log1p``, + ``sqrt``, ``sinh``, ``cosh``, ``tanh``, ``arcsin``, ``arccos``, ``arctan``, ``arccosh``, + ``arcsinh``, ``arctanh``, ``abs``, ``arctan2`` and ``log10``. This Python syntax is **not** allowed: diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index f745dab00bab8..52342de98de79 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -216,10 +216,10 @@ in case you want to include ``NA`` values in group keys, you could pass ``dropna .. ipython:: python - # Default `dropna` is set to True, which will exclude NaNs in keys + # Default ``dropna`` is set to True, which will exclude NaNs in keys df_dropna.groupby(by=["b"], dropna=True).sum() - # In order to allow NaN in keys, set `dropna` to False + # In order to allow NaN in keys, set ``dropna`` to False df_dropna.groupby(by=["b"], dropna=False).sum() The default setting of ``dropna`` argument is ``True`` which means ``NA`` are not included in group keys. diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index a0b16e5fe5d1c..80fd54518ada1 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -117,9 +117,9 @@ index_col : int, str, sequence of int / str, or False, default ``None`` usecols : list-like or callable, default ``None`` Return a subset of the columns. If list-like, all elements must either be positional (i.e. integer indices into the document columns) or strings - that correspond to column names provided either by the user in `names` or + that correspond to column names provided either by the user in ``names`` or inferred from the document header row(s). For example, a valid list-like - `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``. + ``usecols`` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``. Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. To instantiate a DataFrame from ``data`` with element order preserved use @@ -157,7 +157,7 @@ General parsing configuration dtype : Type name or dict of column -> type, default ``None`` Data type for data or columns. E.g. ``{'a': np.float64, 'b': np.int32}`` - (unsupported with ``engine='python'``). Use `str` or `object` together + (unsupported with ``engine='python'``). Use ``str`` or ``object`` together with suitable ``na_values`` settings to preserve and not interpret dtype. engine : {``'c'``, ``'python'``} @@ -215,19 +215,19 @@ na_values : scalar, str, list-like, or dict, default ``None`` keep_default_na : boolean, default ``True`` Whether or not to include the default NaN values when parsing the data. - Depending on whether `na_values` is passed in, the behavior is as follows: + Depending on whether ``na_values`` is passed in, the behavior is as follows: - * If `keep_default_na` is ``True``, and `na_values` are specified, `na_values` + * If ``keep_default_na`` is ``True``, and ``na_values`` are specified, ``na_values`` is appended to the default NaN values used for parsing. - * If `keep_default_na` is ``True``, and `na_values` are not specified, only + * If ``keep_default_na`` is ``True``, and ``na_values`` are not specified, only the default NaN values are used for parsing. - * If `keep_default_na` is ``False``, and `na_values` are specified, only - the NaN values specified `na_values` are used for parsing. - * If `keep_default_na` is ``False``, and `na_values` are not specified, no + * If ``keep_default_na`` is ``False``, and ``na_values`` are specified, only + the NaN values specified ``na_values`` are used for parsing. + * If ``keep_default_na`` is ``False``, and ``na_values`` are not specified, no strings will be parsed as NaN. - Note that if `na_filter` is passed in as ``False``, the `keep_default_na` and - `na_values` parameters will be ignored. + Note that if ``na_filter`` is passed in as ``False``, the ``keep_default_na`` and + ``na_values`` parameters will be ignored. na_filter : boolean, default ``True`` Detect missing value markers (empty strings and the value of na_values). In data without any NAs, passing ``na_filter=False`` can improve the performance @@ -276,11 +276,11 @@ Iteration +++++++++ iterator : boolean, default ``False`` - Return `TextFileReader` object for iteration or getting chunks with + Return ``TextFileReader`` object for iteration or getting chunks with ``get_chunk()``. chunksize : int, default ``None`` - Return `TextFileReader` object for iteration. See :ref:`iterating and chunking - ` below. + Return ``TextFileReader`` object for iteration. See :ref:``iterating and chunking + `` below. Quoting, compression, and file format +++++++++++++++++++++++++++++++++++++ @@ -299,7 +299,7 @@ compression : {``'infer'``, ``'gzip'``, ``'bz2'``, ``'zip'``, ``'xz'``, ``None`` .. versionchanged:: 0.24.0 'infer' option added and set to default. .. versionchanged:: 1.1.0 dict option extended to support ``gzip`` and ``bz2``. - .. versionchanged:: 1.2.0 Previous versions forwarded dict entries for 'gzip' to `gzip.open`. + .. versionchanged:: 1.2.0 Previous versions forwarded dict entries for 'gzip' to ``gzip.open``. thousands : str, default ``None`` Thousands separator. decimal : str, default ``'.'`` @@ -327,17 +327,17 @@ comment : str, default ``None`` Indicates remainder of line should not be parsed. If found at the beginning of a line, the line will be ignored altogether. This parameter must be a single character. Like empty lines (as long as ``skip_blank_lines=True``), fully - commented lines are ignored by the parameter `header` but not by `skiprows`. + commented lines are ignored by the parameter ``header`` but not by ``skiprows``. For example, if ``comment='#'``, parsing '#empty\\na,b,c\\n1,2,3' with - `header=0` will result in 'a,b,c' being treated as the header. + ``header=0`` will result in 'a,b,c' being treated as the header. encoding : str, default ``None`` Encoding to use for UTF when reading/writing (e.g. ``'utf-8'``). `List of Python standard encodings `_. dialect : str or :class:`python:csv.Dialect` instance, default ``None`` If provided, this parameter will override values (default or not) for the - following parameters: `delimiter`, `doublequote`, `escapechar`, - `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to + following parameters: ``delimiter``, ``doublequote``, ``escapechar``, + ``skipinitialspace``, ``quotechar``, and ``quoting``. If it is necessary to override values, a ParserWarning will be issued. See :class:`python:csv.Dialect` documentation for more details. @@ -436,7 +436,7 @@ worth trying. mixed_df['col_1'].apply(type).value_counts() mixed_df['col_1'].dtype - will result with `mixed_df` containing an ``int`` dtype for certain chunks + will result with ``mixed_df`` containing an ``int`` dtype for certain chunks of the column, and ``str`` for others due to the mixed dtypes from the data that was read in. It is important to note that the overall column will be marked with a ``dtype`` of ``object``, which is used for columns with mixed dtypes. @@ -896,7 +896,7 @@ You can also use a dict to specify custom name columns: df It is important to remember that if multiple text columns are to be parsed into -a single date column, then a new column is prepended to the data. The `index_col` +a single date column, then a new column is prepended to the data. The ``index_col`` specification is based off of this new set of columns rather than the original data columns: @@ -937,7 +937,7 @@ Pandas will try to call the ``date_parser`` function in three different ways. If an exception is raised, the next one is tried: 1. ``date_parser`` is first called with one or more arrays as arguments, - as defined using `parse_dates` (e.g., ``date_parser(['2013', '2013'], ['1', '2'])``). + as defined using ``parse_dates`` (e.g., ``date_parser(['2013', '2013'], ['1', '2'])``). 2. If #1 fails, ``date_parser`` is called with all the columns concatenated row-wise into a single array (e.g., ``date_parser(['2013 1', '2013 2'])``). @@ -1369,7 +1369,7 @@ Files with fixed width columns While :func:`read_csv` reads delimited data, the :func:`read_fwf` function works with data files that have known and fixed column widths. The function parameters -to ``read_fwf`` are largely the same as `read_csv` with two extra parameters, and +to ``read_fwf`` are largely the same as ``read_csv`` with two extra parameters, and a different usage of the ``delimiter`` parameter: * ``colspecs``: A list of pairs (tuples) giving the extents of the @@ -1402,7 +1402,7 @@ Consider a typical fixed-width data file: print(open('bar.csv').read()) In order to parse this file into a ``DataFrame``, we simply need to supply the -column specifications to the `read_fwf` function along with the file name: +column specifications to the ``read_fwf`` function along with the file name: .. ipython:: python @@ -1718,7 +1718,7 @@ The ``Series`` and ``DataFrame`` objects have an instance method ``to_csv`` whic allows storing the contents of the object as a comma-separated-values file. The function takes a number of arguments. Only the first is required. -* ``path_or_buf``: A string path to the file to write or a file object. If a file object it must be opened with `newline=''` +* ``path_or_buf``: A string path to the file to write or a file object. If a file object it must be opened with ``newline=''`` * ``sep`` : Field delimiter for the output file (default ",") * ``na_rep``: A string representation of a missing value (default '') * ``float_format``: Format string for floating point numbers @@ -1726,13 +1726,13 @@ function takes a number of arguments. Only the first is required. * ``header``: Whether to write out the column names (default True) * ``index``: whether to write row (index) names (default True) * ``index_label``: Column label(s) for index column(s) if desired. If None - (default), and `header` and `index` are True, then the index names are + (default), and ``header`` and ``index`` are True, then the index names are used. (A sequence should be given if the ``DataFrame`` uses MultiIndex). * ``mode`` : Python write mode, default 'w' * ``encoding``: a string representing the encoding to use if the contents are non-ASCII, for Python versions prior to 3 -* ``line_terminator``: Character sequence denoting line end (default `os.linesep`) -* ``quoting``: Set quoting rules as in csv module (default csv.QUOTE_MINIMAL). Note that if you have set a `float_format` then floats are converted to strings and csv.QUOTE_NONNUMERIC will treat them as non-numeric +* ``line_terminator``: Character sequence denoting line end (default ``os.linesep``) +* ``quoting``: Set quoting rules as in csv module (default csv.QUOTE_MINIMAL). Note that if you have set a ``float_format`` then floats are converted to strings and csv.QUOTE_NONNUMERIC will treat them as non-numeric * ``quotechar``: Character used to quote fields (default '"') * ``doublequote``: Control quoting of ``quotechar`` in fields (default True) * ``escapechar``: Character used to escape ``sep`` and ``quotechar`` when @@ -1885,7 +1885,7 @@ preservation of metadata including but not limited to dtypes and index names. Any orient option that encodes to a JSON object will not preserve the ordering of index and column labels during round-trip serialization. If you wish to preserve - label ordering use the `split` option as it uses ordered containers. + label ordering use the ``split`` option as it uses ordered containers. Date handling +++++++++++++ @@ -2240,7 +2240,7 @@ For line-delimited json files, pandas can also return an iterator which reads in df df.to_json(orient='records', lines=True) - # reader is an iterator that returns `chunksize` lines each iteration + # reader is an iterator that returns ``chunksize`` lines each iteration reader = pd.read_json(StringIO(jsonl), lines=True, chunksize=1) reader for chunk in reader: @@ -3092,7 +3092,7 @@ Dtype specifications ++++++++++++++++++++ As an alternative to converters, the type for an entire column can -be specified using the `dtype` keyword, which takes a dictionary +be specified using the ``dtype`` keyword, which takes a dictionary mapping column names to types. To interpret data with no type inference, use the type ``str`` or ``object``. @@ -3748,8 +3748,8 @@ Passing ``min_itemsize={`values`: size}`` as a parameter to append will set a larger minimum for the string columns. Storing ``floats, strings, ints, bools, datetime64`` are currently supported. For string columns, passing ``nan_rep = 'nan'`` to append will change the default -nan representation on disk (which converts to/from `np.nan`), this -defaults to `nan`. +nan representation on disk (which converts to/from ``np.nan``), this +defaults to ``nan``. .. ipython:: python @@ -4045,7 +4045,7 @@ Query via data columns ++++++++++++++++++++++ You can designate (and index) certain columns that you want to be able -to perform queries (other than the `indexable` columns, which you can +to perform queries (other than the ``indexable`` columns, which you can always query). For instance say you want to perform this common operation, on-disk, and return just the frame that matches this query. You can specify ``data_columns = True`` to force all columns to @@ -4076,7 +4076,7 @@ be ``data_columns``. store.root.df_dc.table There is some performance degradation by making lots of columns into -`data columns`, so it is up to the user to designate these. In addition, +``data columns``, so it is up to the user to designate these. In addition, you cannot change data columns (nor indexables) after the first append/put operation (Of course you can simply read in the data and create a new table!). @@ -4203,7 +4203,7 @@ having a very wide table, but enables more efficient queries. The ``append_to_multiple`` method splits a given single DataFrame into multiple tables according to ``d``, a dictionary that maps the -table names to a list of 'columns' you want in that table. If `None` +table names to a list of 'columns' you want in that table. If ``None`` is used in place of a list, that table will have the remaining unspecified columns of the given DataFrame. The argument ``selector`` defines which table is the selector table (which you can make queries from). @@ -4843,8 +4843,8 @@ Parquet supports partitioning of data based on the values of one or more columns df.to_parquet(path='test', engine='pyarrow', partition_cols=['a'], compression=None) -The `path` specifies the parent directory to which data will be saved. -The `partition_cols` are the column names by which the dataset will be partitioned. +The ``path`` specifies the parent directory to which data will be saved. +The ``partition_cols`` are the column names by which the dataset will be partitioned. Columns are partitioned in the order they are given. The partition splits are determined by the unique values in the partition columns. The above example creates a partitioned dataset that may look like: @@ -5495,7 +5495,7 @@ SAS formats ----------- The top-level function :func:`read_sas` can read (but not write) SAS -`xport` (.XPT) and (since *v0.18.0*) `SAS7BDAT` (.sas7bdat) format files. +``xport`` (.XPT) and (since *v0.18.0*) ``SAS7BDAT`` (.sas7bdat) format files. SAS files only contain two value types: ASCII text and floating point values (usually 8 bytes but sometimes truncated). For xport files, @@ -5543,7 +5543,7 @@ SPSS formats .. versionadded:: 0.25.0 The top-level function :func:`read_spss` can read (but not write) SPSS -`sav` (.sav) and `zsav` (.zsav) format files. +``sav`` (.sav) and ``zsav`` (.zsav) format files. SPSS files contain column names. By default the whole file is read, categorical columns are converted into ``pd.Categorical``, @@ -5566,7 +5566,7 @@ avoid converting categorical columns into ``pd.Categorical``: df = pd.read_spss('spss_data.sav', usecols=['foo', 'bar'], convert_categoricals=False) -More information about the `sav` and `zsav` file format is available here_. +More information about the ``sav`` and ``zsav`` file format is available here_. .. _here: https://www.ibm.com/support/knowledgecenter/en/SSLVMB_22.0.0/com.ibm.spss.statistics.help/spss/base/savedatatypes.htm diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index bc8fc5a7e4f4e..aee56a2565310 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -77,7 +77,7 @@ some configurable handling of "what to do with the other axes": levels=None, names=None, verify_integrity=False, copy=True) * ``objs`` : a sequence or mapping of Series or DataFrame objects. If a - dict is passed, the sorted keys will be used as the `keys` argument, unless + dict is passed, the sorted keys will be used as the ``keys`` argument, unless it is passed, in which case the values will be selected (see below). Any None objects will be dropped silently unless they are all None in which case a ValueError will be raised. @@ -1234,7 +1234,7 @@ resetting indexes. DataFrame. .. note:: - When DataFrames are merged using only some of the levels of a `MultiIndex`, + When DataFrames are merged using only some of the levels of a ``MultiIndex``, the extra levels will be dropped from the resulting merge. In order to preserve those levels, use ``reset_index`` on those level names to move those levels to columns prior to doing the merge. @@ -1487,7 +1487,7 @@ compare two DataFrame or Series, respectively, and summarize their differences. This feature was added in :ref:`V1.1.0 `. -For example, you might want to compare two `DataFrame` and stack their differences +For example, you might want to compare two ``DataFrame`` and stack their differences side by side. .. ipython:: python @@ -1523,7 +1523,7 @@ If you wish, you may choose to stack the differences on rows. df.compare(df2, align_axis=0) -If you wish to keep all original rows and columns, set `keep_shape` argument +If you wish to keep all original rows and columns, set ``keep_shape`` argument to ``True``. .. ipython:: python diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index 06a7c6e33768e..9294897686d46 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -251,7 +251,7 @@ can propagate non-NA values forward or backward: **Limit the amount of filling** If we only want consecutive gaps filled up to a certain number of data points, -we can use the `limit` keyword: +we can use the ``limit`` keyword: .. ipython:: python :suppress: diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst index 398336960e769..563fc941294d1 100644 --- a/doc/source/user_guide/options.rst +++ b/doc/source/user_guide/options.rst @@ -109,7 +109,7 @@ It's also possible to reset multiple options at once (using a regex): ``option_context`` context manager has been exposed through the top-level API, allowing you to execute code with given option values. Option values -are restored automatically when you exit the `with` block: +are restored automatically when you exit the ``with`` block: .. ipython:: python @@ -306,10 +306,10 @@ display.encoding UTF-8 Defaults to the detected en meant to be displayed on the console. display.expand_frame_repr True Whether to print out the full DataFrame repr for wide DataFrames across - multiple lines, `max_columns` is + multiple lines, ``max_columns`` is still respected, but the output will wrap-around across multiple "pages" - if its width exceeds `display.width`. + if its width exceeds ``display.width``. display.float_format None The callable should accept a floating point number and return a string with the desired format of the number. @@ -371,11 +371,11 @@ display.max_rows 60 This sets the maximum numbe fully or just a truncated or summary repr. 'None' value means unlimited. display.min_rows 10 The numbers of rows to show in a truncated - repr (when `max_rows` is exceeded). Ignored - when `max_rows` is set to None or 0. When set - to None, follows the value of `max_rows`. + repr (when ``max_rows`` is exceeded). Ignored + when ``max_rows`` is set to None or 0. When set + to None, follows the value of ``max_rows``. display.max_seq_items 100 when pretty-printing a long sequence, - no more then `max_seq_items` will + no more then ``max_seq_items`` will be printed. If items are omitted, they will be denoted by the addition of "..." to the resulting string. diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst index 1b90aeb00cf9c..e6797512ce3cf 100644 --- a/doc/source/user_guide/reshaping.rst +++ b/doc/source/user_guide/reshaping.rst @@ -609,8 +609,8 @@ This function is often used along with discretization functions like ``cut``: See also :func:`Series.str.get_dummies `. :func:`get_dummies` also accepts a ``DataFrame``. By default all categorical -variables (categorical in the statistical sense, those with `object` or -`categorical` dtype) are encoded as dummy variables. +variables (categorical in the statistical sense, those with ``object`` or +``categorical`` dtype) are encoded as dummy variables. .. ipython:: python diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index cddc3cb2600fd..e1dce5819568f 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -214,7 +214,7 @@ work for arbitrary-sized datasets. for path in files: # Only one dataframe is in memory at a time... df = pd.read_parquet(path) - # ... plus a small Series `counts`, which is updated. + # ... plus a small Series ``counts``, which is updated. counts = counts.add(df['name'].value_counts(), fill_value=0) counts.astype(int) @@ -349,7 +349,7 @@ Now we can do things like fast random access with ``.loc``. ddf.loc['2002-01-01 12:01':'2002-01-01 12:05'].compute() -Dask knows to just look in the 3rd partition for selecting values in `2002`. It +Dask knows to just look in the 3rd partition for selecting values in ``2002``. It doesn't need to look at any other data. Many workflows involve a large amount of data and processing it in a way that diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst index e03ba74f95c90..dd6ac37d88f08 100644 --- a/doc/source/user_guide/text.rst +++ b/doc/source/user_guide/text.rst @@ -266,7 +266,7 @@ i.e., from the end of the string to the beginning of the string: Some caution must be taken to keep regular expressions in mind! For example, the following code will cause trouble because of the regular expression meaning of -`$`: +``$``: .. ipython:: python diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 868bf5a1672ff..33d3b3fb75649 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -1800,12 +1800,12 @@ See :ref:`groupby.iterating-label` or :class:`Resampler.__iter__` for more. .. _timeseries.adjust-the-start-of-the-bins: -Use `origin` or `offset` to adjust the start of the bins +Use ``origin`` or ``offset`` to adjust the start of the bins ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. versionadded:: 1.1.0 -The bins of the grouping are adjusted based on the beginning of the day of the time series starting point. This works well with frequencies that are multiples of a day (like `30D`) or that divide a day evenly (like `90s` or `1min`). This can create inconsistencies with some frequencies that do not meet this criteria. To change this behavior you can specify a fixed Timestamp with the argument ``origin``. +The bins of the grouping are adjusted based on the beginning of the day of the time series starting point. This works well with frequencies that are multiples of a day (like ``30D``) or that divide a day evenly (like ``90s`` or ``1min``). This can create inconsistencies with some frequencies that do not meet this criteria. To change this behavior you can specify a fixed Timestamp with the argument ``origin``. For example: diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst index 8ce4b30c717a4..088c63c8695f6 100644 --- a/doc/source/user_guide/visualization.rst +++ b/doc/source/user_guide/visualization.rst @@ -67,7 +67,7 @@ On DataFrame, :meth:`~DataFrame.plot` is a convenience to plot all of the column @savefig frame_plot_basic.png df.plot(); -You can plot one column versus another using the `x` and `y` keywords in +You can plot one column versus another using the ``x`` and ``y`` keywords in :meth:`~DataFrame.plot`: .. ipython:: python @@ -496,7 +496,7 @@ Area plot You can create area plots with :meth:`Series.plot.area` and :meth:`DataFrame.plot.area`. Area plots are stacked by default. To produce stacked area plot, each column must be either all positive or all negative values. -When input data contains `NaN`, it will be automatically filled by 0. If you want to drop or fill by different values, use :func:`dataframe.dropna` or :func:`dataframe.fillna` before calling `plot`. +When input data contains ``NaN``, it will be automatically filled by 0. If you want to drop or fill by different values, use :func:``dataframe.dropna`` or :func:``dataframe.fillna`` before calling ``plot``. .. ipython:: python :suppress: @@ -1078,7 +1078,7 @@ layout and formatting of the returned plot: plt.close('all') -For each kind of plot (e.g. `line`, `bar`, `scatter`) any additional arguments +For each kind of plot (e.g. ``line``, ``bar``, ``scatter``) any additional arguments keywords are passed along to the corresponding matplotlib function (:meth:`ax.plot() `, :meth:`ax.bar() `, @@ -1271,7 +1271,7 @@ Using the ``x_compat`` parameter, you can suppress this behavior: plt.close('all') If you have more than one plot that needs to be suppressed, the ``use`` method -in ``pandas.plotting.plot_params`` can be used in a `with statement`: +in ``pandas.plotting.plot_params`` can be used in a ``with statement``: .. ipython:: python diff --git a/doc/source/whatsnew/v0.10.1.rst b/doc/source/whatsnew/v0.10.1.rst index 1e9eafd2700e9..3dc680c46a4d9 100644 --- a/doc/source/whatsnew/v0.10.1.rst +++ b/doc/source/whatsnew/v0.10.1.rst @@ -189,8 +189,8 @@ combined result, by using ``where`` on a selector table. - ``HDFStore`` now can read native PyTables table format tables - You can pass ``nan_rep = 'my_nan_rep'`` to append, to change the default nan - representation on disk (which converts to/from `np.nan`), this defaults to - `nan`. + representation on disk (which converts to/from ``np.nan``), this defaults to + ``nan``. - You can pass ``index`` to ``append``. This defaults to ``True``. This will automagically create indices on the *indexables* and *data columns* of the @@ -224,7 +224,7 @@ combined result, by using ``where`` on a selector table. - Function to reset Google Analytics token store so users can recover from improperly setup client secrets (:issue:`2687`). - Fixed groupby bug resulting in segfault when passing in MultiIndex (:issue:`2706`) -- Fixed bug where passing a Series with datetime64 values into `to_datetime` +- Fixed bug where passing a Series with datetime64 values into ``to_datetime`` results in bogus output values (:issue:`2699`) - Fixed bug in ``pattern in HDFStore`` expressions when pattern is not a valid regex (:issue:`2694`) @@ -240,7 +240,7 @@ combined result, by using ``where`` on a selector table. - Fixed C file parser behavior when the file has more columns than data (:issue:`2668`) - Fixed file reader bug that misaligned columns with data in the presence of an - implicit column and a specified `usecols` value + implicit column and a specified ``usecols`` value - DataFrames with numerical or datetime indices are now sorted prior to plotting (:issue:`2609`) - Fixed DataFrame.from_records error when passed columns, index, but empty diff --git a/doc/source/whatsnew/v0.11.0.rst b/doc/source/whatsnew/v0.11.0.rst index c0bc74c9ff036..eb91ac427063f 100644 --- a/doc/source/whatsnew/v0.11.0.rst +++ b/doc/source/whatsnew/v0.11.0.rst @@ -425,13 +425,13 @@ Enhancements - Cursor coordinate information is now displayed in time-series plots. - - added option `display.max_seq_items` to control the number of + - added option ``display.max_seq_items`` to control the number of elements printed per sequence pprinting it. (:issue:`2979`) - - added option `display.chop_threshold` to control display of small numerical + - added option ``display.chop_threshold`` to control display of small numerical values. (:issue:`2739`) - - added option `display.max_info_rows` to prevent verbose_info from being + - added option ``display.max_info_rows`` to prevent verbose_info from being calculated for frames above 1M rows (configurable). (:issue:`2807`, :issue:`2918`) - value_counts() now accepts a "normalize" argument, for normalized @@ -440,7 +440,7 @@ Enhancements - DataFrame.from_records now accepts not only dicts but any instance of the collections.Mapping ABC. - - added option `display.mpl_style` providing a sleeker visual style + - added option ``display.mpl_style`` providing a sleeker visual style for plots. Based on https://gist.github.com/huyng/816622 (:issue:`3075`). - Treat boolean values as integers (values 1 and 0) for numeric diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst index 5a904d6c85c61..a522eab215751 100644 --- a/doc/source/whatsnew/v0.13.0.rst +++ b/doc/source/whatsnew/v0.13.0.rst @@ -214,7 +214,7 @@ These were announced changes in 0.12 or prior that are taking effect as of 0.13. - Remove deprecated ``read_clipboard/to_clipboard/ExcelFile/ExcelWriter`` from ``pandas.io.parsers`` (:issue:`3717`) These are available as functions in the main pandas namespace (e.g. ``pd.read_clipboard``) - default for ``tupleize_cols`` is now ``False`` for both ``to_csv`` and ``read_csv``. Fair warning in 0.12 (:issue:`3604`) -- default for `display.max_seq_len` is now 100 rather than `None`. This activates +- default for ``display.max_seq_len`` is now 100 rather than ``None``. This activates truncated display ("...") of long sequences in various places. (:issue:`3391`) Deprecations @@ -498,7 +498,7 @@ Enhancements - ``to_dict`` now takes ``records`` as a possible out type. Returns an array of column-keyed dictionaries. (:issue:`4936`) -- ``NaN`` handing in get_dummies (:issue:`4446`) with `dummy_na` +- ``NaN`` handing in get_dummies (:issue:``4446``) with ``dummy_na`` .. ipython:: python @@ -1071,7 +1071,7 @@ Bug fixes as the docstring says (:issue:`4362`). - ``as_index`` is no longer ignored when doing groupby apply (:issue:`4648`, :issue:`3417`) -- JSON NaT handling fixed, NaTs are now serialized to `null` (:issue:`4498`) +- JSON NaT handling fixed, NaTs are now serialized to ``null`` (:issue:`4498`) - Fixed JSON handling of escapable characters in JSON object keys (:issue:`4593`) - Fixed passing ``keep_default_na=False`` when ``na_values=None`` @@ -1188,7 +1188,7 @@ Bug fixes single column and passing a list for ``ascending``, the argument for ``ascending`` was being interpreted as ``True`` (:issue:`4839`, :issue:`4846`) -- Fixed ``Panel.tshift`` not working. Added `freq` support to ``Panel.shift`` +- Fixed ``Panel.tshift`` not working. Added ``freq`` support to ``Panel.shift`` (:issue:`4853`) - Fix an issue in TextFileReader w/ Python engine (i.e. PythonParser) with thousands != "," (:issue:`4596`) diff --git a/doc/source/whatsnew/v0.13.1.rst b/doc/source/whatsnew/v0.13.1.rst index 6fe010be8fb2d..9e416f8eeb3f1 100644 --- a/doc/source/whatsnew/v0.13.1.rst +++ b/doc/source/whatsnew/v0.13.1.rst @@ -379,7 +379,7 @@ Performance improvements for 0.13.1 - Series datetime/timedelta binary operations (:issue:`5801`) - DataFrame ``count/dropna`` for ``axis=1`` -- Series.str.contains now has a `regex=False` keyword which can be faster for plain (non-regex) string patterns. (:issue:`5879`) +- Series.str.contains now has a ``regex=False`` keyword which can be faster for plain (non-regex) string patterns. (:issue:`5879`) - Series.str.extract (:issue:`5944`) - ``dtypes/ftypes`` methods (:issue:`5968`) - indexing with object dtypes (:issue:`5968`) @@ -399,7 +399,7 @@ Bug fixes - Bug in ``io.wb.get_countries`` not including all countries (:issue:`6008`) - Bug in Series replace with timestamp dict (:issue:`5797`) -- read_csv/read_table now respects the `prefix` kwarg (:issue:`5732`). +- read_csv/read_table now respects the ``prefix`` kwarg (:issue:`5732`). - Bug in selection with missing values via ``.ix`` from a duplicate indexed DataFrame failing (:issue:`5835`) - Fix issue of boolean comparison on empty DataFrames (:issue:`5808`) - Bug in isnull handling ``NaT`` in an object array (:issue:`5443`) diff --git a/doc/source/whatsnew/v0.14.0.rst b/doc/source/whatsnew/v0.14.0.rst index 847a42b3a7643..c774518873a8e 100644 --- a/doc/source/whatsnew/v0.14.0.rst +++ b/doc/source/whatsnew/v0.14.0.rst @@ -82,7 +82,7 @@ API changes - The :meth:`DataFrame.interpolate` keyword ``downcast`` default has been changed from ``infer`` to ``None``. This is to preserve the original dtype unless explicitly requested otherwise (:issue:`6290`). -- When converting a dataframe to HTML it used to return `Empty DataFrame`. This special case has +- When converting a dataframe to HTML it used to return ``Empty DataFrame``. This special case has been removed, instead a header with the column names is returned (:issue:`6062`). - ``Series`` and ``Index`` now internally share more common operations, e.g. ``factorize(),nunique(),value_counts()`` are now supported on ``Index`` types as well. The ``Series.weekday`` property from is removed @@ -291,12 +291,12 @@ Display changes - Regression in the display of a MultiIndexed Series with ``display.max_rows`` is less than the length of the series (:issue:`7101`) - Fixed a bug in the HTML repr of a truncated Series or DataFrame not showing the class name with the - `large_repr` set to 'info' (:issue:`7105`) -- The `verbose` keyword in ``DataFrame.info()``, which controls whether to shorten the ``info`` + ``large_repr`` set to 'info' (:issue:`7105`) +- The ``verbose`` keyword in ``DataFrame.info()``, which controls whether to shorten the ``info`` representation, is now ``None`` by default. This will follow the global setting in ``display.max_info_columns``. The global setting can be overridden with ``verbose=True`` or ``verbose=False``. -- Fixed a bug with the `info` repr not honoring the `display.max_info_columns` setting (:issue:`6939`) +- Fixed a bug with the ``info`` repr not honoring the ``display.max_info_columns`` setting (:issue:`6939`) - Offset/freq info now in Timestamp __repr__ (:issue:`4553`) .. _whatsnew_0140.parsing: @@ -603,11 +603,11 @@ Plotting - Following keywords are now acceptable for :meth:`DataFrame.plot` with ``kind='bar'`` and ``kind='barh'``: - - `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten. (:issue:`6604`) - - `align`: Specify the bar alignment. Default is `center` (different from matplotlib). In previous versions, pandas passes `align='edge'` to matplotlib and adjust the location to `center` by itself, and it results `align` keyword is not applied as expected. (:issue:`4525`) - - `position`: Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1(right/top-end). Default is 0.5 (center). (:issue:`6604`) + - ``width``: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten. (:issue:`6604`) + - ``align``: Specify the bar alignment. Default is ``center`` (different from matplotlib). In previous versions, pandas passes ``align='edge'`` to matplotlib and adjust the location to ``center`` by itself, and it results ``align`` keyword is not applied as expected. (:issue:`4525`) + - ``position``: Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1(right/top-end). Default is 0.5 (center). (:issue:`6604`) - Because of the default `align` value changes, coordinates of bar plots are now located on integer values (0.0, 1.0, 2.0 ...). This is intended to make bar plot be located on the same coordinates as line plot. However, bar plot may differs unexpectedly when you manually adjust the bar location or drawing area, such as using `set_xlim`, `set_ylim`, etc. In this cases, please modify your script to meet with new coordinates. + Because of the default ``align`` value changes, coordinates of bar plots are now located on integer values (0.0, 1.0, 2.0 ...). This is intended to make bar plot be located on the same coordinates as line plot. However, bar plot may differs unexpectedly when you manually adjust the bar location or drawing area, such as using ``set_xlim``, ``set_ylim``, etc. In this cases, please modify your script to meet with new coordinates. - The :func:`parallel_coordinates` function now takes argument ``color`` instead of ``colors``. A ``FutureWarning`` is raised to alert that @@ -618,7 +618,7 @@ Plotting raised if the old ``data`` argument is used by name. (:issue:`6956`) - :meth:`DataFrame.boxplot` now supports ``layout`` keyword (:issue:`6769`) -- :meth:`DataFrame.boxplot` has a new keyword argument, `return_type`. It accepts ``'dict'``, +- :meth:``DataFrame.boxplot`` has a new keyword argument, ``return_type``. It accepts ``'dict'``, ``'axes'``, or ``'both'``, in which case a namedtuple with the matplotlib axes and a dict of matplotlib Lines is returned. @@ -721,8 +721,8 @@ Deprecations - The following ``io.sql`` functions have been deprecated: ``tquery``, ``uquery``, ``read_frame``, ``frame_query``, ``write_frame``. -- The `percentile_width` keyword argument in :meth:`~DataFrame.describe` has been deprecated. - Use the `percentiles` keyword instead, which takes a list of percentiles to display. The +- The ``percentile_width`` keyword argument in :meth:``~DataFrame.describe`` has been deprecated. + Use the ``percentiles`` keyword instead, which takes a list of percentiles to display. The default output is unchanged. - The default return type of :func:`boxplot` will change from a dict to a matplotlib Axes @@ -851,7 +851,7 @@ Enhancements - Arrays of strings can be wrapped to a specified width (``str.wrap``) (:issue:`6999`) - Add :meth:`~Series.nsmallest` and :meth:`Series.nlargest` methods to Series, See :ref:`the docs ` (:issue:`3960`) -- `PeriodIndex` fully supports partial string indexing like `DatetimeIndex` (:issue:`7043`) +- ``PeriodIndex`` fully supports partial string indexing like ``DatetimeIndex`` (:issue:`7043`) .. ipython:: python @@ -868,7 +868,7 @@ Enhancements - ``Series.rank()`` now has a percentage rank option (:issue:`5971`) - ``Series.rank()`` and ``DataFrame.rank()`` now accept ``method='dense'`` for ranks without gaps (:issue:`6514`) - Support passing ``encoding`` with xlwt (:issue:`3710`) -- Refactor Block classes removing `Block.items` attributes to avoid duplication +- Refactor Block classes removing ``Block.items`` attributes to avoid duplication in item handling (:issue:`6745`, :issue:`6988`). - Testing statements updated to use specialized asserts (:issue:`6175`) @@ -1063,10 +1063,10 @@ Bug fixes - Bug in ``MultiIndex.get_level_values`` doesn't preserve ``DatetimeIndex`` and ``PeriodIndex`` attributes (:issue:`7092`) - Bug in ``Groupby`` doesn't preserve ``tz`` (:issue:`3950`) - Bug in ``PeriodIndex`` partial string slicing (:issue:`6716`) -- Bug in the HTML repr of a truncated Series or DataFrame not showing the class name with the `large_repr` set to 'info' +- Bug in the HTML repr of a truncated Series or DataFrame not showing the class name with the ``large_repr`` set to 'info' (:issue:`7105`) - Bug in ``DatetimeIndex`` specifying ``freq`` raises ``ValueError`` when passed value is too short (:issue:`7098`) -- Fixed a bug with the `info` repr not honoring the `display.max_info_columns` setting (:issue:`6939`) +- Fixed a bug with the ``info`` repr not honoring the ``display.max_info_columns`` setting (:issue:`6939`) - Bug ``PeriodIndex`` string slicing with out of bounds values (:issue:`5407`) - Fixed a memory error in the hashtable implementation/factorizer on resizing of large tables (:issue:`7157`) - Bug in ``isnull`` when applied to 0-dimensional object arrays (:issue:`7176`) diff --git a/doc/source/whatsnew/v0.14.1.rst b/doc/source/whatsnew/v0.14.1.rst index 5de193007474c..354d67a525d0e 100644 --- a/doc/source/whatsnew/v0.14.1.rst +++ b/doc/source/whatsnew/v0.14.1.rst @@ -108,7 +108,7 @@ Enhancements - ``PeriodIndex`` is represented as the same format as ``DatetimeIndex`` (:issue:`7601`) - ``StringMethods`` now work on empty Series (:issue:`7242`) - The file parsers ``read_csv`` and ``read_table`` now ignore line comments provided by - the parameter `comment`, which accepts only a single character for the C reader. + the parameter ``comment``, which accepts only a single character for the C reader. In particular, they allow for comments before file data begins (:issue:`2685`) - Add ``NotImplementedError`` for simultaneous use of ``chunksize`` and ``nrows`` for read_csv() (:issue:`6774`). @@ -150,7 +150,7 @@ Performance - Improvements in Series.transform for significant performance gains (:issue:`6496`) - Improvements in DataFrame.transform with ufuncs and built-in grouper functions for significant performance gains (:issue:`7383`) - Regression in groupby aggregation of datetime64 dtypes (:issue:`7555`) -- Improvements in `MultiIndex.from_product` for large iterables (:issue:`7627`) +- Improvements in ``MultiIndex.from_product`` for large iterables (:issue:`7627`) .. _whatsnew_0141.experimental: @@ -217,7 +217,7 @@ Bug fixes - Bug in ``.loc`` with a list of indexers on a single-multi index level (that is not nested) (:issue:`7349`) - Bug in ``Series.map`` when mapping a dict with tuple keys of different lengths (:issue:`7333`) - Bug all ``StringMethods`` now work on empty Series (:issue:`7242`) -- Fix delegation of `read_sql` to `read_sql_query` when query does not contain 'select' (:issue:`7324`). +- Fix delegation of ``read_sql`` to ``read_sql_query`` when query does not contain 'select' (:issue:`7324`). - Bug where a string column name assignment to a ``DataFrame`` with a ``Float64Index`` raised a ``TypeError`` during a call to ``np.isnan`` (:issue:`7366`). @@ -269,7 +269,7 @@ Bug fixes - Bug in ``pandas.core.strings.str_contains`` does not properly match in a case insensitive fashion when ``regex=False`` and ``case=False`` (:issue:`7505`) - Bug in ``expanding_cov``, ``expanding_corr``, ``rolling_cov``, and ``rolling_corr`` for two arguments with mismatched index (:issue:`7512`) - Bug in ``to_sql`` taking the boolean column as text column (:issue:`7678`) -- Bug in grouped `hist` doesn't handle `rot` kw and `sharex` kw properly (:issue:`7234`) +- Bug in grouped ``hist`` doesn't handle ``rot`` kw and ``sharex`` kw properly (:issue:`7234`) - Bug in ``.loc`` performing fallback integer indexing with ``object`` dtype indices (:issue:`7496`) - Bug (regression) in ``PeriodIndex`` constructor when passed ``Series`` objects (:issue:`7701`). diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst index b80ed7446f805..76cf6d009a732 100644 --- a/doc/source/whatsnew/v0.15.0.rst +++ b/doc/source/whatsnew/v0.15.0.rst @@ -61,7 +61,7 @@ New features Categoricals in Series/DataFrame ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:class:`~pandas.Categorical` can now be included in `Series` and `DataFrames` and gained new +:class:``~pandas.Categorical`` can now be included in ``Series`` and ``DataFrames`` and gained new methods to manipulate. Thanks to Jan Schulz for much of this API/implementation. (:issue:`3943`, :issue:`5313`, :issue:`5314`, :issue:`7444`, :issue:`7839`, :issue:`7848`, :issue:`7864`, :issue:`7914`, :issue:`7768`, :issue:`8006`, :issue:`3678`, :issue:`8075`, :issue:`8076`, :issue:`8143`, :issue:`8453`, :issue:`8518`). @@ -808,7 +808,7 @@ Other notable API changes: .. _whatsnew_0150.blanklines: -- Made both the C-based and Python engines for `read_csv` and `read_table` ignore empty lines in input as well as +- Made both the C-based and Python engines for ``read_csv`` and ``read_table`` ignore empty lines in input as well as white space-filled lines, as long as ``sep`` is not white space. This is an API change that can be controlled by the keyword parameter ``skip_blank_lines``. See :ref:`the docs ` (:issue:`4466`) @@ -830,7 +830,7 @@ Other notable API changes: Previously this would have yielded a column of ``datetime64`` dtype, but without timezone info. - The behaviour of assigning a column to an existing dataframe as `df['a'] = i` + The behaviour of assigning a column to an existing dataframe as ``df['a'] = i`` remains unchanged (this already returned an ``object`` column with a timezone). - When passing multiple levels to :meth:`~pandas.DataFrame.stack()`, it will now raise a ``ValueError`` when the @@ -894,7 +894,7 @@ a transparent change with only very limited API implications (:issue:`5080`, :is - you may need to unpickle pandas version < 0.15.0 pickles using ``pd.read_pickle`` rather than ``pickle.load``. See :ref:`pickle docs ` - when plotting with a ``PeriodIndex``, the matplotlib internal axes will now be arrays of ``Period`` rather than a ``PeriodIndex`` (this is similar to how a ``DatetimeIndex`` passes arrays of ``datetimes`` now) - MultiIndexes will now raise similarly to other pandas objects w.r.t. truth testing, see :ref:`here ` (:issue:`7897`). -- When plotting a DatetimeIndex directly with matplotlib's `plot` function, +- When plotting a DatetimeIndex directly with matplotlib's ``plot`` function, the axis labels will no longer be formatted as dates but as integers (the internal representation of a ``datetime64``). **UPDATE** This is fixed in 0.15.1, see :ref:`here `. diff --git a/doc/source/whatsnew/v0.15.1.rst b/doc/source/whatsnew/v0.15.1.rst index f9c17058dc3ee..da56f07e84d9f 100644 --- a/doc/source/whatsnew/v0.15.1.rst +++ b/doc/source/whatsnew/v0.15.1.rst @@ -249,7 +249,7 @@ Enhancements dfi.memory_usage(index=True) -- Added Index properties `is_monotonic_increasing` and `is_monotonic_decreasing` (:issue:`8680`). +- Added Index properties ``is_monotonic_increasing`` and ``is_monotonic_decreasing`` (:issue:`8680`). - Added option to select columns when importing Stata files (:issue:`7935`) @@ -305,7 +305,7 @@ Bug fixes - Fixed a bug where plotting a column ``y`` and specifying a label would mutate the index name of the original DataFrame (:issue:`8494`) - Fix regression in plotting of a DatetimeIndex directly with matplotlib (:issue:`8614`). - Bug in ``date_range`` where partially-specified dates would incorporate current date (:issue:`6961`) -- Bug in Setting by indexer to a scalar value with a mixed-dtype `Panel4d` was failing (:issue:`8702`) +- Bug in Setting by indexer to a scalar value with a mixed-dtype ``Panel4d`` was failing (:issue:`8702`) - Bug where ``DataReader``'s would fail if one of the symbols passed was invalid. Now returns data for valid symbols and np.nan for invalid (:issue:`8494`) - Bug in ``get_quote_yahoo`` that wouldn't allow non-float return values (:issue:`5229`). diff --git a/doc/source/whatsnew/v0.15.2.rst b/doc/source/whatsnew/v0.15.2.rst index a4eabb97471de..95ca925f18692 100644 --- a/doc/source/whatsnew/v0.15.2.rst +++ b/doc/source/whatsnew/v0.15.2.rst @@ -137,7 +137,7 @@ Enhancements - Added ability to export Categorical data to Stata (:issue:`8633`). See :ref:`here ` for limitations of categorical variables exported to Stata data files. - Added flag ``order_categoricals`` to ``StataReader`` and ``read_stata`` to select whether to order imported categorical data (:issue:`8836`). See :ref:`here ` for more information on importing categorical variables from Stata data files. - Added ability to export Categorical data to to/from HDF5 (:issue:`7621`). Queries work the same as if it was an object array. However, the ``category`` dtyped data is stored in a more efficient manner. See :ref:`here ` for an example and caveats w.r.t. prior versions of pandas. -- Added support for ``searchsorted()`` on `Categorical` class (:issue:`8420`). +- Added support for ``searchsorted()`` on ``Categorical`` class (:issue:`8420`). Other enhancements: @@ -171,7 +171,7 @@ Other enhancements: 3 False True False True 4 True True True True -- Added support for ``utcfromtimestamp()``, ``fromtimestamp()``, and ``combine()`` on `Timestamp` class (:issue:`5351`). +- Added support for ``utcfromtimestamp()``, ``fromtimestamp()``, and ``combine()`` on ``Timestamp`` class (:issue:`5351`). - Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See `here `__. - ``Timedelta`` arithmetic returns ``NotImplemented`` in unknown cases, allowing extensions by custom classes (:issue:`8813`). - ``Timedelta`` now supports arithmetic with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`). @@ -241,7 +241,7 @@ Bug fixes - Bug in ``MultiIndex`` where ``__contains__`` returns wrong result if index is not lexically sorted or unique (:issue:`7724`) - BUG CSV: fix problem with trailing white space in skipped rows, (:issue:`8679`), (:issue:`8661`), (:issue:`8983`) - Regression in ``Timestamp`` does not parse 'Z' zone designator for UTC (:issue:`8771`) -- Bug in `StataWriter` the produces writes strings with 244 characters irrespective of actual size (:issue:`8969`) +- Bug in ``StataWriter`` the produces writes strings with 244 characters irrespective of actual size (:issue:`8969`) - Fixed ValueError raised by cummin/cummax when datetime64 Series contains NaT. (:issue:`8965`) - Bug in DataReader returns object dtype if there are missing values (:issue:`8980`) - Bug in plotting if sharex was enabled and index was a timeseries, would show labels on multiple axes (:issue:`3964`). diff --git a/doc/source/whatsnew/v0.16.0.rst b/doc/source/whatsnew/v0.16.0.rst index 4ad533e68e275..c4db277e79830 100644 --- a/doc/source/whatsnew/v0.16.0.rst +++ b/doc/source/whatsnew/v0.16.0.rst @@ -89,7 +89,7 @@ See the :ref:`documentation ` for more. (:issue:`922 Interaction with scipy.sparse ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Added :meth:`SparseSeries.to_coo` and :meth:`SparseSeries.from_coo` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:`here `). For example, given a SparseSeries with MultiIndex we can convert to a `scipy.sparse.coo_matrix` by specifying the row and column labels as index levels: +Added :meth:``SparseSeries.to_coo`` and :meth:``SparseSeries.from_coo`` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:``here ``). For example, given a SparseSeries with MultiIndex we can convert to a ``scipy.sparse.coo_matrix`` by specifying the row and column labels as index levels: .. code-block:: python @@ -630,7 +630,7 @@ Bug fixes - Bug in ``Series.values_counts`` with excluding ``NaN`` for categorical type ``Series`` with ``dropna=True`` (:issue:`9443`) - Fixed missing numeric_only option for ``DataFrame.std/var/sem`` (:issue:`9201`) - Support constructing ``Panel`` or ``Panel4D`` with scalar data (:issue:`8285`) -- ``Series`` text representation disconnected from `max_rows`/`max_columns` (:issue:`7508`). +- ``Series`` text representation disconnected from ``max_rows``/``max_columns`` (:issue:`7508`). \ diff --git a/doc/source/whatsnew/v0.16.1.rst b/doc/source/whatsnew/v0.16.1.rst index 8dcac4c1044be..a89ede8f024a0 100644 --- a/doc/source/whatsnew/v0.16.1.rst +++ b/doc/source/whatsnew/v0.16.1.rst @@ -232,7 +232,7 @@ enhancements make string operations easier and more consistent with standard pyt idx = pd.Index([' jack', 'jill ', ' jesse ', 'frank']) idx.str.strip() - One special case for the `.str` accessor on ``Index`` is that if a string method returns ``bool``, the ``.str`` accessor + One special case for the ``.str`` accessor on ``Index`` is that if a string method returns ``bool``, the ``.str`` accessor will return a ``np.array`` instead of a boolean ``Index`` (:issue:`8875`). This enables the following expression to work naturally: @@ -310,7 +310,7 @@ Other enhancements - ``get_dummies`` function now accepts ``sparse`` keyword. If set to ``True``, the return ``DataFrame`` is sparse, e.g. ``SparseDataFrame``. (:issue:`8823`) - ``Period`` now accepts ``datetime64`` as value input. (:issue:`9054`) -- Allow timedelta string conversion when leading zero is missing from time definition, ie `0:00:00` vs `00:00:00`. (:issue:`9570`) +- Allow timedelta string conversion when leading zero is missing from time definition, ie ``0:00:00`` vs ``00:00:00``. (:issue:`9570`) - Allow ``Panel.shift`` with ``axis='items'`` (:issue:`9890`) - Trying to write an excel file now raises ``NotImplementedError`` if the ``DataFrame`` has a ``MultiIndex`` instead of writing a broken Excel file. (:issue:`9794`) @@ -329,11 +329,11 @@ Other enhancements API changes ~~~~~~~~~~~ -- When passing in an ax to ``df.plot( ..., ax=ax)``, the `sharex` kwarg will now default to `False`. +- When passing in an ax to ``df.plot( ..., ax=ax)``, the ``sharex`` kwarg will now default to ``False``. The result is that the visibility of xlabels and xticklabels will not anymore be changed. You have to do that by yourself for the right axes in your figure or set ``sharex=True`` explicitly (but this changes the visible for all axes in the figure, not only the one which is passed in!). - If pandas creates the subplots itself (e.g. no passed in `ax` kwarg), then the + If pandas creates the subplots itself (e.g. no passed in ``ax`` kwarg), then the default is still ``sharex=True`` and the visibility changes are applied. - :meth:`~pandas.DataFrame.assign` now inserts new columns in alphabetical order. Previously @@ -442,7 +442,7 @@ Bug fixes - Bug in ``read_csv`` and ``read_table`` when using ``skip_rows`` parameter if blank lines are present. (:issue:`9832`) - Bug in ``read_csv()`` interprets ``index_col=True`` as ``1`` (:issue:`9798`) - Bug in index equality comparisons using ``==`` failing on Index/MultiIndex type incompatibility (:issue:`9785`) -- Bug in which ``SparseDataFrame`` could not take `nan` as a column name (:issue:`8822`) +- Bug in which ``SparseDataFrame`` could not take ``nan`` as a column name (:issue:`8822`) - Bug in ``to_msgpack`` and ``read_msgpack`` zlib and blosc compression support (:issue:`9783`) - Bug ``GroupBy.size`` doesn't attach index name properly if grouped by ``TimeGrouper`` (:issue:`9925`) - Bug causing an exception in slice assignments because ``length_of_indexer`` returns wrong results (:issue:`9995`) diff --git a/doc/source/whatsnew/v0.16.2.rst b/doc/source/whatsnew/v0.16.2.rst index a3c34db09f555..2cb0cbec68eff 100644 --- a/doc/source/whatsnew/v0.16.2.rst +++ b/doc/source/whatsnew/v0.16.2.rst @@ -89,7 +89,7 @@ See the :ref:`documentation ` for more. (:issue:`10129`) Other enhancements ^^^^^^^^^^^^^^^^^^ -- Added `rsplit` to Index/Series StringMethods (:issue:`10303`) +- Added ``rsplit`` to Index/Series StringMethods (:issue:`10303`) - Removed the hard-coded size limits on the ``DataFrame`` HTML representation in the IPython notebook, and leave this to IPython itself (only for IPython diff --git a/doc/source/whatsnew/v0.17.0.rst b/doc/source/whatsnew/v0.17.0.rst index 11c252192be6b..dce60e8363226 100644 --- a/doc/source/whatsnew/v0.17.0.rst +++ b/doc/source/whatsnew/v0.17.0.rst @@ -273,9 +273,9 @@ Support for math functions in .eval() df = pd.DataFrame({'a': np.random.randn(10)}) df.eval("b = sin(a)") -The support math functions are `sin`, `cos`, `exp`, `log`, `expm1`, `log1p`, -`sqrt`, `sinh`, `cosh`, `tanh`, `arcsin`, `arccos`, `arctan`, `arccosh`, -`arcsinh`, `arctanh`, `abs` and `arctan2`. +The support math functions are ``sin``, ``cos``, ``exp``, ``log``, ``expm1``, ``log1p``, +``sqrt``, ``sinh``, ``cosh``, ``tanh``, ``arcsin``, ``arccos``, ``arctan``, ``arccosh``, +``arcsinh``, ``arctanh``, ``abs`` and ``arctan2``. These functions map to the intrinsics for the ``NumExpr`` engine. For the Python engine, they are mapped to ``NumPy`` calls. @@ -519,7 +519,7 @@ Other enhancements - ``DataFrame.apply`` will return a Series of dicts if the passed function returns a dict and ``reduce=True`` (:issue:`8735`). -- Allow passing `kwargs` to the interpolation methods (:issue:`10378`). +- Allow passing ``kwargs`` to the interpolation methods (:issue:`10378`). - Improved error message when concatenating an empty iterable of ``Dataframe`` objects (:issue:`9157`) diff --git a/doc/source/whatsnew/v0.18.0.rst b/doc/source/whatsnew/v0.18.0.rst index fbe24675ddfe2..ef5242b0e33c8 100644 --- a/doc/source/whatsnew/v0.18.0.rst +++ b/doc/source/whatsnew/v0.18.0.rst @@ -290,7 +290,7 @@ A new, friendlier ``ValueError`` is added to protect against the mistake of supp .. code-block:: ipython In [2]: pd.Series(['a', 'b', np.nan, 'c']).str.cat(' ') - ValueError: Did you mean to supply a `sep` keyword? + ValueError: Did you mean to supply a ``sep`` keyword? .. _whatsnew_0180.enhancements.rounding: diff --git a/doc/source/whatsnew/v0.19.1.rst b/doc/source/whatsnew/v0.19.1.rst index 9e6b884e08587..f8b60f457b33f 100644 --- a/doc/source/whatsnew/v0.19.1.rst +++ b/doc/source/whatsnew/v0.19.1.rst @@ -29,7 +29,7 @@ Performance improvements - Fixed performance regression in ``Series.asof(where)`` when ``where`` is a scalar (:issue:`14461`) - Improved performance in ``DataFrame.asof(where)`` when ``where`` is a scalar (:issue:`14461`) - Improved performance in ``.to_json()`` when ``lines=True`` (:issue:`14408`) -- Improved performance in certain types of `loc` indexing with a MultiIndex (:issue:`14551`). +- Improved performance in certain types of ``loc`` indexing with a MultiIndex (:issue:`14551`). .. _whatsnew_0191.bug_fixes: diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index f91d89679dad1..dc51216ff6b2a 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -64,7 +64,7 @@ A ``DataFrame`` can now be written to and subsequently read back via JSON while new_df new_df.dtypes -Please note that the string `index` is not supported with the round trip format, as it is used by default in ``write_json`` to indicate a missing index name. +Please note that the string ``index`` is not supported with the round trip format, as it is used by default in ``write_json`` to indicate a missing index name. .. ipython:: python :okwarning: @@ -457,7 +457,7 @@ These bugs were squashed: Previously, :meth:`Series.str.cat` did not -- in contrast to most of ``pandas`` -- align :class:`Series` on their index before concatenation (see :issue:`18657`). The method has now gained a keyword ``join`` to control the manner of alignment, see examples below and :ref:`here `. -In v.0.23 `join` will default to None (meaning no alignment), but this default will change to ``'left'`` in a future version of pandas. +In v.0.23 ``join`` will default to None (meaning no alignment), but this default will change to ``'left'`` in a future version of pandas. .. ipython:: python :okwarning: @@ -836,7 +836,7 @@ Build changes Index division by zero fills correctly ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and `0 / 0` with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`) +Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and ``0 / 0`` with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`) Previous behavior: @@ -974,7 +974,7 @@ automatically so that the printed data frame fits within the current terminal width (``pd.options.display.max_columns=0``) (:issue:`17023`). If Python runs as a Jupyter kernel (such as the Jupyter QtConsole or a Jupyter notebook, as well as in many IDEs), this value cannot be inferred automatically and is thus -set to `20` as in previous versions. In a terminal, this results in a much +set to ``20`` as in previous versions. In a terminal, this results in a much nicer output: .. image:: ../_static/print_df_new.png @@ -1011,7 +1011,7 @@ Datetimelike API changes - Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). - :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) - For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with ``freq=None``, addition or subtraction of integer-dtyped array or ``Index`` will raise ``NullFrequencyError`` instead of ``TypeError`` (:issue:`19895`) -- :class:`Timestamp` constructor now accepts a `nanosecond` keyword or positional argument (:issue:`18898`) +- :class:``Timestamp`` constructor now accepts a ``nanosecond`` keyword or positional argument (:issue:`18898`) - :class:`DatetimeIndex` will now raise an ``AttributeError`` when the ``tz`` attribute is set after instantiation (:issue:`3746`) - :class:`DatetimeIndex` with a ``pytz`` timezone will now return a consistent ``pytz`` timezone (:issue:`18595`) @@ -1049,7 +1049,7 @@ Other API changes - :class:`DateOffset` objects render more simply, e.g. ```` instead of ```` (:issue:`19403`) - ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`) - ``pd.to_datetime('today')`` now returns a datetime, consistent with ``pd.Timestamp('today')``; previously ``pd.to_datetime('today')`` returned a ``.normalized()`` datetime (:issue:`19935`) -- :func:`Series.str.replace` now takes an optional `regex` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`) +- :func:``Series.str.replace`` now takes an optional ``regex`` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`) - :func:`DatetimeIndex.strftime` and :func:`PeriodIndex.strftime` now return an ``Index`` instead of a numpy array to be consistent with similar accessors (:issue:`20127`) - Constructing a Series from a list of length 1 no longer broadcasts this list when a longer index is specified (:issue:`19714`, :issue:`20391`). - :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`) @@ -1234,7 +1234,7 @@ Categorical - Bug in ``Categorical.__iter__`` not converting to Python types (:issue:`19909`) - Bug in :func:`pandas.factorize` returning the unique codes for the ``uniques``. This now returns a ``Categorical`` with the same dtype as the input (:issue:`19721`) - Bug in :func:`pandas.factorize` including an item for missing values in the ``uniques`` return value (:issue:`19721`) -- Bug in :meth:`Series.take` with categorical data interpreting ``-1`` in `indices` as missing value markers, rather than the last element of the Series (:issue:`20664`) +- Bug in :meth:``Series.take`` with categorical data interpreting ``-1`` in ``indices`` as missing value markers, rather than the last element of the Series (:issue:`20664`) Datetimelike ^^^^^^^^^^^^ @@ -1316,7 +1316,7 @@ Numeric Strings ^^^^^^^ -- Bug in :func:`Series.str.get` with a dictionary in the values and the index not in the keys, raising `KeyError` (:issue:`20671`) +- Bug in :func:``Series.str.get`` with a dictionary in the values and the index not in the keys, raising ``KeyError`` (:issue:`20671`) Indexing @@ -1369,7 +1369,7 @@ IO ^^ - :func:`read_html` now rewinds seekable IO objects after parse failure, before attempting to parse with a new parser. If a parser errors and the object is non-seekable, an informative error is raised suggesting the use of a different parser (:issue:`17975`) -- :meth:`DataFrame.to_html` now has an option to add an id to the leading `` tag (:issue:`8496`) +- :meth:``DataFrame.to_html`` now has an option to add an id to the leading ``
`` tag (:issue:`8496`) - Bug in :func:`read_msgpack` with a non existent file is passed in Python 2 (:issue:`15296`) - Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`) - Bug in :func:`read_csv` where missing values were not being handled properly when ``keep_default_na=False`` with dictionary ``na_values`` (:issue:`19227`) @@ -1378,7 +1378,7 @@ IO - Bug in :func:`DataFrame.to_latex()` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`) - Bug in :func:`DataFrame.to_latex()` where a ``NaN`` in a ``MultiIndex`` would cause an ``IndexError`` or incorrect output (:issue:`14249`) - Bug in :func:`DataFrame.to_latex()` where a non-string index-level name would result in an ``AttributeError`` (:issue:`19981`) -- Bug in :func:`DataFrame.to_latex()` where the combination of an index name and the `index_names=False` option would result in incorrect output (:issue:`18326`) +- Bug in :func:`DataFrame.to_latex()` where the combination of an index name and the ``index_names=False`` option would result in incorrect output (:issue:`18326`) - Bug in :func:`DataFrame.to_latex()` where a ``MultiIndex`` with an empty string as its name would result in incorrect output (:issue:`18669`) - Bug in :func:`DataFrame.to_latex()` where missing space characters caused wrong escaping and produced non-valid latex in some cases (:issue:`20859`) - Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`) @@ -1412,7 +1412,7 @@ GroupBy/resample/rolling - Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`) - Bug in :func:`DataFrame.groupby` where aggregation by ``first``/``last``/``min``/``max`` was causing timestamps to lose precision (:issue:`19526`) - Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`) -- Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`) +- Bug in :func:`DataFrame.groupby` passing the ``on=`` kwarg, and subsequently using ``.apply()`` (:issue:`17813`) - Bug in :func:`DataFrame.resample().aggregate ` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`) - Bug in :func:`DataFrameGroupBy.cumsum` and :func:`DataFrameGroupBy.cumprod` when ``skipna`` was passed (:issue:`19806`) - Bug in :func:`DataFrame.resample` that dropped timezone information (:issue:`13238`) diff --git a/doc/source/whatsnew/v0.23.1.rst b/doc/source/whatsnew/v0.23.1.rst index 03b7d9db6bc63..b51368c87f991 100644 --- a/doc/source/whatsnew/v0.23.1.rst +++ b/doc/source/whatsnew/v0.23.1.rst @@ -74,10 +74,10 @@ In addition, ordering comparisons will raise a ``TypeError`` in the future. a tz-aware time instead of tz-naive (:issue:`21267`) and :attr:`DatetimeIndex.date` returned incorrect date when the input date has a non-UTC timezone (:issue:`21230`). - Fixed regression in :meth:`pandas.io.json.json_normalize` when called with ``None`` values - in nested levels in JSON, and to not drop keys with value as `None` (:issue:`21158`, :issue:`21356`). + in nested levels in JSON, and to not drop keys with value as ``None`` (:issue:`21158`, :issue:`21356`). - Bug in :meth:`~DataFrame.to_csv` causes encoding error when compression and encoding are specified (:issue:`21241`, :issue:`21118`) - Bug preventing pandas from being importable with -OO optimization (:issue:`21071`) -- Bug in :meth:`Categorical.fillna` incorrectly raising a ``TypeError`` when `value` the individual categories are iterable and `value` is an iterable (:issue:`21097`, :issue:`19788`) +- Bug in :meth:`Categorical.fillna` incorrectly raising a ``TypeError`` when ``value`` the individual categories are iterable and ``value`` is an iterable (:issue:`21097`, :issue:`19788`) - Fixed regression in constructors coercing NA values like ``None`` to strings when passing ``dtype=str`` (:issue:`21083`) - Regression in :func:`pivot_table` where an ordered ``Categorical`` with missing values for the pivot's ``index`` would give a mis-aligned result (:issue:`21133`) @@ -106,7 +106,7 @@ Bug fixes **Data-type specific** -- Bug in :meth:`Series.str.replace()` where the method throws `TypeError` on Python 3.5.2 (:issue:`21078`) +- Bug in :meth:`Series.str.replace()` where the method throws ``TypeError`` on Python 3.5.2 (:issue:`21078`) - Bug in :class:`Timedelta` where passing a float with a unit would prematurely round the float precision (:issue:`14156`) - Bug in :func:`pandas.testing.assert_index_equal` which raised ``AssertionError`` incorrectly, when comparing two :class:`CategoricalIndex` objects with param ``check_categorical=False`` (:issue:`19776`) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 5bfaa7a5a3e6b..357acbcc6bf67 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -376,7 +376,7 @@ Other enhancements - :func:`DataFrame.to_html` now accepts ``render_links`` as an argument, allowing the user to generate HTML with links to any URLs that appear in the DataFrame. See the :ref:`section on writing HTML ` in the IO docs for example usage. (:issue:`2679`) - :func:`pandas.read_csv` now supports pandas extension types as an argument to ``dtype``, allowing the user to use pandas extension types when reading CSVs. (:issue:`23228`) -- The :meth:`~DataFrame.shift` method now accepts `fill_value` as an argument, allowing the user to specify a value which will be used instead of NA/NaT in the empty periods. (:issue:`15486`) +- The :meth:`~DataFrame.shift` method now accepts ``fill_value`` as an argument, allowing the user to specify a value which will be used instead of NA/NaT in the empty periods. (:issue:`15486`) - :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`) - :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether ``NaN``/``NaT`` values should be considered (:issue:`17534`) - :func:`DataFrame.to_csv` and :func:`Series.to_csv` now support the ``compression`` keyword when a file handle is passed. (:issue:`21227`) @@ -474,7 +474,7 @@ and replaced it with references to ``pyarrow`` (:issue:`21639` and :issue:`23053 .. _whatsnew_0240.api_breaking.csv_line_terminator: -`os.linesep` is used for ``line_terminator`` of ``DataFrame.to_csv`` +``os.linesep`` is used for ``line_terminator`` of ``DataFrame.to_csv`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :func:`DataFrame.to_csv` now uses :func:`os.linesep` rather than ``'\n'`` @@ -556,7 +556,7 @@ You must pass in the ``line_terminator`` explicitly, even in this case. .. _whatsnew_0240.bug_fixes.nan_with_str_dtype: -Proper handling of `np.NaN` in a string data-typed column with the Python engine +Proper handling of ``np.NaN`` in a string data-typed column with the Python engine ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ There was bug in :func:`read_excel` and :func:`read_csv` with the Python @@ -1198,7 +1198,7 @@ Other API changes - :meth:`DataFrame.set_index` now gives a better (and less frequent) KeyError, raises a ``ValueError`` for incorrect types, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`) - Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`) -- :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`) +- :class:`DateOffset` attribute ``_cacheable`` and method ``_should_cache`` have been removed (:issue:`23118`) - :meth:`Series.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23801`). - :meth:`Categorical.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23466`). - :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`). @@ -1317,7 +1317,7 @@ Deprecations - Timezone converting a tz-aware ``datetime.datetime`` or :class:`Timestamp` with :class:`Timestamp` and the ``tz`` argument is now deprecated. Instead, use :meth:`Timestamp.tz_convert` (:issue:`23579`) - :func:`pandas.api.types.is_period` is deprecated in favor of ``pandas.api.types.is_period_dtype`` (:issue:`23917`) - :func:`pandas.api.types.is_datetimetz` is deprecated in favor of ``pandas.api.types.is_datetime64tz`` (:issue:`23917`) -- Creating a :class:`TimedeltaIndex`, :class:`DatetimeIndex`, or :class:`PeriodIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range`, :func:`date_range`, or :func:`period_range` (:issue:`23919`) +- Creating a :class:`TimedeltaIndex`, :class:``DatetimeIndex``, or :class:``PeriodIndex`` by passing range arguments ``start``, ``end``, and ``periods`` is deprecated in favor of :func:`timedelta_range`, :func:`date_range`, or :func:`period_range` (:issue:`23919`) - Passing a string alias like ``'datetime64[ns, UTC]'`` as the ``unit`` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`). - The ``skipna`` parameter of :meth:`~pandas.api.types.infer_dtype` will switch to ``True`` by default in a future version of pandas (:issue:`17066`, :issue:`24050`) - In :meth:`Series.where` with Categorical data, providing an ``other`` that is not present in the categories is deprecated. Convert the categorical to a different dtype or add the ``other`` to the categories first (:issue:`24077`). @@ -1534,7 +1534,7 @@ Performance improvements - Improved the performance of :func:`pandas.get_dummies` with ``sparse=True`` (:issue:`21997`) - Improved performance of :func:`IndexEngine.get_indexer_non_unique` for sorted, non-unique indexes (:issue:`9466`) - Improved performance of :func:`PeriodIndex.unique` (:issue:`23083`) -- Improved performance of :func:`concat` for `Series` objects (:issue:`23404`) +- Improved performance of :func:`concat` for ``Series`` objects (:issue:`23404`) - Improved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive or UTC datetimes (:issue:`23634`) - Improved performance of :meth:`DatetimeIndex.tz_localize` and various ``DatetimeIndex`` attributes with dateutil UTC timezone (:issue:`23772`) - Fixed a performance regression on Windows with Python 3.7 of :func:`read_csv` (:issue:`23516`) @@ -1602,7 +1602,7 @@ Datetimelike - Bug in :class:`DataFrame` when creating a new column from an ndarray of :class:`Timestamp` objects with timezones creating an object-dtype column, rather than datetime with timezone (:issue:`23932`) - Bug in :class:`Timestamp` constructor which would drop the frequency of an input :class:`Timestamp` (:issue:`22311`) - Bug in :class:`DatetimeIndex` where calling ``np.array(dtindex, dtype=object)`` would incorrectly return an array of ``long`` objects (:issue:`23524`) -- Bug in :class:`Index` where passing a timezone-aware :class:`DatetimeIndex` and `dtype=object` would incorrectly raise a ``ValueError`` (:issue:`23524`) +- Bug in :class:`Index` where passing a timezone-aware :class:``DatetimeIndex`` and ``dtype=object`` would incorrectly raise a ``ValueError`` (:issue:`23524`) - Bug in :class:`Index` where calling ``np.array(dtindex, dtype=object)`` on a timezone-naive :class:`DatetimeIndex` would return an array of ``datetime`` objects instead of :class:`Timestamp` objects, potentially losing nanosecond portions of the timestamps (:issue:`23524`) - Bug in :class:`Categorical.__setitem__` not allowing setting with another ``Categorical`` when both are unordered and have the same categories, but in a different order (:issue:`24142`) - Bug in :func:`date_range` where using dates with millisecond resolution or higher could return incorrect values or the wrong number of values in the index (:issue:`24110`) @@ -1647,7 +1647,7 @@ Timezones - Bug in :class:`Series` constructor which would coerce tz-aware and tz-naive :class:`Timestamp` to tz-aware (:issue:`13051`) - Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`) - Bug in :class:`DatetimeIndex` where constructing with an integer and tz would not localize correctly (:issue:`12619`) -- Fixed bug where :meth:`DataFrame.describe` and :meth:`Series.describe` on tz-aware datetimes did not show `first` and `last` result (:issue:`21328`) +- Fixed bug where :meth:`DataFrame.describe` and :meth:`Series.describe` on tz-aware datetimes did not show ``first`` and ``last`` result (:issue:`21328`) - Bug in :class:`DatetimeIndex` comparisons failing to raise ``TypeError`` when comparing timezone-aware ``DatetimeIndex`` against ``np.datetime64`` (:issue:`22074`) - Bug in ``DataFrame`` assignment with a timezone-aware scalar (:issue:`19843`) - Bug in :func:`DataFrame.asof` that raised a ``TypeError`` when attempting to compare tz-naive and tz-aware timestamps (:issue:`21194`) @@ -1693,7 +1693,7 @@ Numeric - :meth:`Series.agg` can now handle numpy NaN-aware methods like :func:`numpy.nansum` (:issue:`19629`) - Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``pct=True`` and more than 2\ :sup:`24` rows are present resulted in percentages greater than 1.0 (:issue:`18271`) - Calls such as :meth:`DataFrame.round` with a non-unique :meth:`CategoricalIndex` now return expected data. Previously, data would be improperly duplicated (:issue:`21809`). -- Added ``log10``, `floor` and `ceil` to the list of supported functions in :meth:`DataFrame.eval` (:issue:`24139`, :issue:`24353`) +- Added ``log10``, ``floor`` and ``ceil`` to the list of supported functions in :meth:``DataFrame.eval`` (:issue:`24139`, :issue:`24353`) - Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`) - Checking PEP 3141 numbers in :func:`~pandas.api.types.is_scalar` function returns ``True`` (:issue:`22903`) - Reduction methods like :meth:`Series.sum` now accept the default value of ``keepdims=False`` when called from a NumPy ufunc, rather than raising a ``TypeError``. Full support for ``keepdims`` has not been implemented (:issue:`24356`). @@ -1859,7 +1859,7 @@ Reshaping ^^^^^^^^^ - Bug in :func:`pandas.concat` when joining resampled DataFrames with timezone aware index (:issue:`13783`) -- Bug in :func:`pandas.concat` when joining only `Series` the `names` argument of `concat` is no longer ignored (:issue:`23490`) +- Bug in :func:`pandas.concat` when joining only ``Series`` the ``names`` argument of ``concat`` is no longer ignored (:issue:`23490`) - Bug in :meth:`Series.combine_first` with ``datetime64[ns, tz]`` dtype which would return tz-naive result (:issue:`21469`) - Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``datetime64[ns, tz]`` dtype (:issue:`21546`) - Bug in :meth:`DataFrame.where` with an empty DataFrame and empty ``cond`` having non-bool dtype (:issue:`21947`) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index aead8c48eb9b7..1918a1e8caf6c 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -33,7 +33,7 @@ This change will allow ``sort=True`` to mean "always sort" in a future release. The same change applies to :meth:`Index.difference` and :meth:`Index.symmetric_difference`, which would not sort the result when the values could not be compared. -The `sort` option for :meth:`Index.intersection` has changed in three ways. +The ``sort`` option for :meth:`Index.intersection` has changed in three ways. 1. The default has changed from ``True`` to ``False``, to restore the pandas 0.23.4 and earlier behavior of not sorting by default. @@ -55,7 +55,7 @@ Fixed regressions - Fixed regression in :class:`Index.intersection` incorrectly sorting the values by default (:issue:`24959`). - Fixed regression in :func:`merge` when merging an empty ``DataFrame`` with multiple timezone-aware columns on one of the timezone-aware columns (:issue:`25014`). - Fixed regression in :meth:`Series.rename_axis` and :meth:`DataFrame.rename_axis` where passing ``None`` failed to remove the axis name (:issue:`25034`) -- Fixed regression in :func:`to_timedelta` with `box=False` incorrectly returning a ``datetime64`` object instead of a ``timedelta64`` object (:issue:`24961`) +- Fixed regression in :func:`to_timedelta` with ``box=False`` incorrectly returning a ``datetime64`` object instead of a ``timedelta64`` object (:issue:`24961`) - Fixed regression where custom hashable types could not be used as column keys in :meth:`DataFrame.set_index` (:issue:`24969`) .. _whatsnew_0241.bug_fixes: diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 0f0f009307c75..7b4440148677b 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -14,7 +14,7 @@ What's new in 0.25.0 (July 18, 2019) .. warning:: - `Panel` has been fully removed. For N-D labeled data structures, please + ``Panel`` has been fully removed. For N-D labeled data structures, please use `xarray `_ .. warning:: @@ -1167,7 +1167,7 @@ I/O - Fixed bug in :func:`pandas.read_csv` where a BOM would result in incorrect parsing using engine='python' (:issue:`26545`) - :func:`read_excel` now raises a ``ValueError`` when input is of type :class:`pandas.io.excel.ExcelFile` and ``engine`` param is passed since :class:`pandas.io.excel.ExcelFile` has an engine defined (:issue:`26566`) - Bug while selecting from :class:`HDFStore` with ``where=''`` specified (:issue:`26610`). -- Fixed bug in :func:`DataFrame.to_excel()` where custom objects (i.e. `PeriodIndex`) inside merged cells were not being converted into types safe for the Excel writer (:issue:`27006`) +- Fixed bug in :func:`DataFrame.to_excel()` where custom objects (i.e. ``PeriodIndex``) inside merged cells were not being converted into types safe for the Excel writer (:issue:`27006`) - Bug in :meth:`read_hdf` where reading a timezone aware :class:`DatetimeIndex` would raise a ``TypeError`` (:issue:`11926`) - Bug in :meth:`to_msgpack` and :meth:`read_msgpack` which would raise a ``ValueError`` rather than a ``FileNotFoundError`` for an invalid path (:issue:`27160`) - Fixed bug in :meth:`DataFrame.to_parquet` which would raise a ``ValueError`` when the dataframe had no columns (:issue:`27339`) @@ -1262,7 +1262,7 @@ Other - Removed unused C functions from vendored UltraJSON implementation (:issue:`26198`) - Allow :class:`Index` and :class:`RangeIndex` to be passed to numpy ``min`` and ``max`` functions (:issue:`26125`) - Use actual class name in repr of empty objects of a ``Series`` subclass (:issue:`27001`). -- Bug in :class:`DataFrame` where passing an object array of timezone-aware `datetime` objects would incorrectly raise ``ValueError`` (:issue:`13287`) +- Bug in :class:`DataFrame` where passing an object array of timezone-aware ``datetime`` objects would incorrectly raise ``ValueError`` (:issue:`13287`) .. _whatsnew_0.250.contributors: diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 944021ca0fcae..2a2b511356a69 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -9,10 +9,10 @@ including other versions of pandas. I/O and LZMA ~~~~~~~~~~~~ -Some users may unknowingly have an incomplete Python installation lacking the `lzma` module from the standard library. In this case, `import pandas` failed due to an `ImportError` (:issue:`27575`). -Pandas will now warn, rather than raising an `ImportError` if the `lzma` module is not present. Any subsequent attempt to use `lzma` methods will raise a `RuntimeError`. -A possible fix for the lack of the `lzma` module is to ensure you have the necessary libraries and then re-install Python. -For example, on MacOS installing Python with `pyenv` may lead to an incomplete Python installation due to unmet system dependencies at compilation time (like `xz`). Compilation will succeed, but Python might fail at run time. The issue can be solved by installing the necessary dependencies and then re-installing Python. +Some users may unknowingly have an incomplete Python installation lacking the ``lzma`` module from the standard library. In this case, ``import pandas`` failed due to an ``ImportError`` (:issue:`27575`). +Pandas will now warn, rather than raising an ``ImportError`` if the ``lzma`` module is not present. Any subsequent attempt to use ``lzma`` methods will raise a ``RuntimeError``. +A possible fix for the lack of the ``lzma`` module is to ensure you have the necessary libraries and then re-install Python. +For example, on MacOS installing Python with ``pyenv`` may lead to an incomplete Python installation due to unmet system dependencies at compilation time (like ``xz``). Compilation will succeed, but Python might fail at run time. The issue can be solved by installing the necessary dependencies and then re-installing Python. .. _whatsnew_0251.bug_fixes: @@ -52,7 +52,7 @@ Conversion Interval ^^^^^^^^ -- Bug in :class:`IntervalIndex` where `dir(obj)` would raise ``ValueError`` (:issue:`27571`) +- Bug in :class:`IntervalIndex` where ``dir(obj)`` would raise ``ValueError`` (:issue:`27571`) Indexing ^^^^^^^^ @@ -89,13 +89,13 @@ Groupby/resample/rolling - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`) - Bug in :meth:`pandas.core.groupby.GroupBy.nth` where ``observed=False`` was being ignored for Categorical groupers (:issue:`26385`) - Bug in windowing over read-only arrays (:issue:`27766`) -- Fixed segfault in `pandas.core.groupby.DataFrameGroupBy.quantile` when an invalid quantile was passed (:issue:`27470`) +- Fixed segfault in ``pandas.core.groupby.DataFrameGroupBy.quantile`` when an invalid quantile was passed (:issue:`27470`) Reshaping ^^^^^^^^^ - A ``KeyError`` is now raised if ``.unstack()`` is called on a :class:`Series` or :class:`DataFrame` with a flat :class:`Index` passing a name which is not the correct one (:issue:`18303`) -- Bug :meth:`merge_asof` could not merge :class:`Timedelta` objects when passing `tolerance` kwarg (:issue:`27642`) +- Bug :meth:`merge_asof` could not merge :class:`Timedelta` objects when passing ``tolerance`` kwarg (:issue:`27642`) - Bug in :meth:`DataFrame.crosstab` when ``margins`` set to ``True`` and ``normalize`` is not ``False``, an error is raised. (:issue:`27500`) - :meth:`DataFrame.join` now suppresses the ``FutureWarning`` when the sort parameter is specified (:issue:`21952`) - Bug in :meth:`DataFrame.join` raising with readonly arrays (:issue:`27943`) diff --git a/doc/source/whatsnew/v0.6.0.rst b/doc/source/whatsnew/v0.6.0.rst index f984b9ad71b63..1cb9dcbe159aa 100644 --- a/doc/source/whatsnew/v0.6.0.rst +++ b/doc/source/whatsnew/v0.6.0.rst @@ -52,7 +52,7 @@ New features Performance enhancements ~~~~~~~~~~~~~~~~~~~~~~~~ - VBENCH Cythonized ``cache_readonly``, resulting in substantial micro-performance enhancements throughout the code base (:issue:`361`) -- VBENCH Special Cython matrix iterator for applying arbitrary reduction operations with 3-5x better performance than `np.apply_along_axis` (:issue:`309`) +- VBENCH Special Cython matrix iterator for applying arbitrary reduction operations with 3-5x better performance than ``np.apply_along_axis`` (:issue:`309`) - VBENCH Improved performance of ``MultiIndex.from_tuples`` - VBENCH Special Cython matrix iterator for applying arbitrary reduction operations - VBENCH + DOCUMENT Add ``raw`` option to ``DataFrame.apply`` for getting better performance when diff --git a/doc/source/whatsnew/v0.6.1.rst b/doc/source/whatsnew/v0.6.1.rst index 8eea0a07f1f79..9a576f1d2b04a 100644 --- a/doc/source/whatsnew/v0.6.1.rst +++ b/doc/source/whatsnew/v0.6.1.rst @@ -16,12 +16,12 @@ New features - Add PyQt table widget to sandbox (:issue:`435`) - DataFrame.align can :ref:`accept Series arguments ` and an :ref:`axis option ` (:issue:`461`) -- Implement new :ref:`SparseArray ` and `SparseList` +- Implement new :ref:``SparseArray `` and ``SparseList`` data structures. SparseSeries now derives from SparseArray (:issue:`463`) - :ref:`Better console printing options ` (:issue:`453`) - Implement fast :ref:`data ranking ` for Series and DataFrame, fast versions of scipy.stats.rankdata (:issue:`428`) -- Implement `DataFrame.from_items` alternate +- Implement ``DataFrame.from_items`` alternate constructor (:issue:`444`) - DataFrame.convert_objects method for :ref:`inferring better dtypes ` for object columns (:issue:`302`) @@ -37,7 +37,7 @@ New features Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- Improve memory usage of `DataFrame.describe` (do not copy data +- Improve memory usage of ``DataFrame.describe`` (do not copy data unnecessarily) (PR #425) - Optimize scalar value lookups in the general case by 25% or more in Series diff --git a/doc/source/whatsnew/v0.7.0.rst b/doc/source/whatsnew/v0.7.0.rst index a193b8049e951..2fe686d8858a2 100644 --- a/doc/source/whatsnew/v0.7.0.rst +++ b/doc/source/whatsnew/v0.7.0.rst @@ -20,7 +20,7 @@ New features ``DataFrame.append`` (:issue:`468`, :issue:`479`, :issue:`273`) - :ref:`Can ` pass multiple DataFrames to - `DataFrame.append` to concatenate (stack) and multiple Series to + ``DataFrame.append`` to concatenate (stack) and multiple Series to ``Series.append`` too - :ref:`Can` pass list of dicts (e.g., a @@ -282,7 +282,7 @@ Performance improvements - Substantially improve performance of multi-GroupBy aggregation when a Python function is passed, reuse ndarray object in Cython (:issue:`496`) - Can store objects indexed by tuples and floats in HDFStore (:issue:`492`) -- Don't print length by default in Series.to_string, add `length` option (:issue:`489`) +- Don't print length by default in Series.to_string, add ``length`` option (:issue:`489`) - Improve Cython code for multi-groupby to aggregate without having to sort the data (:issue:`93`) - Improve MultiIndex reindexing speed by storing tuples in the MultiIndex, diff --git a/doc/source/whatsnew/v0.8.0.rst b/doc/source/whatsnew/v0.8.0.rst index 2a49315cc3b12..9bba68d8c331d 100644 --- a/doc/source/whatsnew/v0.8.0.rst +++ b/doc/source/whatsnew/v0.8.0.rst @@ -69,15 +69,15 @@ Time Series changes and improvements series. Replaces now deprecated DateRange class - New ``PeriodIndex`` and ``Period`` classes for representing :ref:`time spans ` and performing **calendar logic**, - including the `12 fiscal quarterly frequencies `. + including the ``12 fiscal quarterly frequencies ``. This is a partial port of, and a substantial enhancement to, elements of the scikits.timeseries code base. Support for conversion between PeriodIndex and DatetimeIndex -- New Timestamp data type subclasses `datetime.datetime`, providing the same +- New Timestamp data type subclasses ``datetime.datetime``, providing the same interface while enabling working with nanosecond-resolution data. Also provides :ref:`easy time zone conversions `. - Enhanced support for :ref:`time zones `. Add - `tz_convert` and ``tz_localize`` methods to TimeSeries and DataFrame. All + ``tz_convert`` and ``tz_localize`` methods to TimeSeries and DataFrame. All timestamps are stored as UTC; Timestamps from DatetimeIndex objects with time zone set will be localized to local time. Time zone conversions are therefore essentially free. User needs to know very little about pytz library now; only @@ -91,7 +91,7 @@ Time Series changes and improvements matplotlib-based plotting code - New ``date_range``, ``bdate_range``, and ``period_range`` :ref:`factory functions ` -- Robust **frequency inference** function `infer_freq` and ``inferred_freq`` +- Robust **frequency inference** function ``infer_freq`` and ``inferred_freq`` property of DatetimeIndex, with option to infer frequency on construction of DatetimeIndex - to_datetime function efficiently **parses array of strings** to diff --git a/doc/source/whatsnew/v0.9.0.rst b/doc/source/whatsnew/v0.9.0.rst index 565b965c116db..5172b1989765d 100644 --- a/doc/source/whatsnew/v0.9.0.rst +++ b/doc/source/whatsnew/v0.9.0.rst @@ -8,7 +8,7 @@ Version 0.9.0 (October 7, 2012) This is a major release from 0.8.1 and includes several new features and enhancements along with a large number of bug fixes. New features include -vectorized unicode encoding/decoding for `Series.str`, `to_latex` method to +vectorized unicode encoding/decoding for ``Series.str``, ``to_latex`` method to DataFrame, more flexible parsing of boolean values, and enabling the download of options data from Yahoo! Finance. diff --git a/doc/source/whatsnew/v0.9.1.rst b/doc/source/whatsnew/v0.9.1.rst index 3b2924d175cdf..6b05e5bcded7e 100644 --- a/doc/source/whatsnew/v0.9.1.rst +++ b/doc/source/whatsnew/v0.9.1.rst @@ -15,7 +15,7 @@ DataFrame. New features ~~~~~~~~~~~~ - - `Series.sort`, `DataFrame.sort`, and `DataFrame.sort_index` can now be + - ``Series.sort``, ``DataFrame.sort``, and ``DataFrame.sort_index`` can now be specified in a per-column manner to support multiple sort orders (:issue:`928`) .. code-block:: ipython @@ -34,8 +34,8 @@ New features 1 1 0 0 5 1 0 0 - - `DataFrame.rank` now supports additional argument values for the - `na_option` parameter so missing values can be assigned either the largest + - ``DataFrame.rank`` now supports additional argument values for the + ``na_option`` parameter so missing values can be assigned either the largest or the smallest rank (:issue:`1508`, :issue:`2159`) .. ipython:: python @@ -51,10 +51,10 @@ New features df.rank(na_option='bottom') - - DataFrame has new `where` and `mask` methods to select values according to a + - DataFrame has new ``where`` and ``mask`` methods to select values according to a given boolean mask (:issue:`2109`, :issue:`2151`) - DataFrame currently supports slicing via a boolean vector the same length as the DataFrame (inside the `[]`). + DataFrame currently supports slicing via a boolean vector the same length as the DataFrame (inside the ``[]``). The returned DataFrame has the same number of columns as the original, but is sliced on its index. .. ipython:: python @@ -67,8 +67,8 @@ New features If a DataFrame is sliced with a DataFrame based boolean condition (with the same size as the original DataFrame), then a DataFrame the same size (index and columns) as the original is returned, with - elements that do not meet the boolean condition as `NaN`. This is accomplished via - the new method `DataFrame.where`. In addition, `where` takes an optional `other` argument for replacement. + elements that do not meet the boolean condition as ``NaN``. This is accomplished via + the new method ``DataFrame.where``. In addition, ``where`` takes an optional ``other`` argument for replacement. .. ipython:: python @@ -78,8 +78,8 @@ New features df.where(df>0,-df) - Furthermore, `where` now aligns the input boolean condition (ndarray or DataFrame), such that partial selection - with setting is possible. This is analogous to partial setting via `.ix` (but on the contents rather than the axis labels) + Furthermore, ``where`` now aligns the input boolean condition (ndarray or DataFrame), such that partial selection + with setting is possible. This is analogous to partial setting via ``.ix`` (but on the contents rather than the axis labels) .. ipython:: python @@ -87,7 +87,7 @@ New features df2[ df2[1:4] > 0 ] = 3 df2 - `DataFrame.mask` is the inverse boolean operation of `where`. + ``DataFrame.mask`` is the inverse boolean operation of ``where``. .. ipython:: python @@ -103,9 +103,9 @@ New features - Added option to disable pandas-style tick locators and formatters - using `series.plot(x_compat=True)` or `pandas.plot_params['x_compat'] = - True` (:issue:`2205`) - - Existing TimeSeries methods `at_time` and `between_time` were added to + using ``series.plot(x_compat=True)`` or ``pandas.plot_params['x_compat'] = + True`` (:issue:`2205`) + - Existing TimeSeries methods ``at_time`` and ``between_time`` were added to DataFrame (:issue:`2149`) - DataFrame.dot can now accept ndarrays (:issue:`2042`) - DataFrame.drop now supports non-unique indexes (:issue:`2101`) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4f0ca97310d85..7bbaa0b9702a4 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -250,7 +250,7 @@ Other enhancements - :func:`read_excel` now can read binary Excel (``.xlsb``) files by passing ``engine='pyxlsb'``. For more details and example usage, see the :ref:`Binary Excel files documentation `. Closes :issue:`8540`. - The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) - :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`) -- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) +- DataFrame constructor preserve ``ExtensionArray`` dtype with ``ExtensionArray`` (:issue:`11363`) - :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`) - :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` have gained ``ignore_index`` keyword to reset index (:issue:`30114`) - :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`) @@ -610,7 +610,7 @@ When :class:`Categorical` contains ``np.nan``, Default dtype of empty :class:`pandas.Series` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Initialising an empty :class:`pandas.Series` without specifying a dtype will raise a `DeprecationWarning` now +Initialising an empty :class:`pandas.Series` without specifying a dtype will raise a ``DeprecationWarning`` now (:issue:`17261`). The default dtype will change from ``float64`` to ``object`` in future releases so that it is consistent with the behaviour of :class:`DataFrame` and :class:`Index`. @@ -974,7 +974,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - The 'outer' method on Numpy ufuncs, e.g. ``np.subtract.outer`` operating on :class:`Series` objects is no longer supported, and will raise ``NotImplementedError`` (:issue:`27198`) - Removed ``Series.get_dtype_counts`` and ``DataFrame.get_dtype_counts`` (:issue:`27145`) - Changed the default "fill_value" argument in :meth:`Categorical.take` from ``True`` to ``False`` (:issue:`20841`) -- Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` from ``None`` to ``False`` (:issue:`20584`) +- Changed the default value for the ``raw`` argument in :func:``Series.rolling().apply() ``, :func:`DataFrame.rolling().apply() `, :func:``Series.expanding().apply() ``, and :func:``DataFrame.expanding().apply() `` from ``None`` to ``False`` (:issue:`20584`) - Removed deprecated behavior of :meth:`Series.argmin` and :meth:`Series.argmax`, use :meth:`Series.idxmin` and :meth:`Series.idxmax` for the old behavior (:issue:`16955`) - Passing a tz-aware ``datetime.datetime`` or :class:`Timestamp` into the :class:`Timestamp` constructor with the ``tz`` argument now raises a ``ValueError`` (:issue:`23621`) - Removed ``Series.base``, ``Index.base``, ``Categorical.base``, ``Series.flags``, ``Index.flags``, ``PeriodArray.flags``, ``Series.strides``, ``Index.strides``, ``Series.itemsize``, ``Index.itemsize``, ``Series.data``, ``Index.data`` (:issue:`20721`) @@ -1058,7 +1058,7 @@ Datetimelike - Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`) - Bug in :meth:`Series.astype`, :meth:`Index.astype`, and :meth:`DataFrame.astype` failing to handle ``NaT`` when casting to an integer dtype (:issue:`28492`) - Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`) -- Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`) +- Bug in :class:``DataFrame`` arithmetic operations when operating with a :class:``Series`` with dtype ``'timedelta64[ns]'`` (:issue:`28049`) - Bug in :func:`core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`) - Bug in :func:`pandas._config.localization.get_locales` where the ``locales -a`` encodes the locales list as windows-1252 (:issue:`23638`, :issue:`24760`, :issue:`27368`) - Bug in :meth:`Series.var` failing to raise ``TypeError`` when called with ``timedelta64[ns]`` dtype (:issue:`28289`) @@ -1066,7 +1066,7 @@ Datetimelike - Bug in masking datetime-like arrays with a boolean mask of an incorrect length not raising an ``IndexError`` (:issue:`30308`) - Bug in :attr:`Timestamp.resolution` being a property instead of a class attribute (:issue:`29910`) - Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`) -- Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`) +- Bug in :func:`pandas.to_datetime` failing for ``deques`` when using ``cache=True`` (the default) (:issue:`29403`) - Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`) - Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`) - Bug in :meth:`DataFrame.drop` where attempting to drop non-existent values from a DatetimeIndex would yield a confusing error message (:issue:`30399`) @@ -1095,10 +1095,10 @@ Numeric ^^^^^^^ - Bug in :meth:`DataFrame.quantile` with zero-column :class:`DataFrame` incorrectly raising (:issue:`23925`) - :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth:`DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) -- Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) +- Bug in :class:``DataFrame`` logical operations (``&``, ``|``, ``^``) not matching :class:``Series`` behavior by filling NA values (:issue:`28741`) - Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`) - Bug in :meth:`Series.var` not computing the right value with a nullable integer dtype series not passing through ddof argument (:issue:`29128`) -- Improved error message when using `frac` > 1 and `replace` = False (:issue:`27451`) +- Improved error message when using ``frac`` > 1 and ``replace`` = False (:issue:`27451`) - Bug in numeric indexes resulted in it being possible to instantiate an :class:`Int64Index`, :class:`UInt64Index`, or :class:`Float64Index` with an invalid dtype (e.g. datetime-like) (:issue:`29539`) - Bug in :class:`UInt64Index` precision loss while constructing from a list with values in the ``np.uint64`` range (:issue:`29526`) - Bug in :class:`NumericIndex` construction that caused indexing to fail when integers in the ``np.uint64`` range were used (:issue:`28023`) @@ -1137,8 +1137,8 @@ Indexing - Bug in assignment using a reverse slicer (:issue:`26939`) - Bug in :meth:`DataFrame.explode` would duplicate frame in the presence of duplicates in the index (:issue:`28010`) -- Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a `Period` (:issue:`28323`) (:issue:`28337`) -- Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`) +- Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a ``Period`` (:issue:`28323`) (:issue:`28337`) +- Fix assignment of column via ``.loc`` with numpy non-ns datetime type (:issue:`27395`) - Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`) - :meth:`Index.union` could fail when the left contained duplicates (:issue:`28257`) - Bug when indexing with ``.loc`` where the index was a :class:`CategoricalIndex` with non-string categories didn't work (:issue:`17569`, :issue:`30225`) @@ -1159,7 +1159,7 @@ MultiIndex ^^^^^^^^^^ - Constructor for :class:`MultiIndex` verifies that the given ``sortorder`` is compatible with the actual ``lexsort_depth`` if ``verify_integrity`` parameter is ``True`` (the default) (:issue:`28735`) -- Series and MultiIndex `.drop` with `MultiIndex` raise exception if labels not in given in level (:issue:`8594`) +- Series and MultiIndex ``.drop`` with ``MultiIndex`` raise exception if labels not in given in level (:issue:`8594`) - I/O @@ -1171,7 +1171,7 @@ I/O - Bug in :meth:`DataFrame.to_csv` where values were truncated when the length of ``na_rep`` was shorter than the text input data. (:issue:`25099`) - Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`) - Bug in :meth:`DataFrame.to_json` where a datetime column label would not be written out in ISO format with ``orient="table"`` (:issue:`28130`) -- Bug in :func:`DataFrame.to_parquet` where writing to GCS would fail with `engine='fastparquet'` if the file did not already exist (:issue:`28326`) +- Bug in :func:`DataFrame.to_parquet` where writing to GCS would fail with ``engine='fastparquet'`` if the file did not already exist (:issue:`28326`) - Bug in :func:`read_hdf` closing stores that it didn't open when Exceptions are raised (:issue:`28699`) - Bug in :meth:`DataFrame.read_json` where using ``orient="index"`` would not maintain the order (:issue:`28557`) - Bug in :meth:`DataFrame.to_html` where the length of the ``formatters`` argument was not verified (:issue:`28469`) @@ -1183,9 +1183,9 @@ I/O - Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`) - Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`) - :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`) -- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`) +- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by ``record_path`` would raise a ``TypeError`` (:issue:`30148`) - :func:`read_excel` now accepts binary data (:issue:`15914`) -- Bug in :meth:`read_csv` in which encoding handling was limited to just the string `utf-16` for the C engine (:issue:`24130`) +- Bug in :meth:`read_csv` in which encoding handling was limited to just the string ``utf-16`` for the C engine (:issue:`24130`) Plotting ^^^^^^^^ @@ -1236,7 +1236,7 @@ Reshaping - Bug in :func:`merge`, did not append suffixes correctly with MultiIndex (:issue:`28518`) - :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`) - Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`). -- Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`) +- Better error message in :func:`get_dummies` when ``columns`` isn't a list-like value (:issue:`28383`) - Bug in :meth:`Index.join` that caused infinite recursion error for mismatched ``MultiIndex`` name orders. (:issue:`25760`, :issue:`28956`) - Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ``ValueError`` (:issue:`28664`) - Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`) @@ -1244,8 +1244,8 @@ Reshaping - Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`) - Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`) - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) -- Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`) -- Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`) +- Improved error message and docstring in :func:`cut` and :func:``qcut`` when ``labels=True`` (:issue:`13318`) +- Bug in missing ``fill_na`` parameter to :meth:``DataFrame.unstack`` with list of levels (:issue:`30740`) Sparse ^^^^^^ diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a49b29d691692..489b75fb5fb6a 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -66,10 +66,10 @@ For example: .. _whatsnew_110.dataframe_or_series_comparing: -Comparing two `DataFrame` or two `Series` and summarizing the differences +Comparing two ``DataFrame`` or two ``Series`` and summarizing the differences ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -We've added :meth:`DataFrame.compare` and :meth:`Series.compare` for comparing two `DataFrame` or two `Series` (:issue:`30429`) +We've added :meth:`DataFrame.compare` and :meth:`Series.compare` for comparing two ``DataFrame`` or two ``Series`` (:issue:`30429`) .. ipython:: python @@ -116,10 +116,10 @@ compatibility (:issue:`3729`) .. ipython:: python - # Default `dropna` is set to True, which will exclude NaNs in keys + # Default ``dropna`` is set to True, which will exclude NaNs in keys df_dropna.groupby(by=["b"], dropna=True).sum() - # In order to allow NaN in keys, set `dropna` to False + # In order to allow NaN in keys, set ``dropna`` to False df_dropna.groupby(by=["b"], dropna=False).sum() The default setting of ``dropna`` argument is ``True`` which means ``NA`` are not included in group keys. @@ -155,8 +155,8 @@ method, we get s.sort_values(key=lambda x: x.str.lower()) -When applied to a `DataFrame`, they key is applied per-column to all columns or a subset if -`by` is specified, e.g. +When applied to a ``DataFrame``, they key is applied per-column to all columns or a subset if +``by`` is specified, e.g. .. ipython:: python @@ -217,7 +217,7 @@ Grouper and resample now supports the arguments origin and offset :class:`Grouper` and :meth:`DataFrame.resample` now supports the arguments ``origin`` and ``offset``. It let the user control the timestamp on which to adjust the grouping. (:issue:`31809`) -The bins of the grouping are adjusted based on the beginning of the day of the time series starting point. This works well with frequencies that are multiples of a day (like `30D`) or that divides a day (like `90s` or `1min`). But it can create inconsistencies with some frequencies that do not meet this criteria. To change this behavior you can now specify a fixed timestamp with the argument ``origin``. +The bins of the grouping are adjusted based on the beginning of the day of the time series starting point. This works well with frequencies that are multiples of a day (like ``30D``) or that divides a day (like ``90s`` or ``1min``). But it can create inconsistencies with some frequencies that do not meet this criteria. To change this behavior you can now specify a fixed timestamp with the argument ``origin``. Two arguments are now deprecated (more information in the documentation of :meth:`DataFrame.resample`): @@ -289,7 +289,7 @@ Other enhancements - Added :meth:`api.extensions.ExtensionArray.argmax` and :meth:`api.extensions.ExtensionArray.argmin` (:issue:`24382`) - :func:`timedelta_range` will now infer a frequency when passed ``start``, ``stop``, and ``periods`` (:issue:`32377`) - Positional slicing on a :class:`IntervalIndex` now supports slices with ``step > 1`` (:issue:`31658`) -- :class:`Series.str` now has a `fullmatch` method that matches a regular expression against the entire string in each row of the :class:`Series`, similar to `re.fullmatch` (:issue:`32806`). +- :class:`Series.str` now has a ``fullmatch`` method that matches a regular expression against the entire string in each row of the :class:`Series`, similar to ``re.fullmatch`` (:issue:`32806`). - :meth:`DataFrame.sample` will now also allow array-like and BitGenerator objects to be passed to ``random_state`` as seeds (:issue:`32503`) - :meth:`Index.union` will now raise ``RuntimeWarning`` for :class:`MultiIndex` objects if the object inside are unsortable. Pass ``sort=False`` to suppress this warning (:issue:`33015`) - Added :meth:`Series.dt.isocalendar` and :meth:`DatetimeIndex.isocalendar` that returns a :class:`DataFrame` with year, week, and day calculated according to the ISO 8601 calendar (:issue:`33206`, :issue:`34392`). @@ -319,7 +319,7 @@ Other enhancements :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`26599`). - :meth:`HDFStore.put` now accepts a ``track_times`` parameter. This parameter is passed to the ``create_table`` method of ``PyTables`` (:issue:`32682`). -- :meth:`Series.plot` and :meth:`DataFrame.plot` now accepts `xlabel` and `ylabel` parameters to present labels on x and y axis (:issue:`9093`). +- :meth:`Series.plot` and :meth:`DataFrame.plot` now accepts ``xlabel`` and ``ylabel`` parameters to present labels on x and y axis (:issue:`9093`). - Made :class:`pandas.core.window.rolling.Rolling` and :class:`pandas.core.window.expanding.Expanding` iterable(:issue:`11704`) - Made ``option_context`` a :class:`contextlib.ContextDecorator`, which allows it to be used as a decorator over an entire function (:issue:`34253`). - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now accept an ``errors`` argument (:issue:`22610`) @@ -340,7 +340,7 @@ Other enhancements - :class:`pandas.core.window.ExponentialMovingWindow` now supports a ``times`` argument that allows ``mean`` to be calculated with observations spaced by the timestamps in ``times`` (:issue:`34839`) - :meth:`DataFrame.agg` and :meth:`Series.agg` now accept named aggregation for renaming the output columns/indexes. (:issue:`26513`) - ``compute.use_numba`` now exists as a configuration option that utilizes the numba engine when available (:issue:`33966`, :issue:`35374`) -- :meth:`Series.plot` now supports asymmetric error bars. Previously, if :meth:`Series.plot` received a "2xN" array with error values for `yerr` and/or `xerr`, the left/lower values (first row) were mirrored, while the right/upper values (second row) were ignored. Now, the first row represents the left/lower error values and the second row the right/upper error values. (:issue:`9536`) +- :meth:`Series.plot` now supports asymmetric error bars. Previously, if :meth:`Series.plot` received a "2xN" array with error values for ``yerr`` and/or ``xerr``, the left/lower values (first row) were mirrored, while the right/upper values (second row) were ignored. Now, the first row represents the left/lower error values and the second row the right/upper error values. (:issue:`9536`) .. --------------------------------------------------------------------------- From 764f80ec267190116bd6d3bbb5bafc8b72fca38e Mon Sep 17 00:00:00 2001 From: shubhamsm Date: Fri, 25 Sep 2020 21:10:59 +0530 Subject: [PATCH 02/17] remove unnecessary doubleTicks --- doc/source/user_guide/basics.rst | 2 +- doc/source/whatsnew/v0.15.0.rst | 2 +- doc/source/whatsnew/v0.23.0.rst | 2 +- doc/source/whatsnew/v0.24.0.rst | 4 ++-- doc/source/whatsnew/v1.0.0.rst | 4 ++-- doc/source/whatsnew/v1.1.3.rst | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index 4038f706b9338..5242460395546 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -1855,7 +1855,7 @@ to apply to the values being sorted. s1.sort_values() s1.sort_values(key=lambda x: x.str.lower()) -``key`` will be given the :class:``Series`` of values and should return a ``Series`` +``key`` will be given the :class:`Series` of values and should return a ``Series`` or array of the same shape with the transformed values. For ``DataFrame`` objects, the key is applied per column, so the key should still expect a Series and return a Series, e.g. diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst index 76cf6d009a732..1f054930b3709 100644 --- a/doc/source/whatsnew/v0.15.0.rst +++ b/doc/source/whatsnew/v0.15.0.rst @@ -61,7 +61,7 @@ New features Categoricals in Series/DataFrame ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:class:``~pandas.Categorical`` can now be included in ``Series`` and ``DataFrames`` and gained new +:class:`~pandas.Categorical` can now be included in ``Series`` and ``DataFrames`` and gained new methods to manipulate. Thanks to Jan Schulz for much of this API/implementation. (:issue:`3943`, :issue:`5313`, :issue:`5314`, :issue:`7444`, :issue:`7839`, :issue:`7848`, :issue:`7864`, :issue:`7914`, :issue:`7768`, :issue:`8006`, :issue:`3678`, :issue:`8075`, :issue:`8076`, :issue:`8143`, :issue:`8453`, :issue:`8518`). diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index dc51216ff6b2a..6b45b21793cdf 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -1011,7 +1011,7 @@ Datetimelike API changes - Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). - :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) - For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with ``freq=None``, addition or subtraction of integer-dtyped array or ``Index`` will raise ``NullFrequencyError`` instead of ``TypeError`` (:issue:`19895`) -- :class:``Timestamp`` constructor now accepts a ``nanosecond`` keyword or positional argument (:issue:`18898`) +- :class:`Timestamp` constructor now accepts a ``nanosecond`` keyword or positional argument (:issue:`18898`) - :class:`DatetimeIndex` will now raise an ``AttributeError`` when the ``tz`` attribute is set after instantiation (:issue:`3746`) - :class:`DatetimeIndex` with a ``pytz`` timezone will now return a consistent ``pytz`` timezone (:issue:`18595`) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 357acbcc6bf67..a90630fc3eee2 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1317,7 +1317,7 @@ Deprecations - Timezone converting a tz-aware ``datetime.datetime`` or :class:`Timestamp` with :class:`Timestamp` and the ``tz`` argument is now deprecated. Instead, use :meth:`Timestamp.tz_convert` (:issue:`23579`) - :func:`pandas.api.types.is_period` is deprecated in favor of ``pandas.api.types.is_period_dtype`` (:issue:`23917`) - :func:`pandas.api.types.is_datetimetz` is deprecated in favor of ``pandas.api.types.is_datetime64tz`` (:issue:`23917`) -- Creating a :class:`TimedeltaIndex`, :class:``DatetimeIndex``, or :class:``PeriodIndex`` by passing range arguments ``start``, ``end``, and ``periods`` is deprecated in favor of :func:`timedelta_range`, :func:`date_range`, or :func:`period_range` (:issue:`23919`) +- Creating a :class:`TimedeltaIndex`, :class:`DatetimeIndex`, or :class:`PeriodIndex` by passing range arguments ``start``, ``end``, and ``periods`` is deprecated in favor of :func:`timedelta_range`, :func:`date_range`, or :func:`period_range` (:issue:`23919`) - Passing a string alias like ``'datetime64[ns, UTC]'`` as the ``unit`` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`). - The ``skipna`` parameter of :meth:`~pandas.api.types.infer_dtype` will switch to ``True`` by default in a future version of pandas (:issue:`17066`, :issue:`24050`) - In :meth:`Series.where` with Categorical data, providing an ``other`` that is not present in the categories is deprecated. Convert the categorical to a different dtype or add the ``other`` to the categories first (:issue:`24077`). @@ -1602,7 +1602,7 @@ Datetimelike - Bug in :class:`DataFrame` when creating a new column from an ndarray of :class:`Timestamp` objects with timezones creating an object-dtype column, rather than datetime with timezone (:issue:`23932`) - Bug in :class:`Timestamp` constructor which would drop the frequency of an input :class:`Timestamp` (:issue:`22311`) - Bug in :class:`DatetimeIndex` where calling ``np.array(dtindex, dtype=object)`` would incorrectly return an array of ``long`` objects (:issue:`23524`) -- Bug in :class:`Index` where passing a timezone-aware :class:``DatetimeIndex`` and ``dtype=object`` would incorrectly raise a ``ValueError`` (:issue:`23524`) +- Bug in :class:`Index` where passing a timezone-aware :class:`DatetimeIndex` and ``dtype=object`` would incorrectly raise a ``ValueError`` (:issue:`23524`) - Bug in :class:`Index` where calling ``np.array(dtindex, dtype=object)`` on a timezone-naive :class:`DatetimeIndex` would return an array of ``datetime`` objects instead of :class:`Timestamp` objects, potentially losing nanosecond portions of the timestamps (:issue:`23524`) - Bug in :class:`Categorical.__setitem__` not allowing setting with another ``Categorical`` when both are unordered and have the same categories, but in a different order (:issue:`24142`) - Bug in :func:`date_range` where using dates with millisecond resolution or higher could return incorrect values or the wrong number of values in the index (:issue:`24110`) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 7bbaa0b9702a4..88555df3af14b 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1058,7 +1058,7 @@ Datetimelike - Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`) - Bug in :meth:`Series.astype`, :meth:`Index.astype`, and :meth:`DataFrame.astype` failing to handle ``NaT`` when casting to an integer dtype (:issue:`28492`) - Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`) -- Bug in :class:``DataFrame`` arithmetic operations when operating with a :class:``Series`` with dtype ``'timedelta64[ns]'`` (:issue:`28049`) +- Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype ``'timedelta64[ns]'`` (:issue:`28049`) - Bug in :func:`core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`) - Bug in :func:`pandas._config.localization.get_locales` where the ``locales -a`` encodes the locales list as windows-1252 (:issue:`23638`, :issue:`24760`, :issue:`27368`) - Bug in :meth:`Series.var` failing to raise ``TypeError`` when called with ``timedelta64[ns]`` dtype (:issue:`28289`) @@ -1095,7 +1095,7 @@ Numeric ^^^^^^^ - Bug in :meth:`DataFrame.quantile` with zero-column :class:`DataFrame` incorrectly raising (:issue:`23925`) - :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth:`DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) -- Bug in :class:``DataFrame`` logical operations (``&``, ``|``, ``^``) not matching :class:``Series`` behavior by filling NA values (:issue:`28741`) +- Bug in :class:`DataFrame` logical operations (``&``, ``|``, ``^``) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) - Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`) - Bug in :meth:`Series.var` not computing the right value with a nullable integer dtype series not passing through ddof argument (:issue:`29128`) - Improved error message when using ``frac`` > 1 and ``replace`` = False (:issue:`27451`) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index b382da2db01a4..3d66cfa363859 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -38,7 +38,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.apply` with ``raw=True`` and user-function returning string (:issue:`35940`) - Fixed regression when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`36527`) - Fixed regression in :class:`Period` incorrect value for ordinal over the maximum timestamp (:issue:`36430`) -- Fixed regression when adding a :meth:`timedelta_range` to a :class:``Timestamp`` raised an ``ValueError`` (:issue:`35897`) +- Fixed regression when adding a :meth:`timedelta_range` to a :class:`Timestamp` raised an ``ValueError`` (:issue:`35897`) .. --------------------------------------------------------------------------- From 8cdd77d723a8c808a7466a5430b2bd6b93c33d60 Mon Sep 17 00:00:00 2001 From: shubhamsm Date: Fri, 25 Sep 2020 21:28:13 +0530 Subject: [PATCH 03/17] removing unnecessary backticks --- doc/source/development/contributing_docstring.rst | 2 +- doc/source/user_guide/basics.rst | 4 ++-- doc/source/whatsnew/v0.14.0.rst | 4 ++-- doc/source/whatsnew/v0.23.0.rst | 4 ++-- doc/source/whatsnew/v0.24.0.rst | 2 +- doc/source/whatsnew/v1.0.0.rst | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst index fdba12c404990..136fee3afc494 100644 --- a/doc/source/development/contributing_docstring.rst +++ b/doc/source/development/contributing_docstring.rst @@ -128,7 +128,7 @@ backticks. The following are considered inline code: """ Add the values in ``arr``. - This is equivalent to Python ``sum`` of :meth:``pandas.Series.sum``. + This is equivalent to Python ``sum`` of :meth:`pandas.Series.sum`. Some sections are omitted here for simplicity. """ diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index 5242460395546..e348111fe7881 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -1812,8 +1812,8 @@ For information on key sorting by value, see :ref:`value sorting By values ~~~~~~~~~ -The :meth:``Series.sort_values`` method is used to sort a ``Series`` by its values. The -:meth:``DataFrame.sort_values`` method is used to sort a ``DataFrame`` by its column or row values. +The :meth:`Series.sort_values` method is used to sort a ``Series`` by its values. The +:meth:`DataFrame.sort_values` method is used to sort a ``DataFrame`` by its column or row values. The optional ``by`` parameter to :meth:`DataFrame.sort_values` may used to specify one or more columns to use to determine the sorted order. diff --git a/doc/source/whatsnew/v0.14.0.rst b/doc/source/whatsnew/v0.14.0.rst index c774518873a8e..421ef81427210 100644 --- a/doc/source/whatsnew/v0.14.0.rst +++ b/doc/source/whatsnew/v0.14.0.rst @@ -618,7 +618,7 @@ Plotting raised if the old ``data`` argument is used by name. (:issue:`6956`) - :meth:`DataFrame.boxplot` now supports ``layout`` keyword (:issue:`6769`) -- :meth:``DataFrame.boxplot`` has a new keyword argument, ``return_type``. It accepts ``'dict'``, +- :meth:`DataFrame.boxplot` has a new keyword argument, ``return_type``. It accepts ``'dict'``, ``'axes'``, or ``'both'``, in which case a namedtuple with the matplotlib axes and a dict of matplotlib Lines is returned. @@ -721,7 +721,7 @@ Deprecations - The following ``io.sql`` functions have been deprecated: ``tquery``, ``uquery``, ``read_frame``, ``frame_query``, ``write_frame``. -- The ``percentile_width`` keyword argument in :meth:``~DataFrame.describe`` has been deprecated. +- The ``percentile_width`` keyword argument in :meth:`~DataFrame.describe` has been deprecated. Use the ``percentiles`` keyword instead, which takes a list of percentiles to display. The default output is unchanged. diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index 6b45b21793cdf..215b2ebabf76e 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -1234,7 +1234,7 @@ Categorical - Bug in ``Categorical.__iter__`` not converting to Python types (:issue:`19909`) - Bug in :func:`pandas.factorize` returning the unique codes for the ``uniques``. This now returns a ``Categorical`` with the same dtype as the input (:issue:`19721`) - Bug in :func:`pandas.factorize` including an item for missing values in the ``uniques`` return value (:issue:`19721`) -- Bug in :meth:``Series.take`` with categorical data interpreting ``-1`` in ``indices`` as missing value markers, rather than the last element of the Series (:issue:`20664`) +- Bug in :meth:`Series.take` with categorical data interpreting ``-1`` in ``indices`` as missing value markers, rather than the last element of the Series (:issue:`20664`) Datetimelike ^^^^^^^^^^^^ @@ -1369,7 +1369,7 @@ IO ^^ - :func:`read_html` now rewinds seekable IO objects after parse failure, before attempting to parse with a new parser. If a parser errors and the object is non-seekable, an informative error is raised suggesting the use of a different parser (:issue:`17975`) -- :meth:``DataFrame.to_html`` now has an option to add an id to the leading ``
`` tag (:issue:`8496`) +- :meth:`DataFrame.to_html` now has an option to add an id to the leading ``
`` tag (:issue:`8496`) - Bug in :func:`read_msgpack` with a non existent file is passed in Python 2 (:issue:`15296`) - Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`) - Bug in :func:`read_csv` where missing values were not being handled properly when ``keep_default_na=False`` with dictionary ``na_values`` (:issue:`19227`) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index a90630fc3eee2..4df615a5524bf 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1693,7 +1693,7 @@ Numeric - :meth:`Series.agg` can now handle numpy NaN-aware methods like :func:`numpy.nansum` (:issue:`19629`) - Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``pct=True`` and more than 2\ :sup:`24` rows are present resulted in percentages greater than 1.0 (:issue:`18271`) - Calls such as :meth:`DataFrame.round` with a non-unique :meth:`CategoricalIndex` now return expected data. Previously, data would be improperly duplicated (:issue:`21809`). -- Added ``log10``, ``floor`` and ``ceil`` to the list of supported functions in :meth:``DataFrame.eval`` (:issue:`24139`, :issue:`24353`) +- Added ``log10``, ``floor`` and ``ceil`` to the list of supported functions in :meth:`DataFrame.eval` (:issue:`24139`, :issue:`24353`) - Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`) - Checking PEP 3141 numbers in :func:`~pandas.api.types.is_scalar` function returns ``True`` (:issue:`22903`) - Reduction methods like :meth:`Series.sum` now accept the default value of ``keepdims=False`` when called from a NumPy ufunc, rather than raising a ``TypeError``. Full support for ``keepdims`` has not been implemented (:issue:`24356`). diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 88555df3af14b..a8cb60612886a 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1245,7 +1245,7 @@ Reshaping - Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`) - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) - Improved error message and docstring in :func:`cut` and :func:``qcut`` when ``labels=True`` (:issue:`13318`) -- Bug in missing ``fill_na`` parameter to :meth:``DataFrame.unstack`` with list of levels (:issue:`30740`) +- Bug in missing ``fill_na`` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`) Sparse ^^^^^^ From 7ef4b54dc1d7ce13f8c45664817ceed64a3f4b2a Mon Sep 17 00:00:00 2001 From: shubhamsm Date: Fri, 25 Sep 2020 21:29:13 +0530 Subject: [PATCH 04/17] remove unnecessary backticks --- doc/source/whatsnew/v0.16.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.16.0.rst b/doc/source/whatsnew/v0.16.0.rst index c4db277e79830..8acab2e2e8319 100644 --- a/doc/source/whatsnew/v0.16.0.rst +++ b/doc/source/whatsnew/v0.16.0.rst @@ -89,7 +89,7 @@ See the :ref:`documentation ` for more. (:issue:`922 Interaction with scipy.sparse ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Added :meth:``SparseSeries.to_coo`` and :meth:``SparseSeries.from_coo`` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:``here ``). For example, given a SparseSeries with MultiIndex we can convert to a ``scipy.sparse.coo_matrix`` by specifying the row and column labels as index levels: +Added :meth:`SparseSeries.to_coo`` and :meth:`SparseSeries.from_coo`` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:``here ``). For example, given a SparseSeries with MultiIndex we can convert to a ``scipy.sparse.coo_matrix`` by specifying the row and column labels as index levels: .. code-block:: python From d3834faf55871e07031fb92cd982476493877e6e Mon Sep 17 00:00:00 2001 From: shubhamsm Date: Fri, 25 Sep 2020 22:10:19 +0530 Subject: [PATCH 05/17] remove unnecessary backticks --- doc/source/user_guide/categorical.rst | 2 +- doc/source/user_guide/visualization.rst | 2 +- doc/source/whatsnew/v0.23.0.rst | 4 ++-- doc/source/whatsnew/v1.0.0.rst | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index f3f0f1ca652df..9d3a7098e511c 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -228,7 +228,7 @@ by default. CategoricalDtype() A :class:`~pandas.api.types.CategoricalDtype` can be used in any place pandas -expects a ``dtype``. For example :func:``pandas.read_csv``, +expects a ``dtype``. For example :func:`pandas.read_csv`, :func:`pandas.DataFrame.astype`, or in the ``Series`` constructor. .. note:: diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst index 088c63c8695f6..4aa693436a516 100644 --- a/doc/source/user_guide/visualization.rst +++ b/doc/source/user_guide/visualization.rst @@ -496,7 +496,7 @@ Area plot You can create area plots with :meth:`Series.plot.area` and :meth:`DataFrame.plot.area`. Area plots are stacked by default. To produce stacked area plot, each column must be either all positive or all negative values. -When input data contains ``NaN``, it will be automatically filled by 0. If you want to drop or fill by different values, use :func:``dataframe.dropna`` or :func:``dataframe.fillna`` before calling ``plot``. +When input data contains ``NaN``, it will be automatically filled by 0. If you want to drop or fill by different values, use :func:`dataframe.dropna` or :func:`dataframe.fillna` before calling ``plot``. .. ipython:: python :suppress: diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index 215b2ebabf76e..4d85c06728fb4 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -1049,7 +1049,7 @@ Other API changes - :class:`DateOffset` objects render more simply, e.g. ```` instead of ```` (:issue:`19403`) - ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`) - ``pd.to_datetime('today')`` now returns a datetime, consistent with ``pd.Timestamp('today')``; previously ``pd.to_datetime('today')`` returned a ``.normalized()`` datetime (:issue:`19935`) -- :func:``Series.str.replace`` now takes an optional ``regex`` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`) +- :func:`Series.str.replace` now takes an optional ``regex`` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`) - :func:`DatetimeIndex.strftime` and :func:`PeriodIndex.strftime` now return an ``Index`` instead of a numpy array to be consistent with similar accessors (:issue:`20127`) - Constructing a Series from a list of length 1 no longer broadcasts this list when a longer index is specified (:issue:`19714`, :issue:`20391`). - :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`) @@ -1316,7 +1316,7 @@ Numeric Strings ^^^^^^^ -- Bug in :func:``Series.str.get`` with a dictionary in the values and the index not in the keys, raising ``KeyError`` (:issue:`20671`) +- Bug in :func:`Series.str.get` with a dictionary in the values and the index not in the keys, raising ``KeyError`` (:issue:`20671`) Indexing diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a8cb60612886a..32175d344c320 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -974,7 +974,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - The 'outer' method on Numpy ufuncs, e.g. ``np.subtract.outer`` operating on :class:`Series` objects is no longer supported, and will raise ``NotImplementedError`` (:issue:`27198`) - Removed ``Series.get_dtype_counts`` and ``DataFrame.get_dtype_counts`` (:issue:`27145`) - Changed the default "fill_value" argument in :meth:`Categorical.take` from ``True`` to ``False`` (:issue:`20841`) -- Changed the default value for the ``raw`` argument in :func:``Series.rolling().apply() ``, :func:`DataFrame.rolling().apply() `, :func:``Series.expanding().apply() ``, and :func:``DataFrame.expanding().apply() `` from ``None`` to ``False`` (:issue:`20584`) +- Changed the default value for the ``raw`` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` from ``None`` to ``False`` (:issue:`20584`) - Removed deprecated behavior of :meth:`Series.argmin` and :meth:`Series.argmax`, use :meth:`Series.idxmin` and :meth:`Series.idxmax` for the old behavior (:issue:`16955`) - Passing a tz-aware ``datetime.datetime`` or :class:`Timestamp` into the :class:`Timestamp` constructor with the ``tz`` argument now raises a ``ValueError`` (:issue:`23621`) - Removed ``Series.base``, ``Index.base``, ``Categorical.base``, ``Series.flags``, ``Index.flags``, ``PeriodArray.flags``, ``Series.strides``, ``Index.strides``, ``Series.itemsize``, ``Index.itemsize``, ``Series.data``, ``Index.data`` (:issue:`20721`) @@ -1244,7 +1244,7 @@ Reshaping - Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`) - Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`) - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) -- Improved error message and docstring in :func:`cut` and :func:``qcut`` when ``labels=True`` (:issue:`13318`) +- Improved error message and docstring in :func:`cut` and :func:`qcut` when ``labels=True`` (:issue:`13318`) - Bug in missing ``fill_na`` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`) Sparse From 52ad1802c1431ba9ebb6535a4ed43ae6b8c6f5d6 Mon Sep 17 00:00:00 2001 From: shubhamsm Date: Fri, 25 Sep 2020 23:18:54 +0530 Subject: [PATCH 06/17] remove unnecessary backticks --- doc/source/development/contributing_docstring.rst | 2 +- doc/source/user_guide/io.rst | 4 ++-- doc/source/whatsnew/v0.13.0.rst | 2 +- doc/source/whatsnew/v0.16.0.rst | 2 +- doc/source/whatsnew/v0.6.1.rst | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst index 136fee3afc494..caaa902ef2b52 100644 --- a/doc/source/development/contributing_docstring.rst +++ b/doc/source/development/contributing_docstring.rst @@ -144,7 +144,7 @@ backticks. The following are considered inline code: With several mistakes in the docstring. - It has a blank like after the signature ``def func():``. + It has a blank like after the signature `def func():`. The text 'Some function' should go in the line after the opening quotes of the docstring, not in the same line. diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 80fd54518ada1..cc7f9e9ebb897 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -279,8 +279,8 @@ iterator : boolean, default ``False`` Return ``TextFileReader`` object for iteration or getting chunks with ``get_chunk()``. chunksize : int, default ``None`` - Return ``TextFileReader`` object for iteration. See :ref:``iterating and chunking - `` below. + Return ``TextFileReader`` object for iteration. See :ref:`iterating and chunking + ` below. Quoting, compression, and file format +++++++++++++++++++++++++++++++++++++ diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst index a522eab215751..bc607409546c6 100644 --- a/doc/source/whatsnew/v0.13.0.rst +++ b/doc/source/whatsnew/v0.13.0.rst @@ -498,7 +498,7 @@ Enhancements - ``to_dict`` now takes ``records`` as a possible out type. Returns an array of column-keyed dictionaries. (:issue:`4936`) -- ``NaN`` handing in get_dummies (:issue:``4446``) with ``dummy_na`` +- ``NaN`` handing in get_dummies (:issue:`4446`) with ``dummy_na`` .. ipython:: python diff --git a/doc/source/whatsnew/v0.16.0.rst b/doc/source/whatsnew/v0.16.0.rst index 8acab2e2e8319..00e1264bfb80b 100644 --- a/doc/source/whatsnew/v0.16.0.rst +++ b/doc/source/whatsnew/v0.16.0.rst @@ -89,7 +89,7 @@ See the :ref:`documentation ` for more. (:issue:`922 Interaction with scipy.sparse ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Added :meth:`SparseSeries.to_coo`` and :meth:`SparseSeries.from_coo`` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:``here ``). For example, given a SparseSeries with MultiIndex we can convert to a ``scipy.sparse.coo_matrix`` by specifying the row and column labels as index levels: +Added :meth:`SparseSeries.to_coo`` and :meth:`SparseSeries.from_coo`` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:`here `). For example, given a SparseSeries with MultiIndex we can convert to a ``scipy.sparse.coo_matrix`` by specifying the row and column labels as index levels: .. code-block:: python diff --git a/doc/source/whatsnew/v0.6.1.rst b/doc/source/whatsnew/v0.6.1.rst index 9a576f1d2b04a..8ee80fa2c44b1 100644 --- a/doc/source/whatsnew/v0.6.1.rst +++ b/doc/source/whatsnew/v0.6.1.rst @@ -16,7 +16,7 @@ New features - Add PyQt table widget to sandbox (:issue:`435`) - DataFrame.align can :ref:`accept Series arguments ` and an :ref:`axis option ` (:issue:`461`) -- Implement new :ref:``SparseArray `` and ``SparseList`` +- Implement new :ref:`SparseArray ` and ``SparseList`` data structures. SparseSeries now derives from SparseArray (:issue:`463`) - :ref:`Better console printing options ` (:issue:`453`) - Implement fast :ref:`data ranking ` for Series and From 635f1f6742a886aa2db38f45d75fab32f74240b7 Mon Sep 17 00:00:00 2001 From: shubhamsm Date: Fri, 25 Sep 2020 23:41:20 +0530 Subject: [PATCH 07/17] update --- doc/source/development/contributing_docstring.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst index caaa902ef2b52..136fee3afc494 100644 --- a/doc/source/development/contributing_docstring.rst +++ b/doc/source/development/contributing_docstring.rst @@ -144,7 +144,7 @@ backticks. The following are considered inline code: With several mistakes in the docstring. - It has a blank like after the signature `def func():`. + It has a blank like after the signature ``def func():``. The text 'Some function' should go in the line after the opening quotes of the docstring, not in the same line. From 23713731745202bd377e98d5c522710f2da2aa47 Mon Sep 17 00:00:00 2001 From: shubhamsm Date: Sat, 26 Sep 2020 06:47:30 +0530 Subject: [PATCH 08/17] update --- .pre-commit-config.yaml | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f4b23e362ec1d..4b3e8e5fa9d65 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,4 +35,33 @@ repos: hooks: - id: rst-backticks # these exclusions should be removed and the files fixed - \ No newline at end of file + exclude: (?x)( + text\.rst| + timeseries\.rst| + visualization\.rst| + missing_data\.rst| + options\.rst| + reshaping\.rst| + scale\.rst| + merging\.rst| + cookbook\.rst| + enhancingperf\.rst| + groupby\.rst| + io\.rst| + overview\.rst| + panel\.rst| + plotting\.rst| + 10min\.rst| + basics\.rst| + categorical\.rst| + contributing\.rst| + contributing_docstring\.rst| + extending\.rst| + ecosystem\.rst| + comparison_with_sql\.rst| + install\.rst| + calculate_statistics\.rst| + combine_dataframes\.rst| + v0\.| + v1\.0\.| + v1\.1\.[012]) \ No newline at end of file From 01b6f5a524923f7449536f20053d1bf70466fcce Mon Sep 17 00:00:00 2001 From: shubhamsm Date: Sat, 26 Sep 2020 07:11:37 +0530 Subject: [PATCH 09/17] update --- .pre-commit-config.yaml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d44c0c9aed530..0bc551c4a50a3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -65,18 +65,6 @@ repos: v0\.| v1\.0\.| v1\.1\.[012]) - categorical\.rst| - contributing\.rst| - contributing_docstring\.rst| - extending\.rst| - ecosystem\.rst| - comparison_with_sql\.rst| - install\.rst| - calculate_statistics\.rst| - combine_dataframes\.rst| - v0\.| - v1\.0\.| - v1\.1\.[012]) - repo: local hooks: - id: pip_to_conda From a666e142f32f6b903a00a75068a878f204b4f0fe Mon Sep 17 00:00:00 2001 From: shubhamsm Date: Sat, 26 Sep 2020 08:06:55 +0530 Subject: [PATCH 10/17] allign titles --- doc/source/user_guide/categorical.rst | 4 ++-- doc/source/whatsnew/v0.24.0.rst | 4 ++-- doc/source/whatsnew/v1.1.0.rst | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index 9d3a7098e511c..c7326c4a6b32b 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -999,7 +999,7 @@ Methods for working with missing data, e.g. :meth:`~Series.isna`, :meth:`~Series s.fillna("a") Differences to R's ``factor`` ---------------------------- +---------------------------~~ The following differences to R's factor functions can be observed: @@ -1054,7 +1054,7 @@ an ``object`` dtype is a constant times the length of the data. ``Categorical`` is not a ``numpy`` array -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Currently, categorical data and the underlying ``Categorical`` is implemented as a Python object and not as a low-level NumPy array dtype. This leads to some problems. diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index efce2a131260e..9a2e96f717d9b 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -475,7 +475,7 @@ and replaced it with references to ``pyarrow`` (:issue:`21639` and :issue:`23053 .. _whatsnew_0240.api_breaking.csv_line_terminator: ``os.linesep`` is used for ``line_terminator`` of ``DataFrame.to_csv`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :func:`DataFrame.to_csv` now uses :func:`os.linesep` rather than ``'\n'`` for the default line terminator (:issue:`20353`). @@ -557,7 +557,7 @@ You must pass in the ``line_terminator`` explicitly, even in this case. .. _whatsnew_0240.bug_fixes.nan_with_str_dtype: Proper handling of ``np.NaN`` in a string data-typed column with the Python engine -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ There was bug in :func:`read_excel` and :func:`read_csv` with the Python engine, where missing values turned to ``'nan'`` with ``dtype=str`` and diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 489b75fb5fb6a..54ed407ed0a0a 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -67,7 +67,7 @@ For example: .. _whatsnew_110.dataframe_or_series_comparing: Comparing two ``DataFrame`` or two ``Series`` and summarizing the differences -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ We've added :meth:`DataFrame.compare` and :meth:`Series.compare` for comparing two ``DataFrame`` or two ``Series`` (:issue:`30429`) From 4d56c5236bc26934b38122022e56b4db1cc4dcf7 Mon Sep 17 00:00:00 2001 From: shubhamsm Date: Sat, 26 Sep 2020 19:58:28 +0530 Subject: [PATCH 11/17] update --- .pre-commit-config.yaml | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0bc551c4a50a3..b80e724f13073 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,36 +35,7 @@ repos: hooks: - id: rst-backticks # these exclusions should be removed and the files fixed - exclude: (?x)( - text\.rst| - timeseries\.rst| - visualization\.rst| - missing_data\.rst| - options\.rst| - reshaping\.rst| - scale\.rst| - merging\.rst| - cookbook\.rst| - enhancingperf\.rst| - groupby\.rst| - io\.rst| - overview\.rst| - panel\.rst| - plotting\.rst| - 10min\.rst| - basics\.rst| - categorical\.rst| - contributing\.rst| - contributing_docstring\.rst| - extending\.rst| - ecosystem\.rst| - comparison_with_sql\.rst| - install\.rst| - calculate_statistics\.rst| - combine_dataframes\.rst| - v0\.| - v1\.0\.| - v1\.1\.[012]) + - repo: local hooks: - id: pip_to_conda From a39c5aa7fd71887b97e68ac42d45e7536e4dba7c Mon Sep 17 00:00:00 2001 From: Shubham Mehra <43473352+Shubhamsm@users.noreply.github.com> Date: Sat, 26 Sep 2020 22:29:03 +0530 Subject: [PATCH 12/17] Update doc/source/development/contributing_docstring.rst Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> --- doc/source/development/contributing_docstring.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst index 136fee3afc494..70972bff37887 100644 --- a/doc/source/development/contributing_docstring.rst +++ b/doc/source/development/contributing_docstring.rst @@ -290,7 +290,7 @@ used. Instead of "str, default None", it is preferred to write "str, optional". When ``None`` is a value being used, we will keep the form "str, default None". For example, in ``df.to_csv(compression=None)``, ``None`` is not a value being used, but means that compression is optional, and no compression is being used if not -provided. In this case we will use ``str, optional``. Only in cases like +provided. In this case we will use ``"str, optional"``. Only in cases like ``func(value=None)`` and ``None`` is being used in the same way as ``0`` or ``foo`` would be used, then we will specify "str, int or None, default None". From 68837ebbaafa660521853967ac4fe2233880616c Mon Sep 17 00:00:00 2001 From: Shubham Mehra <43473352+Shubhamsm@users.noreply.github.com> Date: Sat, 26 Sep 2020 22:29:14 +0530 Subject: [PATCH 13/17] Update doc/source/user_guide/categorical.rst Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> --- doc/source/user_guide/categorical.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index c7326c4a6b32b..9da5d2a9fc92f 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -999,7 +999,7 @@ Methods for working with missing data, e.g. :meth:`~Series.isna`, :meth:`~Series s.fillna("a") Differences to R's ``factor`` ----------------------------~~ +----------------------------- The following differences to R's factor functions can be observed: From 5df9c9307ad7f38875851dcbdeeb94f9bbe15daa Mon Sep 17 00:00:00 2001 From: Shubham Mehra <43473352+Shubhamsm@users.noreply.github.com> Date: Sat, 26 Sep 2020 22:29:32 +0530 Subject: [PATCH 14/17] Update doc/source/whatsnew/v0.16.0.rst Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> --- doc/source/whatsnew/v0.16.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.16.0.rst b/doc/source/whatsnew/v0.16.0.rst index 00e1264bfb80b..8d0d6854cbf85 100644 --- a/doc/source/whatsnew/v0.16.0.rst +++ b/doc/source/whatsnew/v0.16.0.rst @@ -89,7 +89,7 @@ See the :ref:`documentation ` for more. (:issue:`922 Interaction with scipy.sparse ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Added :meth:`SparseSeries.to_coo`` and :meth:`SparseSeries.from_coo`` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:`here `). For example, given a SparseSeries with MultiIndex we can convert to a ``scipy.sparse.coo_matrix`` by specifying the row and column labels as index levels: +Added :meth:`SparseSeries.to_coo` and :meth:`SparseSeries.from_coo` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:`here `). For example, given a SparseSeries with MultiIndex we can convert to a ``scipy.sparse.coo_matrix`` by specifying the row and column labels as index levels: .. code-block:: python From 12afc56ceff472ed2d5cbba7176fb575b3c4f436 Mon Sep 17 00:00:00 2001 From: Shubham Mehra <43473352+Shubhamsm@users.noreply.github.com> Date: Sat, 26 Sep 2020 22:30:00 +0530 Subject: [PATCH 15/17] Update doc/source/development/contributing_docstring.rst Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> --- doc/source/development/contributing_docstring.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst index 70972bff37887..26cdd0687706c 100644 --- a/doc/source/development/contributing_docstring.rst +++ b/doc/source/development/contributing_docstring.rst @@ -286,7 +286,7 @@ argument means, which can be added after a comma "int, default -1, meaning all cpus". In cases where the default value is ``None``, meaning that the value will not be -used. Instead of "str, default None", it is preferred to write "str, optional". +used. Instead of ``"str, default None"``, it is preferred to write ``"str, optional"``. When ``None`` is a value being used, we will keep the form "str, default None". For example, in ``df.to_csv(compression=None)``, ``None`` is not a value being used, but means that compression is optional, and no compression is being used if not From 47d10dd946920bbe6059ffb0677c21c046735ee1 Mon Sep 17 00:00:00 2001 From: shubhamsm Date: Sat, 26 Sep 2020 22:30:52 +0530 Subject: [PATCH 16/17] deleting comment --- .pre-commit-config.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b80e724f13073..5b64c52dbc721 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,7 +34,6 @@ repos: rev: v1.6.0 hooks: - id: rst-backticks - # these exclusions should be removed and the files fixed - repo: local hooks: From a53b240d949100adfa892c960db89f8baea15f88 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Sat, 26 Sep 2020 13:15:00 -0500 Subject: [PATCH 17/17] Remove empty line --- .pre-commit-config.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5b64c52dbc721..7f669ee77c3eb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,7 +34,6 @@ repos: rev: v1.6.0 hooks: - id: rst-backticks - - repo: local hooks: - id: pip_to_conda