diff --git a/Makefile b/Makefile index c79175cd3c401..4a82566cf726e 100644 --- a/Makefile +++ b/Makefile @@ -23,3 +23,4 @@ doc: cd doc; \ python make.py clean; \ python make.py html + python make.py spellcheck diff --git a/doc/make.py b/doc/make.py index 4967f30453fd1..4d54a2415a194 100755 --- a/doc/make.py +++ b/doc/make.py @@ -224,8 +224,9 @@ def _sphinx_build(self, kind): -------- >>> DocBuilder(num_jobs=4)._sphinx_build('html') """ - if kind not in ('html', 'latex'): - raise ValueError('kind must be html or latex, not {}'.format(kind)) + if kind not in ('html', 'latex', 'spelling'): + raise ValueError('kind must be html, latex or ' + 'spelling, not {}'.format(kind)) self._run_os('sphinx-build', '-j{}'.format(self.num_jobs), @@ -304,6 +305,18 @@ def zip_html(self): '-q', *fnames) + def spellcheck(self): + """Spell check the documentation.""" + self._sphinx_build('spelling') + output_location = os.path.join('build', 'spelling', 'output.txt') + with open(output_location) as output: + lines = output.readlines() + if lines: + raise SyntaxError( + 'Found misspelled words.' + ' Check pandas/doc/build/spelling/output.txt' + ' for more details.') + def main(): cmds = [method for method in dir(DocBuilder) if not method.startswith('_')] diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index c81842d3d9212..19d745121ce17 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -342,7 +342,7 @@ As usual, **both sides** of the slicers are included as this is label indexing. columns=micolumns).sort_index().sort_index(axis=1) dfmi -Basic multi-index slicing using slices, lists, and labels. +Basic MultiIndex slicing using slices, lists, and labels. .. ipython:: python @@ -990,7 +990,7 @@ On the other hand, if the index is not monotonic, then both slice bounds must be KeyError: 'Cannot get right slice bound for non-unique label: 3' :meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` only check that -an index is weakly monotonic. To check for strict montonicity, you can combine one of those with +an index is weakly monotonic. To check for strict monotonicity, you can combine one of those with :meth:`Index.is_unique` .. ipython:: python diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 8d09f1fc04c1f..d4efa8a28f6c5 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -593,7 +593,7 @@ categorical columns: frame = pd.DataFrame({'a': ['Yes', 'Yes', 'No', 'No'], 'b': range(4)}) frame.describe() -This behaviour can be controlled by providing a list of types as ``include``/``exclude`` +This behavior can be controlled by providing a list of types as ``include``/``exclude`` arguments. The special value ``all`` can also be used: .. ipython:: python diff --git a/doc/source/conf.py b/doc/source/conf.py index d516e67b947ba..97081bec863b7 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -73,10 +73,14 @@ 'sphinx.ext.ifconfig', 'sphinx.ext.linkcode', 'nbsphinx', + 'sphinxcontrib.spelling' ] exclude_patterns = ['**.ipynb_checkpoints'] +spelling_word_list_filename = ['spelling_wordlist.txt', 'names_wordlist.txt'] +spelling_ignore_pypi_package_names = True + with open("index.rst") as f: index_rst_lines = f.readlines() diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index e9939250052f1..6ae93ba46fa5c 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -436,6 +436,25 @@ the documentation are also built by Travis-CI. These docs are then hosted `here `__, see also the :ref:`Continuous Integration ` section. +Spell checking documentation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When contributing to documentation to **pandas** it's good to check if your work +contains any spelling errors. Sphinx provides an easy way to spell check documentation +and docstrings. + +Running the spell check is easy. Just navigate to your local ``pandas/doc/`` directory and run:: + + python make.py spellcheck + +The spellcheck will take a few minutes to run (between 1 to 6 minutes). Sphinx will alert you +with warnings and misspelt words - these misspelt words will be added to a file called +``output.txt`` and you can find it on your local directory ``pandas/doc/build/spelling/``. + +The Sphinx spelling extension uses an EN-US dictionary to correct words, what means that in +some cases you might need to add a word to this dictionary. You can do so by adding the word to +the bag-of-words file named ``spelling_wordlist.txt`` located in the folder ``pandas/doc/``. + .. _contributing.code: Contributing to the code base diff --git a/doc/source/contributing_docstring.rst b/doc/source/contributing_docstring.rst index f80bfd9253764..6b2ecfe66d5e2 100644 --- a/doc/source/contributing_docstring.rst +++ b/doc/source/contributing_docstring.rst @@ -103,7 +103,7 @@ left before or after the docstring. The text starts in the next line after the opening quotes. The closing quotes have their own line (meaning that they are not at the end of the last sentence). -In rare occasions reST styles like bold text or itallics will be used in +In rare occasions reST styles like bold text or italics will be used in docstrings, but is it common to have inline code, which is presented between backticks. It is considered inline code: @@ -706,7 +706,7 @@ than 5, to show the example with the default values. If doing the ``mean``, we could use something like ``[1, 2, 3]``, so it is easy to see that the value returned is the mean. -For more complex examples (groupping for example), avoid using data without +For more complex examples (grouping for example), avoid using data without interpretation, like a matrix of random numbers with columns A, B, C, D... And instead use a meaningful example, which makes it easier to understand the concept. Unless required by the example, use names of animals, to keep examples @@ -877,7 +877,7 @@ be tricky. Here are some attention points: the actual error only the error name is sufficient. * If there is a small part of the result that can vary (e.g. a hash in an object - represenation), you can use ``...`` to represent this part. + representation), you can use ``...`` to represent this part. If you want to show that ``s.plot()`` returns a matplotlib AxesSubplot object, this will fail the doctest :: diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 893642410af02..fdc3b38cfdebc 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -286,7 +286,7 @@ New Columns df = pd.DataFrame( {'AAA' : [1,1,1,2,2,2,3,3], 'BBB' : [2,1,3,4,5,1,2,3]}); df -Method 1 : idxmin() to get the index of the mins +Method 1 : idxmin() to get the index of the minimums .. ipython:: python @@ -307,7 +307,7 @@ MultiIndexing The :ref:`multindexing ` docs. -`Creating a multi-index from a labeled frame +`Creating a MultiIndex from a labeled frame `__ .. ipython:: python @@ -330,7 +330,7 @@ The :ref:`multindexing ` docs. Arithmetic ********** -`Performing arithmetic with a multi-index that needs broadcasting +`Performing arithmetic with a MultiIndex that needs broadcasting `__ .. ipython:: python @@ -342,7 +342,7 @@ Arithmetic Slicing ******* -`Slicing a multi-index with xs +`Slicing a MultiIndex with xs `__ .. ipython:: python @@ -363,7 +363,7 @@ To take the cross section of the 1st level and 1st axis the index: df.xs('six',level=1,axis=0) -`Slicing a multi-index with xs, method #2 +`Slicing a MultiIndex with xs, method #2 `__ .. ipython:: python @@ -386,13 +386,13 @@ To take the cross section of the 1st level and 1st axis the index: df.loc[(All,'Math'),('Exams')] df.loc[(All,'Math'),(All,'II')] -`Setting portions of a multi-index with xs +`Setting portions of a MultiIndex with xs `__ Sorting ******* -`Sort by specific column or an ordered list of columns, with a multi-index +`Sort by specific column or an ordered list of columns, with a MultiIndex `__ .. ipython:: python @@ -664,7 +664,7 @@ The :ref:`Pivot ` docs. `Plot pandas DataFrame with year over year data `__ -To create year and month crosstabulation: +To create year and month cross tabulation: .. ipython:: python @@ -677,7 +677,7 @@ To create year and month crosstabulation: Apply ***** -`Rolling Apply to Organize - Turning embedded lists into a multi-index frame +`Rolling Apply to Organize - Turning embedded lists into a MultiIndex frame `__ .. ipython:: python @@ -1029,8 +1029,8 @@ Skip row between header and data 01.01.1990 05:00;21;11;12;13 """ -Option 1: pass rows explicitly to skiprows -"""""""""""""""""""""""""""""""""""""""""" +Option 1: pass rows explicitly to skip rows +""""""""""""""""""""""""""""""""""""""""""" .. ipython:: python diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index ca6cefac9e842..b5b56fc6815c9 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -1014,7 +1014,7 @@ Deprecate Panel Over the last few years, pandas has increased in both breadth and depth, with new features, datatype support, and manipulation routines. As a result, supporting efficient indexing and functional routines for ``Series``, ``DataFrame`` and ``Panel`` has contributed to an increasingly fragmented and -difficult-to-understand codebase. +difficult-to-understand code base. The 3-D structure of a ``Panel`` is much less common for many types of data analysis, than the 1-D of the ``Series`` or the 2-D of the ``DataFrame``. Going forward it makes sense for @@ -1023,7 +1023,7 @@ pandas to focus on these areas exclusively. Oftentimes, one can simply use a MultiIndex ``DataFrame`` for easily working with higher dimensional data. In addition, the ``xarray`` package was built from the ground up, specifically in order to -support the multi-dimensional analysis that is one of ``Panel`` s main usecases. +support the multi-dimensional analysis that is one of ``Panel`` s main use cases. `Here is a link to the xarray panel-transition documentation `__. .. ipython:: python diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index 30cdb06b28487..8631ec7878af5 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -184,8 +184,8 @@ and metadata disseminated in `SDMX `_ 2.1, an ISO-standard widely used by institutions such as statistics offices, central banks, and international organisations. pandaSDMX can expose datasets and related -structural metadata including dataflows, code-lists, -and datastructure definitions as pandas Series +structural metadata including data flows, code-lists, +and data structure definitions as pandas Series or multi-indexed DataFrames. `fredapi `__ @@ -260,7 +260,7 @@ Data validation `Engarde `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Engarde is a lightweight library used to explicitly state your assumptions abour your datasets +Engarde is a lightweight library used to explicitly state your assumptions about your datasets and check that they're *actually* true. .. _ecosystem.extensions: diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst index b786b1d0c134a..979d025111df1 100644 --- a/doc/source/enhancingperf.rst +++ b/doc/source/enhancingperf.rst @@ -32,7 +32,7 @@ Cython (Writing C extensions for pandas) ---------------------------------------- For many use cases writing pandas in pure Python and NumPy is sufficient. In some -computationally heavy applications however, it can be possible to achieve sizeable +computationally heavy applications however, it can be possible to achieve sizable speed-ups by offloading work to `cython `__. This tutorial assumes you have refactored as much as possible in Python, for example @@ -806,7 +806,7 @@ truncate any strings that are more than 60 characters in length. Second, we can't pass ``object`` arrays to ``numexpr`` thus string comparisons must be evaluated in Python space. -The upshot is that this *only* applies to object-dtype'd expressions. So, if +The upshot is that this *only* applies to object-dtype expressions. So, if you have an expression--for example .. ipython:: python diff --git a/doc/source/extending.rst b/doc/source/extending.rst index f665b219a7bd1..431c69bc0b6b5 100644 --- a/doc/source/extending.rst +++ b/doc/source/extending.rst @@ -167,7 +167,7 @@ you can retain subclasses through ``pandas`` data manipulations. There are 3 constructor properties to be defined: -- ``_constructor``: Used when a manipulation result has the same dimesions as the original. +- ``_constructor``: Used when a manipulation result has the same dimensions as the original. - ``_constructor_sliced``: Used when a manipulation result has one lower dimension(s) as the original, such as ``DataFrame`` single columns slicing. - ``_constructor_expanddim``: Used when a manipulation result has one higher dimension as the original, such as ``Series.to_frame()`` and ``DataFrame.to_panel()``. diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index da13a34cccfea..1c4c3f93726a9 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -994,7 +994,7 @@ is only interesting over one column (here ``colname``), it may be filtered Handling of (un)observed Categorical values ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -When using a ``Categorical`` grouper (as a single grouper, or as part of multipler groupers), the ``observed`` keyword +When using a ``Categorical`` grouper (as a single grouper, or as part of multiple groupers), the ``observed`` keyword controls whether to return a cartesian product of all possible groupers values (``observed=False``) or only those that are observed groupers (``observed=True``). @@ -1010,7 +1010,7 @@ Show only the observed values: pd.Series([1, 1, 1]).groupby(pd.Categorical(['a', 'a', 'a'], categories=['a', 'b']), observed=True).count() -The returned dtype of the grouped will *always* include *all* of the catergories that were grouped. +The returned dtype of the grouped will *always* include *all* of the categories that were grouped. .. ipython:: python diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index e834efd1cb6d1..2b9fcf874ef22 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -700,7 +700,7 @@ Current Behavior Reindexing ~~~~~~~~~~ -The idiomatic way to achieve selecting potentially not-found elmenents is via ``.reindex()``. See also the section on :ref:`reindexing `. +The idiomatic way to achieve selecting potentially not-found elements is via ``.reindex()``. See also the section on :ref:`reindexing `. .. ipython:: python diff --git a/doc/source/install.rst b/doc/source/install.rst index 6054be112f52c..e655136904920 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -31,7 +31,7 @@ PyPI and through conda. Starting **January 1, 2019**, all releases will be Python 3 only. If there are people interested in continued support for Python 2.7 past December -31, 2018 (either backporting bugfixes or funding) please reach out to the +31, 2018 (either backporting bug fixes or funding) please reach out to the maintainers on the issue tracker. For more information, see the `Python 3 statement`_ and the `Porting to Python 3 guide`_. @@ -199,7 +199,7 @@ Running the test suite ---------------------- pandas is equipped with an exhaustive set of unit tests, covering about 97% of -the codebase as of this writing. To run it on your machine to verify that +the code base as of this writing. To run it on your machine to verify that everything is working (and that you have all of the dependencies, soft and hard, installed), make sure you have `pytest `__ and run: diff --git a/doc/source/internals.rst b/doc/source/internals.rst index b120e3a98db7f..caf5790fb24c6 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -41,7 +41,7 @@ There are functions that make the creation of a regular index easy: - ``date_range``: fixed frequency date range generated from a time rule or DateOffset. An ndarray of Python datetime objects - ``period_range``: fixed frequency date range generated from a time rule or - DateOffset. An ndarray of ``Period`` objects, representing Timespans + DateOffset. An ndarray of ``Period`` objects, representing timespans The motivation for having an ``Index`` class in the first place was to enable different implementations of indexing. This means that it's possible for you, diff --git a/doc/source/io.rst b/doc/source/io.rst index aa2484b0cb5c3..7bd56d52b3492 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -116,7 +116,7 @@ header : int or list of ints, default ``'infer'`` existing names. The header can be a list of ints that specify row locations - for a multi-index on the columns e.g. ``[0,1,3]``. Intervening rows + for a MultiIndex on the columns e.g. ``[0,1,3]``. Intervening rows that are not specified will be skipped (e.g. 2 in this example is skipped). Note that this parameter ignores commented lines and empty lines if ``skip_blank_lines=True``, so header=0 denotes the first @@ -503,7 +503,7 @@ This matches the behavior of :meth:`Categorical.set_categories`. converted using the :func:`to_numeric` function, or as appropriate, another converter such as :func:`to_datetime`. - When ``dtype`` is a ``CategoricalDtype`` with homogenous ``categories`` ( + When ``dtype`` is a ``CategoricalDtype`` with homogeneous ``categories`` ( all numeric, all datetimes, etc.), the conversion is done automatically. .. ipython:: python @@ -554,7 +554,7 @@ If the header is in a row other than the first, pass the row number to Default behavior is to infer the column names: if no names are passed the behavior is identical to ``header=0`` and column names - are inferred from the first nonblank line of the file, if column + are inferred from the first non-blank line of the file, if column names are passed explicitly then the behavior is identical to ``header=None``. @@ -868,7 +868,7 @@ data columns: df .. note:: - If a column or index contains an unparseable date, the entire column or + If a column or index contains an unparsable date, the entire column or index will be returned unaltered as an object data type. For non-standard datetime parsing, use :func:`to_datetime` after ``pd.read_csv``. @@ -1644,7 +1644,7 @@ over the string representation of the object. All arguments are optional: argument and returns a formatted string; to be applied to floats in the ``DataFrame``. - ``sparsify`` default True, set to False for a ``DataFrame`` with a hierarchical - index to print every multiindex key at each row. + index to print every MultiIndex key at each row. - ``index_names`` default True, will print the names of the indices - ``index`` default True, will print the index (ie, row labels) - ``header`` default True, will print the column labels @@ -2178,7 +2178,7 @@ A few notes on the generated table schema: - The ``schema`` object contains a ``pandas_version`` field. This contains the version of pandas' dialect of the schema, and will be incremented with each revision. -- All dates are converted to UTC when serializing. Even timezone naïve values, +- All dates are converted to UTC when serializing. Even timezone naive values, which are treated as UTC with an offset of 0. .. ipython:: python @@ -2245,7 +2245,7 @@ A few notes on the generated table schema: .. versionadded:: 0.23.0 ``read_json`` also accepts ``orient='table'`` as an argument. This allows for -the preserveration of metadata such as dtypes and index names in a +the preservation of metadata such as dtypes and index names in a round-trippable manner. .. ipython:: python @@ -2356,7 +2356,7 @@ Read a URL and match a table that contains specific text: Specify a header row (by default ```` or ```` elements located within a ```` are used to form the column index, if multiple rows are contained within -```` then a multiindex is created); if specified, the header row is taken +```` then a multi-index is created); if specified, the header row is taken from the data minus the parsed header elements (```` elements). .. code-block:: python @@ -3141,7 +3141,7 @@ any pickled pandas object (or any other pickled object) from file: .. warning:: - Several internal refactorings have been done while still preserving + Several internal refactoring have been done while still preserving compatibility with pickles created with older versions of pandas. However, for such cases, pickled ``DataFrames``, ``Series`` etc, must be read with ``pd.read_pickle``, rather than ``pickle.load``. @@ -4721,7 +4721,7 @@ writes ``data`` to the database in batches of 1000 rows at a time: .. note:: - The function :func:`~pandas.DataFrame.to_sql` will perform a multivalue + The function :func:`~pandas.DataFrame.to_sql` will perform a multi-value insert if the engine dialect ``supports_multivalues_insert``. This will greatly speed up the insert in some cases. diff --git a/doc/source/merging.rst b/doc/source/merging.rst index 1161656731f88..0de6b871712a3 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -1310,7 +1310,7 @@ For this, use the :meth:`~DataFrame.combine_first` method: Note that this method only takes values from the right ``DataFrame`` if they are missing in the left ``DataFrame``. A related method, :meth:`~DataFrame.update`, -alters non-NA values inplace: +alters non-NA values in place: .. ipython:: python :suppress: diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 3950e4c80749b..e4b5578af15f0 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -105,7 +105,7 @@ Datetimes For datetime64[ns] types, ``NaT`` represents missing values. This is a pseudo-native sentinel value that can be represented by NumPy in a singular dtype (datetime64[ns]). -pandas objects provide intercompatibility between ``NaT`` and ``NaN``. +pandas objects provide compatibility between ``NaT`` and ``NaN``. .. ipython:: python @@ -349,7 +349,7 @@ Interpolation The ``limit_area`` keyword argument was added. Both Series and DataFrame objects have :meth:`~DataFrame.interpolate` -that, by default, performs linear interpolation at missing datapoints. +that, by default, performs linear interpolation at missing data points. .. ipython:: python :suppress: diff --git a/doc/source/names_wordlist.txt b/doc/source/names_wordlist.txt new file mode 100644 index 0000000000000..032883b7febf6 --- /dev/null +++ b/doc/source/names_wordlist.txt @@ -0,0 +1,1652 @@ +Critchley +Villanova +del +Hohmann +Rychyk +Buchkovsky +Lenail +Schade +datetimeindex +Aly +Sivji +Költringer +Bui +András +Novoszáth +Anh +Anil +Pallekonda +Pitrou +Linde +Quinonez +Varshokar +Artem +Bogachev +Avi +Azeez +Oluwafemi +Auffarth +Thiel +Bhavesh +Poddar +Haffner +Naul +Guinta +Moreira +García +Márquez +Cheuk +Chitrank +Dixit +Catalfo +Mazzullo +Chwala +Cihan +Ceyhan +Brunner +Riemenschneider +Dixey +Garrido +Sakuma +Hirschfeld +Adrián +Cañones +Castellano +Arcos +Hoese +Stansby +Kamau +Niederhut +Dror +Atariah +Chea +Kisslinger +Retkowski +Sar +Maeztu +Gianpaolo +Macario +Giftlin +Rajaiah +Olimpio +Gjelt +Inggs +Grzegorz +Konefał +Guilherme +Beltramini +Pitkeathly +Mashkoor +Ferchland +Haochen +Hissashi +Sharaf +Ignasi +Fosch +Alves +Shelvinskyi +Imanflow +Ingolf +Saeta +Pérez +Koevska +Jakub +Nowacki +Werkmann +Zoutkamp +Bandlow +Jaume +Bonet +Alammar +Reback +Jing +Qiang +Goh +Miralles +Nothman +Joeun +Metz +Mease +Schulze +Jongwony +Jordi +Contestí +Joris +Bossche +José +Fonseca +Jovixe +Jörg +Döpfert +Ittoku +Surta +Kuhl +Krzysztof +Chomski +Ksenia +Ksenia +Bobrova +Kunal +Gosar +Kerstein +Laksh +Arora +Geffert +Licht +Takeuchi +Liudmila +Villalba +Manan +Singh +Manraj +Singh +Hemken +Bibiloni +Corchero +Woodbridge +Journois +Gallo +Heikkilä +Braymer +Maybeno +Rocklin +Roeschke +Bussonnier +Mikhaylov +Veksler +Roos +Maximiliano +Greco +Penkov +Röttger +Selik +Waskom +Mie +Kutzma +Mitar +Negus +Münst +Mortada +Mehyar +Braithwaite +Chmura +Karagiannakis +Nipun +Sadvilkar +Martensen +Noémi +Éltető +Bilodeau +Ondrej +Kokes +Onno +Ganssle +Mannino +Reidy +Oliveira +Hoffmann +Ngo +Battiston +Pranav +Suri +Priyanka +Ojha +Pulkit +Maloo +Magliocchetti +Ridhwan +Luthra +Kiplang'at +Rohan +Pandit +Rok +Mihevc +Rouz +Azari +Ryszard +Kaleta +Samir +Musali +Sinayoko +Sangwoong +Yoon +Sharad +Vijalapuram +Shubham +Chaudhary +Sietse +Brouwer +Delprete +Cianciulli +Childs +Stijn +Hoey +Talitha +Pumar +Tarbo +Fukazawa +Petrou +Caswell +Hoffmann +Swast +Augspurger +Tulio +Casagrande +Tushar +Tushar +Mittal +Upkar +Lidder +Vinícius +Figueiredo +Vipin +WBare +Wenhuan +Ayd +Xbar +Yaroslav +Halchenko +Yee +Mey +Yeongseon +Choe +Yian +Yimeng +Zhang +Zihao +Zhao +adatasetaday +akielbowicz +akosel +alinde +amuta +bolkedebruin +cbertinato +cgohlke +charlie +chris +csfarkas +dajcs +deflatSOCO +derestle +htwg +discort +dmanikowski +donK +elrubio +fivemok +fjdiod +fjetter +froessler +gabrielclow +gfyoung +ghasemnaddaf +vetinari +himanshu +awasthi +ignamv +jayfoad +jazzmuesli +jbrockmendel +jjames +joaoavf +joders +jschendel +juan +huguet +luzpaz +mdeboc +miguelmorin +miker +miquelcamprodon +orereta +ottiP +peterpanmj +rafarui +raph +readyready +rmihael +samghelms +scriptomation +sfoo +stefansimik +stonebig +tmnhat +tomneep +tv +verakai +xpvpc +zhanghui +API +Mazzullo +Riemenschneider +Hirschfeld +Stansby +Dror +Atariah +Kisslinger +Ingolf +Werkmann +Reback +Joris +Bossche +Jörg +Döpfert +Kuhl +Krzysztof +Chomski +Licht +Takeuchi +Manraj +Singh +Braymer +Waskom +Mie +Hoffmann +Sietse +Brouwer +Swast +Augspurger +Ayd +Yee +Mey +bolkedebruin +cgohlke +derestle +htwg +fjdiod +gabrielclow +gfyoung +ghasemnaddaf +jbrockmendel +jschendel +miker +pypy +Gleave +Liaw +Velasco +Yee +Marchenko +Amol +Winkler +亮 +André +Jonasson +Sweger +Berkay +Haffner +Tu +Chankey +Pathak +Billington +Filo +Gorgolewski +Mazzullo +Prinoth +Stade +Schuldt +Moehl +Himmelstein +Willmer +Niederhut +Wieser +Fredriksen +Kint +Giftlin +Giftlin +Rajaiah +Guilherme +Beltramini +Guillem +Borrell +Hanmin +Qin +Makait +Hussain +Tamboli +Miholic +Novotný +Helie +Schiratti +Deschenes +Knupp +Reback +Tratner +Nothman +Crall +Mease +Helmus +Joris +Bossche +Bochi +Kuhlmann +Brabandere +Keeton +Keiron +Pizzey +Kernc +Licht +Takeuchi +Kushner +Jelloul +Makarov +Malgorzata +Turzanska +Sy +Roeschke +Picus +Mehmet +Akmanalp +Gasvoda +Penkov +Eubank +Shteynbuk +Tillmann +Pankaj +Pandey +Luo +O'Melveny +Reidy +Quackenbush +Yanovich +Haessig +Battiston +Pradyumna +Reddy +Chinthala +Prasanjit +Prakash +Sangwoong +Yoon +Sudeep +Telt +Caswell +Swast +Augspurger +Tuan +Utkarsh +Upadhyay +Vivek +Aiyong +WBare +Yi +Liu +Yosuke +Nakabayashi +aaron +abarber +gh +aernlund +agustín +méndez +andymaheshw +aviolov +bpraggastis +cbertinato +cclauss +chernrick +chris +dkamm +dwkenefick +faic +fding +gfyoung +guygoldberg +hhuuggoo +huashuai +ian +iulia +jaredsnyder +jbrockmendel +jdeschenes +jebob +jschendel +keitakurita +kernc +kiwirob +kjford +linebp +lloydkirk +louispotok +majiang +manikbhandari +matthiashuschle +mattip +maxwasserman +mjlove +nmartensen +parchd +philipphanemann +rdk +reidy +ri +ruiann +rvernica +weigand +scotthavard +skwbc +tobycheese +tsdlovell +ysau +zzgao +cov +abaldenko +adrian +stepien +Saxena +Akash +Tandon +Aleksey +Bilogur +alexandercbooth +Amol +Kahat +Winkler +Kittredge +Anthonios +Partheniou +Arco +Ashish +Singal +atbd +bastewart +Baurzhan +Muftakhidinov +Kandel +bmagnusson +carlosdanielcsantos +Souza +chaimdemulder +chris +Aycock +Gohlke +Paulik +Warth +Brunner +Himmelstein +Willmer +Krych +dickreuter +Dimitris +Spathis +discort +Dmitry +Suria +Wijaya +Stanczak +dr +leo +dubourg +dwkenefick +Andrade +Ennemoser +Francesc +Alted +Fumito +Hamamura +funnycrab +gfyoung +Ferroni +goldenbull +Jeffries +Guilherme +Beltramini +Guilherme +Samora +Hao +Harshit +Patni +Ilya +Schurov +Iván +Vallés +Pérez +Leng +Jaehoon +Hwang +Goppert +Santucci +Reback +Crist +Jevnik +Nothman +Zwinck +jojomdt +Whitmore +Mease +Mease +Joost +Kranendonk +Joris +Bossche +Bradt +Santander +Julien +Marrec +Solinsky +Kacawi +Kamal +Kamalaldin +Shedden +Kernc +Keshav +Ramaswamy +Ren +linebp +Pedersen +Cestaro +Scarabello +Lukasz +paramstyle +Lababidi +Unserialized +manu +manuels +Roeschke +mattip +Picus +Roeschke +maxalbert +Roos +mcocdawc +Lamparski +Michiel +Mikolaj +Chwalisz +Miroslav +Šedivý +Mykola +Golubyev +Rud +Halen +Chmura +nuffe +Pankaj +Pandey +paul +mannino +Pawel +Kordek +pbreach +Csizsek +Petio +Petrov +Ruffwind +Battiston +Chromiec +Prasanjit +Prakash +Forgione +Rouz +Azari +Sahil +Dua +sakkemo +Sami +Salonen +Sarma +Tangirala +scls +Gsänger +Sébastien +Menten +Heide +Shyam +Saladi +sinhrks +Sinhrks +Rauch +stijnvanhoey +Adiseshan +themrmax +Thiago +Serafim +Thoralf +Thrasibule +Gustafsson +Augspurger +tomrod +Shen +tzinckgraf +Uwe +wandersoncferreira +watercrossing +wcwagner +Wiktor +Tomczak +xgdgsc +Yaroslav +Halchenko +Yimeng +Zhang +yui +knk +Saxena +Kandel +Aycock +Himmelstein +Willmer +gfyoung +hesham +shabana +Reback +Jevnik +Joris +Bossche +Santander +Shedden +Keshav +Ramaswamy +Scarabello +Picus +Roeschke +Roos +Mykola +Golubyev +Halen +Pawel +Kordek +Battiston +sinhrks +Adiseshan +Augspurger +wandersoncferreira +Yaroslav +Halchenko +Chainz +Anthonios +Partheniou +Arash +Rouhani +Kandel +chris +Warth +Krych +dubourg +gfyoung +Iván +Vallés +Pérez +Reback +Jevnik +Mease +Joris +Bossche +Keshav +Ramaswamy +Ren +mattrijk +paul +mannino +Chromiec +Sinhrks +Thiago +Serafim +adneu +agraboso +Alekseyev +Vig +Riddell +Amol +Amol +Agrawal +Anthonios +Partheniou +babakkeyvani +Kandel +Baxley +Camilo +Cota +chris +Grinolds +Hudon +Aycock +Warth +cmazzullo +cr +Siladji +Drewrey +Lupton +dsm +Blancas +Marsden +Marczinowski +O'Donovan +Gábor +Lipták +Geraint +gfyoung +Ferroni +Haleemur +harshul +Hassan +Shamim +iamsimha +Iulius +Nazarov +jackieleng +Reback +Crist +Jevnik +Liekezer +Zwinck +Erenrich +Joris +Bossche +Howes +Brandys +Kamil +Sindi +Ka +Wo +Shedden +Kernc +Brucher +Roos +Scherer +Mortada +Mehyar +mpuels +Haseeb +Tariq +Bonnotte +Virtanen +Mestemaker +Pawel +Kordek +Battiston +pijucha +Jucha +priyankjain +Nimmi +Gieseke +Keyes +Sahil +Dua +Sanjiv +Lobo +Sašo +Stanovnik +Heide +sinhrks +Sinhrks +Kappel +Choi +Sudarshan +Konge +Caswell +Augspurger +Uwe +Hoffmann +wcwagner +Xiang +Zhang +Yadunandan +Yaroslav +Halchenko +YG +Riku +Yuichiro +Kaneko +yui +knk +zhangjinjie +znmean +颜发才 +Yan +Facai +Fiore +Gartland +Bastiaan +Benoît +Vinot +Fustin +Freitas +Ter +Livschitz +Gábor +Lipták +Hassan +Kibirige +Iblis +Saeta +Pérez +Wolosonovich +Reback +Jevnik +Joris +Bossche +Storck +Ka +Wo +Shedden +Kieran +O'Mahony +Lababidi +Maoyuan +Liu +Wittmann +MaxU +Roos +Droettboom +Eubank +Bonnotte +Virtanen +Battiston +Prabhjot +Singh +Augspurger +Aiyong +Winand +Xbar +Yan +Facai +adneu +ajenkins +cargometrics +behzad +nouri +chinskiy +gfyoung +jeps +jonaslb +kotrfa +nileracecrew +onesandzeroes +sinhrks +tsdlovell +Alekseyev +Rosenfeld +Anthonios +Partheniou +Sipos +Carroux +Aycock +Scanlin +Da +Dorozhko +O'Donovan +Cleary +Gianluca +Jeffries +Horel +Schwabacher +Deschenes +Reback +Jevnik +Fremlin +Hoersch +Joris +Bossche +Joris +Vankerschaver +Ka +Wo +Keming +Zhang +Shedden +Farrugia +Lurie +Roos +Mayank +Asthana +Mortada +Mehyar +Moussa +Taifi +Navreet +Bonnotte +Reiners +Gura +Battiston +Carnevale +Rinoc +Rishipuri +Sangmin +Lasley +Sereger +Seabold +Thierry +Moisan +Caswell +Augspurger +Hauck +Varun +Yoong +Kang +Lim +Yoshiki +Vázquez +Baeza +Joong +Younggun +Yuval +Langer +argunov +behzad +nouri +boombard +brian +pantano +chromy +daniel +dgram +gfyoung +hcontrast +jfoo +kaustuv +deolal +llllllllll +ranarag +rockg +scls +seales +sinhrks +srib +surveymedia +tworec +Drozd +Anthonios +Partheniou +Berendt +Piersall +Hamed +Saljooghinejad +Iblis +Deschenes +Reback +Callin +Joris +Bossche +Ka +Wo +Loïc +Séguin +Luo +Yicheng +Magnus +Jöud +Leonhardt +Roos +Bonnotte +Pastafarianist +Chong +Schaf +Philipp +deCarvalho +Khomenko +Rémy +Léone +Thierry +Moisan +Augspurger +Varun +Hoffmann +Winterflower +Younggun +ajcr +azuranski +behzad +nouri +cel +emilydolson +hironow +lexual +llllllllll +rockg +silentquasar +sinhrks +taeold +unparseable +Rothberg +Bedini +Rosenfeld +Anthonios +Partheniou +Artemy +Kolchinsky +Willers +Gohlke +Clearfield +Ringwalt +Cottrell +Gagne +Schettino +Panfilov +Araujo +Gianluca +Poulin +Nisar +Henriksen +Hoegen +Jaidev +Deshpande +Swails +Reback +Buyl +Joris +Bossche +Joris +Vankerschaver +Julien +Danjou +Ka +Wo +Kehoe +Jordahl +Shedden +Buitinck +Gambogi +Savoie +Roos +D'Agostino +Mortada +Mehyar +Eubank +Nipun +Batra +Ondřej +Čertík +Pratap +Vardhan +Rafal +Skolasinski +Rinoc +Gieseke +Safia +Abdalla +Saumitra +Shahapure +Pölsterl +Rubbert +Sinhrks +Siu +Kwan +Seabold +Carrucciu +Hoyer +Pascoe +Santegoeds +Grainger +Tjerk +Santegoeds +Augspurger +Winterflower +Yaroslav +Halchenko +agijsberts +ajcr +behzad +nouri +cel +cyrusmaher +davidovitch +ganego +jreback +juricast +larvian +maximilianr +msund +rekcahpassyla +robertzk +scls +seth +sinhrks +springcoil +terrytangyuan +tzinckgraf +Rosenfeld +Artemy +Kolchinsky +Willers +Christer +der +Meeren +Hudon +Lasiman +Brundu +Gaëtan +Menten +Hiebert +Reback +Joris +Bossche +Ka +Wo +Mortada +Mehyar +Grainger +Ajamian +Augspurger +Yoshiki +Vázquez +Baeza +Younggun +austinc +behzad +nouri +jreback +lexual +rekcahpassyla +scls +sinhrks +Artemy +Kolchinsky +Gilmer +Grinolds +Birken +Hirschfeld +Dunné +Hatem +Nassrat +Sperr +Herter +Blackburne +Reback +Crist +Abernot +Joris +Bossche +Shedden +Razoumov +Riel +Mortada +Mehyar +Eubank +Grisel +Battiston +Hyunjin +Zhang +Hoyer +Tiago +Antao +Ajamian +Augspurger +Tomaz +Berisa +Shirgur +Filimonov +Hogman +Yasin +Younggun +behzad +nouri +dsm +floydsoft +gfr +jnmclarty +jreback +ksanghai +lucas +mschmohl +ptype +rockg +scls +sinhrks +Toth +Amici +Artemy +Kolchinsky +Ashwini +Chaudhary +Letson +Chau +Hoang +Christer +der +Meeren +Cottrell +Ehsan +Azarnasab +Torcasso +Sexauer +Reback +Joris +Bossche +Joschka +zur +Jacobsmühlen +Bochi +Junya +Hayashi +Shedden +Kieran +O'Mahony +Kodi +Arfer +Airas +Mortada +Mehyar +Lasley +Lasley +Pascual +Seabold +Hoyer +Grainger +Augspurger +Filimonov +Vyomkesh +Tripathi +Holmgren +Yulong +behzad +nouri +bertrandhaut +bjonen +cel +clham +hsperr +ischwabacher +jnmclarty +josham +jreback +omtinez +roch +sinhrks +unutbu +Angelos +Evripiotis +Artemy +Kolchinsky +Pointet +Jacobowski +Charalampos +Papaloizou +Warth +Zanini +Francesc +Kleynhans +Reback +Tratner +Joris +Bossche +Suggit +Lasley +Hoyer +Sylvain +Corlay +Grainger +Tiago +Antao +Hauck +Chaves +Salgado +Bhandoh +Aiyong +Holmgren +behzad +nouri +broessli +charalampos +papaloizou +immerrr +jnmclarty +jreback +mgilbert +onesandzeroes +peadarcoyle +rockg +seth +sinhrks +unutbu +wavedatalab +Åsmund +Hjulstad +Rosenfeld +Sipos +Artemy +Kolchinsky +Letson +Horel +Reback +Joris +Bossche +Sanghee +Hoyer +Aiyong +behzad +nouri +immerrr +jnmclarty +jreback +pallav +fdsi +unutbu +Greenhall +Artemy +Kolchinsky +behzad +nouri +Sauer +benjamin +Thyreau +bjonen +Stoafer +dlovell +dsm +Herrero +Hsiaoming +Huan +hunterowens +Hyungtae +immerrr +Slavitt +ischwabacher +Schaer +Tratner +Farnham +jmorris +jnmclarty +Bradish +Joerg +Rittinger +Joris +Bossche +jreback +klonuo +lexual +mcjcode +Schatzow +Mortada +Mehyar +mtrbean +Typanski +onesandzeroes +Masurel +Battiston +rockg +Petchler +seth +Shahul +Hameed +Shashank +Agarwal +sinhrks +someben +stahlous +stas +sl +Hoyer +thatneat +alcorn +Augspurger +unutbu +Yevgeniy +Grechka +Yoshiki +VÃ +zquez +Baeza +zachcp +Rosenfeld +Quistorff +Wignall +bwignall +clham +Waeber +Bew +dsm +helger +immerrr +Schaer +jaimefrio +Reaver +Joris +Bossche +jreback +Julien +Danjou +lexual +Wittmann +Mortada +Mehyar +onesandzeroes +rockg +sanguineturtle +Schaer +seth +sinhrks +Hoyer +Kluyver +yelite +hexbin +Acanthostega +agijsberts +akittredge +Gaudio +Rothberg +Rosenfeld +ankostis +anomrake +Mazières +anton +bashtage +Sauer +benjamin +Buran +bwignall +cgohlke +chebee +clham +Birken +danielballan +Waeber +Drapala +Gouthaman +Balaraman +Poulin +hshimizu +hugo +immerrr +ischwabacher +Schaer +jaimefrio +Sexauer +Reback +Tratner +Reaver +Joris +Bossche +jreback +jsexauer +Júlio +kdiether +Jordahl +Wittmann +Grender +Gruen +michaelws +mikebailey +Nipun +Batra +ojdo +onesandzeroes +phaebz +Battiston +Carnevale +ribonoous +Gibboni +rockg +sinhrks +Seabold +Hoyer +Cera +Augspurger +unutbu +westurner +Yaroslav +Halchenko +lexual +danbirken +travis +Billington +Cobzarenco +Gamboa +Cavazos +Gaudecker +Gerigk +Yaroslav +Halchenko +sharey +Vytautas +Jancauskas +Hammerbacher +Hilboll +Luc +Kesters +JanSchulz +Negusse +Wouter +Overmeire +Reeson +Aman +Thakral +Uga +Vandenbussche +Pinxing +astype +Buglet +Beltrame +Hilboll +Jev +Kuznetsov +Wouter +Overmeire +Reyfman +Joon +Ro +Uga +Vandenbussche +setupegg +Hammerbacher +Jev +Kuznetsov +Wouter +Overmeire +Aman +Thakral +Uga +Vandenbussche +carljv +rsamson +newaxis +Fortunov +Aman +Thakral +Beltrame +Wouter +Overmeire +rsamson +Laserson +Pentreath +Joon +Ro +Uga +Fortunov +Berka +Vandenbussche +krogh +akima +BPoly +isna +kurt diff --git a/doc/source/options.rst b/doc/source/options.rst index 48247eb48baaf..697cc0682e39a 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -149,7 +149,7 @@ More information can be found in the `ipython documentation Frequently Used Options ----------------------- -The following is a walkthrough of the more frequently used display options. +The following is a walk-through of the more frequently used display options. ``display.max_rows`` and ``display.max_columns`` sets the maximum number of rows and columns displayed when a frame is pretty-printed. Truncated diff --git a/doc/source/release.rst b/doc/source/release.rst index 32db2ff5ebb24..04c499ff6797b 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -2429,7 +2429,7 @@ New Features - ``plot(kind='kde')`` now accepts the optional parameters ``bw_method`` and ``ind``, passed to scipy.stats.gaussian_kde() (for scipy >= 0.11.0) to set - the bandwidth, and to gkde.evaluate() to specify the indicies at which it + the bandwidth, and to gkde.evaluate() to specify the indices at which it is evaluated, respectively. See scipy docs. (:issue:`4298`) - Added ``isin`` method to DataFrame (:issue:`4211`) - ``df.to_clipboard()`` learned a new ``excel`` keyword that let's you @@ -2540,7 +2540,7 @@ Improvements to existing features - ``read_json`` now raises a (more informative) ``ValueError`` when the dict contains a bad key and ``orient='split'`` (:issue:`4730`, :issue:`4838`) - ``read_stata`` now accepts Stata 13 format (:issue:`4291`) -- ``ExcelWriter`` and ``ExcelFile`` can be used as contextmanagers. +- ``ExcelWriter`` and ``ExcelFile`` can be used as context managers. (:issue:`3441`, :issue:`4933`) - ``pandas`` is now tested with two different versions of ``statsmodels`` (0.4.3 and 0.5.0) (:issue:`4981`). @@ -2553,7 +2553,7 @@ Improvements to existing features that cannot be concatenated (:issue:`4608`). - Add ``halflife`` option to exponentially weighted moving functions (PR :issue:`4998`) -- ``to_dict`` now takes ``records`` as a possible outtype. Returns an array +- ``to_dict`` now takes ``records`` as a possible out type. Returns an array of column-keyed dictionaries. (:issue:`4936`) - ``tz_localize`` can infer a fall daylight savings transition based on the structure of unlocalized data (:issue:`4230`) @@ -2664,13 +2664,13 @@ API Changes - ``select_as_coordinates`` will now return an ``Int64Index`` of the resultant selection set - support ``timedelta64[ns]`` as a serialization type (:issue:`3577`) - - store `datetime.date` objects as ordinals rather then timetuples to avoid + - store `datetime.date` objects as ordinals rather then time-tuples to avoid timezone issues (:issue:`2852`), thanks @tavistmorph and @numpand - ``numexpr`` 2.2.2 fixes incompatibility in PyTables 2.4 (:issue:`4908`) - ``flush`` now accepts an ``fsync`` parameter, which defaults to ``False`` (:issue:`5364`) - ``unicode`` indices not supported on ``table`` formats (:issue:`5386`) - - pass thru store creation arguments; can be used to support in-memory stores + - pass through store creation arguments; can be used to support in-memory stores - ``JSON`` - added ``date_unit`` parameter to specify resolution of timestamps. @@ -2736,7 +2736,7 @@ API Changes created when passing floating values in index creation. This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the same. Indexing on other index types - are preserved (and positional fallback for ``[],ix``), with the exception, + are preserved (and positional fall back for ``[],ix``), with the exception, that floating point slicing on indexes on non ``Float64Index`` will raise a ``TypeError``, e.g. ``Series(range(5))[3.5:4.5]`` (:issue:`263`,:issue:`5375`) - Make Categorical repr nicer (:issue:`4368`) @@ -2765,7 +2765,7 @@ API Changes (:issue:`5339`) - default for `display.max_seq_len` is now 100 rather then `None`. This activates truncated display ("...") of long sequences in various places. (:issue:`3391`) -- **All** division with ``NDFrame`` - likes is now truedivision, regardless +- **All** division with ``NDFrame`` - likes is now true division, regardless of the future import. You can use ``//`` and ``floordiv`` to do integer division. @@ -2787,7 +2787,7 @@ API Changes dtype: float64 - raise/warn ``SettingWithCopyError/Warning`` exception/warning when setting of a - copy thru chained assignment is detected, settable via option ``mode.chained_assignment`` + copy through chained assignment is detected, settable via option ``mode.chained_assignment`` - test the list of ``NA`` values in the csv parser. add ``N/A``, ``#NA`` as independent default na values (:issue:`5521`) - The refactoring involving``Series`` deriving from ``NDFrame`` breaks ``rpy2<=2.3.8``. an Issue @@ -2888,7 +2888,7 @@ See :ref:`Internal Refactoring` (datetime/timedelta/time etc.) into a separate, cleaned up wrapper class. (:issue:`4613`) - Complex compat for ``Series`` with ``ndarray``. (:issue:`4819`) -- Removed unnecessary ``rwproperty`` from codebase in favor of builtin +- Removed unnecessary ``rwproperty`` from code base in favor of builtin property. (:issue:`4843`) - Refactor object level numeric methods (mean/sum/min/max...) from object level modules to ``core/generic.py`` (:issue:`4435`). @@ -3014,7 +3014,7 @@ Bug Fixes - Fix boolean indexing on an empty series loses index names (:issue:`4235`), infer_dtype works with empty arrays. - Fix reindexing with multiple axes; if an axes match was not replacing the - current axes, leading to a possible lazay frequency inference issue + current axes, leading to a possible lazy frequency inference issue (:issue:`3317`) - Fixed issue where ``DataFrame.apply`` was reraising exceptions incorrectly (causing the original stack trace to be truncated). @@ -3036,7 +3036,7 @@ Bug Fixes (:issue:`4727`) - Fix some inconsistencies with ``Index.rename`` and ``MultiIndex.rename``, etc. (:issue:`4718`, :issue:`4628`) -- Bug in using ``iloc/loc`` with a cross-sectional and duplicate indicies +- Bug in using ``iloc/loc`` with a cross-sectional and duplicate indices (:issue:`4726`) - Bug with using ``QUOTE_NONE`` with ``to_csv`` causing ``Exception``. (:issue:`4328`) @@ -3171,7 +3171,7 @@ Bug Fixes - Fixed bug in Excel writers where frames with duplicate column names weren't written correctly. (:issue:`5235`) - Fixed issue with ``drop`` and a non-unique index on Series (:issue:`5248`) -- Fixed seg fault in C parser caused by passing more names than columns in +- Fixed segfault in C parser caused by passing more names than columns in the file. (:issue:`5156`) - Fix ``Series.isin`` with date/time-like dtypes (:issue:`5021`) - C and Python Parser can now handle the more common multi-index column @@ -3377,7 +3377,7 @@ API Changes - more consistency in the to_datetime return types (give string/array of string inputs) (:issue:`3888`) - The internal ``pandas`` class hierarchy has changed (slightly). The previous ``PandasObject`` now is called ``PandasContainer`` and a new - ``PandasObject`` has become the baseclass for ``PandasContainer`` as well + ``PandasObject`` has become the base class for ``PandasContainer`` as well as ``Index``, ``Categorical``, ``GroupBy``, ``SparseList``, and ``SparseArray`` (+ their base classes). Currently, ``PandasObject`` provides string methods (from ``StringMixin``). (:issue:`4090`, :issue:`4092`) @@ -3729,7 +3729,7 @@ Bug Fixes - Bug in value_counts of ``datetime64[ns]`` Series (:issue:`3002`) - Fixed printing of ``NaT`` in an index - Bug in idxmin/idxmax of ``datetime64[ns]`` Series with ``NaT`` (:issue:`2982`) -- Bug in ``icol, take`` with negative indicies was producing incorrect return +- Bug in ``icol, take`` with negative indices was producing incorrect return values (see :issue:`2922`, :issue:`2892`), also check for out-of-bounds indices (:issue:`3029`) - Bug in DataFrame column insertion when the column creation fails, existing frame is left in an irrecoverable state (:issue:`3010`) @@ -3752,7 +3752,7 @@ Bug Fixes - Fix upsampling bug with closed='left' and daily to daily data (:issue:`3020`) - Fixed missing tick bars on scatter_matrix plot (:issue:`3063`) - Fixed bug in Timestamp(d,tz=foo) when d is date() rather then datetime() (:issue:`2993`) -- series.plot(kind='bar') now respects pylab color schem (:issue:`3115`) +- series.plot(kind='bar') now respects pylab color scheme (:issue:`3115`) - Fixed bug in reshape if not passed correct input, now raises TypeError (:issue:`2719`) - Fixed a bug where Series ctor did not respect ordering if OrderedDict passed in (:issue:`3282`) - Fix NameError issue on RESO_US (:issue:`2787`) @@ -3790,7 +3790,7 @@ Bug Fixes a simple index (:issue:`2893`) - Fix Python ASCII file parsing when integer falls outside of floating point spacing (:issue:`3258`) -- fixed pretty priniting of sets (:issue:`3294`) +- fixed pretty printing of sets (:issue:`3294`) - Panel() and Panel.from_dict() now respects ordering when give OrderedDict (:issue:`3303`) - DataFrame where with a datetimelike incorrectly selecting (:issue:`3311`) - Ensure index casts work even in Int64Index @@ -3837,7 +3837,7 @@ Improvements to existing features keyword to append - support automagic indexing via ``index`` keyword to append - support ``expectedrows`` keyword in append to inform ``PyTables`` about - the expected tablesize + the expected table size - support ``start`` and ``stop`` keywords in select to limit the row selection space - added ``get_store`` context manager to automatically import with pandas @@ -3908,7 +3908,7 @@ Bug Fixes - Fix setitem on a Series with a boolean key and a non-scalar as value (:issue:`2686`) - Box datetime64 values in Series.apply/map (:issue:`2627`, :issue:`2689`) -- Upconvert datetime + datetime64 values when concatenating frames (:issue:`2624`) +- Up convert datetime + datetime64 values when concatenating frames (:issue:`2624`) - Raise a more helpful error message in merge operations when one DataFrame has duplicate columns (:issue:`2649`) - Fix partial date parsing issue occurring only when code is run at EOM @@ -4115,7 +4115,7 @@ Bug Fixes datetime64 when calling DataFrame.apply. (:issue:`2374`) - Raise exception when calling to_panel on non uniquely-indexed frame (:issue:`2441`) - Improved detection of console encoding on IPython zmq frontends (:issue:`2458`) -- Preserve time zone when .append-ing two time series (:issue:`2260`) +- Preserve time zone when .appending two time series (:issue:`2260`) - Box timestamps when calling reset_index on time-zone-aware index rather than creating a tz-less datetime64 column (:issue:`2262`) - Enable searching non-string columns in DataFrame.filter(like=...) (:issue:`2467`) @@ -4359,7 +4359,7 @@ Bug Fixes - Fix DatetimeIndex.isin to function properly (:issue:`1763`) - Fix conversion of array of tz-aware datetime.datetime to DatetimeIndex with right time zone (:issue:`1777`) -- Fix DST issues with generating ancxhored date ranges (:issue:`1778`) +- Fix DST issues with generating anchored date ranges (:issue:`1778`) - Fix issue calling sort on result of Series.unique (:issue:`1807`) - Fix numerical issue leading to square root of negative number in rolling_std (:issue:`1840`) @@ -4612,14 +4612,14 @@ New Features - Add keys() method on DataFrame (:issue:`1240`) - Add new ``match`` function to API (similar to R) (:issue:`502`) - Add dayfirst option to parsers (:issue:`854`) -- Add ``method`` argument to ``align`` method for forward/backward fillin +- Add ``method`` argument to ``align`` method for forward/backward filling (:issue:`216`) - Add Panel.transpose method for rearranging axes (:issue:`695`) - Add new ``cut`` function (patterned after R) for discretizing data into equal range-length bins or arbitrary breaks of your choosing (:issue:`415`) - Add new ``qcut`` for cutting with quantiles (:issue:`1378`) - Add ``value_counts`` top level array method (:issue:`1392`) -- Added Andrews curves plot tupe (:issue:`1325`) +- Added Andrews curves plot type (:issue:`1325`) - Add lag plot (:issue:`1440`) - Add autocorrelation_plot (:issue:`1425`) - Add support for tox and Travis CI (:issue:`1382`) @@ -4690,7 +4690,7 @@ API Changes - Remove deprecated DataMatrix name - Default merge suffixes for overlap now have underscores instead of periods to facilitate tab completion, etc. (:issue:`1239`) -- Deprecation of offset, time_rule timeRule parameters throughout codebase +- Deprecation of offset, time_rule timeRule parameters throughout code base - Series.append and DataFrame.append no longer check for duplicate indexes by default, add verify_integrity parameter (:issue:`1394`) - Refactor Factor class, old constructor moved to Factor.from_array @@ -4879,7 +4879,7 @@ Bug Fixes - Fix combineAdd NotImplementedError for SparseDataFrame (:issue:`887`) - Fix DataFrame.to_html encoding and columns (:issue:`890`, :issue:`891`, :issue:`909`) - Fix na-filling handling in mixed-type DataFrame (:issue:`910`) -- Fix to DataFrame.set_value with non-existant row/col (:issue:`911`) +- Fix to DataFrame.set_value with non-existent row/col (:issue:`911`) - Fix malformed block in groupby when excluding nuisance columns (:issue:`916`) - Fix inconsistent NA handling in dtype=object arrays (:issue:`925`) - Fix missing center-of-mass computation in ewmcov (:issue:`862`) @@ -4935,7 +4935,7 @@ Bug Fixes - Fix indexing operation for floating point values (:issue:`780`, :issue:`798`) - Fix groupby case resulting in malformed dataframe (:issue:`814`) - Fix behavior of reindex of Series dropping name (:issue:`812`) -- Improve on redudant groupby computation (:issue:`775`) +- Improve on redundant groupby computation (:issue:`775`) - Catch possible NA assignment to int/bool series with exception (:issue:`839`) pandas 0.7.0 @@ -5116,7 +5116,7 @@ Bug Fixes - Raise exception in out-of-bounds indexing of Series instead of seg-faulting, regression from earlier releases (:issue:`495`) - Fix error when joining DataFrames of different dtypes within the same - typeclass (e.g. float32 and float64) (:issue:`486`) + type class (e.g. float32 and float64) (:issue:`486`) - Fix bug in Series.min/Series.max on objects like datetime.datetime (GH :issue:`487`) - Preserve index names in Index.union (:issue:`501`) @@ -5162,7 +5162,7 @@ Bug Fixes - Format floats to default to same number of digits (:issue:`395`) - Added decorator to copy docstring from one function to another (:issue:`449`) - Fix error in monotonic many-to-one left joins -- Fix __eq__ comparison between DateOffsets with different relativedelta +- Fix __eq__ comparison between DateOffsets with different relative delta keywords passed - Fix exception caused by parser converter returning strings (:issue:`583`) - Fix MultiIndex formatting bug with integer names (:issue:`601`) @@ -5461,7 +5461,7 @@ Improvements to existing features `Series.map` significantly when passed elementwise Python function, motivated by :issue:`355` - Cythonized `cache_readonly`, resulting in substantial micro-performance - enhancements throughout the codebase (:issue:`361`) + enhancements throughout the code base (:issue:`361`) - Special Cython matrix iterator for applying arbitrary reduction operations with 3-5x better performance than `np.apply_along_axis` (:issue:`309`) - Add `raw` option to `DataFrame.apply` for getting better performance when @@ -5751,7 +5751,7 @@ pandas 0.4.3 **Release date:** 10/9/2011 -This is largely a bugfix release from 0.4.2 but also includes a handful of new +This is largely a bug fix release from 0.4.2 but also includes a handful of new and enhanced features. Also, pandas can now be installed and used on Python 3 (thanks Thomas Kluyver!). @@ -5803,7 +5803,7 @@ Bug Fixes - Fix Python ndarray access in Cython code for sparse blocked index integrity check - Fix bug writing Series to CSV in Python 3 (:issue:`209`) -- Miscellaneous Python 3 bugfixes +- Miscellaneous Python 3 bug fixes Thanks ~~~~~~ @@ -5828,7 +5828,7 @@ New Features int64-based time series (e.g. using NumPy's datetime64 one day) and also faster operations on DataFrame objects storing record array-like data. - Refactored `Index` classes to have a `join` method and associated data - alignment routines throughout the codebase to be able to leverage optimized + alignment routines throughout the code base to be able to leverage optimized joining / merging routines. - Added `Series.align` method for aligning two series with choice of join method @@ -6164,7 +6164,7 @@ API Changes - Removed `pandas.core.pytools` module. Code has been moved to `pandas.core.common` - Tacked on `groupName` attribute for groups in GroupBy renamed to `name` -- Panel/LongPanel `dims` attribute renamed to `shape` to be more conformant +- Panel/LongPanel `dims` attribute renamed to `shape` to be more conforming - Slicing a `Series` returns a view now - More Series deprecations / renaming: `toCSV` to `to_csv`, `asOf` to `asof`, `merge` to `map`, `applymap` to `apply`, `toDict` to `to_dict`, diff --git a/doc/source/spelling_wordlist.txt b/doc/source/spelling_wordlist.txt new file mode 100644 index 0000000000000..4c355a1b9c435 --- /dev/null +++ b/doc/source/spelling_wordlist.txt @@ -0,0 +1,916 @@ +IPython +ipython +numpy +NumPy +Reindexing +reindexing +ga +fe +reindexed +automagic +Histogramming +histogramming +concat +resampling +iterables +sparsified +df +loc +gc +Timeseries +ndarrays +ndarray +dtype +dtypes +dtyped +reindex +sliceable +timedelta +Timedeltas +timedeltas +subpackages +subpackage +filepath +io +nthreads +kwargs +kwarg +arg +args +Datetimelike +datetime +datetimes +tz +builtin +NaN +nan +behaviour +quantiling +aggregators +aggregator +Dtypes +groupby +GroupBy +Tablewise +Elementwise +ufunc +ufuncs +dict +namedtuples +namedtuple +iterrows +upcasted +upcasting +upcast +searchsorted +downcasting +Likert +categoricals +Groupby +Unioning +csv +Upcase +resampling +Upcase +Lowcase +Propcase +Interop +Stata +stata +bysort +Spearman +Wikipedia +debiasing +docstrings +docstring +Docstrings +autosummary +linting +toolchain +Appveyor +Akogun +online +pdf +reStructuredText +reST +backticks +cpus +str +idxmin +mins +agg +DataFrame +dataframes +NaT +len +Statsmodels +Bokeh +Protovis +Seaborn +Wickham +shareability +apps +app +Plotly +Spyder +Fama +Eurostat +organisations +Geopandas +Dask +Scikit +backends +Engarde +Cyberpandas +Accessor +Numba +optimising +Cython +cython +cythonizing +cythonized +Vectorize +ol +subclassing +IPv +iteritems +itertuples +dt +upcast +subsetting +programmatically +stderr +scipy +SparseArray +doctests +nd +refactored +Jit +stdout +Typeclass +Pythonic +zscore +SQL +broadcastable +resample +resamples +groupbys +metaprogramming +upcast +un +dropna +ints +int +boxplot +groupwise +indices +pre +datetimelike +dev +gd +colname +intemname +nd +isin +backporting +admin +Debian +Ubuntu +Centos +RHEL +xlsx +xz +ftp +impl +timespans +pre +Regex +regex +sortedness +delim +usecols +skipinitialspace +skiprows +skipfooter +nrows +na +iso +dayfirst +chunksize +gz +bz +lineterminator +quotechar +doublequote +escapechar +tupleize +prepended +colspecs +NONNUMERIC +serializer +localhost +json +strtod +deserialization +Hadoop +ns +stringified +xclip +xsel +gtk +gtpy +Msgpacks +msgpack +msgpacks +foo +ptrepack +sqlalchemy +sqlite +Sqlite +dta +bdat +netCDF +backend +deserialising +deserializing +qtpy +indexables +itemsize +de +sas +Miniconda +itemname +ndims +ndim +mergands +Timeseries +timeseries +asof +Nans +DataFrames +fillna +ffill +bfill +alignable +sim +py +ipy +colheader +yearfirst +repr +EngFormatter +frontends +frontend +longtable +multirow +cline +clines +colwidth +Sparsify +html +pprint +mathjax +Jupyter +xls +xlsm +hdf +numexpr +matplotlib +timedeltas +lexual +danbirken +isnull +Timestamp +np +xs +locs +datelike +dups +recarray +setitem +rhs +gaussian +kde +gkde +fwf +iNf +astyping +vbench +lgautier +jnothman +roundtrip +xlrd +buf +jtratner +tavistmorph +numpand +unserialiable +tseries +mul +completers +refactor +Refactor +subclassed +consolidatable +setitem +DataFrame +klass +jtratner +bs +lxml +rockg +inplace +pyt +tslib +vals +pos +cparser +locs +repr'd +cumsum +cumprod +rhs +datetimeindex +reraising +iloc +setitem +lhs +ticklocs +ticklabels +immerrr +np +kwds +travis +ci +yarikoptic +setitem +delitem +cpcloud +pprinting +hoechenberger +Faq +FAQ +faq +mtkini +spearman +SleepingPills +astypes +cov +timedeltalike +weekmasks +Weekmasks +xlrd +unioning +uint +iget +applymap +stonebig +recarrays +tdsmith +tokenization +google +xN +sharex +famafrench +strptime +stephenwlin +nans +diff +ohlc +util +seg +getitem +queryables +Dataframe +idxmax +putmasking +argsort +unsampling +pylab +fromordinal +andrews +strftime +wb +gzipped +gzip +aggfunc +multithreading +unicode +bork +tokenizer +sortlevel +Scikits +isnull +ndpanel +notnul +ctor +tzinfo +tzoffset +endianness +Upsampling +upsampling +upsampled +locators +locator +astimezone +iget +qcut +ewma +icol +printoption +quantileTS +UTC +utc +bool +init +OLS +Isnull +nansum +Cythonize +extlinks +utcoffset +khash +kendall +tolist +unhandled +downsampling +dayofyear +setops +discretizing +klib +ylabel +bday +BDay +timeRule +unmergeable +navar +pyplot +multiindex +combineAdd +ewmcov +algos +unpickling +MultiIndex +Memoize +Unbox +nanops +vectorize +DataFame +fallback +sharey +xlabel +notnull +asfreq +crit +rpy +nanvar +ddof +ols +printoptions +rankdata +pyo +camelCased +cacheable +unindexed +reduceat +blosc +aggregatable +idx +tradeoff +nPeriods +camelCasing +camelCased +LongPanel +truediv +px +parseCSV +unpivoted +extractall +weekofyear +dayofweek +CDay +Nano +parameterised +sunday +monday +tuesday +friday +upsample +resampled +tzfile +bools +xlsxwriter +ggplot +Colormaps +colormaps +trippable +callables +pivotting +GBQ +intersphinx +hashable +compat +Compat +rollforward +seekable +endian +subrecords +readonly +orderedness +eval +datetimelikes +pytables +argmax +argmin +utf +segfault +segfaults +xlims +CPython +MultiIndexed +blosc +blosclz +hc +lz +zlib +zstd +tput +boxplot +UInt +unioned +hashtable +saslib +resampled +dicts +datetimetz +ascii +evals +Compat +lexsorted +errstate +incompat +boxplots +honour +UTF +subclasse +ungrouped +xport +writeable +unencodable +serialising +serialise +Segfault +ceiled +xarray +jupyter +ified +isoformat +downsample +upsample +aggregator +ascii +compat +src +ness +unencoded +submethods +gbq +vectorised +nanos +Bigquery +complib +overridable +xlabels +xticklabels +listlike +jobComplete +cummin +cummax +undeprecated +triang +errored +unpickle +ngroups +multiindexes +xticks +yticks +errorbars +barplots +rcParams +dfs +nw +Openpyxl +barh +timestamp +inv +Welford +tarball +hdfstore +Pandonic +Perf +factorizer +sharey +yyyy +dd +xxx +bdays +nfrequencies +XYZ +Vytautas +Jancauskas +rankdata +Astype +astyped +mergesort +nano +unpickled +dataframe +serialised +serialisation +numpies +deserialize +hashtables +unpivoting +cubehelix +unparsable +fu +Unpivots +rownames +retbins +objs +sep +stubnames +expr +func +skipna +halflife +cond +ceil +fillchar +swapcased +deletechars +figsize +bw +xlabelsize +ftypes +ge +Unpivots +lsuffix +fname +fo +ftypes +rsuffix +sparsifying +tup +cls +nonunique +xrange +periodIndex +pytz +ctime +dst +localtime +proleptic +tzname +stddev +resampler +Resampler +searchpath +cmap +visualising +figsize +desc +Iterable +da +ta +CategoricalIndex +specialised +takeable +iter +upcase +Outlier +fontsize +pearson +corrwith +eq +ewm +floordiv +ftype +iat +typeR +slinear +krogh +akima +BPoly +isna +kurt +le +lt +ne +notna +nsmallest +Deutsche +Colormap +colorbar +silverman +gridsize +radd +rdiv +regexes +rfloordiv +rmod +rmul +rpow +rsub +rtruediv +RandomState +sem +quicksort +heapsort +organised +swapaxes +swaplevel +OAuth +defaultdict +tablename +HDFStore +appendable +searchable +serialisable +lzo +usepackage +booktabs +coereced +spellcheck +misspelt +rcl +multicolumns +gfc +automagically +fastparquet +brotli +sql +nullable +performant +lexsorted +tw +latin +StrL +tshift +basestring +DatetimeIndex +periodIndex +pydatetime +perioddelta +ExcelFile +noqa +deepcopy +Discretize +hasnans +nbytes +nlevels +DateOffset +stringr +orderable +IntervalIndex +versionadded +lexsort +droplevel +swaplevel +kurt +IGNORECASE +findall +isalnum +isalpha +isdecimal +isdigit +islower +isnumeric +isspace +istitle +isupper +ljust +lstrip +rfind +rindex +rpartition +rsplit +rstrip +startswith +deletechars +whitespaces +insecable +stringr +zfill +tshift +SparseSeries +isoweekday +isocalendar +fromtimestamp +dateutil +utcfromtimestamp +utcnow +utctimetuple +api +ExtensionArray +nbytes +abc +ABCMeta +Typecode +ExtensionDtype +biufcmMOSUV +accessor +CategoricalDtype +DataFrameGroupBy +Weekmask +walkthrough +wieldy +stubnames +unix +asian +Eg +recomputation +useQueryCache +LocalPath +fspath +params +datatypes +connectable +multirows +sparsify +parseable +TimedeltaIndex +baz +pathlib +radviz +axvline +xtick +unpivot +StataWriter +StataReader +IndexSlice +uuid +cellstyle +tablewise +rowwise +columnwise +env +fba +Regexp +sparsify +multiline +UnsupportedFunctionCall +UnsortedIndexError +PerformanceWarning +ParserWarning +ParserError +OutOfBoundsDatetime +EmptyDataError +DtypeWarning +crosstab +SeriesGroupBy +nunique +nlargest +Truthy +cumcount +ngroup +bdate +toordinal +julian +timetz +timetuple +freqstr +daysinmonth +asm +TimedeltaIndex +pytimedelta +autodetect +coords +endswith +SparseDataFrame +spmatrix +swapcase +rjust +ndarrary +regexs +ptp +imag +gca +keywors +intercalary +daysinmonth +divmod +autocorr +asobject +Argsorts +xrot +RangeIndex +PeriodIndex +qyear +timeries +scikits +fromDict +levshape +putmask +asi +repl \ No newline at end of file diff --git a/doc/source/text.rst b/doc/source/text.rst index 4af64d9f791cc..34bb1a07dfc08 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -55,8 +55,8 @@ Since ``df.columns`` is an Index object, we can use the ``.str`` accessor df.columns.str.lower() These string methods can then be used to clean up the columns as needed. -Here we are removing leading and trailing whitespaces, lowercasing all names, -and replacing any remaining whitespaces with underscores: +Here we are removing leading and trailing white spaces, lower casing all names, +and replacing any remaining white spaces with underscores: .. ipython:: python diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 73e3e721aad71..f1011f7c5c3c6 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1738,7 +1738,7 @@ If ``Period`` freq is daily or higher (``D``, ``H``, ``T``, ``S``, ``L``, ``U``, ... ValueError: Input has different freq from Period(freq=H) -If ``Period`` has other freqs, only the same ``offsets`` can be added. Otherwise, ``ValueError`` will be raised. +If ``Period`` has other frequencies, only the same ``offsets`` can be added. Otherwise, ``ValueError`` will be raised. .. ipython:: python diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 09a52ee527cb5..17197b805e86a 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -1061,7 +1061,7 @@ Plot Formatting Setting the plot style ~~~~~~~~~~~~~~~~~~~~~~ -From version 1.5 and up, matplotlib offers a range of preconfigured plotting styles. Setting the +From version 1.5 and up, matplotlib offers a range of pre-configured plotting styles. Setting the style can be used to easily give plots the general look that you want. Setting the style is as easy as calling ``matplotlib.style.use(my_plot_style)`` before creating your plot. For example you could write ``matplotlib.style.use('ggplot')`` for ggplot-style diff --git a/doc/source/whatsnew/v0.10.0.txt b/doc/source/whatsnew/v0.10.0.txt index 3fc05158b7fe7..3a269e53a2404 100644 --- a/doc/source/whatsnew/v0.10.0.txt +++ b/doc/source/whatsnew/v0.10.0.txt @@ -370,7 +370,7 @@ Updated PyTables Support df1.get_dtype_counts() - performance improvements on table writing -- support for arbitrarily indexed dimensions +- support for arbitrary indexed dimensions - ``SparseSeries`` now has a ``density`` property (:issue:`2384`) - enable ``Series.str.strip/lstrip/rstrip`` methods to take an input argument to strip arbitrary characters (:issue:`2411`) diff --git a/doc/source/whatsnew/v0.10.1.txt b/doc/source/whatsnew/v0.10.1.txt index 2d5843101dec2..bb405c283ba24 100644 --- a/doc/source/whatsnew/v0.10.1.txt +++ b/doc/source/whatsnew/v0.10.1.txt @@ -149,7 +149,7 @@ combined result, by using ``where`` on a selector table. `nan`. - You can pass ``index`` to ``append``. This defaults to ``True``. This will - automagically create indicies on the *indexables* and *data columns* of the + automagically create indices on the *indexables* and *data columns* of the table - You can pass ``chunksize=an integer`` to ``append``, to change the writing @@ -157,7 +157,7 @@ combined result, by using ``where`` on a selector table. on writing. - You can pass ``expectedrows=an integer`` to the first ``append``, to set the - TOTAL number of expectedrows that ``PyTables`` will expected. This will + TOTAL number of expected rows that ``PyTables`` will expected. This will optimize read/write performance. - ``Select`` now supports passing ``start`` and ``stop`` to provide selection @@ -191,7 +191,7 @@ combined result, by using ``where`` on a selector table. levels with a very large number of combinatorial values (:issue:`2684`) - Fixed bug that causes plotting to fail when the index is a DatetimeIndex with a fixed-offset timezone (:issue:`2683`) -- Corrected businessday subtraction logic when the offset is more than 5 bdays +- Corrected business day subtraction logic when the offset is more than 5 bdays and the starting date is on a weekend (:issue:`2680`) - Fixed C file parser behavior when the file has more columns than data (:issue:`2668`) diff --git a/doc/source/whatsnew/v0.11.0.txt b/doc/source/whatsnew/v0.11.0.txt index b90a597815ec5..3c9cfda49aebd 100644 --- a/doc/source/whatsnew/v0.11.0.txt +++ b/doc/source/whatsnew/v0.11.0.txt @@ -33,7 +33,7 @@ three types of multi-axis indexing. See more at :ref:`Selection by Label ` -- ``.iloc`` is strictly integer position based (from ``0`` to ``length-1`` of the axis), will raise ``IndexError`` when the requested indicies are out of bounds. Allowed inputs are: +- ``.iloc`` is strictly integer position based (from ``0`` to ``length-1`` of the axis), will raise ``IndexError`` when the requested indices are out of bounds. Allowed inputs are: - An integer e.g. ``5`` - A list or array of integers ``[4, 3, 0]`` @@ -44,7 +44,7 @@ three types of multi-axis indexing. - ``.ix`` supports mixed integer and label based access. It is primarily label based, but will fallback to integer positional access. ``.ix`` is the most general and will support any of the inputs to ``.loc`` and ``.iloc``, as well as support for floating point label schemes. ``.ix`` is especially useful when dealing with mixed positional and label - based hierarchial indexes. + based hierarchical indexes. As using integer slices with ``.ix`` have different behavior depending on whether the slice is interpreted as position based or label based, it's usually better to be @@ -211,7 +211,7 @@ Astype conversion on ``datetime64[ns]`` to ``object``, implicitly converts ``NaT API changes ~~~~~~~~~~~ - - Added to_series() method to indicies, to facilitate the creation of indexers + - Added to_series() method to indices, to facilitate the creation of indexers (:issue:`3275`) - ``HDFStore`` diff --git a/doc/source/whatsnew/v0.12.0.txt b/doc/source/whatsnew/v0.12.0.txt index ad33c49792d9f..69483b18a5490 100644 --- a/doc/source/whatsnew/v0.12.0.txt +++ b/doc/source/whatsnew/v0.12.0.txt @@ -73,7 +73,7 @@ API changes e.g. a boolean Series, even with integer labels, will raise. Since ``iloc`` is purely positional based, the labels on the Series are not alignable (:issue:`3631`) - This case is rarely used, and there are plently of alternatives. This preserves the + This case is rarely used, and there are plenty of alternatives. This preserves the ``iloc`` API to be *purely* positional based. .. ipython:: python @@ -166,7 +166,7 @@ API changes - The internal ``pandas`` class hierarchy has changed (slightly). The previous ``PandasObject`` now is called ``PandasContainer`` and a new - ``PandasObject`` has become the baseclass for ``PandasContainer`` as well + ``PandasObject`` has become the base class for ``PandasContainer`` as well as ``Index``, ``Categorical``, ``GroupBy``, ``SparseList``, and ``SparseArray`` (+ their base classes). Currently, ``PandasObject`` provides string methods (from ``StringMixin``). (:issue:`4090`, :issue:`4092`) @@ -296,7 +296,7 @@ Other Enhancements df.replace(regex=r'\s*\.\s*', value=np.nan) to replace all occurrences of the string ``'.'`` with zero or more - instances of surrounding whitespace with ``NaN``. + instances of surrounding white space with ``NaN``. Regular string replacement still works as expected. For example, you can do @@ -403,7 +403,7 @@ Bug Fixes :issue:`3572`, :issue:`3911`, :issue:`3912`), but they will try to convert object arrays to numeric arrays if possible so that you can still plot, for example, an object array with floats. This happens before any drawing takes place which - elimnates any spurious plots from showing up. + eliminates any spurious plots from showing up. - ``fillna`` methods now raise a ``TypeError`` if the ``value`` parameter is a list or tuple. diff --git a/doc/source/whatsnew/v0.13.0.txt b/doc/source/whatsnew/v0.13.0.txt index 02ddc362255ec..94cd451196ead 100644 --- a/doc/source/whatsnew/v0.13.0.txt +++ b/doc/source/whatsnew/v0.13.0.txt @@ -414,7 +414,7 @@ HDFStore API Changes - add the keyword ``dropna=True`` to ``append`` to change whether ALL nan rows are not written to the store (default is ``True``, ALL nan rows are NOT written), also settable via the option ``io.hdf.dropna_table`` (:issue:`4625`) -- pass thru store creation arguments; can be used to support in-memory stores +- pass through store creation arguments; can be used to support in-memory stores DataFrame repr Changes ~~~~~~~~~~~~~~~~~~~~~~ @@ -443,7 +443,7 @@ Enhancements - Clipboard functionality now works with PySide (:issue:`4282`) - Added a more informative error message when plot arguments contain overlapping color and style arguments (:issue:`4402`) -- ``to_dict`` now takes ``records`` as a possible outtype. Returns an array +- ``to_dict`` now takes ``records`` as a possible out type. Returns an array of column-keyed dictionaries. (:issue:`4936`) - ``NaN`` handing in get_dummies (:issue:`4446`) with `dummy_na` diff --git a/doc/source/whatsnew/v0.14.0.txt b/doc/source/whatsnew/v0.14.0.txt index 92c699017fc13..4408470c52feb 100644 --- a/doc/source/whatsnew/v0.14.0.txt +++ b/doc/source/whatsnew/v0.14.0.txt @@ -78,10 +78,10 @@ API changes - ``df.iloc[len(df)::-1]`` now enumerates all elements in reverse - The :meth:`DataFrame.interpolate` keyword ``downcast`` default has been changed from ``infer`` to - ``None``. This is to preseve the original dtype unless explicitly requested otherwise (:issue:`6290`). + ``None``. This is to preserve the original dtype unless explicitly requested otherwise (:issue:`6290`). - When converting a dataframe to HTML it used to return `Empty DataFrame`. This special case has been removed, instead a header with the column names is returned (:issue:`6062`). -- ``Series`` and ``Index`` now internall share more common operations, e.g. ``factorize(),nunique(),value_counts()`` are +- ``Series`` and ``Index`` now internally share more common operations, e.g. ``factorize(),nunique(),value_counts()`` are now supported on ``Index`` types as well. The ``Series.weekday`` property from is removed from Series for API consistency. Using a ``DatetimeIndex/PeriodIndex`` method on a Series will now raise a ``TypeError``. (:issue:`4551`, :issue:`4056`, :issue:`5519`, :issue:`6380`, :issue:`7206`). @@ -294,7 +294,7 @@ Display Changes Text Parsing API Changes ~~~~~~~~~~~~~~~~~~~~~~~~ -:func:`read_csv`/:func:`read_table` will now be noiser w.r.t invalid options rather than falling back to the ``PythonParser``. +:func:`read_csv`/:func:`read_table` will now be noisier w.r.t invalid options rather than falling back to the ``PythonParser``. - Raise ``ValueError`` when ``sep`` specified with ``delim_whitespace=True`` in :func:`read_csv`/:func:`read_table` @@ -714,7 +714,7 @@ Deprecations Use the `percentiles` keyword instead, which takes a list of percentiles to display. The default output is unchanged. -- The default return type of :func:`boxplot` will change from a dict to a matpltolib Axes +- The default return type of :func:`boxplot` will change from a dict to a matplotlib Axes in a future release. You can use the future behavior now by passing ``return_type='axes'`` to boxplot. @@ -781,7 +781,7 @@ Enhancements noon, January 1, 4713 BC. Because nanoseconds are used to define the time in pandas the actual range of dates that you can use is 1678 AD to 2262 AD. (:issue:`4041`) - ``DataFrame.to_stata`` will now check data for compatibility with Stata data types - and will upcast when needed. When it is not possible to losslessly upcast, a warning + and will upcast when needed. When it is not possible to lossless upcast, a warning is issued (:issue:`6327`) - ``DataFrame.to_stata`` and ``StataWriter`` will accept keyword arguments time_stamp and data_label which allow the time stamp and dataset label to be set when creating a @@ -881,7 +881,7 @@ Bug Fixes - Prevent segfault due to MultiIndex not being supported in HDFStore table format (:issue:`1848`) - Bug in ``pd.DataFrame.sort_index`` where mergesort wasn't stable when ``ascending=False`` (:issue:`6399`) -- Bug in ``pd.tseries.frequencies.to_offset`` when argument has leading zeroes (:issue:`6391`) +- Bug in ``pd.tseries.frequencies.to_offset`` when argument has leading zeros (:issue:`6391`) - Bug in version string gen. for dev versions with shallow clones / install from tarball (:issue:`6127`) - Inconsistent tz parsing ``Timestamp`` / ``to_datetime`` for current year (:issue:`5958`) - Indexing bugs with reordered indexes (:issue:`6252`, :issue:`6254`) @@ -922,7 +922,7 @@ Bug Fixes - Bug in ``Series.reindex`` when specifying a ``method`` with some nan values was inconsistent (noted on a resample) (:issue:`6418`) - Bug in :meth:`DataFrame.replace` where nested dicts were erroneously depending on the order of dictionary keys and values (:issue:`5338`). -- Perf issue in concatting with empty objects (:issue:`3259`) +- Performance issue in concatenating with empty objects (:issue:`3259`) - Clarify sorting of ``sym_diff`` on ``Index`` objects with ``NaN`` values (:issue:`6444`) - Regression in ``MultiIndex.from_product`` with a ``DatetimeIndex`` as input (:issue:`6439`) - Bug in ``str.extract`` when passed a non-default index (:issue:`6348`) @@ -966,8 +966,8 @@ Bug Fixes - Bug in downcasting inference with empty arrays (:issue:`6733`) - Bug in ``obj.blocks`` on sparse containers dropping all but the last items of same for dtype (:issue:`6748`) - Bug in unpickling ``NaT (NaTType)`` (:issue:`4606`) -- Bug in ``DataFrame.replace()`` where regex metacharacters were being treated - as regexs even when ``regex=False`` (:issue:`6777`). +- Bug in ``DataFrame.replace()`` where regex meta characters were being treated + as regex even when ``regex=False`` (:issue:`6777`). - Bug in timedelta ops on 32-bit platforms (:issue:`6808`) - Bug in setting a tz-aware index directly via ``.index`` (:issue:`6785`) - Bug in expressions.py where numexpr would try to evaluate arithmetic ops @@ -983,7 +983,7 @@ Bug Fixes would only replace the first occurrence of a value (:issue:`6689`) - Better error message when passing a frequency of 'MS' in ``Period`` construction (GH5332) - Bug in ``Series.__unicode__`` when ``max_rows=None`` and the Series has more than 1000 rows. (:issue:`6863`) -- Bug in ``groupby.get_group`` where a datetlike wasn't always accepted (:issue:`5267`) +- Bug in ``groupby.get_group`` where a datelike wasn't always accepted (:issue:`5267`) - Bug in ``groupBy.get_group`` created by ``TimeGrouper`` raises ``AttributeError`` (:issue:`6914`) - Bug in ``DatetimeIndex.tz_localize`` and ``DatetimeIndex.tz_convert`` converting ``NaT`` incorrectly (:issue:`5546`) - Bug in arithmetic operations affecting ``NaT`` (:issue:`6873`) @@ -994,7 +994,7 @@ Bug Fixes - Bug in ``DataFrame.plot`` and ``Series.plot``, where the legend behave inconsistently when plotting to the same axes repeatedly (:issue:`6678`) - Internal tests for patching ``__finalize__`` / bug in merge not finalizing (:issue:`6923`, :issue:`6927`) - accept ``TextFileReader`` in ``concat``, which was affecting a common user idiom (:issue:`6583`) -- Bug in C parser with leading whitespace (:issue:`3374`) +- Bug in C parser with leading white space (:issue:`3374`) - Bug in C parser with ``delim_whitespace=True`` and ``\r``-delimited lines - Bug in python parser with explicit multi-index in row following column header (:issue:`6893`) - Bug in ``Series.rank`` and ``DataFrame.rank`` that caused small floats (<1e-13) to all receive the same rank (:issue:`6886`) diff --git a/doc/source/whatsnew/v0.14.1.txt b/doc/source/whatsnew/v0.14.1.txt index 32a2391c75531..f7f69218e0ef5 100644 --- a/doc/source/whatsnew/v0.14.1.txt +++ b/doc/source/whatsnew/v0.14.1.txt @@ -172,7 +172,7 @@ Bug Fixes - Bug in Panel indexing with a multi-index axis (:issue:`7516`) - Regression in datetimelike slice indexing with a duplicated index and non-exact end-points (:issue:`7523`) - Bug in setitem with list-of-lists and single vs mixed types (:issue:`7551`:) -- Bug in timeops with non-aligned Series (:issue:`7500`) +- Bug in time ops with non-aligned Series (:issue:`7500`) - Bug in timedelta inference when assigning an incomplete Series (:issue:`7592`) - Bug in groupby ``.nth`` with a Series and integer-like column name (:issue:`7559`) - Bug in ``Series.get`` with a boolean accessor (:issue:`7407`) @@ -209,7 +209,7 @@ Bug Fixes - Bug in inferred_freq results in None for eastern hemisphere timezones (:issue:`7310`) - Bug in ``Easter`` returns incorrect date when offset is negative (:issue:`7195`) - Bug in broadcasting with ``.div``, integer dtypes and divide-by-zero (:issue:`7325`) -- Bug in ``CustomBusinessDay.apply`` raiases ``NameError`` when ``np.datetime64`` object is passed (:issue:`7196`) +- Bug in ``CustomBusinessDay.apply`` raises ``NameError`` when ``np.datetime64`` object is passed (:issue:`7196`) - Bug in ``MultiIndex.append``, ``concat`` and ``pivot_table`` don't preserve timezone (:issue:`6606`) - Bug in ``.loc`` with a list of indexers on a single-multi index level (that is not nested) (:issue:`7349`) - Bug in ``Series.map`` when mapping a dict with tuple keys of different lengths (:issue:`7333`) diff --git a/doc/source/whatsnew/v0.15.0.txt b/doc/source/whatsnew/v0.15.0.txt index 0f1a8c324de54..94093b2cfb16c 100644 --- a/doc/source/whatsnew/v0.15.0.txt +++ b/doc/source/whatsnew/v0.15.0.txt @@ -44,7 +44,7 @@ users upgrade to this version. .. warning:: - The refactorings in :class:`~pandas.Categorical` changed the two argument constructor from + The refactoring in :class:`~pandas.Categorical` changed the two argument constructor from "codes/labels and levels" to "values and levels (now called 'categories')". This can lead to subtle bugs. If you use :class:`~pandas.Categorical` directly, please audit your code before updating to this pandas version and change it to use the :meth:`~pandas.Categorical.from_codes` constructor. See more on ``Categorical`` :ref:`here ` @@ -139,7 +139,7 @@ This type is very similar to how ``Timestamp`` works for ``datetimes``. It is a The arguments to ``pd.to_timedelta`` are now ``(arg,unit='ns',box=True,coerce=False)``, previously were ``(arg,box=True,unit='ns')`` as these are more logical. -Consruct a scalar +Construct a scalar .. ipython:: python @@ -794,7 +794,7 @@ Other notable API changes: .. _whatsnew_0150.blanklines: - Made both the C-based and Python engines for `read_csv` and `read_table` ignore empty lines in input as well as - whitespace-filled lines, as long as ``sep`` is not whitespace. This is an API change + white space-filled lines, as long as ``sep`` is not white space. This is an API change that can be controlled by the keyword parameter ``skip_blank_lines``. See :ref:`the docs ` (:issue:`4466`) - A timeseries/index localized to UTC when inserted into a Series/DataFrame will preserve the UTC timezone @@ -940,7 +940,7 @@ Enhancements Enhancements in the importing/exporting of Stata files: -- Added support for bool, uint8, uint16 and uint32 datatypes in ``to_stata`` (:issue:`7097`, :issue:`7365`) +- Added support for bool, uint8, uint16 and uint32 data types in ``to_stata`` (:issue:`7097`, :issue:`7365`) - Added conversion option when importing Stata files (:issue:`8527`) - ``DataFrame.to_stata`` and ``StataWriter`` check string length for compatibility with limitations imposed in dta files where fixed-width @@ -988,7 +988,7 @@ Other: - Added ``split`` as an option to the ``orient`` argument in ``pd.DataFrame.to_dict``. (:issue:`7840`) - The ``get_dummies`` method can now be used on DataFrames. By default only - catagorical columns are encoded as 0's and 1's, while other columns are + categorical columns are encoded as 0's and 1's, while other columns are left untouched. .. ipython:: python @@ -1070,7 +1070,7 @@ Other: idx.duplicated() idx.drop_duplicates() -- add ``copy=True`` argument to ``pd.concat`` to enable pass thru of complete blocks (:issue:`8252`) +- add ``copy=True`` argument to ``pd.concat`` to enable pass through of complete blocks (:issue:`8252`) - Added support for numpy 1.8+ data types (``bool_``, ``int_``, ``float_``, ``string_``) for conversion to R dataframe (:issue:`8400`) diff --git a/doc/source/whatsnew/v0.15.1.txt b/doc/source/whatsnew/v0.15.1.txt index 345fc9f1b5da7..918eab3a9763e 100644 --- a/doc/source/whatsnew/v0.15.1.txt +++ b/doc/source/whatsnew/v0.15.1.txt @@ -72,7 +72,7 @@ API changes df.groupby(ts, as_index=False).max() -- ``groupby`` will not erroneously exclude columns if the column name conflics +- ``groupby`` will not erroneously exclude columns if the column name conflicts with the grouper name (:issue:`8112`): .. ipython:: python diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index f1dfab0f57ed3..16a57676c89c0 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -165,7 +165,7 @@ Other enhancements: - Added support for ``utcfromtimestamp()``, ``fromtimestamp()``, and ``combine()`` on `Timestamp` class (:issue:`5351`). - Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See `here `__. - ``Timedelta`` arithmetic returns ``NotImplemented`` in unknown cases, allowing extensions by custom classes (:issue:`8813`). -- ``Timedelta`` now supports arithemtic with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`). +- ``Timedelta`` now supports arithmetic with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`). - Added ``Timedelta.to_timedelta64()`` method to the public API (:issue:`8884`). - Added ``gbq.generate_bq_schema()`` function to the gbq module (:issue:`8325`). - ``Series`` now works with map objects the same way as generators (:issue:`8909`). @@ -173,7 +173,7 @@ Other enhancements: - ``to_datetime`` gains an ``exact`` keyword to allow for a format to not require an exact match for a provided format string (if its ``False``). ``exact`` defaults to ``True`` (meaning that exact matching is still the default) (:issue:`8904`) - Added ``axvlines`` boolean option to parallel_coordinates plot function, determines whether vertical lines will be printed, default is True - Added ability to read table footers to read_html (:issue:`8552`) -- ``to_sql`` now infers datatypes of non-NA values for columns that contain NA values and have dtype ``object`` (:issue:`8778`). +- ``to_sql`` now infers data types of non-NA values for columns that contain NA values and have dtype ``object`` (:issue:`8778`). .. _whatsnew_0152.performance: @@ -215,7 +215,7 @@ Bug Fixes - ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo and when it receives no data from Yahoo (:issue:`8761`), (:issue:`8783`). - Fix: The font size was only set on x axis if vertical or the y axis if horizontal. (:issue:`8765`) - Fixed division by 0 when reading big csv files in python 3 (:issue:`8621`) -- Bug in outputting a Multindex with ``to_html,index=False`` which would add an extra column (:issue:`8452`) +- Bug in outputting a MultiIndex with ``to_html,index=False`` which would add an extra column (:issue:`8452`) - Imported categorical variables from Stata files retain the ordinal information in the underlying data (:issue:`8836`). - Defined ``.size`` attribute across ``NDFrame`` objects to provide compat with numpy >= 1.9.1; buggy with ``np.array_split`` (:issue:`8846`) - Skip testing of histogram plots for matplotlib <= 1.2 (:issue:`8648`). @@ -230,11 +230,11 @@ Bug Fixes - Bug where index name was still used when plotting a series with ``use_index=False`` (:issue:`8558`). - Bugs when trying to stack multiple columns, when some (or all) of the level names are numbers (:issue:`8584`). - Bug in ``MultiIndex`` where ``__contains__`` returns wrong result if index is not lexically sorted or unique (:issue:`7724`) -- BUG CSV: fix problem with trailing whitespace in skipped rows, (:issue:`8679`), (:issue:`8661`), (:issue:`8983`) +- BUG CSV: fix problem with trailing white space in skipped rows, (:issue:`8679`), (:issue:`8661`), (:issue:`8983`) - Regression in ``Timestamp`` does not parse 'Z' zone designator for UTC (:issue:`8771`) - Bug in `StataWriter` the produces writes strings with 244 characters irrespective of actual size (:issue:`8969`) - Fixed ValueError raised by cummin/cummax when datetime64 Series contains NaT. (:issue:`8965`) -- Bug in Datareader returns object dtype if there are missing values (:issue:`8980`) +- Bug in DataReader returns object dtype if there are missing values (:issue:`8980`) - Bug in plotting if sharex was enabled and index was a timeseries, would show labels on multiple axes (:issue:`3964`). - Bug where passing a unit to the TimedeltaIndex constructor applied the to nano-second conversion twice. (:issue:`9011`). - Bug in plotting of a period-like array (:issue:`9012`) diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 48af06d124f2e..214a08ef0bbff 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -133,7 +133,7 @@ from a ``scipy.sparse.coo_matrix``: String Methods Enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- Following new methods are accesible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9282`, :issue:`9352`, :issue:`9386`, :issue:`9387`, :issue:`9439`) +- Following new methods are accessible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9282`, :issue:`9352`, :issue:`9386`, :issue:`9387`, :issue:`9439`) ============= ============= ============= =============== =============== .. .. Methods .. .. @@ -530,7 +530,7 @@ Deprecations We refer users to the external package `pandas-qt `_. (:issue:`9615`) - The ``pandas.rpy`` interface is deprecated and will be removed in a future version. - Similar functionaility can be accessed thru the `rpy2 `_ project (:issue:`9602`) + Similar functionality can be accessed through the `rpy2 `_ project (:issue:`9602`) - Adding ``DatetimeIndex/PeriodIndex`` to another ``DatetimeIndex/PeriodIndex`` is being deprecated as a set-operation. This will be changed to a ``TypeError`` in a future version. ``.union()`` should be used for the union set operation. (:issue:`9094`) - Subtracting ``DatetimeIndex/PeriodIndex`` from another ``DatetimeIndex/PeriodIndex`` is being deprecated as a set-operation. This will be changed to an actual numeric subtraction yielding a ``TimeDeltaIndex`` in a future version. ``.difference()`` should be used for the differencing set operation. (:issue:`9094`) @@ -601,7 +601,7 @@ Bug Fixes - Bug in binary operator method (eg ``.mul()``) alignment with integer levels (:issue:`9463`). - Bug in boxplot, scatter and hexbin plot may show an unnecessary warning (:issue:`8877`) - Bug in subplot with ``layout`` kw may show unnecessary warning (:issue:`9464`) -- Bug in using grouper functions that need passed thru arguments (e.g. axis), when using wrapped function (e.g. ``fillna``), (:issue:`9221`) +- Bug in using grouper functions that need passed through arguments (e.g. axis), when using wrapped function (e.g. ``fillna``), (:issue:`9221`) - ``DataFrame`` now properly supports simultaneous ``copy`` and ``dtype`` arguments in constructor (:issue:`9099`) - Bug in ``read_csv`` when using skiprows on a file with CR line endings with the c engine. (:issue:`9079`) - ``isnull`` now detects ``NaT`` in ``PeriodIndex`` (:issue:`9129`) @@ -613,7 +613,7 @@ Bug Fixes - Fixed division by zero error for ``Series.kurt()`` when all values are equal (:issue:`9197`) - Fixed issue in the ``xlsxwriter`` engine where it added a default 'General' format to cells if no other format was applied. This prevented other row or column formatting being applied. (:issue:`9167`) - Fixes issue with ``index_col=False`` when ``usecols`` is also specified in ``read_csv``. (:issue:`9082`) -- Bug where ``wide_to_long`` would modify the input stubnames list (:issue:`9204`) +- Bug where ``wide_to_long`` would modify the input stub names list (:issue:`9204`) - Bug in ``to_sql`` not storing float64 values using double precision. (:issue:`9009`) - ``SparseSeries`` and ``SparsePanel`` now accept zero argument constructors (same as their non-sparse counterparts) (:issue:`9272`). - Regression in merging ``Categorical`` and ``object`` dtypes (:issue:`9426`) @@ -624,7 +624,7 @@ Bug Fixes - Fixed bug with reading CSV files from Amazon S3 on python 3 raising a TypeError (:issue:`9452`) - Bug in the Google BigQuery reader where the 'jobComplete' key may be present but False in the query results (:issue:`8728`) - Bug in ``Series.values_counts`` with excluding ``NaN`` for categorical type ``Series`` with ``dropna=True`` (:issue:`9443`) -- Fixed mising numeric_only option for ``DataFrame.std/var/sem`` (:issue:`9201`) +- Fixed missing numeric_only option for ``DataFrame.std/var/sem`` (:issue:`9201`) - Support constructing ``Panel`` or ``Panel4D`` with scalar data (:issue:`8285`) - ``Series`` text representation disconnected from `max_rows`/`max_columns` (:issue:`7508`). diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index 5c716f6ad45c1..e2da12fc94b58 100644 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -133,7 +133,7 @@ groupby operations on the index will preserve the index nature as well reindexing operations, will return a resulting index based on the type of the passed indexer, meaning that passing a list will return a plain-old-``Index``; indexing with a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories -of the PASSED ``Categorical`` dtype. This allows one to arbitrarly index these even with +of the PASSED ``Categorical`` dtype. This allows one to arbitrary index these even with values NOT in the categories, similarly to how you can reindex ANY pandas index. .. code-block:: ipython @@ -237,7 +237,7 @@ enhancements make string operations easier and more consistent with standard pyt idx.str.startswith('a') s[s.index.str.startswith('a')] -- The following new methods are accesible via ``.str`` accessor to apply the function to each values. (:issue:`9766`, :issue:`9773`, :issue:`10031`, :issue:`10045`, :issue:`10052`) +- The following new methods are accessible via ``.str`` accessor to apply the function to each values. (:issue:`9766`, :issue:`9773`, :issue:`10031`, :issue:`10045`, :issue:`10052`) ================ =============== =============== =============== ================ .. .. Methods .. .. @@ -348,7 +348,7 @@ Deprecations Index Representation ~~~~~~~~~~~~~~~~~~~~ -The string representation of ``Index`` and its sub-classes have now been unified. These will show a single-line display if there are few values; a wrapped multi-line display for a lot of values (but less than ``display.max_seq_items``; if lots of items (> ``display.max_seq_items``) will show a truncated display (the head and tail of the data). The formatting for ``MultiIndex`` is unchanges (a multi-line wrapped display). The display width responds to the option ``display.max_seq_items``, which is defaulted to 100. (:issue:`6482`) +The string representation of ``Index`` and its sub-classes have now been unified. These will show a single-line display if there are few values; a wrapped multi-line display for a lot of values (but less than ``display.max_seq_items``; if lots of items (> ``display.max_seq_items``) will show a truncated display (the head and tail of the data). The formatting for ``MultiIndex`` is unchanged (a multi-line wrapped display). The display width responds to the option ``display.max_seq_items``, which is defaulted to 100. (:issue:`6482`) Previous Behavior @@ -437,8 +437,8 @@ Bug Fixes - Bug in ``to_msgpack`` and ``read_msgpack`` zlib and blosc compression support (:issue:`9783`) - Bug ``GroupBy.size`` doesn't attach index name properly if grouped by ``TimeGrouper`` (:issue:`9925`) - Bug causing an exception in slice assignments because ``length_of_indexer`` returns wrong results (:issue:`9995`) -- Bug in csv parser causing lines with initial whitespace plus one non-space character to be skipped. (:issue:`9710`) -- Bug in C csv parser causing spurious NaNs when data started with newline followed by whitespace. (:issue:`10022`) +- Bug in csv parser causing lines with initial white space plus one non-space character to be skipped. (:issue:`9710`) +- Bug in C csv parser causing spurious NaNs when data started with newline followed by white space. (:issue:`10022`) - Bug causing elements with a null group to spill into the final group when grouping by a ``Categorical`` (:issue:`9603`) - Bug where .iloc and .loc behavior is not consistent on empty dataframes (:issue:`9964`) - Bug in invalid attribute access on a ``TimedeltaIndex`` incorrectly raised ``ValueError`` instead of ``AttributeError`` (:issue:`9680`) diff --git a/doc/source/whatsnew/v0.16.2.txt b/doc/source/whatsnew/v0.16.2.txt index 29f6832b48aaf..047da4c94093b 100644 --- a/doc/source/whatsnew/v0.16.2.txt +++ b/doc/source/whatsnew/v0.16.2.txt @@ -125,7 +125,7 @@ Bug Fixes - Bug where ``HDFStore.select`` modifies the passed columns list (:issue:`7212`) - Bug in ``Categorical`` repr with ``display.width`` of ``None`` in Python 3 (:issue:`10087`) - Bug in ``to_json`` with certain orients and a ``CategoricalIndex`` would segfault (:issue:`10317`) -- Bug where some of the nan funcs do not have consistent return dtypes (:issue:`10251`) +- Bug where some of the nan functions do not have consistent return dtypes (:issue:`10251`) - Bug in ``DataFrame.quantile`` on checking that a valid axis was passed (:issue:`9543`) - Bug in ``groupby.apply`` aggregation for ``Categorical`` not preserving categories (:issue:`10138`) - Bug in ``to_csv`` where ``date_format`` is ignored if the ``datetime`` is fractional (:issue:`10209`) @@ -155,7 +155,7 @@ Bug Fixes - Bug in ``GroupBy.get_group`` raises ``ValueError`` when group key contains ``NaT`` (:issue:`6992`) - Bug in ``SparseSeries`` constructor ignores input data name (:issue:`10258`) - Bug in ``Categorical.remove_categories`` causing a ``ValueError`` when removing the ``NaN`` category if underlying dtype is floating-point (:issue:`10156`) -- Bug where infer_freq infers timerule (WOM-5XXX) unsupported by to_offset (:issue:`9425`) +- Bug where infer_freq infers time rule (WOM-5XXX) unsupported by to_offset (:issue:`9425`) - Bug in ``DataFrame.to_hdf()`` where table format would raise a seemingly unrelated error for invalid (non-string) column names. This is now explicitly forbidden. (:issue:`9057`) - Bug to handle masking empty ``DataFrame`` (:issue:`10126`). - Bug where MySQL interface could not handle numeric table/column names (:issue:`10255`) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index ec8f318b72fef..1b98ebd0e19c5 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -308,7 +308,7 @@ See the :ref:`documentation ` for more details. os.remove('test.xlsx') Previously, it was necessary to specify the ``has_index_names`` argument in ``read_excel``, -if the serialized data had index names. For version 0.17.0 the ouptput format of ``to_excel`` +if the serialized data had index names. For version 0.17.0 the output format of ``to_excel`` has been changed to make this keyword unnecessary - the change is shown below. **Old** @@ -1042,7 +1042,7 @@ Performance Improvements Bug Fixes ~~~~~~~~~ -- Bug in incorrection computation of ``.mean()`` on ``timedelta64[ns]`` because of overflow (:issue:`9442`) +- Bug in incorrect computation of ``.mean()`` on ``timedelta64[ns]`` because of overflow (:issue:`9442`) - Bug in ``.isin`` on older numpies (:issue:`11232`) - Bug in ``DataFrame.to_html(index=False)`` renders unnecessary ``name`` row (:issue:`10344`) - Bug in ``DataFrame.to_latex()`` the ``column_format`` argument could not be passed (:issue:`9402`) diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index e1b561c4deacb..990f27950d982 100644 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -41,7 +41,7 @@ Conditional HTML Formatting We've added *experimental* support for conditional HTML formatting: the visual styling of a DataFrame based on the data. The styling is accomplished with HTML and CSS. -Acesses the styler class with the :attr:`pandas.DataFrame.style`, attribute, +Accesses the styler class with the :attr:`pandas.DataFrame.style`, attribute, an instance of :class:`~pandas.core.style.Styler` with your data attached. Here's a quick example: diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index bfd314639aa60..8dc49dbc319a6 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -330,7 +330,7 @@ Timedeltas t[0].round('2h') -In addition, ``.round()``, ``.floor()`` and ``.ceil()`` will be available thru the ``.dt`` accessor of ``Series``. +In addition, ``.round()``, ``.floor()`` and ``.ceil()`` will be available through the ``.dt`` accessor of ``Series``. .. ipython:: python @@ -414,7 +414,7 @@ New Behavior: df.loc[ix, 'b'] = df.loc[ix, 'b'] df.dtypes -When a DataFrame's integer slice is partially updated with a new slice of floats that could potentially be downcasted to integer without losing precision, the dtype of the slice will be set to float instead of integer. +When a DataFrame's integer slice is partially updated with a new slice of floats that could potentially be down-casted to integer without losing precision, the dtype of the slice will be set to float instead of integer. Previous Behavior: @@ -516,19 +516,19 @@ Other enhancements - ``Series`` gained an ``is_unique`` attribute (:issue:`11946`) - ``DataFrame.quantile`` and ``Series.quantile`` now accept ``interpolation`` keyword (:issue:`10174`). - Added ``DataFrame.style.format`` for more flexible formatting of cell values (:issue:`11692`) -- ``DataFrame.select_dtypes`` now allows the ``np.float16`` typecode (:issue:`11990`) +- ``DataFrame.select_dtypes`` now allows the ``np.float16`` type code (:issue:`11990`) - ``pivot_table()`` now accepts most iterables for the ``values`` parameter (:issue:`12017`) - Added Google ``BigQuery`` service account authentication support, which enables authentication on remote servers. (:issue:`11881`, :issue:`12572`). For further details see `here `__ - ``HDFStore`` is now iterable: ``for k in store`` is equivalent to ``for k in store.keys()`` (:issue:`12221`). - Add missing methods/fields to ``.dt`` for ``Period`` (:issue:`8848`) -- The entire codebase has been ``PEP``-ified (:issue:`12096`) +- The entire code base has been ``PEP``-ified (:issue:`12096`) .. _whatsnew_0180.api_breaking: Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- the leading whitespaces have been removed from the output of ``.to_string(index=False)`` method (:issue:`11833`) +- the leading white spaces have been removed from the output of ``.to_string(index=False)`` method (:issue:`11833`) - the ``out`` parameter has been removed from the ``Series.round()`` method. (:issue:`11763`) - ``DataFrame.round()`` leaves non-numeric columns unchanged in its return, rather than raises. (:issue:`11885`) - ``DataFrame.head(0)`` and ``DataFrame.tail(0)`` return empty frames, rather than ``self``. (:issue:`11937`) @@ -1186,7 +1186,7 @@ Performance Improvements - Improved performance in construction of ``Categoricals`` with ``Series`` of datetimes containing ``NaT`` (:issue:`12077`) -- Improved performance of ISO 8601 date parsing for dates without separators (:issue:`11899`), leading zeros (:issue:`11871`) and with whitespace preceding the time zone (:issue:`9714`) +- Improved performance of ISO 8601 date parsing for dates without separators (:issue:`11899`), leading zeros (:issue:`11871`) and with white space preceding the time zone (:issue:`9714`) diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index de9a5d5d8afae..34921505a46bf 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -106,7 +106,7 @@ Now you can do: .. _whatsnew_0181.enhancements.method_chain: -Method chaininng improvements +Method chaining improvements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The following methods / indexers now accept a ``callable``. It is intended to make @@ -598,14 +598,14 @@ Bug Fixes - Bug in ``.resample(...)`` with a ``PeriodIndex`` when resampling to an existing frequency (:issue:`12770`) - Bug in printing data which contains ``Period`` with different ``freq`` raises ``ValueError`` (:issue:`12615`) - Bug in ``Series`` construction with ``Categorical`` and ``dtype='category'`` is specified (:issue:`12574`) -- Bugs in concatenation with a coercable dtype was too aggressive, resulting in different dtypes in outputformatting when an object was longer than ``display.max_rows`` (:issue:`12411`, :issue:`12045`, :issue:`11594`, :issue:`10571`, :issue:`12211`) +- Bugs in concatenation with a coercible dtype was too aggressive, resulting in different dtypes in output formatting when an object was longer than ``display.max_rows`` (:issue:`12411`, :issue:`12045`, :issue:`11594`, :issue:`10571`, :issue:`12211`) - Bug in ``float_format`` option with option not being validated as a callable. (:issue:`12706`) - Bug in ``GroupBy.filter`` when ``dropna=False`` and no groups fulfilled the criteria (:issue:`12768`) - Bug in ``__name__`` of ``.cum*`` functions (:issue:`12021`) - Bug in ``.astype()`` of a ``Float64Inde/Int64Index`` to an ``Int64Index`` (:issue:`12881`) -- Bug in roundtripping an integer based index in ``.to_json()/.read_json()`` when ``orient='index'`` (the default) (:issue:`12866`) +- Bug in round tripping an integer based index in ``.to_json()/.read_json()`` when ``orient='index'`` (the default) (:issue:`12866`) - Bug in plotting ``Categorical`` dtypes cause error when attempting stacked bar plot (:issue:`13019`) -- Compat with >= ``numpy`` 1.11 for ``NaT`` comparions (:issue:`12969`) +- Compat with >= ``numpy`` 1.11 for ``NaT`` comparisons (:issue:`12969`) - Bug in ``.drop()`` with a non-unique ``MultiIndex``. (:issue:`12701`) - Bug in ``.concat`` of datetime tz-aware and naive DataFrames (:issue:`12467`) - Bug in correctly raising a ``ValueError`` in ``.resample(..).fillna(..)`` when passing a non-string (:issue:`12952`) @@ -673,7 +673,7 @@ Bug Fixes - Bug in ``pd.concat`` raises ``AttributeError`` when input data contains tz-aware datetime and timedelta (:issue:`12620`) - Bug in ``pd.concat`` did not handle empty ``Series`` properly (:issue:`11082`) -- Bug in ``.plot.bar`` alginment when ``width`` is specified with ``int`` (:issue:`12979`) +- Bug in ``.plot.bar`` alignment when ``width`` is specified with ``int`` (:issue:`12979`) - Bug in ``fill_value`` is ignored if the argument to a binary operator is a constant (:issue:`12723`) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 50d7877a9cd48..73fb124afef87 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -386,7 +386,7 @@ Google BigQuery Enhancements Fine-grained numpy errstate ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Previous versions of pandas would permanently silence numpy's ufunc error handling when ``pandas`` was imported. Pandas did this in order to silence the warnings that would arise from using numpy ufuncs on missing data, which are usually represented as ``NaN`` s. Unfortunately, this silenced legitimate warnings arising in non-pandas code in the application. Starting with 0.19.0, pandas will use the ``numpy.errstate`` context manager to silence these warnings in a more fine-grained manner, only around where these operations are actually used in the pandas codebase. (:issue:`13109`, :issue:`13145`) +Previous versions of pandas would permanently silence numpy's ufunc error handling when ``pandas`` was imported. Pandas did this in order to silence the warnings that would arise from using numpy ufuncs on missing data, which are usually represented as ``NaN`` s. Unfortunately, this silenced legitimate warnings arising in non-pandas code in the application. Starting with 0.19.0, pandas will use the ``numpy.errstate`` context manager to silence these warnings in a more fine-grained manner, only around where these operations are actually used in the pandas code base. (:issue:`13109`, :issue:`13145`) After upgrading pandas, you may see *new* ``RuntimeWarnings`` being issued from your code. These are likely legitimate, and the underlying cause likely existed in the code when using previous versions of pandas that simply silenced the warning. Use `numpy.errstate `__ around the source of the ``RuntimeWarning`` to control how these conditions are handled. @@ -750,7 +750,7 @@ This will now convert integers/floats with the default unit of ``ns``. Bug fixes related to ``.to_datetime()``: - Bug in ``pd.to_datetime()`` when passing integers or floats, and no ``unit`` and ``errors='coerce'`` (:issue:`13180`). -- Bug in ``pd.to_datetime()`` when passing invalid datatypes (e.g. bool); will now respect the ``errors`` keyword (:issue:`13176`) +- Bug in ``pd.to_datetime()`` when passing invalid data types (e.g. bool); will now respect the ``errors`` keyword (:issue:`13176`) - Bug in ``pd.to_datetime()`` which overflowed on ``int8``, and ``int16`` dtypes (:issue:`13451`) - Bug in ``pd.to_datetime()`` raise ``AttributeError`` with ``NaN`` and the other string is not valid when ``errors='ignore'`` (:issue:`12424`) - Bug in ``pd.to_datetime()`` did not cast floats correctly when ``unit`` was specified, resulting in truncated datetime (:issue:`13834`) @@ -1512,7 +1512,7 @@ Bug Fixes - Bug in ``.set_index`` raises ``AmbiguousTimeError`` if new index contains DST boundary and multi levels (:issue:`12920`) - Bug in ``.shift`` raises ``AmbiguousTimeError`` if data contains datetime near DST boundary (:issue:`13926`) - Bug in ``pd.read_hdf()`` returns incorrect result when a ``DataFrame`` with a ``categorical`` column and a query which doesn't match any values (:issue:`13792`) -- Bug in ``.iloc`` when indexing with a non lex-sorted MultiIndex (:issue:`13797`) +- Bug in ``.iloc`` when indexing with a non lexsorted MultiIndex (:issue:`13797`) - Bug in ``.loc`` when indexing with date strings in a reverse sorted ``DatetimeIndex`` (:issue:`14316`) - Bug in ``Series`` comparison operators when dealing with zero dim NumPy arrays (:issue:`13006`) - Bug in ``.combine_first`` may return incorrect ``dtype`` (:issue:`7630`, :issue:`10567`) diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index b8afe18e0f871..1c577dddf1cd4 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -43,7 +43,7 @@ Bug Fixes - Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`) - Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow in the negative direction was not being caught (:issue:`14068`, :issue:`14453`) - Bug in string indexing against data with ``object`` ``Index`` may raise ``AttributeError`` (:issue:`14424`) -- Corrrecly raise ``ValueError`` on empty input to ``pd.eval()`` and ``df.query()`` (:issue:`13139`) +- Correctly raise ``ValueError`` on empty input to ``pd.eval()`` and ``df.query()`` (:issue:`13139`) - Bug in ``RangeIndex.intersection`` when result is a empty set (:issue:`14364`). - Bug in groupby-transform broadcasting that could cause incorrect dtype coercion (:issue:`14457`) - Bug in ``Series.__setitem__`` which allowed mutating read-only arrays (:issue:`14359`). diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 5fb725a76770e..bd90e371597dc 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -24,7 +24,7 @@ Highlights include: .. warning:: - Pandas has changed the internal structure and layout of the codebase. + Pandas has changed the internal structure and layout of the code base. This can affect imports that are not from the top-level ``pandas.*`` namespace, please see the changes :ref:`here `. Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. @@ -324,7 +324,7 @@ this JSON Table schema representation of the Series or DataFrame if you are using IPython (or another frontend like `nteract`_ using the Jupyter messaging protocol). This gives frontends like the Jupyter notebook and `nteract`_ -more flexiblity in how they display pandas objects, since they have +more flexibility in how they display pandas objects, since they have more information about the data. You must enable this by setting the ``display.html.table_schema`` option to ``True``. @@ -462,7 +462,7 @@ Selecting via a scalar value that is contained *in* the intervals. Other Enhancements ^^^^^^^^^^^^^^^^^^ -- ``DataFrame.rolling()`` now accepts the parameter ``closed='right'|'left'|'both'|'neither'`` to choose the rolling window-endpoint closedness. See the :ref:`documentation ` (:issue:`13965`) +- ``DataFrame.rolling()`` now accepts the parameter ``closed='right'|'left'|'both'|'neither'`` to choose the rolling window-endpoint closed. See the :ref:`documentation ` (:issue:`13965`) - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) - ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) @@ -1389,7 +1389,7 @@ list, and a dict of column names to scalars or lists. This provides a useful syn (potentially different) aggregations. However, ``.agg(..)`` can *also* accept a dict that allows 'renaming' of the result columns. This is a complicated and confusing syntax, as well as not consistent -between ``Series`` and ``DataFrame``. We are deprecating this 'renaming' functionaility. +between ``Series`` and ``DataFrame``. We are deprecating this 'renaming' functionality. - We are deprecating passing a dict to a grouped/rolled/resampled ``Series``. This allowed one to ``rename`` the resulting aggregation, but this had a completely different @@ -1528,7 +1528,7 @@ Removal of prior version deprecations/changes - The ``pandas.io.ga`` module with a ``google-analytics`` interface is removed (:issue:`11308`). Similar functionality can be found in the `Google2Pandas `__ package. - ``pd.to_datetime`` and ``pd.to_timedelta`` have dropped the ``coerce`` parameter in favor of ``errors`` (:issue:`13602`) -- ``pandas.stats.fama_macbeth``, ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var``, as well as the top-level ``pandas.fama_macbeth`` and ``pandas.ols`` routines are removed. Similar functionaility can be found in the `statsmodels `__ package. (:issue:`11898`) +- ``pandas.stats.fama_macbeth``, ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var``, as well as the top-level ``pandas.fama_macbeth`` and ``pandas.ols`` routines are removed. Similar functionality can be found in the `statsmodels `__ package. (:issue:`11898`) - The ``TimeSeries`` and ``SparseTimeSeries`` classes, aliases of ``Series`` and ``SparseSeries``, are removed (:issue:`10890`, :issue:`15098`). - ``Series.is_time_series`` is dropped in favor of ``Series.index.is_all_dates`` (:issue:`15098`) @@ -1640,7 +1640,7 @@ I/O - Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`) - Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`) - Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) -- Bug in ``pd.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) +- Bug in ``pd.read_csv()`` with ``parse_dates`` when multi-line headers are specified (:issue:`15376`) - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) - Bug in ``pd.read_csv()`` when an index was specified and no values were specified as null values (:issue:`15835`) - Bug in ``pd.read_csv()`` in which certain invalid file objects caused the Python interpreter to crash (:issue:`15337`) @@ -1722,7 +1722,7 @@ Numeric - Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`) - Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) - Bug in ``pandas.tools.utils.cartesian_product()`` with large input can cause overflow on windows (:issue:`15265`) -- Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) +- Bug in ``.eval()`` which caused multi-line evals to fail with local variables not on the first line (:issue:`15342`) Other ^^^^^ diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 2e9e616daf3a7..2c147736d79a8 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -124,7 +124,7 @@ I/O - Bug in :meth:`DataFrame.to_csv` when the table had ``MultiIndex`` columns, and a list of strings was passed in for ``header`` (:issue:`5539`) - Bug in parsing integer datetime-like columns with specified format in ``read_sql`` (:issue:`17855`). - Bug in :meth:`DataFrame.to_msgpack` when serializing data of the ``numpy.bool_`` datatype (:issue:`18390`) -- Bug in :func:`read_json` not decoding when reading line deliminted JSON from S3 (:issue:`17200`) +- Bug in :func:`read_json` not decoding when reading line delimited JSON from S3 (:issue:`17200`) - Bug in :func:`pandas.io.json.json_normalize` to avoid modification of ``meta`` (:issue:`18610`) - Bug in :func:`to_latex` where repeated multi-index values were not printed even though a higher level index differed from the previous row (:issue:`14484`) - Bug when reading NaN-only categorical columns in :class:`HDFStore` (:issue:`18413`) @@ -139,7 +139,7 @@ Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`) -- Bug in ``DataFrame.resample(...)`` when there is a time change (DST) and resampling frequecy is 12h or higher (:issue:`15549`) +- Bug in ``DataFrame.resample(...)`` when there is a time change (DST) and resampling frequency is 12h or higher (:issue:`15549`) - Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`) - Bug in ``rolling.var`` where calculation is inaccurate with a zero-valued array (:issue:`18430`) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a099fb40c35a7..2430b6ac2bbd4 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -243,7 +243,7 @@ Grouping by a categorical includes the unobserved categories in the output. When grouping by multiple categorical columns, this means you get the cartesian product of all the categories, including combinations where there are no observations, which can result in a large number of groups. We have added a keyword ``observed`` to control this behavior, it defaults to -``observed=False`` for backward-compatiblity. (:issue:`14942`, :issue:`8138`, :issue:`15217`, :issue:`17594`, :issue:`8669`, :issue:`20583`, :issue:`20902`) +``observed=False`` for backward-compatibility. (:issue:`14942`, :issue:`8138`, :issue:`15217`, :issue:`17594`, :issue:`8669`, :issue:`20583`, :issue:`20902`) .. ipython:: python @@ -535,8 +535,8 @@ Other Enhancements - :func:`DataFrame.replace` now supports the ``method`` parameter, which can be used to specify the replacement method when ``to_replace`` is a scalar, list or tuple and ``value`` is ``None`` (:issue:`19632`) - :meth:`Timestamp.month_name`, :meth:`DatetimeIndex.month_name`, and :meth:`Series.dt.month_name` are now available (:issue:`12805`) - :meth:`Timestamp.day_name` and :meth:`DatetimeIndex.day_name` are now available to return day names with a specified locale (:issue:`12806`) -- :meth:`DataFrame.to_sql` now performs a multivalue insert if the underlying connection supports itk rather than inserting row by row. - ``SQLAlchemy`` dialects supporting multivalue inserts include: ``mysql``, ``postgresql``, ``sqlite`` and any dialect with ``supports_multivalues_insert``. (:issue:`14315`, :issue:`8953`) +- :meth:`DataFrame.to_sql` now performs a multi-value insert if the underlying connection supports itk rather than inserting row by row. + ``SQLAlchemy`` dialects supporting multi-value inserts include: ``mysql``, ``postgresql``, ``sqlite`` and any dialect with ``supports_multivalues_insert``. (:issue:`14315`, :issue:`8953`) - :func:`read_html` now accepts a ``displayed_only`` keyword argument to controls whether or not hidden elements are parsed (``True`` by default) (:issue:`20027`) - :func:`read_html` now reads all ```` elements in a ````, not just the first. (:issue:`20690`) - :meth:`~pandas.core.window.Rolling.quantile` and :meth:`~pandas.core.window.Expanding.quantile` now accept the ``interpolation`` keyword, ``linear`` by default (:issue:`20497`) @@ -836,7 +836,7 @@ Extraction of matching patterns from strings By default, extracting matching patterns from strings with :func:`str.extract` used to return a ``Series`` if a single group was being extracted (a ``DataFrame`` if more than one group was extracted). As of Pandas 0.23.0 :func:`str.extract` always returns a ``DataFrame``, unless -``expand`` is set to ``False``. Finallay, ``None`` was an accepted value for +``expand`` is set to ``False``. Finally, ``None`` was an accepted value for the ``expand`` parameter (which was equivalent to ``False``), but now raises a ``ValueError``. (:issue:`11386`) Previous Behavior: @@ -896,7 +896,7 @@ New Behavior: Notice in the example above that the converted ``Categorical`` has retained ``ordered=True``. Had the default value for ``ordered`` remained as ``False``, the converted ``Categorical`` would have become unordered, despite ``ordered=False`` never being explicitly specified. To change the value of ``ordered``, explicitly pass it to the new dtype, e.g. ``CategoricalDtype(categories=list('cbad'), ordered=False)``. -Note that the unintenional conversion of ``ordered`` discussed above did not arise in previous versions due to separate bugs that prevented ``astype`` from doing any type of category to category conversion (:issue:`10696`, :issue:`18593`). These bugs have been fixed in this release, and motivated changing the default value of ``ordered``. +Note that the unintentional conversion of ``ordered`` discussed above did not arise in previous versions due to separate bugs that prevented ``astype`` from doing any type of category to category conversion (:issue:`10696`, :issue:`18593`). These bugs have been fixed in this release, and motivated changing the default value of ``ordered``. .. _whatsnew_0230.api_breaking.pretty_printing: @@ -1107,7 +1107,7 @@ Performance Improvements - Improved performance of :func:`pandas.core.groupby.GroupBy.any` and :func:`pandas.core.groupby.GroupBy.all` (:issue:`15435`) - Improved performance of :func:`pandas.core.groupby.GroupBy.pct_change` (:issue:`19165`) - Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`) -- Improved performance of ``getattr(Series, attr)`` when the Series has certain index types. This manifiested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`) +- Improved performance of ``getattr(Series, attr)`` when the Series has certain index types. This manifested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`) - Fixed a performance regression for :func:`GroupBy.nth` and :func:`GroupBy.last` with some object columns (:issue:`19283`) - Improved performance of :func:`pandas.core.arrays.Categorical.from_codes` (:issue:`18501`) @@ -1243,7 +1243,7 @@ Offsets - Bug in :class:`WeekOfMonth` and :class:`LastWeekOfMonth` where default keyword arguments for constructor raised ``ValueError`` (:issue:`19142`) - Bug in :class:`FY5253Quarter`, :class:`LastWeekOfMonth` where rollback and rollforward behavior was inconsistent with addition and subtraction behavior (:issue:`18854`) - Bug in :class:`FY5253` where ``datetime`` addition and subtraction incremented incorrectly for dates on the year-end but not normalized to midnight (:issue:`18854`) -- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`) +- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operations (:issue:`14774`) Numeric @@ -1329,9 +1329,9 @@ I/O - :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`, :issue:`9155`, :issue:`19900`) - Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`) - Bug in :func:`read_pickle` when unpickling objects with :class:`TimedeltaIndex` or :class:`Float64Index` created with pandas prior to version 0.20 (:issue:`19939`) -- Bug in :meth:`pandas.io.json.json_normalize` where subrecords are not properly normalized if any subrecords values are NoneType (:issue:`20030`) +- Bug in :meth:`pandas.io.json.json_normalize` where sub-records are not properly normalized if any sub-records values are NoneType (:issue:`20030`) - Bug in ``usecols`` parameter in :func:`read_csv` where error is not raised correctly when passing a string. (:issue:`20529`) -- Bug in :func:`HDFStore.keys` when reading a file with a softlink causes exception (:issue:`20523`) +- Bug in :func:`HDFStore.keys` when reading a file with a soft link causes exception (:issue:`20523`) - Bug in :func:`HDFStore.select_column` where a key which is not a valid store raised an ``AttributeError`` instead of a ``KeyError`` (:issue:`17912`) Plotting @@ -1390,7 +1390,7 @@ Reshaping - Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`) - Bug in :func:`DataFrame.stack`, :func:`DataFrame.unstack`, :func:`Series.unstack` which were not returning subclasses (:issue:`15563`) - Bug in timezone comparisons, manifesting as a conversion of the index to UTC in ``.concat()`` (:issue:`18523`) -- Bug in :func:`concat` when concatting sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) +- Bug in :func:`concat` when concatenating sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) - Improved error message for :func:`DataFrame.merge` when there is no common merge key (:issue:`19427`) - Bug in :func:`DataFrame.join` which does an ``outer`` instead of a ``left`` join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`) - :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`) @@ -1411,5 +1411,5 @@ Other ^^^^^ - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) -- Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existant option key in some cases (:issue:`19789`) +- Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existent option key in some cases (:issue:`19789`) - Bug in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` for Series or DataFrames with differing unicode data (:issue:`20503`) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index b94377af770f4..1626508c3ba31 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -64,7 +64,7 @@ Performance Improvements Documentation Changes ~~~~~~~~~~~~~~~~~~~~~ -- +- Added sphinx spelling extension, updated documentation on how to use the spell check (:issue:`21079`) - - diff --git a/doc/source/whatsnew/v0.6.0.txt b/doc/source/whatsnew/v0.6.0.txt index 55a67a75e0fd1..bd01dd0a90a59 100644 --- a/doc/source/whatsnew/v0.6.0.txt +++ b/doc/source/whatsnew/v0.6.0.txt @@ -43,7 +43,7 @@ New Features Performance Enhancements ~~~~~~~~~~~~~~~~~~~~~~~~ -- VBENCH Cythonized ``cache_readonly``, resulting in substantial micro-performance enhancements throughout the codebase (:issue:`361`) +- VBENCH Cythonized ``cache_readonly``, resulting in substantial micro-performance enhancements throughout the code base (:issue:`361`) - VBENCH Special Cython matrix iterator for applying arbitrary reduction operations with 3-5x better performance than `np.apply_along_axis` (:issue:`309`) - VBENCH Improved performance of ``MultiIndex.from_tuples`` - VBENCH Special Cython matrix iterator for applying arbitrary reduction operations diff --git a/doc/source/whatsnew/v0.8.0.txt b/doc/source/whatsnew/v0.8.0.txt index b5ec5aa73ee9a..29d6fe563d047 100644 --- a/doc/source/whatsnew/v0.8.0.txt +++ b/doc/source/whatsnew/v0.8.0.txt @@ -33,7 +33,7 @@ clear of NumPy 1.6's datetime64 API functions (though limited as they are) and only interact with this data using the interface that pandas provides. See the end of the 0.8.0 section for a "porting" guide listing potential issues -for users migrating legacy codebases from pandas 0.7 or earlier to 0.8.0. +for users migrating legacy code bases from pandas 0.7 or earlier to 0.8.0. Bug fixes to the 0.7.x series for legacy NumPy < 1.6 users will be provided as they arise. There will be no more further development in 0.7.x beyond bug @@ -68,7 +68,7 @@ Time series changes and improvements :ref:`time spans ` and performing **calendar logic**, including the `12 fiscal quarterly frequencies `. This is a partial port of, and a substantial enhancement to, - elements of the scikits.timeseries codebase. Support for conversion between + elements of the scikits.timeseries code base. Support for conversion between PeriodIndex and DatetimeIndex - New Timestamp data type subclasses `datetime.datetime`, providing the same interface while enabling working with nanosecond-resolution data. Also @@ -76,7 +76,7 @@ Time series changes and improvements - Enhanced support for :ref:`time zones `. Add `tz_convert` and ``tz_lcoalize`` methods to TimeSeries and DataFrame. All timestamps are stored as UTC; Timestamps from DatetimeIndex objects with time - zone set will be localized to localtime. Time zone conversions are therefore + zone set will be localized to local time. Time zone conversions are therefore essentially free. User needs to know very little about pytz library now; only time zone names as as strings are required. Time zone-aware timestamps are equal if and only if their UTC timestamps match. Operations between time diff --git a/doc/source/whatsnew/v0.9.1.txt b/doc/source/whatsnew/v0.9.1.txt index e2d6d7a275086..1f58170b30244 100644 --- a/doc/source/whatsnew/v0.9.1.txt +++ b/doc/source/whatsnew/v0.9.1.txt @@ -8,7 +8,7 @@ v0.9.1 (November 14, 2012) -------------------------- -This is a bugfix release from 0.9.0 and includes several new features and +This is a bug fix release from 0.9.0 and includes several new features and enhancements along with a large number of bug fixes. The new features include by-column sort order for DataFrame and Series, improved NA handling for the rank method, masking functions for DataFrame, and intraday time-series filtering for diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 741e5553141f7..41047d9c25c22 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -304,7 +304,7 @@ cdef class IndexEngine: """ return an indexer suitable for takng from a non unique index return the labels in the same order ast the target and a missing indexer into the targets (which correspond - to the -1 indicies in the results """ + to the -1 indices in the results """ cdef: ndarray values, x diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f7bb6c1dbb304..f93748a75e609 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -943,7 +943,7 @@ class Timedelta(_Timedelta): days, seconds, microseconds, milliseconds, minutes, hours, weeks : numeric, optional Values for construction in compat with datetime.timedelta. - np ints and floats will be coereced to python ints and floats. + np ints and floats will be coerced to python ints and floats. Notes ----- diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 88bc497f9f22d..63520fdd74299 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -513,7 +513,7 @@ def _factorize_array(values, na_sentinel=-1, size_hint=None, See Also -------- pandas.cut : Discretize continuous-valued array. - pandas.unique : Find the unique valuse in an array. + pandas.unique : Find the unique value in an array. Examples -------- @@ -558,7 +558,7 @@ def _factorize_array(values, na_sentinel=-1, size_hint=None, [a, c] Categories (3, object): [a, b, c] - Notice that ``'b'`` is in ``uniques.categories``, desipite not being + Notice that ``'b'`` is in ``uniques.categories``, despite not being present in ``cat.values``. For all other pandas objects, an Index of the appropriate type is @@ -576,8 +576,8 @@ def _factorize_array(values, na_sentinel=-1, size_hint=None, @Substitution( values=dedent("""\ values : sequence - A 1-D seqeunce. Sequences that aren't pandas objects are - coereced to ndarrays before factorization. + A 1-D sequence. Sequences that aren't pandas objects are + coerced to ndarrays before factorization. """), order=dedent("""\ order @@ -1457,7 +1457,7 @@ def take(arr, indices, axis=0, allow_fill=False, fill_value=None): Parameters ---------- arr : sequence - Non array-likes (sequences without a dtype) are coereced + Non array-likes (sequences without a dtype) are coerced to an ndarray. indices : sequence of integers Indices to be taken. diff --git a/pandas/core/apply.py b/pandas/core/apply.py index ac173c5182bc7..27ac5038276d6 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -113,7 +113,7 @@ def get_result(self): if isinstance(self.f, compat.string_types): # Support for `frame.transform('method')` # Some methods (shift, etc.) require the axis argument, others - # don't, so inspect and insert if nescessary. + # don't, so inspect and insert if necessary. func = getattr(self.obj, self.f) sig = compat.signature(func) if 'axis' in sig.args: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 1922801c30719..ce87c0a8b0c5a 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -491,7 +491,7 @@ def take(self, indices, allow_fill=False, fill_value=None): `fill_value`: a user-facing "boxed" scalar, and a low-level physical NA value. `fill_value` should be the user-facing version, and the implementation should handle translating that to the - physical version for processing the take if nescessary. + physical version for processing the take if necessary. Returns ------- @@ -510,7 +510,7 @@ def take(self, indices, allow_fill=False, fill_value=None): ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``, ``iloc``, when `indices` is a sequence of values. Additionally, it's called by :meth:`Series.reindex`, or any other method - that causes realignemnt, with a `fill_value`. + that causes realignment, with a `fill_value`. See Also -------- diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index abcb9ae3494b5..eff8c9b4f4cbf 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -51,7 +51,7 @@ _take_msg = textwrap.dedent("""\ Interpreting negative values in 'indexer' as missing values. - In the future, this will change to meaning positional indicies + In the future, this will change to meaning positional indices from the right. Use 'allow_fill=True' to retain the previous behavior and silence this @@ -1478,7 +1478,7 @@ def argsort(self, *args, **kwargs): # TODO(PY2): use correct signature # We have to do *args, **kwargs to avoid a a py2-only signature # issue since np.argsort differs from argsort. - """Return the indicies that would sort the Categorical. + """Return the indices that would sort the Categorical. Parameters ---------- diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index d9dc73434f5ac..ad4588f254174 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -33,7 +33,7 @@ def isna(obj): """ Detect missing values for an array-like object. - This function takes a scalar or array-like object and indictates + This function takes a scalar or array-like object and indicates whether values are missing (``NaN`` in numeric arrays, ``None`` or ``NaN`` in object arrays, ``NaT`` in datetimelike). @@ -52,7 +52,7 @@ def isna(obj): See Also -------- notna : boolean inverse of pandas.isna. - Series.isna : Detetct missing values in a Series. + Series.isna : Detect missing values in a Series. DataFrame.isna : Detect missing values in a DataFrame. Index.isna : Detect missing values in an Index. @@ -260,7 +260,7 @@ def notna(obj): """ Detect non-missing values for an array-like object. - This function takes a scalar or array-like object and indictates + This function takes a scalar or array-like object and indicates whether values are valid (not missing, which is ``NaN`` in numeric arrays, ``None`` or ``NaN`` in object arrays, ``NaT`` in datetimelike). @@ -279,7 +279,7 @@ def notna(obj): See Also -------- isna : boolean inverse of pandas.notna. - Series.notna : Detetct valid values in a Series. + Series.notna : Detect valid values in a Series. DataFrame.notna : Detect valid values in a DataFrame. Index.notna : Detect valid values in an Index. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 77a67c048a48d..5e5cde05cafbc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1136,7 +1136,7 @@ def to_gbq(self, destination_table, project_id, chunksize=None, Number of rows to be inserted in each chunk from the dataframe. Set to ``None`` to load the whole dataframe at once. reauth : bool, default False - Force Google BigQuery to reauthenticate the user. This is useful + Force Google BigQuery to re-authenticate the user. This is useful if multiple accounts are used. if_exists : str, default 'fail' Behavior when the destination table exists. Value can be one of: @@ -5922,7 +5922,7 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, -------- DataFrame.applymap: For elementwise operations DataFrame.aggregate: only perform aggregating type operations - DataFrame.transform: only perform transformating type operations + DataFrame.transform: only perform transforming type operations Examples -------- @@ -6565,7 +6565,7 @@ def cov(self, min_periods=None): See Also -------- pandas.Series.cov : compute covariance with another Series - pandas.core.window.EWM.cov: expoential weighted sample covariance + pandas.core.window.EWM.cov: exponential weighted sample covariance pandas.core.window.Expanding.cov : expanding sample covariance pandas.core.window.Rolling.cov : rolling sample covariance diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9e4eda1bc4dc7..38def81e73231 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3607,7 +3607,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, If desired, we can fill in the missing values using one of several options. - For example, to backpropagate the last valid value to fill the ``NaN`` + For example, to back-propagate the last valid value to fill the ``NaN`` values, pass ``bfill`` as an argument to the ``method`` keyword. >>> df2.reindex(date_index2, method='bfill') @@ -4541,7 +4541,7 @@ def as_matrix(self, columns=None): e.g. If the dtypes are float16 and float32, dtype will be upcast to float32. If dtypes are int32 and uint8, dtype will be upcase to int32. By numpy.find_common_type convention, mixing int64 and uint64 - will result in a flot64 dtype. + will result in a float64 dtype. This method is provided for backwards compatibility. Generally, it is recommended to use '.values'. @@ -4622,7 +4622,7 @@ def values(self): See Also -------- - pandas.DataFrame.index : Retrievie the index labels + pandas.DataFrame.index : Retrieve the index labels pandas.DataFrame.columns : Retrieving the column names """ self._consolidate_inplace() @@ -5702,7 +5702,7 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None): the correct type for replacement. Compare the behavior of ``s.replace({'a': None})`` and - ``s.replace('a', None)`` to understand the pecularities + ``s.replace('a', None)`` to understand the peculiarities of the `to_replace` parameter: >>> s = pd.Series([10, 'a', 'a', 'b', 'a']) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index df7a5dc9dc173..3bc59157055ce 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2069,7 +2069,7 @@ def shift(self, periods=1, freq=None, axis=0): @Appender(_doc_template) def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, axis=0): - """Calcuate pct_change of each value to previous entry in group""" + """Calculate pct_change of each value to previous entry in group""" if freq is not None or axis != 0: return self.apply(lambda x: x.pct_change(periods=periods, fill_method=fill_method, diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index df39eb5fd8312..82147e3ad2f38 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2629,7 +2629,7 @@ def shift(self, periods=1, freq=None): def argsort(self, *args, **kwargs): """ - Return the integer indicies that would sort the index. + Return the integer indices that would sort the index. Parameters ---------- @@ -2641,7 +2641,7 @@ def argsort(self, *args, **kwargs): Returns ------- numpy.ndarray - Integer indicies that would sort the index if used as + Integer indices that would sort the index if used as an indexer. See also diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 83950f1d71633..bc4b729cbfe15 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2046,7 +2046,7 @@ def normalize(self): """ Convert times to midnight. - The time component of the date-timeise converted to midnight i.e. + The time component of the date-time is converted to midnight i.e. 00:00:00. This is useful in cases, when the time does not matter. Length is unaltered. The timezones are unaffected. diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index fbcf06a28c1e5..a9c65b7c2c864 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2141,7 +2141,7 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): Notes ----- - This method only works if the MultiIndex is properly lex-sorted. So, + This method only works if the MultiIndex is properly lexsorted. So, if only the first 2 levels of a 3-level MultiIndex are lexsorted, you can only pass two levels to ``.slice_locs``. diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b9e8f9028dbf7..c163e3d53e634 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -587,7 +587,7 @@ def asfreq(self, freq=None, how='E'): 'S', 'START', or 'BEGIN' for start. Whether the elements should be aligned to the end or start within pa period. January 31st ('END') vs. - Janury 1st ('START') for example. + January 1st ('START') for example. Returns ------- diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 31c489e2f8941..e9b9a734ec5f5 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -761,7 +761,7 @@ def _interp_limit(invalid, fw_limit, bw_limit): """ # handle forward first; the backward direction is the same except # 1. operate on the reversed array - # 2. subtract the returned indicies from N - 1 + # 2. subtract the returned indices from N - 1 N = len(invalid) f_idx = set() b_idx = set() diff --git a/pandas/core/panel.py b/pandas/core/panel.py index fe46b8a66e5ef..c4aa471b8b944 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1405,7 +1405,7 @@ def _get_join_index(self, other, how): # miscellaneous data creation @staticmethod def _extract_axes(self, data, axes, **kwargs): - """ return a list of the axis indicies """ + """ return a list of the axis indices """ return [self._extract_axis(self, data, axis=i, **kwargs) for i, a in enumerate(axes)] @@ -1447,11 +1447,11 @@ def _homogenize_dict(self, frames, intersect=True, dtype=None): Returns ------- - dict of aligned results & indicies + dict of aligned results & indices """ result = dict() - # caller differs dict/ODict, presered type + # caller differs dict/ODict, preserved type if isinstance(frames, OrderedDict): result = OrderedDict() diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 0707cc756682e..0b0fcacc1bc48 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -425,7 +425,7 @@ def backfill(self, limit=None): appear (e.g., when the resampling frequency is higher than the original frequency). The backward fill will replace NaN values that appeared in the resampled data with the next value in the original sequence. - Missing values that existed in the orginal data will not be modified. + Missing values that existed in the original data will not be modified. Parameters ---------- @@ -529,7 +529,7 @@ def fillna(self, method, limit=None): appear (e.g., when the resampling frequency is higher than the original frequency). - Missing values that existed in the orginal data will + Missing values that existed in the original data will not be modified. Parameters diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index ce99d2f8c9a63..b3e3c52f6e363 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -166,7 +166,8 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'): Wide panel to long format. Less flexible but more user-friendly than melt. With stubnames ['A', 'B'], this function expects to find one or more - group of columns with format Asuffix1, Asuffix2,..., Bsuffix1, Bsuffix2,... + group of columns with format + A-suffix1, A-suffix2,..., B-suffix1, B-suffix2,... You specify what you want to call this suffix in the resulting long format with `j` (for example `j='year'`) @@ -185,7 +186,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'): i : str or list-like Column(s) to use as id variable(s) j : str - The name of the subobservation variable. What you wish to name your + The name of the sub-observation variable. What you wish to name your suffix in the long format. sep : str, default "" A character indicating the separation of the variable names @@ -200,7 +201,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'): numeric suffixes. Suffixes with no numbers could be specified with the negated character class '\\D+'. You can also further disambiguate suffixes, for example, if your wide variables are of the form - Aone, Btwo,.., and you have an unrelated column Arating, you can + A-one, B-two,.., and you have an unrelated column A-rating, you can ignore the last one by specifying `suffix='(!?one|two)'` .. versionadded:: 0.20.0 @@ -242,7 +243,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'): 1 1980 0.997345 e 1.3 2 1980 0.282978 f 0.1 - With multuple id columns + With multiple id columns >>> df = pd.DataFrame({ ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 4d8897fb7c811..73aba4d4e044b 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -319,7 +319,7 @@ def merge_asof(left, right, on=None, - If True, allow matching with the same 'on' value (i.e. less-than-or-equal-to / greater-than-or-equal-to) - If False, don't match the same 'on' value - (i.e., stricly less-than / strictly greater-than) + (i.e., strictly less-than / strictly greater-than) direction : 'backward' (default), 'forward', or 'nearest' Whether to search for prior, subsequent, or closest matches. diff --git a/pandas/core/series.py b/pandas/core/series.py index 0e2ae22f35af7..c92825abf45a3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3088,7 +3088,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): -------- Series.map: For element-wise operations Series.agg: only perform aggregating type operations - Series.transform: only perform transformating type operations + Series.transform: only perform transforming type operations Examples -------- diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 81d775157cf62..cb1e8c067f537 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -207,7 +207,7 @@ def str_count(arr, pat, flags=0): Flags for the `re` module. For a complete list, `see here `_. **kwargs - For compatability with other string methods. Not used. + For compatibility with other string methods. Not used. Returns ------- @@ -1358,7 +1358,7 @@ def str_split(arr, pat=None, n=None): Limit number of splits in output. ``None``, 0 and -1 will be interpreted as return all splits. expand : bool, default False - Expand the splitted strings into separate columns. + Expand the split strings into separate columns. * If ``True``, return DataFrame/MultiIndex expanding dimensionality. * If ``False``, return Series/Index, containing lists of strings. diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 1de43116d0b49..8ecb81397edb3 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -138,7 +138,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, as dateutil). Warning: yearfirst=True is not strict, but will prefer to parse - with year first (this is a known bug, based on dateutil beahavior). + with year first (this is a known bug, based on dateutil behavior). .. versionadded:: 0.16.1 @@ -181,8 +181,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, .. versionadded:: 0.20.0 cache : boolean, default False If True, use a cache of unique, converted dates to apply the datetime - conversion. May produce sigificant speed-up when parsing duplicate date - strings, especially ones with timezone offsets. + conversion. May produce significant speed-up when parsing duplicate + date strings, especially ones with timezone offsets. .. versionadded:: 0.23.0 diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index f876ceb8a26bf..5203cf036c146 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -60,7 +60,7 @@ class Styler(object): table_styles: list-like, default None list of {selector: (attr, value)} dicts; see Notes uuid: str, default None - a unique identifier to avoid CSS collisons; generated automatically + a unique identifier to avoid CSS collisions; generated automatically caption: str, default None caption to attach to the table @@ -79,7 +79,7 @@ class Styler(object): If using in the Jupyter notebook, Styler has defined a ``_repr_html_`` to automatically render itself. Otherwise call Styler.render to get - the genterated HTML. + the generated HTML. CSS classes are attached to the generated HTML @@ -120,7 +120,7 @@ def __init__(self, data, precision=None, table_styles=None, uuid=None, if data.ndim == 1: data = data.to_frame() if not data.index.is_unique or not data.columns.is_unique: - raise ValueError("style is not supported for non-unique indicies.") + raise ValueError("style is not supported for non-unique indices.") self.data = data self.index = data.index @@ -549,7 +549,7 @@ def _apply(self, func, axis=0, subset=None, **kwargs): def apply(self, func, axis=0, subset=None, **kwargs): """ - Apply a function column-wise, row-wise, or table-wase, + Apply a function column-wise, row-wise, or table-wise, updating the HTML representation with the result. Parameters @@ -1051,7 +1051,8 @@ def _bar_center_mid(s, color, width, base): def bar(self, subset=None, axis=0, color='#d65f5f', width=100, align='left'): """ - Color the background ``color`` proptional to the values in each column. + Color the background ``color`` proportional to the values in each + column. Excludes non-numeric data by default. Parameters diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 236d70609e76c..c7c16598ee432 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -56,7 +56,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, List of BigQuery column names in the desired order for results DataFrame. reauth : boolean, default False - Force Google BigQuery to reauthenticate the user. This is useful + Force Google BigQuery to re-authenticate the user. This is useful if multiple accounts are used. private_key : str, optional Service account private key in JSON format. Can be file path diff --git a/pandas/io/json/table_schema.py b/pandas/io/json/table_schema.py index 01f7db7d68664..6f663f8ff8433 100644 --- a/pandas/io/json/table_schema.py +++ b/pandas/io/json/table_schema.py @@ -219,7 +219,7 @@ def build_table_schema(data, index=True, primary_key=None, version=True): ----- See `_as_json_table_type` for conversion types. Timedeltas as converted to ISO8601 duration format with - 9 decimal places after the secnods field for nanosecond precision. + 9 decimal places after the seconds field for nanosecond precision. Categoricals are converted to the `any` dtype, and use the `enum` field constraint to list the allowed values. The `ordered` attribute is included diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index daa370d0ca61a..aa39e341792c7 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -297,7 +297,7 @@ def read_hdf(path_or_buf, key=None, mode='r', **kwargs): objects. .. versionadded:: 0.19.0 support for pathlib, py.path. - .. versionadded:: 0.21.0 support for __fspath__ proptocol. + .. versionadded:: 0.21.0 support for __fspath__ protocol. key : object, optional The group identifier in the store. Can be omitted if the HDF file @@ -3790,13 +3790,13 @@ class WORMTable(Table): table_type = u('worm') def read(self, **kwargs): - """ read the indicies and the indexing array, calculate offset rows and + """ read the indices and the indexing array, calculate offset rows and return """ raise NotImplementedError("WORMTable needs to implement read") def write(self, **kwargs): """ write in a format that we can search later on (but cannot append - to): write out the indicies and the values using _write_array + to): write out the indices and the values using _write_array (e.g. a CArray) create an indexing table so that we can search """ raise NotImplementedError("WORKTable needs to implement write") @@ -4694,7 +4694,7 @@ class Selection(object): ---------- table : a Table object where : list of Terms (or convertible to) - start, stop: indicies to start and/or stop selection + start, stop: indices to start and/or stop selection """ diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 87b7d13251f28..0819df97ba5fa 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1394,7 +1394,7 @@ def orientation(self): In statistics, `kernel density estimation`_ (KDE) is a non-parametric way to estimate the probability density function (PDF) of a random variable. This function uses Gaussian kernels and includes automatic - bandwith determination. + bandwidth determination. .. _kernel density estimation: https://en.wikipedia.org/wiki/Kernel_density_estimation @@ -2031,7 +2031,7 @@ def plot_series(data, kind='line', ax=None, # Series unique Tick label font size in points or as a string (e.g., `large`). rot : int or float, default 0 The rotation angle of labels (in degrees) - with respect to the screen coordinate sytem. + with respect to the screen coordinate system. grid : boolean, default True Setting this to True will show the grid. figsize : A tuple (width, height) in inches @@ -2063,7 +2063,7 @@ def plot_series(data, kind='line', ax=None, # Series unique * 'axes' : object of class matplotlib.axes.Axes * 'dict' : dict of matplotlib.lines.Line2D objects - * 'both' : a nametuple with strucure (ax, lines) + * 'both' : a namedtuple with structure (ax, lines) For data grouped with ``by``: @@ -2848,8 +2848,8 @@ def hist(self, bins=10, **kwds): >>> ax = s.plot.kde() A scalar bandwidth can be specified. Using a small bandwidth value can - lead to overfitting, while using a large bandwidth value may result - in underfitting: + lead to over-fitting, while using a large bandwidth value may result + in under-fitting: .. plot:: :context: close-figs @@ -3284,8 +3284,8 @@ def hist(self, by=None, bins=10, **kwds): >>> ax = df.plot.kde() A scalar bandwidth can be specified. Using a small bandwidth value can - lead to overfitting, while using a large bandwidth value may result - in underfitting: + lead to over-fitting, while using a large bandwidth value may result + in under-fitting: .. plot:: :context: close-figs @@ -3415,7 +3415,7 @@ def scatter(self, x, y, s=None, c=None, **kwds): - A sequence of color strings referred to by name, RGB or RGBA code, which will be used for each point's color recursively. For - intance ['green','yellow'] all points will be filled in green or + instance ['green','yellow'] all points will be filled in green or yellow, alternatively. - A column name or position whose values will be used to color the diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 6dd38187f7277..300e1acdea911 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1638,7 +1638,7 @@ def test_constructor_series_copy(self): def test_constructor_with_nas(self): # GH 5016 - # na's in indicies + # na's in indices def check(df): for i in range(len(df.columns)): diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index f0ba1851b28dd..a77c170221bea 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -130,7 +130,7 @@ def setup_method(self, method): setattr(self, o, d) def generate_indices(self, f, values=False): - """ generate the indicies + """ generate the indices if values is True , use the axis values is False, use the range """ diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index bfc74db73b813..49047e1da0996 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -391,13 +391,13 @@ def test_iloc_getitem_frame(self): expected = df.ix[[0, 2, 6], [0, 2]] tm.assert_frame_equal(result, expected) - # neg indicies + # neg indices result = df.iloc[[-1, 1, 3], [-1, 1]] with catch_warnings(record=True): expected = df.ix[[18, 2, 6], [6, 2]] tm.assert_frame_equal(result, expected) - # dups indicies + # dups indices result = df.iloc[[-1, -1, 1, 3], [-1, 1]] with catch_warnings(record=True): expected = df.ix[[18, 18, 2, 6], [6, 2]] diff --git a/pandas/tests/indexing/test_panel.py b/pandas/tests/indexing/test_panel.py index 81265c9f2941d..1085e2a61be48 100644 --- a/pandas/tests/indexing/test_panel.py +++ b/pandas/tests/indexing/test_panel.py @@ -43,12 +43,12 @@ def test_iloc_getitem_panel(self): expected = p.loc[['A', 'C']] tm.assert_panel_equal(result, expected) - # neg indicies + # neg indices result = p.iloc[[-1, 1], [-1, 1]] expected = p.loc[['D', 'B'], ['c', 'b']] tm.assert_panel_equal(result, expected) - # dups indicies + # dups indices result = p.iloc[[-1, -1, 1], [-1, 1]] expected = p.loc[['D', 'D', 'B'], ['c', 'b']] tm.assert_panel_equal(result, expected) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 381a059244858..d590cfd6b6c64 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -1484,7 +1484,7 @@ def test_append_with_data_columns(self): store.append('df', df[2:]) tm.assert_frame_equal(store['df'], df) - # check that we have indicies created + # check that we have indices created assert(store._handle.root.df.table.cols.index.is_indexed is True) assert(store._handle.root.df.table.cols.B.is_indexed is True) @@ -4511,7 +4511,7 @@ def do_copy(f, new_f=None, keys=None, keys = store.keys() assert set(keys) == set(tstore.keys()) - # check indicies & nrows + # check indices & nrows for k in tstore.keys(): if tstore.get_storer(k).is_table: new_t = tstore.get_storer(k) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 12d803a76e7f3..d95a2ad2d7f76 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1524,7 +1524,7 @@ def test_take(self): expected = self.panel.reindex(minor=['D', 'A', 'B', 'C']) assert_panel_equal(result, expected) - # neg indicies ok + # neg indices ok expected = self.panel.reindex(minor=['D', 'D', 'B', 'C']) result = self.panel.take([3, -1, 1, 2], axis=2) assert_panel_equal(result, expected)