From 2acac6e208ffafa63bd816b0fbe1ebcd6c6244de Mon Sep 17 00:00:00 2001 From: Min ho Kim Date: Sun, 23 Jun 2019 17:37:31 +1000 Subject: [PATCH] Fix typo Fix typo --- asv_bench/benchmarks/offset.py | 2 +- .../comparison/comparison_with_sas.rst | 2 +- .../comparison/comparison_with_stata.rst | 2 +- doc/source/user_guide/io.rst | 6 +++--- doc/source/user_guide/timeseries.rst | 2 +- doc/source/whatsnew/v0.10.1.rst | 2 +- doc/source/whatsnew/v0.14.0.rst | 2 +- doc/source/whatsnew/v0.14.1.rst | 2 +- doc/source/whatsnew/v0.19.0.rst | 2 +- doc/source/whatsnew/v0.21.0.rst | 4 ++-- doc/source/whatsnew/v0.23.0.rst | 2 +- doc/source/whatsnew/v0.23.1.rst | 2 +- doc/source/whatsnew/v0.24.0.rst | 10 +++++----- doc/source/whatsnew/v0.25.0.rst | 4 ++-- doc/source/whatsnew/v0.8.0.rst | 2 +- pandas/_libs/tslibs/fields.pyx | 2 +- pandas/compat/numpy/__init__.py | 2 +- pandas/conftest.py | 2 +- pandas/core/arrays/base.py | 2 +- pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/integer.py | 2 +- pandas/core/arrays/period.py | 2 +- pandas/core/arrays/sparse.py | 4 ++-- pandas/core/base.py | 2 +- pandas/core/computation/pytables.py | 2 +- pandas/core/dtypes/common.py | 4 ++-- pandas/core/frame.py | 12 ++++++------ pandas/core/generic.py | 12 ++++++------ pandas/core/groupby/generic.py | 2 +- pandas/core/groupby/grouper.py | 2 +- pandas/core/indexes/base.py | 6 +++--- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/multi.py | 6 +++--- pandas/core/indexes/range.py | 2 +- pandas/core/indexing.py | 2 +- pandas/core/internals/construction.py | 2 +- pandas/core/missing.py | 2 +- pandas/core/panel.py | 2 +- pandas/core/resample.py | 4 ++-- pandas/core/reshape/tile.py | 2 +- pandas/core/series.py | 2 +- pandas/core/sparse/scipy_sparse.py | 2 +- pandas/core/strings.py | 8 ++++---- pandas/core/tools/datetimes.py | 2 +- pandas/io/formats/excel.py | 2 +- pandas/io/json/json.py | 2 +- pandas/io/json/normalize.py | 2 +- pandas/io/json/table_schema.py | 2 +- pandas/io/pytables.py | 10 +++++----- pandas/io/stata.py | 2 +- pandas/plotting/_matplotlib/tools.py | 2 +- pandas/tests/arithmetic/test_numeric.py | 4 ++-- pandas/tests/arithmetic/test_object.py | 2 +- pandas/tests/arithmetic/test_period.py | 2 +- pandas/tests/arithmetic/test_timedelta64.py | 4 ++-- pandas/tests/arrays/test_datetimelike.py | 2 +- pandas/tests/arrays/test_integer.py | 4 ++-- pandas/tests/dtypes/test_common.py | 2 +- pandas/tests/extension/base/ops.py | 2 +- pandas/tests/extension/json/test_json.py | 2 +- pandas/tests/frame/test_combine_concat.py | 2 +- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/frame/test_indexing.py | 4 ++-- pandas/tests/frame/test_nonunique_indexes.py | 2 +- pandas/tests/groupby/test_apply.py | 2 +- pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/groupby/test_timegrouper.py | 2 +- .../tests/indexes/datetimes/test_partial_slicing.py | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 6 +++--- pandas/tests/indexes/interval/test_construction.py | 2 +- pandas/tests/indexes/multi/test_missing.py | 2 +- pandas/tests/indexes/test_category.py | 2 +- pandas/tests/indexing/multiindex/test_xs.py | 2 +- pandas/tests/indexing/test_coercion.py | 2 +- pandas/tests/indexing/test_floats.py | 2 +- pandas/tests/indexing/test_iloc.py | 2 +- pandas/tests/indexing/test_loc.py | 2 +- pandas/tests/io/excel/test_writers.py | 2 +- pandas/tests/io/formats/test_format.py | 2 +- pandas/tests/io/pytables/test_pytables.py | 10 +++++----- pandas/tests/io/test_parquet.py | 2 +- pandas/tests/io/test_sql.py | 2 +- pandas/tests/plotting/test_frame.py | 2 +- pandas/tests/plotting/test_series.py | 2 +- pandas/tests/reshape/merge/test_merge.py | 6 +++--- pandas/tests/reshape/test_concat.py | 4 ++-- pandas/tests/scalar/timedelta/test_timedelta.py | 2 +- pandas/tests/scalar/timestamp/test_unary_ops.py | 2 +- pandas/tests/series/test_missing.py | 2 +- pandas/tests/test_algos.py | 2 +- pandas/tests/test_base.py | 2 +- pandas/tests/test_multilevel.py | 2 +- pandas/tests/test_window.py | 6 +++--- pandas/tests/tseries/offsets/test_offsets.py | 2 +- pandas/tseries/offsets.py | 2 +- pandas/util/testing.py | 6 +++--- scripts/validate_docstrings.py | 2 +- 99 files changed, 148 insertions(+), 148 deletions(-) diff --git a/asv_bench/benchmarks/offset.py b/asv_bench/benchmarks/offset.py index 26e344758596f..9b738e699a5b3 100644 --- a/asv_bench/benchmarks/offset.py +++ b/asv_bench/benchmarks/offset.py @@ -9,7 +9,7 @@ pass hcal = pd.tseries.holiday.USFederalHolidayCalendar() -# These offests currently raise a NotImplimentedError with .apply_index() +# These offsets currently raise a NotImplimentedError with .apply_index() non_apply = [pd.offsets.Day(), pd.offsets.BYearEnd(), pd.offsets.BYearBegin(), diff --git a/doc/source/getting_started/comparison/comparison_with_sas.rst b/doc/source/getting_started/comparison/comparison_with_sas.rst index fc12c8524d3bf..cbedeec737ec0 100644 --- a/doc/source/getting_started/comparison/comparison_with_sas.rst +++ b/doc/source/getting_started/comparison/comparison_with_sas.rst @@ -660,7 +660,7 @@ example, to subtract the mean for each observation by smoker group. run; -pandas ``groubpy`` provides a ``transform`` mechanism that allows +pandas ``groupby`` provides a ``transform`` mechanism that allows these type of operations to be succinctly expressed in one operation. diff --git a/doc/source/getting_started/comparison/comparison_with_stata.rst b/doc/source/getting_started/comparison/comparison_with_stata.rst index bf2b03176ecd8..c354ed7872cb4 100644 --- a/doc/source/getting_started/comparison/comparison_with_stata.rst +++ b/doc/source/getting_started/comparison/comparison_with_stata.rst @@ -634,7 +634,7 @@ For example, to subtract the mean for each observation by smoker group. generate adj_total_bill = total_bill - group_bill -pandas ``groubpy`` provides a ``transform`` mechanism that allows +pandas ``groupby`` provides a ``transform`` mechanism that allows these type of operations to be succinctly expressed in one operation. diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 30a42de2ab287..7caaec62c0a8a 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -488,7 +488,7 @@ specification: .. versionadded:: 0.21.0 -Specifying ``dtype='cateogry'`` will result in an unordered ``Categorical`` +Specifying ``dtype='category'`` will result in an unordered ``Categorical`` whose ``categories`` are the unique values observed in the data. For more control on the categories and order, create a :class:`~pandas.api.types.CategoricalDtype` ahead of time, and pass that for @@ -1679,7 +1679,7 @@ S3 URLs are handled as well but require installing the `S3Fs df = pd.read_csv('s3://pandas-test/tips.csv') -If your S3 bucket requires cedentials you will need to set them as environment +If your S3 bucket requires credentials you will need to set them as environment variables or in the ``~/.aws/credentials`` config file, refer to the `S3Fs documentation on credentials `_. @@ -2078,7 +2078,7 @@ Dates written in nanoseconds need to be read back in nanoseconds: json = dfj2.to_json(date_unit='ns') - # Try to parse timestamps as millseconds -> Won't Work + # Try to parse timestamps as milliseconds -> Won't Work dfju = pd.read_json(json, date_unit='ms') dfju diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index f27e9c677d925..7bdec001a688f 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -1133,7 +1133,7 @@ Valid business hours are distinguished by whether it started from valid ``Busine pd.Timestamp('2014-08-01 17:00') + bh pd.Timestamp('2014-08-01 23:00') + bh - # Although 2014-08-02 is Satuaday, + # Although 2014-08-02 is Saturday, # it is valid because it starts from 08-01 (Friday). pd.Timestamp('2014-08-02 04:00') + bh diff --git a/doc/source/whatsnew/v0.10.1.rst b/doc/source/whatsnew/v0.10.1.rst index b5b2b889732cd..7d51ded1cad19 100644 --- a/doc/source/whatsnew/v0.10.1.rst +++ b/doc/source/whatsnew/v0.10.1.rst @@ -170,7 +170,7 @@ combined result, by using ``where`` on a selector table. df_mt, selector='df1_mt') store - # indiviual tables were created + # individual tables were created store.select('df1_mt') store.select('df2_mt') diff --git a/doc/source/whatsnew/v0.14.0.rst b/doc/source/whatsnew/v0.14.0.rst index d61b9a40438f8..f049006808c0f 100644 --- a/doc/source/whatsnew/v0.14.0.rst +++ b/doc/source/whatsnew/v0.14.0.rst @@ -816,7 +816,7 @@ Enhancements - Implemented ``Panel.pct_change`` (:issue:`6904`) - Added ``how`` option to rolling-moment functions to dictate how to handle resampling; :func:`rolling_max` defaults to max, :func:`rolling_min` defaults to min, and all others default to mean (:issue:`6297`) -- ``CustomBuisnessMonthBegin`` and ``CustomBusinessMonthEnd`` are now available (:issue:`6866`) +- ``CustomBusinessMonthBegin`` and ``CustomBusinessMonthEnd`` are now available (:issue:`6866`) - :meth:`Series.quantile` and :meth:`DataFrame.quantile` now accept an array of quantiles. - :meth:`~DataFrame.describe` now accepts an array of percentiles to include in the summary statistics (:issue:`4196`) diff --git a/doc/source/whatsnew/v0.14.1.rst b/doc/source/whatsnew/v0.14.1.rst index 98ebbd6a52344..fcfb22d074554 100644 --- a/doc/source/whatsnew/v0.14.1.rst +++ b/doc/source/whatsnew/v0.14.1.rst @@ -247,7 +247,7 @@ Bug Fixes - Bug in ``DatetimeIndex`` comparison doesn't handle ``NaT`` properly (:issue:`7529`) - Bug in passing input with ``tzinfo`` to some offsets ``apply``, ``rollforward`` or ``rollback`` resets ``tzinfo`` or raises ``ValueError`` (:issue:`7465`) - Bug in ``DatetimeIndex.to_period``, ``PeriodIndex.asobject``, ``PeriodIndex.to_timestamp`` doesn't preserve ``name`` (:issue:`7485`) -- Bug in ``DatetimeIndex.to_period`` and ``PeriodIndex.to_timestanp`` handle ``NaT`` incorrectly (:issue:`7228`) +- Bug in ``DatetimeIndex.to_period`` and ``PeriodIndex.to_timestamp`` handle ``NaT`` incorrectly (:issue:`7228`) - Bug in ``offsets.apply``, ``rollforward`` and ``rollback`` may return normal ``datetime`` (:issue:`7502`) - Bug in ``resample`` raises ``ValueError`` when target contains ``NaT`` (:issue:`7227`) - Bug in ``Timestamp.tz_localize`` resets ``nanosecond`` info (:issue:`7534`) diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index de29a1eb93709..fe9fdd7448923 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -1513,7 +1513,7 @@ Bug Fixes - Bug in ``Series`` comparison may output incorrect result if rhs contains ``NaT`` (:issue:`9005`) - Bug in ``Series`` and ``Index`` comparison may output incorrect result if it contains ``NaT`` with ``object`` dtype (:issue:`13592`) - Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`) -- Bug in ``Peirod`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`) +- Bug in ``Period`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`) - Bug in ``pd.set_eng_float_format()`` that would prevent NaN and Inf from formatting (:issue:`11981`) - Bug in ``.unstack`` with ``Categorical`` dtype resets ``.ordered`` to ``True`` (:issue:`13249`) - Clean some compile time warnings in datetime parsing (:issue:`13607`) diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst index 5c6f1d1af6b54..44b50437a6dfe 100644 --- a/doc/source/whatsnew/v0.21.0.rst +++ b/doc/source/whatsnew/v0.21.0.rst @@ -263,7 +263,7 @@ Now, to find prices per store/product, we can simply do: See the :ref:`documentation ` for more. -.. _whatsnew_0210.enhancements.reanme_categories: +.. _whatsnew_0210.enhancements.rename_categories: ``Categorical.rename_categories`` accepts a dict-like ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -418,7 +418,7 @@ New Behavior, without regard to the bottleneck installation: s.sum() -Note that this also changes the sum of an empty ``Series``. Previously this always returned 0 regardless of a ``bottlenck`` installation: +Note that this also changes the sum of an empty ``Series``. Previously this always returned 0 regardless of a ``bottleneck`` installation: .. code-block:: ipython diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index 98479fa30eb15..51efa37b55add 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -276,7 +276,7 @@ To show only observed values: df.groupby(['A', 'B', 'C'], observed=True).count() -For pivotting operations, this behavior is *already* controlled by the ``dropna`` keyword: +For pivoting operations, this behavior is *already* controlled by the ``dropna`` keyword: .. ipython:: python diff --git a/doc/source/whatsnew/v0.23.1.rst b/doc/source/whatsnew/v0.23.1.rst index f6af2990c935b..0218c3b02a413 100644 --- a/doc/source/whatsnew/v0.23.1.rst +++ b/doc/source/whatsnew/v0.23.1.rst @@ -26,7 +26,7 @@ Fixed Regressions **Comparing Series with datetime.date** We've reverted a 0.23.0 change to comparing a :class:`Series` holding datetimes and a ``datetime.date`` object (:issue:`21152`). -In pandas 0.22 and earlier, comparing a Series holding datetimes and ``datetime.date`` objects would coerce the ``datetime.date`` to a datetime before comapring. +In pandas 0.22 and earlier, comparing a Series holding datetimes and ``datetime.date`` objects would coerce the ``datetime.date`` to a datetime before comparing. This was inconsistent with Python, NumPy, and :class:`DatetimeIndex`, which never consider a datetime and ``datetime.date`` equal. In 0.23.0, we unified operations between DatetimeIndex and Series, and in the process changed comparisons between a Series of datetimes and ``datetime.date`` without warning. diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 05d6a03639a2d..086519ad75192 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1061,7 +1061,7 @@ The affected cases are: .. code-block:: ipython - # Comparison operations and arithmetic opeartions both raise ValueError. + # Comparison operations and arithmetic operations both raise ValueError. In [6]: df == (1, 2, 3) ... ValueError: Unable to coerce to Series, length must be 2: given 3 @@ -1324,7 +1324,7 @@ Deprecations - :meth:`Series.clip_lower`, :meth:`Series.clip_upper`, :meth:`DataFrame.clip_lower` and :meth:`DataFrame.clip_upper` are deprecated and will be removed in a future version. Use ``Series.clip(lower=threshold)``, ``Series.clip(upper=threshold)`` and the equivalent ``DataFrame`` methods (:issue:`24203`) - :meth:`Series.nonzero` is deprecated and will be removed in a future version (:issue:`18262`) - Passing an integer to :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtypes is deprecated, will raise ``TypeError`` in a future version. Use ``obj.fillna(pd.Timedelta(...))`` instead (:issue:`24694`) -- ``Series.cat.categorical``, ``Series.cat.name`` and ``Sersies.cat.index`` have been deprecated. Use the attributes on ``Series.cat`` or ``Series`` directly. (:issue:`24751`). +- ``Series.cat.categorical``, ``Series.cat.name`` and ``Series.cat.index`` have been deprecated. Use the attributes on ``Series.cat`` or ``Series`` directly. (:issue:`24751`). - Passing a dtype without a precision like ``np.dtype('datetime64')`` or ``timedelta64`` to :class:`Index`, :class:`DatetimeIndex` and :class:`TimedeltaIndex` is now deprecated. Use the nanosecond-precision dtype instead (:issue:`24753`). .. _whatsnew_0240.deprecations.datetimelike_int_ops: @@ -1604,7 +1604,7 @@ Datetimelike - Bug in :class:`DatetimeIndex` where calling ``np.array(dtindex, dtype=object)`` would incorrectly return an array of ``long`` objects (:issue:`23524`) - Bug in :class:`Index` where passing a timezone-aware :class:`DatetimeIndex` and `dtype=object` would incorrectly raise a ``ValueError`` (:issue:`23524`) - Bug in :class:`Index` where calling ``np.array(dtindex, dtype=object)`` on a timezone-naive :class:`DatetimeIndex` would return an array of ``datetime`` objects instead of :class:`Timestamp` objects, potentially losing nanosecond portions of the timestamps (:issue:`23524`) -- Bug in :class:`Categorical.__setitem__` not allowing setting with another ``Categorical`` when both are undordered and have the same categories, but in a different order (:issue:`24142`) +- Bug in :class:`Categorical.__setitem__` not allowing setting with another ``Categorical`` when both are unordered and have the same categories, but in a different order (:issue:`24142`) - Bug in :func:`date_range` where using dates with millisecond resolution or higher could return incorrect values or the wrong number of values in the index (:issue:`24110`) - Bug in :class:`DatetimeIndex` where constructing a :class:`DatetimeIndex` from a :class:`Categorical` or :class:`CategoricalIndex` would incorrectly drop timezone information (:issue:`18664`) - Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where indexing with ``Ellipsis`` would incorrectly lose the index's ``freq`` attribute (:issue:`21282`) @@ -1670,7 +1670,7 @@ Timezones Offsets ^^^^^^^ -- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`) +- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operations (:issue:`14774`) - Bug in :class:`DateOffset` where keyword arguments ``week`` and ``milliseconds`` were accepted and ignored. Passing these will now raise ``ValueError`` (:issue:`19398`) - Bug in adding :class:`DateOffset` with :class:`DataFrame` or :class:`PeriodIndex` incorrectly raising ``TypeError`` (:issue:`23215`) - Bug in comparing :class:`DateOffset` objects with non-DateOffset objects, particularly strings, raising ``ValueError`` instead of returning ``False`` for equality checks and ``True`` for not-equal checks (:issue:`23524`) @@ -1838,7 +1838,7 @@ Groupby/Resample/Rolling ``SeriesGroupBy`` when the grouping variable only contains NaNs and numpy version < 1.13 (:issue:`21956`). - Multiple bugs in :func:`pandas.core.window.Rolling.min` with ``closed='left'`` and a datetime-like index leading to incorrect results and also segfault. (:issue:`21704`) -- Bug in :meth:`pandas.core.resample.Resampler.apply` when passing postiional arguments to applied func (:issue:`14615`). +- Bug in :meth:`pandas.core.resample.Resampler.apply` when passing positional arguments to applied func (:issue:`14615`). - Bug in :meth:`Series.resample` when passing ``numpy.timedelta64`` to ``loffset`` kwarg (:issue:`7687`). - Bug in :meth:`pandas.core.resample.Resampler.asfreq` when frequency of ``TimedeltaIndex`` is a subperiod of a new frequency (:issue:`13022`). - Bug in :meth:`pandas.core.groupby.SeriesGroupBy.mean` when values were integral but could not fit inside of int64, overflowing instead. (:issue:`22487`) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 19636f42c6129..109005364fca6 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -119,7 +119,7 @@ Other Enhancements - ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`) - :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`) - :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behavior of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`) -- :meth:`RangeIndex.union` now supports the ``sort`` argument. If ``sort=False`` an unsorted ``Int64Index`` is always returned. ``sort=None`` is the default and returns a mononotically increasing ``RangeIndex`` if possible or a sorted ``Int64Index`` if not (:issue:`24471`) +- :meth:`RangeIndex.union` now supports the ``sort`` argument. If ``sort=False`` an unsorted ``Int64Index`` is always returned. ``sort=None`` is the default and returns a monotonically increasing ``RangeIndex`` if possible or a sorted ``Int64Index`` if not (:issue:`24471`) - :meth:`TimedeltaIndex.intersection` now also supports the ``sort`` keyword (:issue:`24471`) - :meth:`DataFrame.rename` now supports the ``errors`` argument to raise errors when attempting to rename nonexistent keys (:issue:`13473`) - Added :ref:`api.frame.sparse` for working with a ``DataFrame`` whose values are sparse (:issue:`25681`) @@ -694,7 +694,7 @@ I/O - Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`) - Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to :class:`Timestamp`, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`) - Bug in :func:`json_normalize` for ``errors='ignore'`` where missing values in the input data, were filled in resulting ``DataFrame`` with the string ``"nan"`` instead of ``numpy.nan`` (:issue:`25468`) -- :meth:`DataFrame.to_html` now raises ``TypeError`` when using an invalid type for the ``classes`` parameter instead of ``AsseertionError`` (:issue:`25608`) +- :meth:`DataFrame.to_html` now raises ``TypeError`` when using an invalid type for the ``classes`` parameter instead of ``AssertionError`` (:issue:`25608`) - Bug in :meth:`DataFrame.to_string` and :meth:`DataFrame.to_latex` that would lead to incorrect output when the ``header`` keyword is used (:issue:`16718`) - Bug in :func:`read_csv` not properly interpreting the UTF8 encoded filenames on Windows on Python 3.6+ (:issue:`15086`) - Improved performance in :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` when converting columns that have missing values (:issue:`25772`) diff --git a/doc/source/whatsnew/v0.8.0.rst b/doc/source/whatsnew/v0.8.0.rst index 575ec6b7d19f4..664325ac063c0 100644 --- a/doc/source/whatsnew/v0.8.0.rst +++ b/doc/source/whatsnew/v0.8.0.rst @@ -77,7 +77,7 @@ Time series changes and improvements interface while enabling working with nanosecond-resolution data. Also provides :ref:`easy time zone conversions `. - Enhanced support for :ref:`time zones `. Add - `tz_convert` and ``tz_lcoalize`` methods to TimeSeries and DataFrame. All + `tz_convert` and ``tz_localize`` methods to TimeSeries and DataFrame. All timestamps are stored as UTC; Timestamps from DatetimeIndex objects with time zone set will be localized to local time. Time zone conversions are therefore essentially free. User needs to know very little about pytz library now; only diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 4ebf5e587a727..2a41b5ff2339c 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -171,7 +171,7 @@ def get_start_end_field(int64_t[:] dtindex, object field, # YearBegin(), BYearBegin() use month = starting month of year. # QuarterBegin(), BQuarterBegin() use startingMonth = starting - # month of year. Other offests use month, startingMonth as ending + # month of year. Other offsets use month, startingMonth as ending # month of year. if (freqstr[0:2] in ['MS', 'QS', 'AS']) or ( diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index c738cc74e46a4..22bfab8b7c6d6 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -1,4 +1,4 @@ -""" support numpy compatiblitiy across versions """ +""" support numpy compatibility across versions """ from distutils.version import LooseVersion import re diff --git a/pandas/conftest.py b/pandas/conftest.py index 4bcd0ea8442e6..058361af343b6 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -122,7 +122,7 @@ def observed(request): """ pass in the observed keyword to groupby for [True, False] This indicates whether categoricals should return values for values which are not in the grouper [False / None], or only values which - appear in the grouper [True]. [None] is supported for future compatiblity + appear in the grouper [True]. [None] is supported for future compatibility if we decide to change the default (and would need to warn if this parameter is not passed)""" return request.param diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index c709cd9e9f0b2..20fd582179dc6 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -653,7 +653,7 @@ def factorize( ----- :meth:`pandas.factorize` offers a `sort` keyword as well. """ - # Impelmentor note: There are two ways to override the behavior of + # Implementer note: There are two ways to override the behavior of # pandas.factorize # 1. _values_for_factorize and _from_factorize. # Specify the values passed to pandas' internal factorization diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d415dbbdaf0a3..6e7217762a3fb 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -676,7 +676,7 @@ def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): def _has_same_tz(self, other): zzone = self._timezone - # vzone sholdn't be None if value is non-datetime like + # vzone shouldn't be None if value is non-datetime like if isinstance(other, np.datetime64): # convert to Timestamp as np.datetime64 doesn't have tz attr other = Timestamp(other) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 589e98f016f69..07d5664f98714 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -28,7 +28,7 @@ class _IntegerDtype(ExtensionDtype): An ExtensionDtype to hold a single size & kind of integer dtype. These specific implementations are subclasses of the non-public - _IntegerDtype. For example we have Int8Dtype to represnt signed int 8s. + _IntegerDtype. For example we have Int8Dtype to represent signed int 8s. The attributes name & type are set when these subclasses are created. """ diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index ece05567d3343..3a9322773fc69 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -854,7 +854,7 @@ def dt64arr_to_periodarr(data, freq, tz=None): ------- ordinals : ndarray[int] freq : Tick - The frequencey extracted from the Series or DatetimeIndex if that's + The frequency extracted from the Series or DatetimeIndex if that's used. """ diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 3dda6868a80da..d692fe6d7cabe 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -562,7 +562,7 @@ class SparseArray(PandasObject, ExtensionArray, ExtensionOpsMixin): * 'block': Stores a `block` and `block_length` for each contiguous *span* of sparse values. This is best when sparse data tends to be clumped together, with large - regsions of ``fill-value`` values between sparse values. + regions of ``fill-value`` values between sparse values. * 'integer': uses an integer to store the location of each sparse value. @@ -1316,7 +1316,7 @@ def _concat_same_type(cls, to_concat): sp_index = IntIndex(length, indices) else: - # when concatentating block indices, we don't claim that you'll + # when concatenating block indices, we don't claim that you'll # get an identical index as concating the values and then # creating a new index. We don't want to spend the time trying # to merge blocks across arrays in `to_concat`, so the resulting diff --git a/pandas/core/base.py b/pandas/core/base.py index ab9d8b9d778e5..30e800cb9bd73 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1137,7 +1137,7 @@ def __iter__(self): ------- iterator """ - # We are explicity making element iterators. + # We are explicitly making element iterators. if is_datetimelike(self._values): return map(com.maybe_box_datetimelike, self._values) elif is_extension_array_dtype(self._values): diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 2a762b5ee24b6..25cfa8fe17697 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -410,7 +410,7 @@ def visit_Assign(self, node, **kwargs): return self.visit(cmpr) def visit_Subscript(self, node, **kwargs): - # only allow simple suscripts + # only allow simple subscripts value = self.visit(node.value) slobj = self.visit(node.slice) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index ce99d150880c6..b2b74e2a70ca9 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1835,7 +1835,7 @@ def is_complex_dtype(arr_or_dtype): Returns ------- boolean - Whether or not the array or dtype is of a compex dtype. + Whether or not the array or dtype is of a complex dtype. Examples -------- @@ -1929,7 +1929,7 @@ def _is_dtype_type(arr_or_dtype, condition): Returns ------- - bool : if the condition is satisifed for the arr_or_dtype + bool : if the condition is satisfied for the arr_or_dtype """ if arr_or_dtype is None: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6746844f4b1fa..fd2e1e3e41ced 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2145,7 +2145,7 @@ def to_parquet(self, fname, engine='auto', compression='snappy', col_space='The minimum width of each column in CSS length ' 'units. An int is assumed to be px units.\n\n' ' .. versionadded:: 0.25.0\n' - ' Abillity to use str') + ' Ability to use str') @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) def to_html(self, buf=None, columns=None, col_space=None, header=True, @@ -5312,7 +5312,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): this_mask = isna(series) other_mask = isna(otherSeries) - # don't overwrite columns unecessarily + # don't overwrite columns unnecessarily # DO propagate if this column is not in the intersection if not overwrite and other_mask.all(): result[col] = this[col].copy() @@ -5572,7 +5572,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, else: mask = notna(this) - # don't overwrite columns unecessarily + # don't overwrite columns unnecessarily if mask.all(): continue @@ -6508,7 +6508,7 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, 2 13 dtype: int64 - Retuning a list-like will result in a Series + Returning a list-like will result in a Series >>> df.apply(lambda x: [1, 2], axis=1) 0 [1, 2] @@ -6993,7 +6993,7 @@ def round(self, decimals=0, *args, **kwargs): 3 0.2 0.2 With a dict, the number of places for specific columns can be - specfified with the column names as key and the number of decimal + specified with the column names as key and the number of decimal places as value >>> df.round({'dogs': 1, 'cats': 0}) @@ -7004,7 +7004,7 @@ def round(self, decimals=0, *args, **kwargs): 3 0.2 0.0 Using a Series, the number of places for specific columns can be - specfified with the column names as index and the number of + specified with the column names as index and the number of decimal places as value >>> decimals = pd.Series([0, 1], index=['cats', 'dogs']) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0e2a6a0cac414..992c83e66090e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3193,7 +3193,7 @@ def _slice(self, slobj, axis=0, kind=None): result = result.__finalize__(self) # this could be a view - # but only in a single-dtyped view slicable case + # but only in a single-dtyped view sliceable case is_copy = axis != 0 or result._is_view result._set_is_copy(self, copy=is_copy) return result @@ -3243,7 +3243,7 @@ def _check_setitem_copy(self, stacklevel=4, t='setting', force=False): force : boolean, default False if True, then force showing an error - validate if we are doing a settitem on a chained copy. + validate if we are doing a setitem on a chained copy. If you call this function, be sure to set the stacklevel such that the user will see the error *at the level of setting* @@ -3644,7 +3644,7 @@ class animal locomotion result.index = new_index # this could be a view - # but only in a single-dtyped view slicable case + # but only in a single-dtyped view sliceable case result._set_is_copy(self, copy=not result._is_view) return result @@ -6488,7 +6488,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, for c, src in to_replace.items(): if c in value and c in self: # object conversion is handled in - # series.replace which is called recursivelly + # series.replace which is called recursively res[c] = res[c].replace(to_replace=src, value=value[c], inplace=False, @@ -6724,7 +6724,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Note how the last entry in column 'a' is interpolated differently, because there is no entry after it to use for interpolation. Note how the first entry in column 'b' remains ``NaN``, because there - is no entry befofe it to use for interpolation. + is no entry before it to use for interpolation. >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0), ... (np.nan, 2.0, np.nan, np.nan), @@ -9576,7 +9576,7 @@ def describe(self, percentiles=None, include=None, exclude=None): DataFrame.max: Maximum of the values in the object. DataFrame.min: Minimum of the values in the object. DataFrame.mean: Mean of the values. - DataFrame.std: Standard deviation of the obersvations. + DataFrame.std: Standard deviation of the observations. DataFrame.select_dtypes: Subset of a DataFrame including/excluding columns based on their dtype. diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 35ffa552913ae..91be320a3e674 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1316,7 +1316,7 @@ def _apply_to_column_groupbys(self, func): return func(self) def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None): - """Calcuate pct_change of each value to previous entry in group""" + """Calculate pct_change of each value to previous entry in group""" # TODO: Remove this conditional when #23918 is fixed if freq: return self.apply(lambda x: x.pct_change(periods=periods, diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index febfdc7bdf908..d0f28bed4399b 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -494,7 +494,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, # not an iterable of keys. In the meantime, we attempt to provide # a warning. We can assume that the user wanted a list of keys when # the key is not in the index. We just have to be careful with - # unhashble elements of `key`. Any unhashable elements implies that + # unhashable elements of `key`. Any unhashable elements implies that # they wanted a list of keys. # https://github.com/pandas-dev/pandas/issues/18314 is_tuple = isinstance(key, tuple) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 73abd708415a1..cb5b4a6c8993c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1131,7 +1131,7 @@ def to_flat_index(self): .. versionadded:: 0.24.0 - This is implemented for compatability with subclass implementations + This is implemented for compatibility with subclass implementations when chaining. Returns @@ -1486,7 +1486,7 @@ def _get_level_values(self, level): Return an Index of values for requested level. This is primarily useful to get an individual level of values from a - MultiIndex, but is provided on Index as well for compatability. + MultiIndex, but is provided on Index as well for compatibility. Parameters ---------- @@ -3885,7 +3885,7 @@ def _try_convert_to_int_index(cls, data, copy, name, dtype): from .numeric import Int64Index, UInt64Index if not is_unsigned_integer_dtype(dtype): # skip int64 conversion attempt if uint-like dtype is passed, as - # this could return Int64Index when UInt64Index is what's desrired + # this could return Int64Index when UInt64Index is what's desired try: res = data.astype('i8', copy=False) if (res == data).all(): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 122c30ae7dfd5..3d3774ce48e8b 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -534,7 +534,7 @@ def _can_reindex(self, indexer): @Appender(_index_shared_docs['where']) def where(self, cond, other=None): # TODO: Investigate an alternative implementation with - # 1. copy the underyling Categorical + # 1. copy the underlying Categorical # 2. setitem with `cond` and `other` # 3. Rebuild CategoricalIndex. if other is None: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e6d876436c986..5ce670d9fe33e 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -376,7 +376,7 @@ def _is_dates_only(self): def __reduce__(self): - # we use a special reudce here because we need + # we use a special reduce here because we need # to simply set the .tz (and not reinterpret it) d = dict(data=self._data) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 577d0221cd8da..49f657332bbbf 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -390,7 +390,7 @@ def itemsize(self): 'a future version') warnings.warn(msg, FutureWarning, stacklevel=2) - # supress the warning from the underlying left/right itemsize + # suppress the warning from the underlying left/right itemsize with warnings.catch_warnings(): warnings.simplefilter('ignore') return self.left.itemsize + self.right.itemsize diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0d6e75f95f863..a06d304fb5a22 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -947,7 +947,7 @@ def f(l): def memory_usage(self, deep=False): # we are overwriting our base class to avoid # computing .values here which could materialize - # a tuple representation uncessarily + # a tuple representation unnecessarily return self._nbytes(deep) @cache_readonly @@ -1074,7 +1074,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, sentinel = '' # GH3547 # use value of sparsify as sentinel, unless it's an obvious - # "Truthey" value + # "Truthy" value if sparsify not in [True, 1]: sentinel = sparsify # little bit of a kludge job for #1217 @@ -2729,7 +2729,7 @@ def convert_indexer(start, stop, step, indexer=indexer, return m if isinstance(key, slice): - # handle a slice, returnig a slice if we can + # handle a slice, returning a slice if we can # otherwise a boolean indexer try: diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index ab39969af8db0..47dad1788e021 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -525,7 +525,7 @@ def _union(self, other, sort): sort : False or None, default None Whether to sort resulting index. ``sort=None`` returns a - mononotically increasing ``RangeIndex`` if possible or a sorted + monotonically increasing ``RangeIndex`` if possible or a sorted ``Int64Index`` if not. ``sort=False`` always returns an unsorted ``Int64Index`` diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6a21adb1d16ae..f6aa54f4836d9 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -955,7 +955,7 @@ def _getitem_lowerdim(self, tup): def _getitem_nested_tuple(self, tup): # we have a nested tuple so have at least 1 multi-index level - # we should be able to match up the dimensionaility here + # we should be able to match up the dimensionality here # we have too many indexers for our dim, but have at least 1 # multi-index dimension, try to see if we have something like diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index f564ac13dc41d..d766d7f06d34a 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -588,7 +588,7 @@ def sanitize_array(data, index, dtype=None, copy=False, subarr = data # everything else in this block must also handle ndarray's, - # becuase we've unwrapped PandasArray into an ndarray. + # because we've unwrapped PandasArray into an ndarray. if dtype is not None: subarr = data.astype(dtype) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index cdb3b77567829..4230b212f567a 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -619,7 +619,7 @@ def mask_zero_div_zero(x, y, result, copy=False): def dispatch_missing(op, left, right, result): """ - Fill nulls caused by division by zero, casting to a diffferent dtype + Fill nulls caused by division by zero, casting to a different dtype if necessary. Parameters diff --git a/pandas/core/panel.py b/pandas/core/panel.py index c65a73bd0d3f0..9d6b7333ca39f 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1392,7 +1392,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, Parameters ---------- other : Panel, or object coercible to Panel - The object from which the caller will be udpated. + The object from which the caller will be updated. join : {'left', 'right', 'outer', 'inner'}, default 'left' How individual DataFrames are joined. overwrite : bool, default True diff --git a/pandas/core/resample.py b/pandas/core/resample.py index d1d99d28e59b6..632b5a9c5e002 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -48,7 +48,7 @@ class Resampler(_GroupBy): groupby : a TimeGrouper object axis : int, default 0 kind : str or None - 'period', 'timestamp' to override default index treatement + 'period', 'timestamp' to override default index treatment Returns ------- @@ -1602,7 +1602,7 @@ def _take_new_index(obj, indexer, new_index, axis=0): def _get_timestamp_range_edges(first, last, offset, closed='left', base=0): """ - Adjust the `first` Timestamp to the preceeding Timestamp that resides on + Adjust the `first` Timestamp to the preceding Timestamp that resides on the provided offset. Adjust the `last` Timestamp to the following Timestamp that resides on the provided offset. Input Timestamps that already reside on the offset will be adjusted depending on the type of diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 8c29bdc2a974c..96124331e43ef 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -429,7 +429,7 @@ def _convert_bin_to_numeric_type(bins, dtype): def _convert_bin_to_datelike_type(bins, dtype): """ - Convert bins to a DatetimeIndex or TimedeltaIndex if the orginal dtype is + Convert bins to a DatetimeIndex or TimedeltaIndex if the original dtype is datelike Parameters diff --git a/pandas/core/series.py b/pandas/core/series.py index 11e578e74f6e7..730a96f5435a1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1481,7 +1481,7 @@ def iteritems(self): Lazily iterate over (index, value) tuples. This method returns an iterable tuple (index, value). This is - convienient if you want to create a lazy iterator. Note that the + convenient if you want to create a lazy iterator. Note that the methods Series.items and Series.iteritems are the same methods. Returns diff --git a/pandas/core/sparse/scipy_sparse.py b/pandas/core/sparse/scipy_sparse.py index 0dd8958e93c13..7ff0f46575661 100644 --- a/pandas/core/sparse/scipy_sparse.py +++ b/pandas/core/sparse/scipy_sparse.py @@ -42,7 +42,7 @@ def get_indexers(levels): values_ilabels = [x[0] for x in values_ilabels] # # performance issues with groupby ################################### - # TODO: these two lines can rejplace the code below but + # TODO: these two lines can replace the code below but # groupby is too slow (in some cases at least) # labels_to_i = ss.groupby(level=levels, sort=sort_labels).first() # labels_to_i[:] = np.arange(labels_to_i.shape[0]) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 6ebfbc8bb0ee0..710b29c6a6536 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2058,7 +2058,7 @@ def _get_series_list(self, others, ignore_index=False): # self._orig is either Series or Index idx = self._orig if isinstance(self._orig, Index) else self._orig.index - err_msg = ('others must be Series, Index, DataFrame, np.ndarrary or ' + err_msg = ('others must be Series, Index, DataFrame, np.ndarray or ' 'list-like (either containing only strings or containing ' 'only objects of type Series/Index/list-like/np.ndarray)') @@ -2155,7 +2155,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): Parameters ---------- - others : Series, Index, DataFrame, np.ndarrary or list-like + others : Series, Index, DataFrame, np.ndarray or list-like Series, Index, DataFrame, np.ndarray (one- or two-dimensional) and other list-likes of strings must have the same length as the calling Series/Index, with the exception of indexed objects (i.e. @@ -2571,7 +2571,7 @@ def rsplit(self, pat=None, n=-1, expand=False): 0 Linda van der Berg 1 George Pitt - Rivers - To return a Series containining tuples instead of a DataFrame: + To return a Series containing tuples instead of a DataFrame: >>> s.str.partition('-', expand=False) 0 (Linda van der Berg, , ) @@ -3292,7 +3292,7 @@ def rindex(self, sub, start=0, end=None): The ``s5.str.istitle`` method checks for whether all words are in title case (whether only the first letter of each word is capitalized). Words are - assumed to be as any sequence of non-numeric characters seperated by + assumed to be as any sequence of non-numeric characters separated by whitespace characters. >>> s5.str.istitle() diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 8e6331fe44e6b..5893ff0e0dd8f 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -363,7 +363,7 @@ def _adjust_to_origin(arg, origin, unit): raise ValueError("incompatible 'arg' type for given " "'origin'='julian'") - # premptively check this for a nice range + # preemptively check this for a nice range j_max = Timestamp.max.to_julian_date() - j0 j_min = Timestamp.min.to_julian_date() - j0 if np.any(arg > j_max) or np.any(arg < j_min): diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 4db00e34b39e2..5792f6e2a5a08 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -557,7 +557,7 @@ def _format_hierarchical_rows(self): # MultiIndex columns require an extra row # with index names (blank if None) for - # unambigous round-trip, unless not merging, + # unambiguous round-trip, unless not merging, # in which case the names all go on one row Issue #11328 if isinstance(self.columns, ABCMultiIndex) and self.merge_cells: self.rowcounter += 1 diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 7bafa15bb1979..f14b615471ccc 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -180,7 +180,7 @@ def __init__(self, obj, orient, date_format, double_precision, self.schema = build_table_schema(obj, index=self.index) - # NotImplementd on a column MultiIndex + # NotImplemented on a column MultiIndex if obj.ndim == 2 and isinstance(obj.columns, MultiIndex): raise NotImplementedError( "orient='table' is not supported for MultiIndex") diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index fa4e35b08bf6e..2d8bc20b1195e 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -79,7 +79,7 @@ def nested_to_record(ds, prefix="", sep=".", level=0): else: newkey = prefix + sep + k - # only dicts gets recurse-flattend + # only dicts gets recurse-flattened # only at level>1 do we rename the rest of the keys if not isinstance(v, dict): if level != 0: # so we skip copying for top level, common case diff --git a/pandas/io/json/table_schema.py b/pandas/io/json/table_schema.py index 7742bc717b184..a54f5cdf723a3 100644 --- a/pandas/io/json/table_schema.py +++ b/pandas/io/json/table_schema.py @@ -142,7 +142,7 @@ def convert_json_field_to_pandas_type(field): 'int64' >>> convert_json_field_to_pandas_type({'name': 'a_categorical', 'type': 'any', - 'contraints': {'enum': [ + 'constraints': {'enum': [ 'a', 'b', 'c']}, 'ordered': True}) 'CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)' diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 17d580bae5cf1..97d5b1dd2a1e5 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -824,7 +824,7 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, raise ValueError( "all tables must have exactly the same nrows!") - # axis is the concentation axes + # axis is the concentration axes axis = list({t.non_index_axes[0][0] for t in tbls})[0] def func(_start, _stop, _where): @@ -948,7 +948,7 @@ def append(self, key, value, format=None, append=True, columns=None, of the object are indexed. See `here `__. min_itemsize : dict of columns that specify minimum string sizes - nan_rep : string to use as string nan represenation + nan_rep : string to use as string nan representation chunksize : size to chunk the writing expectedrows : expected TOTAL row size of this table encoding : default None, provide an encoding for strings @@ -1343,7 +1343,7 @@ def error(t): else: - # distiguish between a frame/table + # distinguish between a frame/table tt = 'legacy_panel' try: fields = group.table._v_attrs.fields @@ -3316,7 +3316,7 @@ def validate_version(self, where=None): warnings.warn(ws, IncompatibilityWarning) def validate_min_itemsize(self, min_itemsize): - """validate the min_itemisze doesn't contain items that are not in the + """validate the min_itemsize doesn't contain items that are not in the axes this needs data_columns to be defined """ if min_itemsize is None: @@ -3500,7 +3500,7 @@ def validate_data_columns(self, data_columns, min_itemsize): def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, min_itemsize=None, **kwargs): """ create and return the axes - leagcy tables create an indexable column, indexable index, + legacy tables create an indexable column, indexable index, non-indexable fields Parameters diff --git a/pandas/io/stata.py b/pandas/io/stata.py index d8dfd15477974..00b7a29b27b63 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -2708,7 +2708,7 @@ class StataWriter117(StataWriter): Each label must be 80 characters or smaller. convert_strl : list List of columns names to convert to Stata StrL format. Columns with - more than 2045 characters are aautomatically written as StrL. + more than 2045 characters are automatically written as StrL. Smaller columns can be converted by including the column name. Using StrLs can reduce output file size when strings are longer than 8 characters, and either frequently repeated or sparse. diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index e491cfc3309a0..acb5ab7b8e04b 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -133,7 +133,7 @@ def _subplots(naxes=None, sharex=False, sharey=False, squeeze=True, Number of rows and columns of the subplot grid. If not specified, calculated from naxes and layout_type - layout_type : {'box', 'horziontal', 'vertical'}, default 'box' + layout_type : {'box', 'horizontal', 'vertical'}, default 'box' Specify how to layout the subplot grid. fig_kw : Other keyword arguments to be passed to the figure() call. diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 256ee930b4cda..f58f8981317df 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -1,4 +1,4 @@ -# Arithmetc tests for DataFrame/Series/Index/Array classes that should +# Arithmetic tests for DataFrame/Series/Index/Array classes that should # behave identically. # Specifically for numeric dtypes from collections import abc @@ -587,7 +587,7 @@ def test_operators_frame(self): tm.assert_series_equal(ts / ts, ts / df['A'], check_names=False) - # TODO: this came from tests.series.test_analytics, needs cleannup and + # TODO: this came from tests.series.test_analytics, needs cleanup and # de-duplication with test_modulo above def test_modulo2(self): with np.errstate(all='ignore'): diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py index 8d67e02d514ff..dd931939ddf51 100644 --- a/pandas/tests/arithmetic/test_object.py +++ b/pandas/tests/arithmetic/test_object.py @@ -1,4 +1,4 @@ -# Arithmetc tests for DataFrame/Series/Index/Array classes that should +# Arithmetic tests for DataFrame/Series/Index/Array classes that should # behave identically. # Specifically for object dtype from decimal import Decimal diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index e254312e39724..bc1b78bf944d1 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1,4 +1,4 @@ -# Arithmetc tests for DataFrame/Series/Index/Array classes that should +# Arithmetic tests for DataFrame/Series/Index/Array classes that should # behave identically. # Specifically for Period dtype import operator diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 2dff9a6088de8..047900c3d7586 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1,4 +1,4 @@ -# Arithmetc tests for DataFrame/Series/Index/Array classes that should +# Arithmetic tests for DataFrame/Series/Index/Array classes that should # behave identically. from datetime import datetime, timedelta @@ -48,7 +48,7 @@ def test_compare_timedelta64_zerodim(self): tdi >= np.array(4) def test_compare_timedelta_series(self): - # regresssion test for GH#5963 + # regression test for GH#5963 s = pd.Series([timedelta(days=1), timedelta(days=2)]) actual = s > timedelta(days=1) expected = pd.Series([False, True]) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 2f42ec5bae2b0..2337d8363155c 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -450,7 +450,7 @@ def test_concat_same_type_invalid(self, datetime_index): arr._concat_same_type([arr, other]) def test_concat_same_type_different_freq(self): - # we *can* concatentate DTI with different freqs. + # we *can* concatenate DTI with different freqs. a = DatetimeArray(pd.date_range('2000', periods=2, freq='D', tz='US/Central')) b = DatetimeArray(pd.date_range('2000', periods=2, freq='H', diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 066eadc9b68bc..65f7628370ad4 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -164,13 +164,13 @@ def _check_op(self, s, op_name, other, exc=None): self._check_op_integer(result, expected, mask, s, op_name, other) def _check_op_float(self, result, expected, mask, s, op_name, other): - # check comparisions that are resulting in float dtypes + # check comparisons that are resulting in float dtypes expected[mask] = np.nan tm.assert_series_equal(result, expected) def _check_op_integer(self, result, expected, mask, s, op_name, other): - # check comparisions that are resulting in integer dtypes + # check comparisons that are resulting in integer dtypes # to compare properly, we convert the expected # to float, mask to nans and convert infs diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index c7a62dfe77c37..675abec661b5a 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -417,7 +417,7 @@ def test_is_datetime_or_timedelta_dtype(): assert not com.is_datetime_or_timedelta_dtype(pd.Series([1, 2])) assert not com.is_datetime_or_timedelta_dtype(np.array(['a', 'b'])) - # TODO(jreback), this is sligthly suspect + # TODO(jreback), this is slightly suspect assert not com.is_datetime_or_timedelta_dtype( DatetimeTZDtype("ns", "US/Eastern")) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 2ac68c52d53c7..708eb9c7c8c43 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -36,7 +36,7 @@ def _check_op(self, s, op, other, op_name, exc=NotImplementedError): op(s, other) def _check_divmod_op(self, s, op, other, exc=Exception): - # divmod has multiple return values, so check separatly + # divmod has multiple return values, so check separately if exc is None: result_div, result_mod = op(s, other) if op is divmod: diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 97c329e0a5c92..89d30b0a3cc06 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -25,7 +25,7 @@ def data(): # Why the while loop? NumPy is unable to construct an ndarray from # equal-length ndarrays. Many of our operations involve coercing the # EA to an ndarray of objects. To avoid random test failures, we ensure - # that our data is coercable to an ndarray. Several tests deal with only + # that our data is coercible to an ndarray. Several tests deal with only # the first two elements, so that's what we'll check. while len(data[0]) == len(data[1]): diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 9683beb20def5..faa86acb1584f 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -664,7 +664,7 @@ def test_combine_first_mixed_bug(self): expected = Series([True, True, False], name=2) assert_series_equal(result, expected) - # GH 3593, converting datetime64[ns] incorrecly + # GH 3593, converting datetime64[ns] incorrectly df0 = DataFrame({"a": [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]}) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 7dc74961a2adc..c6508072cb8c7 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -149,7 +149,7 @@ def _check_mixed_dtypes(df, dtypes=None): if d in df: assert(df.dtypes[d] == d) - # mixed floating and integer coexinst in the same frame + # mixed floating and integer coexist in the same frame df = _make_mixed_dtypes_df('float') _check_mixed_dtypes(df) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 40785c6a1d321..3c9558d5cbd10 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2729,7 +2729,7 @@ def _check_set(df, cond, check_dtypes=True): cond = df >= 0 _check_set(df, cond) - # aligining + # aligning cond = (df >= 0)[1:] _check_set(df, cond) @@ -3691,7 +3691,7 @@ def test_assigning_ops(self): df.at["j", "cats"] = "c" # Assigning a Category to parts of a int/... column uses the values of - # the Catgorical + # the Categorical df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")}) exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")}) df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"]) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index d46ce41fc7f03..e7583adff403b 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -307,7 +307,7 @@ def check(result, expected=None): with pytest.raises(ValueError, match=msg): df[df.A > 6] - # dup aligining operations should work + # dup aligning operations should work # GH 5185 df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3]) df2 = DataFrame([1, 2, 3], index=[1, 2, 3]) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 5bea749febc76..0fb8673e6274a 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -133,7 +133,7 @@ def f(g): def test_group_apply_once_per_group(df, group_names): # GH2936, GH7739, GH10519, GH2656, GH12155, GH20084, GH21417 - # This test should ensure that a function is only evaluted + # This test should ensure that a function is only evaluated # once per group. Previously the function has been evaluated twice # on the first group to check if the Cython index slider is safe to use # This test ensures that the side effect (append to list) is only triggered diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 87b57b0609b36..3da3ab22b643b 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -130,7 +130,7 @@ def func(dataf): assert isinstance(result, DataFrame) # GH5592 - # inconcistent return type + # inconsistent return type df = DataFrame(dict(A=['Tiger', 'Tiger', 'Tiger', 'Lamb', 'Lamb', 'Pony', 'Pony'], B=Series( np.arange(7), dtype='int64'), C=date_range( diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index ef05e6ada4890..4ca470d316e5c 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -92,7 +92,7 @@ def test_groupby_with_timegrouper_methods(self, should_sort): def test_timegrouper_with_reg_groups(self): # GH 3794 - # allow combinateion of timegrouper/reg groups + # allow combination of timegrouper/reg groups df_original = DataFrame({ 'Branch': 'A A A A A A A B'.split(), diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 085e62ed9341e..6ec8568ce7242 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -94,7 +94,7 @@ def test_slice_duplicate_monotonic(self): def test_monotone_DTI_indexing_bug(self): # GH 19362 - # Testing accessing the first element in a montononic descending + # Testing accessing the first element in a monotonic descending # partial string indexing. df = pd.DataFrame(list(range(5))) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index ea33e563b31be..2a5ae92cb59f5 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -201,7 +201,7 @@ def test_to_datetime_with_non_exact(self, cache): def test_parse_nanoseconds_with_formula(self, cache): # GH8989 - # trunctaing the nanoseconds when a format was provided + # truncating the nanoseconds when a format was provided for v in ["2012-01-01 09:00:00.000000001", "2012-01-01 09:00:00.000001", "2012-01-01 09:00:00.001", @@ -383,7 +383,7 @@ def test_to_datetime_now(self): def test_to_datetime_today(self): # See GH#18666 # Test with one timezone far ahead of UTC and another far behind, so - # one of these will _almost_ alawys be in a different day from UTC. + # one of these will _almost_ always be in a different day from UTC. # Unfortunately this test between 12 and 1 AM Samoa time # this both of these timezones _and_ UTC will all be in the same day, # so this test will not detect the regression introduced in #18666. @@ -606,7 +606,7 @@ def test_to_datetime_tz_psycopg2(self, cache): ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) assert is_datetime64_ns_dtype(i) - # tz coerceion + # tz coercion result = pd.to_datetime(i, errors='coerce', cache=cache) tm.assert_index_equal(result, i) diff --git a/pandas/tests/indexes/interval/test_construction.py b/pandas/tests/indexes/interval/test_construction.py index 7a54ad5c180a4..eb9b573cce91d 100644 --- a/pandas/tests/indexes/interval/test_construction.py +++ b/pandas/tests/indexes/interval/test_construction.py @@ -272,7 +272,7 @@ def test_constructor_errors(self): IntervalIndex.from_tuples(tuples) def test_na_tuples(self): - # tuple (NA, NA) evaluates the same as NA as an elemenent + # tuple (NA, NA) evaluates the same as NA as an element na_tuple = [(0, 1), (np.nan, np.nan), (2, 3)] idx_na_tuple = IntervalIndex.from_tuples(na_tuple) idx_na_element = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)]) diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py index 518c12bb20e13..1928c303a1bcd 100644 --- a/pandas/tests/indexes/multi/test_missing.py +++ b/pandas/tests/indexes/multi/test_missing.py @@ -74,7 +74,7 @@ def test_dropna(): idx.dropna(how='xxx') # GH26408 - # test if missing values are dropped for mutiindex constructed + # test if missing values are dropped for multiindex constructed # from codes and values idx = MultiIndex(levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]], diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index d38fa20a9335c..d89d282fb785b 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -676,7 +676,7 @@ def test_get_loc(self): with pytest.raises(KeyError): i.get_loc('NOT-EXIST') - # non-unique, slicable + # non-unique, sliceable cidx3 = CategoricalIndex(list('aabbb'), categories=list('abc')) idx3 = Index(list('aabbb')) diff --git a/pandas/tests/indexing/multiindex/test_xs.py b/pandas/tests/indexing/multiindex/test_xs.py index f9117341e3a78..bbc55c75c5b77 100644 --- a/pandas/tests/indexing/multiindex/test_xs.py +++ b/pandas/tests/indexing/multiindex/test_xs.py @@ -52,7 +52,7 @@ def test_xs_loc_equality(multiindex_dataframe_random_data): def test_xs_missing_values_in_index(): # see gh-6574 - # missing values in returned index should be preserrved + # missing values in returned index should be preserved acc = [ ('a', 'abcde', 1), ('b', 'bbcde', 2), diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 8b2b0b349e203..e9c1b85e7d40c 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -665,7 +665,7 @@ def test_where_index_period(self): class TestFillnaSeriesCoercion(CoercionBase): - # not indexing, but place here for consisntency + # not indexing, but place here for consistency method = 'fillna' diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 9a2aae08dbb15..ada613110d9bf 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -35,7 +35,7 @@ def test_scalar_error(self): # float_indexers should raise exceptions # on appropriate Index types & accessors # this duplicates the code below - # but is spefically testing for the error + # but is specifically testing for the error # message for index in [tm.makeStringIndex, tm.makeUnicodeIndex, diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 4fa26dc67ba0c..6b5ad66e268df 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -20,7 +20,7 @@ def test_iloc_exceeds_bounds(self): # iloc should allow indexers that exceed the bounds df = DataFrame(np.random.random_sample((20, 5)), columns=list('ABCDE')) - # lists of positions should raise IndexErrror! + # lists of positions should raise IndexError! msg = 'positional indexers are out-of-bounds' with pytest.raises(IndexError, match=msg): df.iloc[:, [0, 1, 2, 3, 4, 5]] diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 5f5718fe3eac3..11d0fa2602baa 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -16,7 +16,7 @@ class TestLoc(Base): def test_loc_getitem_dups(self): # GH 5678 - # repeated gettitems on a dup index returning a ndarray + # repeated getitems on a dup index returning a ndarray df = DataFrame( np.random.random_sample((20, 5)), index=['ABCDE' [x % 5] for x in range(20)]) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 961d781764b67..ea75e97bace0b 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -730,7 +730,7 @@ def test_to_excel_multiindex_dates( assert recons.index.names == ('time', 'foo') def test_to_excel_multiindex_no_write_index(self, engine, ext): - # Test writing and re-reading a MI witout the index. GH 5616. + # Test writing and re-reading a MI without the index. GH 5616. # Initial non-MI frame. frame1 = DataFrame({'a': [10, 20], 'b': [30, 40], 'c': [50, 60]}) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index edb7c2136825d..0eeb0e6eb2f2d 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -533,7 +533,7 @@ def test_to_string_with_formatters_unicode(self): assert result == ' c/\u03c3\n' + '0 1\n1 2\n2 3' def test_east_asian_unicode_false(self): - # not alighned properly because of east asian width + # not aligned properly because of east asian width # mid col df = DataFrame({'a': ['あ', 'いいい', 'う', 'ええええええ'], diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index ef9dbc63d873d..413c11ba2f9fe 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -225,7 +225,7 @@ def test_long_strings(self): def test_api(self): # GH4584 - # API issue when to_hdf doesn't acdept append AND format args + # API issue when to_hdf doesn't accept append AND format args with ensure_clean_path(self.path) as path: df = tm.makeDataFrame() @@ -2656,7 +2656,7 @@ def test_select(self): expected = df.reindex(columns=['A', 'B']) tm.assert_frame_equal(expected, result) - # equivalentsly + # equivalently result = store.select('df', [("columns=['A', 'B']")]) expected = df.reindex(columns=['A', 'B']) tm.assert_frame_equal(expected, result) @@ -3284,7 +3284,7 @@ def test_frame_select_complex2(self): expected = read_hdf(hh, 'df', where='l1=[2, 3, 4]') - # sccope with list like + # scope with list like l = selection.index.tolist() # noqa store = HDFStore(hh) result = store.select('df', where='l1=l') @@ -3308,7 +3308,7 @@ def test_frame_select_complex2(self): result = read_hdf(hh, 'df', where='l1=list(selection.index)') assert_frame_equal(result, expected) - # sccope with index + # scope with index store = HDFStore(hh) result = store.select('df', where='l1=index') @@ -5164,7 +5164,7 @@ def test_legacy_datetimetz_object(self, datapath): assert_frame_equal(result, expected) def test_dst_transitions(self): - # make sure we are not failing on transaitions + # make sure we are not failing on transitions with ensure_clean_store(self.path) as store: times = pd.date_range("2013-10-26 23:00", "2013-10-27 01:00", tz="Europe/London", diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index afdd83ba9bb8c..db5c92fb681a2 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -305,7 +305,7 @@ def test_write_index(self, engine): check_round_trip(df, engine) def test_write_multiindex(self, pa): - # Not suppoprted in fastparquet as of 0.1.3 or older pyarrow version + # Not supported in fastparquet as of 0.1.3 or older pyarrow version engine = pa df = pd.DataFrame({'A': [1, 2, 3]}) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index e651892bde0a0..b053afa4dd7d5 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1338,7 +1338,7 @@ def check(col): # this is parsed on Travis (linux), but not on macosx for some reason # even with the same versions of psycopg2 & sqlalchemy, possibly a - # Postgrsql server version difference + # Postgresql server version difference col = df.DateColWithTz assert is_datetime64tz_dtype(col.dtype) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 4ee918fa48dab..06c753d1b8e21 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -2952,7 +2952,7 @@ def test_plain_axes(self): fig.add_axes([0.2, 0.2, 0.2, 0.2]) Series(rand(10)).plot(ax=ax) - # suppliad ax itself is a plain Axes, but because the cmap keyword + # supplied ax itself is a plain Axes, but because the cmap keyword # a new ax is created for the colorbar -> also multiples axes (GH11520) df = DataFrame({'a': randn(8), 'b': randn(8)}) fig = self.plt.figure() diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 9dabb35196741..9a954b522333d 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -529,7 +529,7 @@ def test_df_series_secondary_legend(self): assert ax.right_ax.get_yaxis().get_visible() tm.close() - # seconcary -> secondary (without passing ax) + # secondary -> secondary (without passing ax) _, ax = self.plt.subplots() ax = df.plot(secondary_y=True, ax=ax) s.plot(legend=True, secondary_y=True, ax=ax) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index b487f865b68a4..8eb4141555260 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -603,7 +603,7 @@ def test_other_datetime_unit(self): 'datetime64[ns]']: df2 = s.astype(dtype).to_frame('days') - # coerces to datetime64[ns], thus sholuld not be affected + # coerces to datetime64[ns], thus should not be affected assert df2['days'].dtype == 'datetime64[ns]' result = df1.merge(df2, left_on='entity_id', right_index=True) @@ -1243,9 +1243,9 @@ def test_merge_incompat_infer_boolean_object(self): ([0, 1], pd.Series([False, True], dtype=bool)), ]) def test_merge_incompat_dtypes_are_ok(self, df1_vals, df2_vals): - # these are explicity allowed incompat merges, that pass thru + # these are explicitly allowed incompat merges, that pass thru # the result type is dependent on if the values on the rhs are - # inferred, otherwise these will be coereced to object + # inferred, otherwise these will be coerced to object df1 = DataFrame({'A': df1_vals}) df2 = DataFrame({'A': df2_vals}) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 1420d4420e430..4f65251ebd923 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2198,7 +2198,7 @@ def test_categorical_concat(self, sort): def test_categorical_concat_gh7864(self): # GH 7864 - # make sure ordering is preserverd + # make sure ordering is preserved df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": list('abbaae')}) df["grade"] = Categorical(df["raw_grade"]) df['grade'].cat.set_categories(['e', 'a', 'b']) @@ -2265,7 +2265,7 @@ def test_categorical_index_preserver(self): }).set_index('B') tm.assert_frame_equal(result, expected) - # wrong catgories + # wrong categories df3 = DataFrame({'A': a, 'B': Categorical(b, categories=list('abe')) }).set_index('B') msg = "categories must match existing categories when appending" diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index f10876531e66a..469072970133d 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -213,7 +213,7 @@ def test_conversion(self): assert isinstance(td64, np.timedelta64) - # this is NOT equal and cannot be roundtriped (because of the nanos) + # this is NOT equal and cannot be roundtripped (because of the nanos) td = Timedelta('1 days, 10:11:12.012345678') assert td != td.to_pytimedelta() diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 657008856482f..8b13458050ce8 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -112,7 +112,7 @@ def test_ceil_floor_edge(self, test_input, rounder, freq, expected): ]) @pytest.mark.parametrize('rounder', ['ceil', 'floor', 'round']) def test_round_minute_freq(self, test_input, freq, expected, rounder): - # Ensure timestamps that shouldnt round dont! + # Ensure timestamps that shouldn't round dont! # GH#21262 dt = Timestamp(test_input) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 5328a58e3fbff..94050f7526444 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -634,7 +634,7 @@ def test_timedelta64_nan(self): # td np.float64 -> another float-object somewher on + # casting to -> np.float64 -> another float-object somewhere on # the way could lead jepardize this behavior comps = [np.nan] # could be casted to float64 values = [np.nan] diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index d24ed9433f4f7..d82b205803b09 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -206,7 +206,7 @@ def check_ops_properties(self, props, filter=None, ignore_failures=False): result = getattr(o, op) - # these couuld be series, arrays or scalars + # these could be series, arrays or scalars if isinstance(result, Series) and isinstance(expected, Series): tm.assert_series_equal(result, expected) elif isinstance(result, Index) and isinstance(expected, Index): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index e8d6b3bcaa77f..aa9c9bb05f877 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1840,7 +1840,7 @@ def test_sort_index_and_reconstruction(self): # 15622 # lexsortedness should be identical - # across MultiIndex consruction methods + # across MultiIndex construction methods df = DataFrame([[1, 1], [2, 2]], index=list('ab')) expected = DataFrame([[1, 1], [2, 2], [1, 1], [2, 2]], diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 9524a78dae16c..4dfdd1c96728b 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1387,7 +1387,7 @@ def quantile_func(x): def test_rolling_quantile_np_percentile(self): # #9413: Tests that rolling window's quantile default behavior - # is analogus to Numpy's percentile + # is analogous to Numpy's percentile row = 10 col = 5 idx = pd.date_range('20100101', periods=row, freq='B') @@ -2003,7 +2003,7 @@ def test_pairwise_with_self(self, f): # DataFrame with itself, pairwise=True # note that we may construct the 1st level of the MI - # in a non-motononic way, so compare accordingly + # in a non-monotonic way, so compare accordingly results = [] for i, df in enumerate(self.df1s): result = f(df) @@ -2154,7 +2154,7 @@ def is_constant(x): def no_nans(x): return x.notna().all().all() - # data is a tuple(object, is_contant, no_nans) + # data is a tuple(object, is_constant, no_nans) data = create_series() + create_dataframes() return [(x, is_constant(x), no_nans(x)) for x in data] diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index a1ad792e57bde..151cd2a42ecef 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -813,7 +813,7 @@ def test_call(self): assert self.offset4(self.d) == datetime(2014, 6, 30, 14) def test_sub(self): - # we have to override test_sub here becasue self.offset2 is not + # we have to override test_sub here because self.offset2 is not # defined as self._offset(2) off = self.offset2 msg = "Cannot subtract datetime from offset" diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 00837d36d9508..ac20ad1669638 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -116,7 +116,7 @@ class DateOffset(BaseOffset): off specifying n in the keywords you use, but regardless it is there for you. n is needed for DateOffset subclasses. - DateOffets work as follows. Each offset specify a set of dates + DateOffset work as follows. Each offset specify a set of dates that conform to the DateOffset. For example, Bday defines this set to be the set of dates that are weekdays (M-F). To test if a date is in the set of a DateOffset dateOffset we can use the diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 107c17c5253fb..f14b202b034d6 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1666,7 +1666,7 @@ def index_subclass_makers_generator(): def all_timeseries_index_generator(k=10): """Generator which can be iterated over to get instances of all the classes - which represent time-seires. + which represent time-series. Parameters ---------- @@ -1793,7 +1793,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, # pass None to index constructor for no name names = None - # make singelton case uniform + # make singleton case uniform if isinstance(names, str) and nlevels == 1: names = [names] @@ -1872,7 +1872,7 @@ def makeCustomDataframe(nrows, ncols, c_idx_names=True, r_idx_names=True, N < idx_nlevels, for just the first N levels. If ndupe doesn't divide nrows/ncol, the last label might have lower multiplicity. dtype - passed to the DataFrame constructor as is, in case you wish to - have more control in conjuncion with a custom `data_gen_f` + have more control in conjunction with a custom `data_gen_f` r_idx_type, c_idx_type - "i"/"f"/"s"/"u"/"dt"/"td". If idx_type is not None, `idx_nlevels` must be 1. "i"/"f" creates an integer/float index, diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 64eaf45376b2f..dddd5eb1f1eab 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -150,7 +150,7 @@ def error(code, **kwargs): code : str Error code. message : str - Error message with varaibles replaced. + Error message with variables replaced. """ return (code, ERROR_MSGS[code].format(**kwargs))