From 3f5d728f871e6709b9ce3520b38c0746b03ed62f Mon Sep 17 00:00:00 2001 From: Michael Charlton Date: Fri, 9 Dec 2016 11:30:51 +0000 Subject: [PATCH 1/6] astype method now takes dict mapping col names to datatypes #14761 Updating documentation to reflect change --- doc/source/basics.rst | 4 +++- doc/source/whatsnew/v0.20.0.txt | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index e5aa6b577270a..ada7877c4505c 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1755,12 +1755,14 @@ then the more *general* one will be used as the result of the operation. # conversion of dtypes df3.astype('float32').dtypes +.. versionadded:: 0.20.0 + Convert a subset of columns to a specified type using :meth:`~DataFrame.astype` .. ipython:: python dft = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6], 'c': [7, 8, 9]}) - dft[['a','b']] = dft[['a','b']].astype(np.uint8) + dft = dft.astype({'a': np.float64, 'c': np.uint8}) dft dft.dtypes diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 0bfd755aae40c..35543b3f025d3 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -43,6 +43,16 @@ fixed-width text files, and :func:`read_excel` for parsing Excel files. pd.read_fwf(StringIO(data)).dtypes pd.read_fwf(StringIO(data), dtype={'a':'float64', 'b':'object'}).dtypes +You can now pass a dictionary mapping column names to desired data types for that +column to :meth:`~DataFrame.astype`. + +.. ipython:: python + + dft = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6], 'c': [7, 8, 9]}) + dft = dft.astype({'a': np.float64, 'c': np.uint8}) + dft + dft.dtypes + .. _whatsnew_0200.enhancements.other: Other enhancements From fddbb2ed0537cd32c3e37f61983972e1957399d4 Mon Sep 17 00:00:00 2001 From: Michael Charlton Date: Thu, 15 Dec 2016 15:39:18 +0000 Subject: [PATCH 2/6] Updates after review DataFrame.astype now allows changing the dtype of a column by passing a dict mapping column name to dtype. --- doc/source/basics.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index e7db814483905..58da24d889507 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1779,6 +1779,18 @@ Convert a subset of columns to a specified type using :meth:`~DataFrame.astype` dft.loc[:, ['a', 'b']] = dft.loc[:, ['a', 'b']].astype(np.uint8) dft.dtypes + +.. versionadded:: 0.19 + +Convert certain columns to a specific dtype by passing a dict to :meth:`~DataFrame.astype` + +.. ipython:: python + + dft1 = pd.DataFrame({'a': [1,0,1], 'b': [4,5,6], 'c': [7, 8, 9]}) + dft1 = dft1.astype({'a': np.bool, 'c': np.float64}) + dft1 + dft1.dtypes + .. _basics.object_conversion: object conversion From a61ec51dfd32a6d434318a78da36822f531518e6 Mon Sep 17 00:00:00 2001 From: Michael Charlton Date: Thu, 15 Dec 2016 16:31:13 +0000 Subject: [PATCH 3/6] Corrections to docs after review feedback DataFrame.astype now allows setting the type of columns by passing a dict mapping column to dtype. --- doc/source/basics.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index e7db814483905..58da24d889507 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1779,6 +1779,18 @@ Convert a subset of columns to a specified type using :meth:`~DataFrame.astype` dft.loc[:, ['a', 'b']] = dft.loc[:, ['a', 'b']].astype(np.uint8) dft.dtypes + +.. versionadded:: 0.19 + +Convert certain columns to a specific dtype by passing a dict to :meth:`~DataFrame.astype` + +.. ipython:: python + + dft1 = pd.DataFrame({'a': [1,0,1], 'b': [4,5,6], 'c': [7, 8, 9]}) + dft1 = dft1.astype({'a': np.bool, 'c': np.float64}) + dft1 + dft1.dtypes + .. _basics.object_conversion: object conversion From 0c9078562c771e139d2e8ea3291dd52f41a68719 Mon Sep 17 00:00:00 2001 From: Michael Charlton Date: Thu, 15 Dec 2016 17:22:08 +0000 Subject: [PATCH 4/6] Removing uneeded changes Mistakenly added changes carried out in v0.19 to v0.20 --- doc/source/whatsnew/v0.20.0.txt | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 07e8154f557b3..2855cde95ac2a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -43,15 +43,27 @@ fixed-width text files, and :func:`read_excel` for parsing Excel files. pd.read_fwf(StringIO(data)).dtypes pd.read_fwf(StringIO(data), dtype={'a':'float64', 'b':'object'}).dtypes -You can now pass a dictionary mapping column names to desired data types for that -column to :meth:`~DataFrame.astype`. +.. _whatsnew_0200.enhancements.groupby_access: + +Groupby Enhancements +^^^^^^^^^^^^^^^^^^^^ + +Strings passed to ``DataFrame.groupby()`` as the ``by`` parameter may now reference either column names or index level names (:issue:`5677`) .. ipython:: python - dft = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6], 'c': [7, 8, 9]}) - dft = dft.astype({'a': np.float64, 'c': np.uint8}) - dft - dft.dtypes + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + + index = pd.MultiIndex.from_arrays(arrays, names=['first', 'second']) + + df = pd.DataFrame({'A': [1, 1, 1, 1, 2, 2, 3, 3], + 'B': np.arange(8)}, + index=index) + df + + df.groupby(['second', 'A']).sum() + .. _whatsnew_0200.enhancements.other: From f51dbdf7ccc703326906935c0e7e890f52f64d76 Mon Sep 17 00:00:00 2001 From: Michael Charlton Date: Thu, 22 Dec 2016 17:44:15 +0000 Subject: [PATCH 5/6] removing stray orig file after merge --- doc/source/basics.rst.orig | 2008 ------------------------------------ 1 file changed, 2008 deletions(-) delete mode 100644 doc/source/basics.rst.orig diff --git a/doc/source/basics.rst.orig b/doc/source/basics.rst.orig deleted file mode 100644 index 2e8abe0a5c329..0000000000000 --- a/doc/source/basics.rst.orig +++ /dev/null @@ -1,2008 +0,0 @@ -.. currentmodule:: pandas - -.. ipython:: python - :suppress: - - import numpy as np - import pandas as pd - np.set_printoptions(precision=4, suppress=True) - pd.options.display.max_rows = 15 - -.. _basics: - -============================== - Essential Basic Functionality -============================== - -Here we discuss a lot of the essential functionality common to the pandas data -structures. Here's how to create some of the objects used in the examples from -the previous section: - -.. ipython:: python - - index = pd.date_range('1/1/2000', periods=8) - s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e']) - df = pd.DataFrame(np.random.randn(8, 3), index=index, - columns=['A', 'B', 'C']) - wp = pd.Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=pd.date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - -.. _basics.head_tail: - -Head and Tail -------------- - -To view a small sample of a Series or DataFrame object, use the -:meth:`~DataFrame.head` and :meth:`~DataFrame.tail` methods. The default number -of elements to display is five, but you may pass a custom number. - -.. ipython:: python - - long_series = pd.Series(np.random.randn(1000)) - long_series.head() - long_series.tail(3) - -.. _basics.attrs: - -Attributes and the raw ndarray(s) ---------------------------------- - -pandas objects have a number of attributes enabling you to access the metadata - - * **shape**: gives the axis dimensions of the object, consistent with ndarray - * Axis labels - - * **Series**: *index* (only axis) - * **DataFrame**: *index* (rows) and *columns* - * **Panel**: *items*, *major_axis*, and *minor_axis* - -Note, **these attributes can be safely assigned to**! - -.. ipython:: python - - df[:2] - df.columns = [x.lower() for x in df.columns] - df - -To get the actual data inside a data structure, one need only access the -**values** property: - -.. ipython:: python - - s.values - df.values - wp.values - -If a DataFrame or Panel contains homogeneously-typed data, the ndarray can -actually be modified in-place, and the changes will be reflected in the data -structure. For heterogeneous data (e.g. some of the DataFrame's columns are not -all the same dtype), this will not be the case. The values attribute itself, -unlike the axis labels, cannot be assigned to. - -.. note:: - - When working with heterogeneous data, the dtype of the resulting ndarray - will be chosen to accommodate all of the data involved. For example, if - strings are involved, the result will be of object dtype. If there are only - floats and integers, the resulting array will be of float dtype. - -.. _basics.accelerate: - -Accelerated operations ----------------------- - -pandas has support for accelerating certain types of binary numerical and boolean operations using -the ``numexpr`` library (starting in 0.11.0) and the ``bottleneck`` libraries. - -These libraries are especially useful when dealing with large data sets, and provide large -speedups. ``numexpr`` uses smart chunking, caching, and multiple cores. ``bottleneck`` is -a set of specialized cython routines that are especially fast when dealing with arrays that have -``nans``. - -Here is a sample (using 100 column x 100,000 row ``DataFrames``): - -.. csv-table:: - :header: "Operation", "0.11.0 (ms)", "Prior Version (ms)", "Ratio to Prior" - :widths: 25, 25, 25, 25 - :delim: ; - - ``df1 > df2``; 13.32; 125.35; 0.1063 - ``df1 * df2``; 21.71; 36.63; 0.5928 - ``df1 + df2``; 22.04; 36.50; 0.6039 - -You are highly encouraged to install both libraries. See the section -:ref:`Recommended Dependencies ` for more installation info. - -.. _basics.binop: - -Flexible binary operations --------------------------- - -With binary operations between pandas data structures, there are two key points -of interest: - - * Broadcasting behavior between higher- (e.g. DataFrame) and - lower-dimensional (e.g. Series) objects. - * Missing data in computations - -We will demonstrate how to manage these issues independently, though they can -be handled simultaneously. - -Matching / broadcasting behavior -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -DataFrame has the methods :meth:`~DataFrame.add`, :meth:`~DataFrame.sub`, -:meth:`~DataFrame.mul`, :meth:`~DataFrame.div` and related functions -:meth:`~DataFrame.radd`, :meth:`~DataFrame.rsub`, ... -for carrying out binary operations. For broadcasting behavior, -Series input is of primary interest. Using these functions, you can use to -either match on the *index* or *columns* via the **axis** keyword: - -.. ipython:: python - - df = pd.DataFrame({'one' : pd.Series(np.random.randn(3), index=['a', 'b', 'c']), - 'two' : pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']), - 'three' : pd.Series(np.random.randn(3), index=['b', 'c', 'd'])}) - df - row = df.ix[1] - column = df['two'] - - df.sub(row, axis='columns') - df.sub(row, axis=1) - - df.sub(column, axis='index') - df.sub(column, axis=0) - -.. ipython:: python - :suppress: - - df_orig = df - -Furthermore you can align a level of a multi-indexed DataFrame with a Series. - -.. ipython:: python - - dfmi = df.copy() - dfmi.index = pd.MultiIndex.from_tuples([(1,'a'),(1,'b'),(1,'c'),(2,'a')], - names=['first','second']) - dfmi.sub(column, axis=0, level='second') - -With Panel, describing the matching behavior is a bit more difficult, so -the arithmetic methods instead (and perhaps confusingly?) give you the option -to specify the *broadcast axis*. For example, suppose we wished to demean the -data over a particular axis. This can be accomplished by taking the mean over -an axis and broadcasting over the same axis: - -.. ipython:: python - - major_mean = wp.mean(axis='major') - major_mean - wp.sub(major_mean, axis='major') - -And similarly for ``axis="items"`` and ``axis="minor"``. - -.. note:: - - I could be convinced to make the **axis** argument in the DataFrame methods - match the broadcasting behavior of Panel. Though it would require a - transition period so users can change their code... - -Series and Index also support the :func:`divmod` builtin. This function takes -the floor division and modulo operation at the same time returning a two-tuple -of the same type as the left hand side. For example: - -.. ipython:: python - - s = pd.Series(np.arange(10)) - s - div, rem = divmod(s, 3) - div - rem - - idx = pd.Index(np.arange(10)) - idx - div, rem = divmod(idx, 3) - div - rem - -We can also do elementwise :func:`divmod`: - -.. ipython:: python - - div, rem = divmod(s, [2, 2, 3, 3, 4, 4, 5, 5, 6, 6]) - div - rem - -Missing data / operations with fill values -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In Series and DataFrame (though not yet in Panel), the arithmetic functions -have the option of inputting a *fill_value*, namely a value to substitute when -at most one of the values at a location are missing. For example, when adding -two DataFrame objects, you may wish to treat NaN as 0 unless both DataFrames -are missing that value, in which case the result will be NaN (you can later -replace NaN with some other value using ``fillna`` if you wish). - -.. ipython:: python - :suppress: - - df2 = df.copy() - df2['three']['a'] = 1. - -.. ipython:: python - - df - df2 - df + df2 - df.add(df2, fill_value=0) - -.. _basics.compare: - -Flexible Comparisons -~~~~~~~~~~~~~~~~~~~~ - -Starting in v0.8, pandas introduced binary comparison methods eq, ne, lt, gt, -le, and ge to Series and DataFrame whose behavior is analogous to the binary -arithmetic operations described above: - -.. ipython:: python - - df.gt(df2) - df2.ne(df) - -These operations produce a pandas object the same type as the left-hand-side input -that if of dtype ``bool``. These ``boolean`` objects can be used in indexing operations, -see :ref:`here` - -.. _basics.reductions: - -Boolean Reductions -~~~~~~~~~~~~~~~~~~ - -You can apply the reductions: :attr:`~DataFrame.empty`, :meth:`~DataFrame.any`, -:meth:`~DataFrame.all`, and :meth:`~DataFrame.bool` to provide a -way to summarize a boolean result. - -.. ipython:: python - - (df > 0).all() - (df > 0).any() - -You can reduce to a final boolean value. - -.. ipython:: python - - (df > 0).any().any() - -You can test if a pandas object is empty, via the :attr:`~DataFrame.empty` property. - -.. ipython:: python - - df.empty - pd.DataFrame(columns=list('ABC')).empty - -To evaluate single-element pandas objects in a boolean context, use the method -:meth:`~DataFrame.bool`: - -.. ipython:: python - - pd.Series([True]).bool() - pd.Series([False]).bool() - pd.DataFrame([[True]]).bool() - pd.DataFrame([[False]]).bool() - -.. warning:: - - You might be tempted to do the following: - - .. code-block:: python - - >>> if df: - ... - - Or - - .. code-block:: python - - >>> df and df2 - - These both will raise as you are trying to compare multiple values. - - .. code-block:: python - - ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). - -See :ref:`gotchas` for a more detailed discussion. - -.. _basics.equals: - -Comparing if objects are equivalent -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Often you may find there is more than one way to compute the same -result. As a simple example, consider ``df+df`` and ``df*2``. To test -that these two computations produce the same result, given the tools -shown above, you might imagine using ``(df+df == df*2).all()``. But in -fact, this expression is False: - -.. ipython:: python - - df+df == df*2 - (df+df == df*2).all() - -Notice that the boolean DataFrame ``df+df == df*2`` contains some False values! -That is because NaNs do not compare as equals: - -.. ipython:: python - - np.nan == np.nan - -So, as of v0.13.1, NDFrames (such as Series, DataFrames, and Panels) -have an :meth:`~DataFrame.equals` method for testing equality, with NaNs in -corresponding locations treated as equal. - -.. ipython:: python - - (df+df).equals(df*2) - -Note that the Series or DataFrame index needs to be in the same order for -equality to be True: - -.. ipython:: python - - df1 = pd.DataFrame({'col':['foo', 0, np.nan]}) - df2 = pd.DataFrame({'col':[np.nan, 0, 'foo']}, index=[2,1,0]) - df1.equals(df2) - df1.equals(df2.sort_index()) - -Comparing array-like objects -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can conveniently do element-wise comparisons when comparing a pandas -data structure with a scalar value: - -.. ipython:: python - - pd.Series(['foo', 'bar', 'baz']) == 'foo' - pd.Index(['foo', 'bar', 'baz']) == 'foo' - -Pandas also handles element-wise comparisons between different array-like -objects of the same length: - -.. ipython:: python - - pd.Series(['foo', 'bar', 'baz']) == pd.Index(['foo', 'bar', 'qux']) - pd.Series(['foo', 'bar', 'baz']) == np.array(['foo', 'bar', 'qux']) - -Trying to compare ``Index`` or ``Series`` objects of different lengths will -raise a ValueError: - -.. code-block:: ipython - - In [55]: pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo', 'bar']) - ValueError: Series lengths must match to compare - - In [56]: pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo']) - ValueError: Series lengths must match to compare - -Note that this is different from the numpy behavior where a comparison can -be broadcast: - -.. ipython:: python - - np.array([1, 2, 3]) == np.array([2]) - -or it can return False if broadcasting can not be done: - -.. ipython:: python - :okwarning: - - np.array([1, 2, 3]) == np.array([1, 2]) - -Combining overlapping data sets -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -A problem occasionally arising is the combination of two similar data sets -where values in one are preferred over the other. An example would be two data -series representing a particular economic indicator where one is considered to -be of "higher quality". However, the lower quality series might extend further -back in history or have more complete data coverage. As such, we would like to -combine two DataFrame objects where missing values in one DataFrame are -conditionally filled with like-labeled values from the other DataFrame. The -function implementing this operation is :meth:`~DataFrame.combine_first`, -which we illustrate: - -.. ipython:: python - - df1 = pd.DataFrame({'A' : [1., np.nan, 3., 5., np.nan], - 'B' : [np.nan, 2., 3., np.nan, 6.]}) - df2 = pd.DataFrame({'A' : [5., 2., 4., np.nan, 3., 7.], - 'B' : [np.nan, np.nan, 3., 4., 6., 8.]}) - df1 - df2 - df1.combine_first(df2) - -General DataFrame Combine -~~~~~~~~~~~~~~~~~~~~~~~~~ - -The :meth:`~DataFrame.combine_first` method above calls the more general -DataFrame method :meth:`~DataFrame.combine`. This method takes another DataFrame -and a combiner function, aligns the input DataFrame and then passes the combiner -function pairs of Series (i.e., columns whose names are the same). - -So, for instance, to reproduce :meth:`~DataFrame.combine_first` as above: - -.. ipython:: python - - combiner = lambda x, y: np.where(pd.isnull(x), y, x) - df1.combine(df2, combiner) - -.. _basics.stats: - -Descriptive statistics ----------------------- - -A large number of methods for computing descriptive statistics and other related -operations on :ref:`Series `, :ref:`DataFrame -`, and :ref:`Panel `. Most of these -are aggregations (hence producing a lower-dimensional result) like -:meth:`~DataFrame.sum`, :meth:`~DataFrame.mean`, and :meth:`~DataFrame.quantile`, -but some of them, like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod`, -produce an object of the same size. Generally speaking, these methods take an -**axis** argument, just like *ndarray.{sum, std, ...}*, but the axis can be -specified by name or integer: - - - **Series**: no axis argument needed - - **DataFrame**: "index" (axis=0, default), "columns" (axis=1) - - **Panel**: "items" (axis=0), "major" (axis=1, default), "minor" - (axis=2) - -For example: - -.. ipython:: python - - df - df.mean(0) - df.mean(1) - -All such methods have a ``skipna`` option signaling whether to exclude missing -data (``True`` by default): - -.. ipython:: python - - df.sum(0, skipna=False) - df.sum(axis=1, skipna=True) - -Combined with the broadcasting / arithmetic behavior, one can describe various -statistical procedures, like standardization (rendering data zero mean and -standard deviation 1), very concisely: - -.. ipython:: python - - ts_stand = (df - df.mean()) / df.std() - ts_stand.std() - xs_stand = df.sub(df.mean(1), axis=0).div(df.std(1), axis=0) - xs_stand.std(1) - -Note that methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod` -preserve the location of ``NaN`` values. This is somewhat different from -:meth:`~DataFrame.expanding` and :meth:`~DataFrame.rolling`. -For more details please see :ref:`this note `. - -.. ipython:: python - - df.cumsum() - -Here is a quick reference summary table of common functions. Each also takes an -optional ``level`` parameter which applies only if the object has a -:ref:`hierarchical index`. - -.. csv-table:: - :header: "Function", "Description" - :widths: 20, 80 - - ``count``, Number of non-null observations - ``sum``, Sum of values - ``mean``, Mean of values - ``mad``, Mean absolute deviation - ``median``, Arithmetic median of values - ``min``, Minimum - ``max``, Maximum - ``mode``, Mode - ``abs``, Absolute Value - ``prod``, Product of values - ``std``, Bessel-corrected sample standard deviation - ``var``, Unbiased variance - ``sem``, Standard error of the mean - ``skew``, Sample skewness (3rd moment) - ``kurt``, Sample kurtosis (4th moment) - ``quantile``, Sample quantile (value at %) - ``cumsum``, Cumulative sum - ``cumprod``, Cumulative product - ``cummax``, Cumulative maximum - ``cummin``, Cumulative minimum - -Note that by chance some NumPy methods, like ``mean``, ``std``, and ``sum``, -will exclude NAs on Series input by default: - -.. ipython:: python - - np.mean(df['one']) - np.mean(df['one'].values) - -``Series`` also has a method :meth:`~Series.nunique` which will return the -number of unique non-null values: - -.. ipython:: python - - series = pd.Series(np.random.randn(500)) - series[20:500] = np.nan - series[10:20] = 5 - series.nunique() - -.. _basics.describe: - -Summarizing data: describe -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -There is a convenient :meth:`~DataFrame.describe` function which computes a variety of summary -statistics about a Series or the columns of a DataFrame (excluding NAs of -course): - -.. ipython:: python - - series = pd.Series(np.random.randn(1000)) - series[::2] = np.nan - series.describe() - frame = pd.DataFrame(np.random.randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e']) - frame.ix[::2] = np.nan - frame.describe() - -You can select specific percentiles to include in the output: - -.. ipython:: python - - series.describe(percentiles=[.05, .25, .75, .95]) - -By default, the median is always included. - -For a non-numerical Series object, :meth:`~Series.describe` will give a simple -summary of the number of unique values and most frequently occurring values: - -.. ipython:: python - - s = pd.Series(['a', 'a', 'b', 'b', 'a', 'a', np.nan, 'c', 'd', 'a']) - s.describe() - -Note that on a mixed-type DataFrame object, :meth:`~DataFrame.describe` will -restrict the summary to include only numerical columns or, if none are, only -categorical columns: - -.. ipython:: python - - frame = pd.DataFrame({'a': ['Yes', 'Yes', 'No', 'No'], 'b': range(4)}) - frame.describe() - -This behaviour can be controlled by providing a list of types as ``include``/``exclude`` -arguments. The special value ``all`` can also be used: - -.. ipython:: python - - frame.describe(include=['object']) - frame.describe(include=['number']) - frame.describe(include='all') - -That feature relies on :ref:`select_dtypes `. Refer to -there for details about accepted inputs. - -.. _basics.idxmin: - -Index of Min/Max Values -~~~~~~~~~~~~~~~~~~~~~~~ - -The :meth:`~DataFrame.idxmin` and :meth:`~DataFrame.idxmax` functions on Series -and DataFrame compute the index labels with the minimum and maximum -corresponding values: - -.. ipython:: python - - s1 = pd.Series(np.random.randn(5)) - s1 - s1.idxmin(), s1.idxmax() - - df1 = pd.DataFrame(np.random.randn(5,3), columns=['A','B','C']) - df1 - df1.idxmin(axis=0) - df1.idxmax(axis=1) - -When there are multiple rows (or columns) matching the minimum or maximum -value, :meth:`~DataFrame.idxmin` and :meth:`~DataFrame.idxmax` return the first -matching index: - -.. ipython:: python - - df3 = pd.DataFrame([2, 1, 1, 3, np.nan], columns=['A'], index=list('edcba')) - df3 - df3['A'].idxmin() - -.. note:: - - ``idxmin`` and ``idxmax`` are called ``argmin`` and ``argmax`` in NumPy. - -.. _basics.discretization: - -Value counts (histogramming) / Mode -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The :meth:`~Series.value_counts` Series method and top-level function computes a histogram -of a 1D array of values. It can also be used as a function on regular arrays: - -.. ipython:: python - - data = np.random.randint(0, 7, size=50) - data - s = pd.Series(data) - s.value_counts() - pd.value_counts(data) - -Similarly, you can get the most frequently occurring value(s) (the mode) of the values in a Series or DataFrame: - -.. ipython:: python - - s5 = pd.Series([1, 1, 3, 3, 3, 5, 5, 7, 7, 7]) - s5.mode() - df5 = pd.DataFrame({"A": np.random.randint(0, 7, size=50), - "B": np.random.randint(-10, 15, size=50)}) - df5.mode() - - -Discretization and quantiling -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Continuous values can be discretized using the :func:`cut` (bins based on values) -and :func:`qcut` (bins based on sample quantiles) functions: - -.. ipython:: python - - arr = np.random.randn(20) - factor = pd.cut(arr, 4) - factor - - factor = pd.cut(arr, [-5, -1, 0, 1, 5]) - factor - -:func:`qcut` computes sample quantiles. For example, we could slice up some -normally distributed data into equal-size quartiles like so: - -.. ipython:: python - - arr = np.random.randn(30) - factor = pd.qcut(arr, [0, .25, .5, .75, 1]) - factor - pd.value_counts(factor) - -We can also pass infinite values to define the bins: - -.. ipython:: python - - arr = np.random.randn(20) - factor = pd.cut(arr, [-np.inf, 0, np.inf]) - factor - -.. _basics.apply: - -Function application --------------------- - -To apply your own or another library's functions to pandas objects, -you should be aware of the three methods below. The appropriate -method to use depends on whether your function expects to operate -on an entire ``DataFrame`` or ``Series``, row- or column-wise, or elementwise. - -1. `Tablewise Function Application`_: :meth:`~DataFrame.pipe` -2. `Row or Column-wise Function Application`_: :meth:`~DataFrame.apply` -3. Elementwise_ function application: :meth:`~DataFrame.applymap` - -.. _basics.pipe: - -Tablewise Function Application -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. versionadded:: 0.16.2 - -``DataFrames`` and ``Series`` can of course just be passed into functions. -However, if the function needs to be called in a chain, consider using the :meth:`~DataFrame.pipe` method. -Compare the following - -.. code-block:: python - - # f, g, and h are functions taking and returning ``DataFrames`` - >>> f(g(h(df), arg1=1), arg2=2, arg3=3) - -with the equivalent - -.. code-block:: python - - >>> (df.pipe(h) - .pipe(g, arg1=1) - .pipe(f, arg2=2, arg3=3) - ) - -Pandas encourages the second style, which is known as method chaining. -``pipe`` makes it easy to use your own or another library's functions -in method chains, alongside pandas' methods. - -In the example above, the functions ``f``, ``g``, and ``h`` each expected the ``DataFrame`` as the first positional argument. -What if the function you wish to apply takes its data as, say, the second argument? -In this case, provide ``pipe`` with a tuple of ``(callable, data_keyword)``. -``.pipe`` will route the ``DataFrame`` to the argument specified in the tuple. - -For example, we can fit a regression using statsmodels. Their API expects a formula first and a ``DataFrame`` as the second argument, ``data``. We pass in the function, keyword pair ``(sm.poisson, 'data')`` to ``pipe``: - -.. ipython:: python - - import statsmodels.formula.api as sm - - bb = pd.read_csv('data/baseball.csv', index_col='id') - - (bb.query('h > 0') - .assign(ln_h = lambda df: np.log(df.h)) - .pipe((sm.poisson, 'data'), 'hr ~ ln_h + year + g + C(lg)') - .fit() - .summary() - ) - -The pipe method is inspired by unix pipes and more recently dplyr_ and magrittr_, which -have introduced the popular ``(%>%)`` (read pipe) operator for R_. -The implementation of ``pipe`` here is quite clean and feels right at home in python. -We encourage you to view the source code (``pd.DataFrame.pipe??`` in IPython). - -.. _dplyr: https://github.com/hadley/dplyr -.. _magrittr: https://github.com/smbache/magrittr -.. _R: http://www.r-project.org - - -Row or Column-wise Function Application -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Arbitrary functions can be applied along the axes of a DataFrame or Panel -using the :meth:`~DataFrame.apply` method, which, like the descriptive -statistics methods, take an optional ``axis`` argument: - -.. ipython:: python - - df.apply(np.mean) - df.apply(np.mean, axis=1) - df.apply(lambda x: x.max() - x.min()) - df.apply(np.cumsum) - df.apply(np.exp) - -Depending on the return type of the function passed to :meth:`~DataFrame.apply`, -the result will either be of lower dimension or the same dimension. - -:meth:`~DataFrame.apply` combined with some cleverness can be used to answer many questions -about a data set. For example, suppose we wanted to extract the date where the -maximum value for each column occurred: - -.. ipython:: python - - tsdf = pd.DataFrame(np.random.randn(1000, 3), columns=['A', 'B', 'C'], - index=pd.date_range('1/1/2000', periods=1000)) - tsdf.apply(lambda x: x.idxmax()) - -You may also pass additional arguments and keyword arguments to the :meth:`~DataFrame.apply` -method. For instance, consider the following function you would like to apply: - -.. code-block:: python - - def subtract_and_divide(x, sub, divide=1): - return (x - sub) / divide - -You may then apply this function as follows: - -.. code-block:: python - - df.apply(subtract_and_divide, args=(5,), divide=3) - -Another useful feature is the ability to pass Series methods to carry out some -Series operation on each column or row: - -.. ipython:: python - :suppress: - - tsdf = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], - index=pd.date_range('1/1/2000', periods=10)) - tsdf.values[3:7] = np.nan - -.. ipython:: python - - tsdf - tsdf.apply(pd.Series.interpolate) - - -Finally, :meth:`~DataFrame.apply` takes an argument ``raw`` which is False by default, which -converts each row or column into a Series before applying the function. When -set to True, the passed function will instead receive an ndarray object, which -has positive performance implications if you do not need the indexing -functionality. - -.. seealso:: - - The section on :ref:`GroupBy ` demonstrates related, flexible - functionality for grouping by some criterion, applying, and combining the - results into a Series, DataFrame, etc. - -.. _Elementwise: - -Applying elementwise Python functions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Since not all functions can be vectorized (accept NumPy arrays and return -another array or value), the methods :meth:`~DataFrame.applymap` on DataFrame -and analogously :meth:`~Series.map` on Series accept any Python function taking -a single value and returning a single value. For example: - -.. ipython:: python - :suppress: - - df4 = df_orig.copy() - -.. ipython:: python - - df4 - f = lambda x: len(str(x)) - df4['one'].map(f) - df4.applymap(f) - -:meth:`Series.map` has an additional feature which is that it can be used to easily -"link" or "map" values defined by a secondary series. This is closely related -to :ref:`merging/joining functionality `: - -.. ipython:: python - - s = pd.Series(['six', 'seven', 'six', 'seven', 'six'], - index=['a', 'b', 'c', 'd', 'e']) - t = pd.Series({'six' : 6., 'seven' : 7.}) - s - s.map(t) - - -.. _basics.apply_panel: - -Applying with a Panel -~~~~~~~~~~~~~~~~~~~~~ - -Applying with a ``Panel`` will pass a ``Series`` to the applied function. If the applied -function returns a ``Series``, the result of the application will be a ``Panel``. If the applied function -reduces to a scalar, the result of the application will be a ``DataFrame``. - -.. note:: - - Prior to 0.13.1 ``apply`` on a ``Panel`` would only work on ``ufuncs`` (e.g. ``np.sum/np.max``). - -.. ipython:: python - - import pandas.util.testing as tm - panel = tm.makePanel(5) - panel - panel['ItemA'] - -A transformational apply. - -.. ipython:: python - - result = panel.apply(lambda x: x*2, axis='items') - result - result['ItemA'] - -A reduction operation. - -.. ipython:: python - - panel.apply(lambda x: x.dtype, axis='items') - -A similar reduction type operation - -.. ipython:: python - - panel.apply(lambda x: x.sum(), axis='major_axis') - -This last reduction is equivalent to - -.. ipython:: python - - panel.sum('major_axis') - -A transformation operation that returns a ``Panel``, but is computing -the z-score across the ``major_axis``. - -.. ipython:: python - - result = panel.apply( - lambda x: (x-x.mean())/x.std(), - axis='major_axis') - result - result['ItemA'] - -Apply can also accept multiple axes in the ``axis`` argument. This will pass a -``DataFrame`` of the cross-section to the applied function. - -.. ipython:: python - - f = lambda x: ((x.T-x.mean(1))/x.std(1)).T - - result = panel.apply(f, axis = ['items','major_axis']) - result - result.loc[:,:,'ItemA'] - -This is equivalent to the following - -.. ipython:: python - - result = pd.Panel(dict([ (ax, f(panel.loc[:,:,ax])) - for ax in panel.minor_axis ])) - result - result.loc[:,:,'ItemA'] - - -.. _basics.reindexing: - -Reindexing and altering labels ------------------------------- - -:meth:`~Series.reindex` is the fundamental data alignment method in pandas. -It is used to implement nearly all other features relying on label-alignment -functionality. To *reindex* means to conform the data to match a given set of -labels along a particular axis. This accomplishes several things: - - * Reorders the existing data to match a new set of labels - * Inserts missing value (NA) markers in label locations where no data for - that label existed - * If specified, **fill** data for missing labels using logic (highly relevant - to working with time series data) - -Here is a simple example: - -.. ipython:: python - - s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e']) - s - s.reindex(['e', 'b', 'f', 'd']) - -Here, the ``f`` label was not contained in the Series and hence appears as -``NaN`` in the result. - -With a DataFrame, you can simultaneously reindex the index and columns: - -.. ipython:: python - - df - df.reindex(index=['c', 'f', 'b'], columns=['three', 'two', 'one']) - -For convenience, you may utilize the :meth:`~Series.reindex_axis` method, which -takes the labels and a keyword ``axis`` parameter. - -Note that the ``Index`` objects containing the actual axis labels can be -**shared** between objects. So if we have a Series and a DataFrame, the -following can be done: - -.. ipython:: python - - rs = s.reindex(df.index) - rs - rs.index is df.index - -This means that the reindexed Series's index is the same Python object as the -DataFrame's index. - - -.. seealso:: - - :ref:`MultiIndex / Advanced Indexing ` is an even more concise way of - doing reindexing. - -.. note:: - - When writing performance-sensitive code, there is a good reason to spend - some time becoming a reindexing ninja: **many operations are faster on - pre-aligned data**. Adding two unaligned DataFrames internally triggers a - reindexing step. For exploratory analysis you will hardly notice the - difference (because ``reindex`` has been heavily optimized), but when CPU - cycles matter sprinkling a few explicit ``reindex`` calls here and there can - have an impact. - -.. _basics.reindex_like: - -Reindexing to align with another object -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You may wish to take an object and reindex its axes to be labeled the same as -another object. While the syntax for this is straightforward albeit verbose, it -is a common enough operation that the :meth:`~DataFrame.reindex_like` method is -available to make this simpler: - -.. ipython:: python - :suppress: - - df2 = df.reindex(['a', 'b', 'c'], columns=['one', 'two']) - df3 = df2 - df2.mean() - - -.. ipython:: python - - df2 - df3 - df.reindex_like(df2) - -.. _basics.align: - -Aligning objects with each other with ``align`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The :meth:`~Series.align` method is the fastest way to simultaneously align two objects. It -supports a ``join`` argument (related to :ref:`joining and merging `): - - - ``join='outer'``: take the union of the indexes (default) - - ``join='left'``: use the calling object's index - - ``join='right'``: use the passed object's index - - ``join='inner'``: intersect the indexes - -It returns a tuple with both of the reindexed Series: - -.. ipython:: python - - s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e']) - s1 = s[:4] - s2 = s[1:] - s1.align(s2) - s1.align(s2, join='inner') - s1.align(s2, join='left') - -.. _basics.df_join: - -For DataFrames, the join method will be applied to both the index and the -columns by default: - -.. ipython:: python - - df.align(df2, join='inner') - -You can also pass an ``axis`` option to only align on the specified axis: - -.. ipython:: python - - df.align(df2, join='inner', axis=0) - -.. _basics.align.frame.series: - -If you pass a Series to :meth:`DataFrame.align`, you can choose to align both -objects either on the DataFrame's index or columns using the ``axis`` argument: - -.. ipython:: python - - df.align(df2.ix[0], axis=1) - -.. _basics.reindex_fill: - -Filling while reindexing -~~~~~~~~~~~~~~~~~~~~~~~~ - -:meth:`~Series.reindex` takes an optional parameter ``method`` which is a -filling method chosen from the following table: - -.. csv-table:: - :header: "Method", "Action" - :widths: 30, 50 - - pad / ffill, Fill values forward - bfill / backfill, Fill values backward - nearest, Fill from the nearest index value - -We illustrate these fill methods on a simple Series: - -.. ipython:: python - - rng = pd.date_range('1/3/2000', periods=8) - ts = pd.Series(np.random.randn(8), index=rng) - ts2 = ts[[0, 3, 6]] - ts - ts2 - - ts2.reindex(ts.index) - ts2.reindex(ts.index, method='ffill') - ts2.reindex(ts.index, method='bfill') - ts2.reindex(ts.index, method='nearest') - -These methods require that the indexes are **ordered** increasing or -decreasing. - -Note that the same result could have been achieved using -:ref:`fillna ` (except for ``method='nearest'``) or -:ref:`interpolate `: - -.. ipython:: python - - ts2.reindex(ts.index).fillna(method='ffill') - -:meth:`~Series.reindex` will raise a ValueError if the index is not monotonic -increasing or decreasing. :meth:`~Series.fillna` and :meth:`~Series.interpolate` -will not make any checks on the order of the index. - -.. _basics.limits_on_reindex_fill: - -Limits on filling while reindexing -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The ``limit`` and ``tolerance`` arguments provide additional control over -filling while reindexing. Limit specifies the maximum count of consecutive -matches: - -.. ipython:: python - - ts2.reindex(ts.index, method='ffill', limit=1) - -In contrast, tolerance specifies the maximum distance between the index and -indexer values: - -.. ipython:: python - - ts2.reindex(ts.index, method='ffill', tolerance='1 day') - -Notice that when used on a ``DatetimeIndex``, ``TimedeltaIndex`` or -``PeriodIndex``, ``tolerance`` will coerced into a ``Timedelta`` if possible. -This allows you to specify tolerance with appropriate strings. - -.. _basics.drop: - -Dropping labels from an axis -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -A method closely related to ``reindex`` is the :meth:`~DataFrame.drop` function. -It removes a set of labels from an axis: - -.. ipython:: python - - df - df.drop(['a', 'd'], axis=0) - df.drop(['one'], axis=1) - -Note that the following also works, but is a bit less obvious / clean: - -.. ipython:: python - - df.reindex(df.index.difference(['a', 'd'])) - -.. _basics.rename: - -Renaming / mapping labels -~~~~~~~~~~~~~~~~~~~~~~~~~ - -The :meth:`~DataFrame.rename` method allows you to relabel an axis based on some -mapping (a dict or Series) or an arbitrary function. - -.. ipython:: python - - s - s.rename(str.upper) - -If you pass a function, it must return a value when called with any of the -labels (and must produce a set of unique values). A dict or -Series can also be used: - -.. ipython:: python - - df.rename(columns={'one' : 'foo', 'two' : 'bar'}, - index={'a' : 'apple', 'b' : 'banana', 'd' : 'durian'}) - -If the mapping doesn't include a column/index label, it isn't renamed. Also -extra labels in the mapping don't throw an error. - -The :meth:`~DataFrame.rename` method also provides an ``inplace`` named -parameter that is by default ``False`` and copies the underlying data. Pass -``inplace=True`` to rename the data in place. - -.. versionadded:: 0.18.0 - -Finally, :meth:`~Series.rename` also accepts a scalar or list-like -for altering the ``Series.name`` attribute. - -.. ipython:: python - - s.rename("scalar-name") - -.. _basics.rename_axis: - -The Panel class has a related :meth:`~Panel.rename_axis` class which can rename -any of its three axes. - -.. _basics.iteration: - -Iteration ---------- - -The behavior of basic iteration over pandas objects depends on the type. -When iterating over a Series, it is regarded as array-like, and basic iteration -produces the values. Other data structures, like DataFrame and Panel, -follow the dict-like convention of iterating over the "keys" of the -objects. - -In short, basic iteration (``for i in object``) produces: - -* **Series**: values -* **DataFrame**: column labels -* **Panel**: item labels - -Thus, for example, iterating over a DataFrame gives you the column names: - -.. ipython:: - - In [0]: df = pd.DataFrame({'col1' : np.random.randn(3), 'col2' : np.random.randn(3)}, - ...: index=['a', 'b', 'c']) - - In [0]: for col in df: - ...: print(col) - ...: - -Pandas objects also have the dict-like :meth:`~DataFrame.iteritems` method to -iterate over the (key, value) pairs. - -To iterate over the rows of a DataFrame, you can use the following methods: - -* :meth:`~DataFrame.iterrows`: Iterate over the rows of a DataFrame as (index, Series) pairs. - This converts the rows to Series objects, which can change the dtypes and has some - performance implications. -* :meth:`~DataFrame.itertuples`: Iterate over the rows of a DataFrame - as namedtuples of the values. This is a lot faster than - :meth:`~DataFrame.iterrows`, and is in most cases preferable to use - to iterate over the values of a DataFrame. - -.. warning:: - - Iterating through pandas objects is generally **slow**. In many cases, - iterating manually over the rows is not needed and can be avoided with - one of the following approaches: - - * Look for a *vectorized* solution: many operations can be performed using - built-in methods or numpy functions, (boolean) indexing, ... - - * When you have a function that cannot work on the full DataFrame/Series - at once, it is better to use :meth:`~DataFrame.apply` instead of iterating - over the values. See the docs on :ref:`function application `. - - * If you need to do iterative manipulations on the values but performance is - important, consider writing the inner loop using e.g. cython or numba. - See the :ref:`enhancing performance ` section for some - examples of this approach. - -.. warning:: - - You should **never modify** something you are iterating over. - This is not guaranteed to work in all cases. Depending on the - data types, the iterator returns a copy and not a view, and writing - to it will have no effect! - - For example, in the following case setting the value has no effect: - - .. ipython:: python - - df = pd.DataFrame({'a': [1, 2, 3], 'b': ['a', 'b', 'c']}) - - for index, row in df.iterrows(): - row['a'] = 10 - - df - -iteritems -~~~~~~~~~ - -Consistent with the dict-like interface, :meth:`~DataFrame.iteritems` iterates -through key-value pairs: - -* **Series**: (index, scalar value) pairs -* **DataFrame**: (column, Series) pairs -* **Panel**: (item, DataFrame) pairs - -For example: - -.. ipython:: - - In [0]: for item, frame in wp.iteritems(): - ...: print(item) - ...: print(frame) - ...: - -.. _basics.iterrows: - -iterrows -~~~~~~~~ - -:meth:`~DataFrame.iterrows` allows you to iterate through the rows of a -DataFrame as Series objects. It returns an iterator yielding each -index value along with a Series containing the data in each row: - -.. ipython:: - - In [0]: for row_index, row in df.iterrows(): - ...: print('%s\n%s' % (row_index, row)) - ...: - -.. note:: - - Because :meth:`~DataFrame.iterrows` returns a Series for each row, - it does **not** preserve dtypes across the rows (dtypes are - preserved across columns for DataFrames). For example, - - .. ipython:: python - - df_orig = pd.DataFrame([[1, 1.5]], columns=['int', 'float']) - df_orig.dtypes - row = next(df_orig.iterrows())[1] - row - - All values in ``row``, returned as a Series, are now upcasted - to floats, also the original integer value in column `x`: - - .. ipython:: python - - row['int'].dtype - df_orig['int'].dtype - - To preserve dtypes while iterating over the rows, it is better - to use :meth:`~DataFrame.itertuples` which returns namedtuples of the values - and which is generally much faster as ``iterrows``. - -For instance, a contrived way to transpose the DataFrame would be: - -.. ipython:: python - - df2 = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) - print(df2) - print(df2.T) - - df2_t = pd.DataFrame(dict((idx,values) for idx, values in df2.iterrows())) - print(df2_t) - -itertuples -~~~~~~~~~~ - -The :meth:`~DataFrame.itertuples` method will return an iterator -yielding a namedtuple for each row in the DataFrame. The first element -of the tuple will be the row's corresponding index value, while the -remaining values are the row values. - -For instance, - -.. ipython:: python - - for row in df.itertuples(): - print(row) - -This method does not convert the row to a Series object but just -returns the values inside a namedtuple. Therefore, -:meth:`~DataFrame.itertuples` preserves the data type of the values -and is generally faster as :meth:`~DataFrame.iterrows`. - -.. note:: - - The column names will be renamed to positional names if they are - invalid Python identifiers, repeated, or start with an underscore. - With a large number of columns (>255), regular tuples are returned. - -.. _basics.dt_accessors: - -.dt accessor ------------- - -``Series`` has an accessor to succinctly return datetime like properties for the -*values* of the Series, if it is a datetime/period like Series. -This will return a Series, indexed like the existing Series. - -.. ipython:: python - - # datetime - s = pd.Series(pd.date_range('20130101 09:10:12', periods=4)) - s - s.dt.hour - s.dt.second - s.dt.day - -This enables nice expressions like this: - -.. ipython:: python - - s[s.dt.day==2] - -You can easily produces tz aware transformations: - -.. ipython:: python - - stz = s.dt.tz_localize('US/Eastern') - stz - stz.dt.tz - -You can also chain these types of operations: - -.. ipython:: python - - s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern') - -You can also format datetime values as strings with :meth:`Series.dt.strftime` which -supports the same format as the standard :meth:`~datetime.datetime.strftime`. - -.. ipython:: python - - # DatetimeIndex - s = pd.Series(pd.date_range('20130101', periods=4)) - s - s.dt.strftime('%Y/%m/%d') - -.. ipython:: python - - # PeriodIndex - s = pd.Series(pd.period_range('20130101', periods=4)) - s - s.dt.strftime('%Y/%m/%d') - -The ``.dt`` accessor works for period and timedelta dtypes. - -.. ipython:: python - - # period - s = pd.Series(pd.period_range('20130101', periods=4, freq='D')) - s - s.dt.year - s.dt.day - -.. ipython:: python - - # timedelta - s = pd.Series(pd.timedelta_range('1 day 00:00:05', periods=4, freq='s')) - s - s.dt.days - s.dt.seconds - s.dt.components - -.. note:: - - ``Series.dt`` will raise a ``TypeError`` if you access with a non-datetimelike values - -Vectorized string methods -------------------------- - -Series is equipped with a set of string processing methods that make it easy to -operate on each element of the array. Perhaps most importantly, these methods -exclude missing/NA values automatically. These are accessed via the Series's -``str`` attribute and generally have names matching the equivalent (scalar) -built-in string methods. For example: - - .. ipython:: python - - s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat']) - s.str.lower() - -Powerful pattern-matching methods are provided as well, but note that -pattern-matching generally uses `regular expressions -`__ by default (and in some cases -always uses them). - -Please see :ref:`Vectorized String Methods ` for a complete -description. - -.. _basics.sorting: - -Sorting -------- - -.. warning:: - - The sorting API is substantially changed in 0.17.0, see :ref:`here ` for these changes. - In particular, all sorting methods now return a new object by default, and **DO NOT** operate in-place (except by passing ``inplace=True``). - -There are two obvious kinds of sorting that you may be interested in: sorting -by label and sorting by actual values. - -By Index -~~~~~~~~ - -The primary method for sorting axis -labels (indexes) are the ``Series.sort_index()`` and the ``DataFrame.sort_index()`` methods. - -.. ipython:: python - - unsorted_df = df.reindex(index=['a', 'd', 'c', 'b'], - columns=['three', 'two', 'one']) - - # DataFrame - unsorted_df.sort_index() - unsorted_df.sort_index(ascending=False) - unsorted_df.sort_index(axis=1) - - # Series - unsorted_df['three'].sort_index() - -By Values -~~~~~~~~~ - -The :meth:`Series.sort_values` and :meth:`DataFrame.sort_values` are the entry points for **value** sorting (that is the values in a column or row). -:meth:`DataFrame.sort_values` can accept an optional ``by`` argument for ``axis=0`` -which will use an arbitrary vector or a column name of the DataFrame to -determine the sort order: - -.. ipython:: python - - df1 = pd.DataFrame({'one':[2,1,1,1],'two':[1,3,2,4],'three':[5,4,3,2]}) - df1.sort_values(by='two') - -The ``by`` argument can take a list of column names, e.g.: - -.. ipython:: python - - df1[['one', 'two', 'three']].sort_values(by=['one','two']) - -These methods have special treatment of NA values via the ``na_position`` -argument: - -.. ipython:: python - - s[2] = np.nan - s.sort_values() - s.sort_values(na_position='first') - - -.. _basics.searchsorted: - -searchsorted -~~~~~~~~~~~~ - -Series has the :meth:`~Series.searchsorted` method, which works similar to -:meth:`numpy.ndarray.searchsorted`. - -.. ipython:: python - - ser = pd.Series([1, 2, 3]) - ser.searchsorted([0, 3]) - ser.searchsorted([0, 4]) - ser.searchsorted([1, 3], side='right') - ser.searchsorted([1, 3], side='left') - ser = pd.Series([3, 1, 2]) - ser.searchsorted([0, 3], sorter=np.argsort(ser)) - -.. _basics.nsorted: - -smallest / largest values -~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. versionadded:: 0.14.0 - -``Series`` has the :meth:`~Series.nsmallest` and :meth:`~Series.nlargest` methods which return the -smallest or largest :math:`n` values. For a large ``Series`` this can be much -faster than sorting the entire Series and calling ``head(n)`` on the result. - -.. ipython:: python - - s = pd.Series(np.random.permutation(10)) - s - s.sort_values() - s.nsmallest(3) - s.nlargest(3) - -.. versionadded:: 0.17.0 - -``DataFrame`` also has the ``nlargest`` and ``nsmallest`` methods. - -.. ipython:: python - - df = pd.DataFrame({'a': [-2, -1, 1, 10, 8, 11, -1], - 'b': list('abdceff'), - 'c': [1.0, 2.0, 4.0, 3.2, np.nan, 3.0, 4.0]}) - df.nlargest(3, 'a') - df.nlargest(5, ['a', 'c']) - df.nsmallest(3, 'a') - df.nsmallest(5, ['a', 'c']) - - -.. _basics.multi-index_sorting: - -Sorting by a multi-index column -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You must be explicit about sorting when the column is a multi-index, and fully specify -all levels to ``by``. - -.. ipython:: python - - df1.columns = pd.MultiIndex.from_tuples([('a','one'),('a','two'),('b','three')]) - df1.sort_values(by=('a','two')) - - -Copying -------- - -The :meth:`~DataFrame.copy` method on pandas objects copies the underlying data (though not -the axis indexes, since they are immutable) and returns a new object. Note that -**it is seldom necessary to copy objects**. For example, there are only a -handful of ways to alter a DataFrame *in-place*: - - * Inserting, deleting, or modifying a column - * Assigning to the ``index`` or ``columns`` attributes - * For homogeneous data, directly modifying the values via the ``values`` - attribute or advanced indexing - -To be clear, no pandas methods have the side effect of modifying your data; -almost all methods return new objects, leaving the original object -untouched. If data is modified, it is because you did so explicitly. - -.. _basics.dtypes: - -dtypes ------- - -The main types stored in pandas objects are ``float``, ``int``, ``bool``, -``datetime64[ns]`` and ``datetime64[ns, tz]`` (in >= 0.17.0), ``timedelta[ns]``, ``category`` (in >= 0.15.0), and ``object``. In addition these dtypes -have item sizes, e.g. ``int64`` and ``int32``. See :ref:`Series with TZ ` for more detail on ``datetime64[ns, tz]`` dtypes. - -A convenient :attr:`~DataFrame.dtypes` attribute for DataFrames returns a Series with the data type of each column. - -.. ipython:: python - - dft = pd.DataFrame(dict(A = np.random.rand(3), - B = 1, - C = 'foo', - D = pd.Timestamp('20010102'), - E = pd.Series([1.0]*3).astype('float32'), - F = False, - G = pd.Series([1]*3,dtype='int8'))) - dft - dft.dtypes - -On a ``Series`` use the :attr:`~Series.dtype` attribute. - -.. ipython:: python - - dft['A'].dtype - -If a pandas object contains data multiple dtypes *IN A SINGLE COLUMN*, the dtype of the -column will be chosen to accommodate all of the data types (``object`` is the most -general). - -.. ipython:: python - - # these ints are coerced to floats - pd.Series([1, 2, 3, 4, 5, 6.]) - - # string data forces an ``object`` dtype - pd.Series([1, 2, 3, 6., 'foo']) - -The method :meth:`~DataFrame.get_dtype_counts` will return the number of columns of -each type in a ``DataFrame``: - -.. ipython:: python - - dft.get_dtype_counts() - -Numeric dtypes will propagate and can coexist in DataFrames (starting in v0.11.0). -If a dtype is passed (either directly via the ``dtype`` keyword, a passed ``ndarray``, -or a passed ``Series``, then it will be preserved in DataFrame operations. Furthermore, -different numeric dtypes will **NOT** be combined. The following example will give you a taste. - -.. ipython:: python - - df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float32') - df1 - df1.dtypes - df2 = pd.DataFrame(dict( A = pd.Series(np.random.randn(8), dtype='float16'), - B = pd.Series(np.random.randn(8)), - C = pd.Series(np.array(np.random.randn(8), dtype='uint8')) )) - df2 - df2.dtypes - -defaults -~~~~~~~~ - -By default integer types are ``int64`` and float types are ``float64``, -*REGARDLESS* of platform (32-bit or 64-bit). The following will all result in ``int64`` dtypes. - -.. ipython:: python - - pd.DataFrame([1, 2], columns=['a']).dtypes - pd.DataFrame({'a': [1, 2]}).dtypes - pd.DataFrame({'a': 1 }, index=list(range(2))).dtypes - -Numpy, however will choose *platform-dependent* types when creating arrays. -The following **WILL** result in ``int32`` on 32-bit platform. - -.. ipython:: python - - frame = pd.DataFrame(np.array([1, 2])) - - -upcasting -~~~~~~~~~ - -Types can potentially be *upcasted* when combined with other types, meaning they are promoted -from the current type (say ``int`` to ``float``) - -.. ipython:: python - - df3 = df1.reindex_like(df2).fillna(value=0.0) + df2 - df3 - df3.dtypes - -The ``values`` attribute on a DataFrame return the *lower-common-denominator* of the dtypes, meaning -the dtype that can accommodate **ALL** of the types in the resulting homogeneous dtyped numpy array. This can -force some *upcasting*. - -.. ipython:: python - - df3.values.dtype - -astype -~~~~~~ - -.. _basics.cast: - -You can use the :meth:`~DataFrame.astype` method to explicitly convert dtypes from one to another. These will by default return a copy, -even if the dtype was unchanged (pass ``copy=False`` to change this behavior). In addition, they will raise an -exception if the astype operation is invalid. - -Upcasting is always according to the **numpy** rules. If two different dtypes are involved in an operation, -then the more *general* one will be used as the result of the operation. - -.. ipython:: python - - df3 - df3.dtypes - - # conversion of dtypes - df3.astype('float32').dtypes - - -Convert a subset of columns to a specified type using :meth:`~DataFrame.astype` - -.. ipython:: python - - dft = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6], 'c': [7, 8, 9]}) - dft[['a','b']] = dft[['a','b']].astype(np.uint8) - dft - dft.dtypes - -.. versionadded:: 0.19.0 - -Convert certain columns to a specific dtype by passing a dict to :meth:`~DataFrame.astype` - -.. ipython:: python - - dft1 = pd.DataFrame({'a': [1,0,1], 'b': [4,5,6], 'c': [7, 8, 9]}) - dft1 = dft1.astype({'a': np.bool, 'c': np.float64}) - dft1 - dft1.dtypes - -.. note:: - - When trying to convert a subset of columns to a specified type using :meth:`~DataFrame.astype` and :meth:`~DataFrame.loc`, upcasting occurs. - - :meth:`~DataFrame.loc` tries to fit in what we are assigning to the current dtypes, while ``[]`` will overwrite them taking the dtype from the right hand side. Therefore the following piece of code produces the unintended result. - - .. ipython:: python - - dft = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6], 'c': [7, 8, 9]}) - dft.loc[:, ['a', 'b']].astype(np.uint8).dtypes - dft.loc[:, ['a', 'b']] = dft.loc[:, ['a', 'b']].astype(np.uint8) - dft.dtypes - -.. _basics.object_conversion: - -object conversion -~~~~~~~~~~~~~~~~~ - -pandas offers various functions to try to force conversion of types from the ``object`` dtype to other types. -The following functions are available for one dimensional object arrays or scalars: - -- :meth:`~pandas.to_numeric` (conversion to numeric dtypes) - - .. ipython:: python - - m = ['1.1', 2, 3] - pd.to_numeric(m) - -- :meth:`~pandas.to_datetime` (conversion to datetime objects) - - .. ipython:: python - - import datetime - m = ['2016-07-09', datetime.datetime(2016, 3, 2)] - pd.to_datetime(m) - -- :meth:`~pandas.to_timedelta` (conversion to timedelta objects) - - .. ipython:: python - - m = ['5us', pd.Timedelta('1day')] - pd.to_timedelta(m) - -To force a conversion, we can pass in an ``errors`` argument, which specifies how pandas should deal with elements -that cannot be converted to desired dtype or object. By default, ``errors='raise'``, meaning that any errors encountered -will be raised during the conversion process. However, if ``errors='coerce'``, these errors will be ignored and pandas -will convert problematic elements to ``pd.NaT`` (for datetime and timedelta) or ``np.nan`` (for numeric). This might be -useful if you are reading in data which is mostly of the desired dtype (e.g. numeric, datetime), but occasionally has -non-conforming elements intermixed that you want to represent as missing: - -.. ipython:: python - - import datetime - m = ['apple', datetime.datetime(2016, 3, 2)] - pd.to_datetime(m, errors='coerce') - - m = ['apple', 2, 3] - pd.to_numeric(m, errors='coerce') - - m = ['apple', pd.Timedelta('1day')] - pd.to_timedelta(m, errors='coerce') - -The ``errors`` parameter has a third option of ``errors='ignore'``, which will simply return the passed in data if it -encounters any errors with the conversion to a desired data type: - -.. ipython:: python - - import datetime - m = ['apple', datetime.datetime(2016, 3, 2)] - pd.to_datetime(m, errors='ignore') - - m = ['apple', 2, 3] - pd.to_numeric(m, errors='ignore') - - m = ['apple', pd.Timedelta('1day')] - pd.to_timedelta(m, errors='ignore') - -In addition to object conversion, :meth:`~pandas.to_numeric` provides another argument ``downcast``, which gives the -option of downcasting the newly (or already) numeric data to a smaller dtype, which can conserve memory: - -.. ipython:: python - - m = ['1', 2, 3] - pd.to_numeric(m, downcast='integer') # smallest signed int dtype - pd.to_numeric(m, downcast='signed') # same as 'integer' - pd.to_numeric(m, downcast='unsigned') # smallest unsigned int dtype - pd.to_numeric(m, downcast='float') # smallest float dtype - -As these methods apply only to one-dimensional arrays, lists or scalars; they cannot be used directly on multi-dimensional objects such -as DataFrames. However, with :meth:`~pandas.DataFrame.apply`, we can "apply" the function over each column efficiently: - -.. ipython:: python - - import datetime - df = pd.DataFrame([['2016-07-09', datetime.datetime(2016, 3, 2)]] * 2, dtype='O') - df - df.apply(pd.to_datetime) - - df = pd.DataFrame([['1.1', 2, 3]] * 2, dtype='O') - df - df.apply(pd.to_numeric) - - df = pd.DataFrame([['5us', pd.Timedelta('1day')]] * 2, dtype='O') - df - df.apply(pd.to_timedelta) - -gotchas -~~~~~~~ - -Performing selection operations on ``integer`` type data can easily upcast the data to ``floating``. -The dtype of the input data will be preserved in cases where ``nans`` are not introduced (starting in 0.11.0) -See also :ref:`integer na gotchas ` - -.. ipython:: python - - dfi = df3.astype('int32') - dfi['E'] = 1 - dfi - dfi.dtypes - - casted = dfi[dfi>0] - casted - casted.dtypes - -While float dtypes are unchanged. - -.. ipython:: python - - dfa = df3.copy() - dfa['A'] = dfa['A'].astype('float32') - dfa.dtypes - - casted = dfa[df2>0] - casted - casted.dtypes - -Selecting columns based on ``dtype`` ------------------------------------- - -.. _basics.selectdtypes: - -.. versionadded:: 0.14.1 - -The :meth:`~DataFrame.select_dtypes` method implements subsetting of columns -based on their ``dtype``. - -First, let's create a :class:`DataFrame` with a slew of different -dtypes: - -.. ipython:: python - - df = pd.DataFrame({'string': list('abc'), - 'int64': list(range(1, 4)), - 'uint8': np.arange(3, 6).astype('u1'), - 'float64': np.arange(4.0, 7.0), - 'bool1': [True, False, True], - 'bool2': [False, True, False], - 'dates': pd.date_range('now', periods=3).values, - 'category': pd.Series(list("ABC")).astype('category')}) - df['tdeltas'] = df.dates.diff() - df['uint64'] = np.arange(3, 6).astype('u8') - df['other_dates'] = pd.date_range('20130101', periods=3).values - df['tz_aware_dates'] = pd.date_range('20130101', periods=3, tz='US/Eastern') - df - -And the dtypes - -.. ipython:: python - - df.dtypes - -:meth:`~DataFrame.select_dtypes` has two parameters ``include`` and ``exclude`` that allow you to -say "give me the columns WITH these dtypes" (``include``) and/or "give the -columns WITHOUT these dtypes" (``exclude``). - -For example, to select ``bool`` columns - -.. ipython:: python - - df.select_dtypes(include=[bool]) - -You can also pass the name of a dtype in the `numpy dtype hierarchy -`__: - -.. ipython:: python - - df.select_dtypes(include=['bool']) - -:meth:`~pandas.DataFrame.select_dtypes` also works with generic dtypes as well. - -For example, to select all numeric and boolean columns while excluding unsigned -integers - -.. ipython:: python - - df.select_dtypes(include=['number', 'bool'], exclude=['unsignedinteger']) - -To select string columns you must use the ``object`` dtype: - -.. ipython:: python - - df.select_dtypes(include=['object']) - -To see all the child dtypes of a generic ``dtype`` like ``numpy.number`` you -can define a function that returns a tree of child dtypes: - -.. ipython:: python - - def subdtypes(dtype): - subs = dtype.__subclasses__() - if not subs: - return dtype - return [dtype, [subdtypes(dt) for dt in subs]] - -All numpy dtypes are subclasses of ``numpy.generic``: - -.. ipython:: python - - subdtypes(np.generic) - -.. note:: - - Pandas also defines the types ``category``, and ``datetime64[ns, tz]``, which are not integrated into the normal - numpy hierarchy and wont show up with the above function. - -.. note:: - - The ``include`` and ``exclude`` parameters must be non-string sequences. From ef5ffe2545c872f1138cb07432e6be748f0da787 Mon Sep 17 00:00:00 2001 From: Michael Date: Fri, 20 Jan 2017 12:28:03 +0000 Subject: [PATCH 6/6] DEPR: relocate exceptions from pandas.core.common #14800 Move exceptions/warnings from pandas.core.common to pandas.api.exceptions The exceptions/warnings can still be imported from pandas.core.common however a DeprecationWarning will be issued when they are raised. --- doc/source/whatsnew/v0.20.0.txt | 19 +++++- pandas/api/exceptions.py | 46 ++++++++++++++ pandas/api/tests/test_api.py | 49 ++++++++++++++- pandas/compat/numpy/function.py | 2 +- pandas/core/base.py | 2 +- pandas/core/common.py | 63 +++++-------------- pandas/core/frame.py | 4 +- pandas/core/generic.py | 8 +-- pandas/core/groupby.py | 4 +- pandas/core/ops.py | 4 +- pandas/core/panel.py | 4 +- pandas/core/series.py | 3 +- pandas/indexes/multi.py | 5 +- pandas/io/common.py | 2 +- pandas/io/gbq.py | 2 +- pandas/io/html.py | 2 +- pandas/io/json.py | 2 +- pandas/io/packers.py | 2 +- pandas/io/parsers.py | 2 +- pandas/io/pytables.py | 3 +- pandas/io/tests/parser/test_parsers.py | 2 +- pandas/io/tests/test_packers.py | 2 +- pandas/tests/formats/test_format.py | 1 - .../tests/frame/test_axis_select_reindex.py | 2 +- pandas/tests/frame/test_constructors.py | 8 +-- pandas/tests/frame/test_indexing.py | 10 +-- pandas/tests/groupby/test_groupby.py | 5 +- pandas/tests/indexes/test_multi.py | 2 +- pandas/tests/indexing/test_indexing.py | 30 ++++----- pandas/tests/series/test_datetime_values.py | 4 +- pandas/tests/test_multilevel.py | 8 +-- pandas/tests/test_window.py | 2 +- pandas/tools/plotting.py | 4 +- pandas/tseries/base.py | 2 +- pandas/tseries/index.py | 4 +- pandas/tseries/offsets.py | 2 +- pandas/tseries/resample.py | 3 +- pandas/tseries/tests/test_daterange.py | 4 +- pandas/tseries/tests/test_resample.py | 2 +- pandas/tseries/tests/test_timeseries.py | 2 +- pandas/util/depr_module.py | 46 +++++++++++++- 41 files changed, 249 insertions(+), 124 deletions(-) create mode 100644 pandas/api/exceptions.py diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 2a825edd0e98a..65b5952206e36 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -265,6 +265,23 @@ Deprecations - ``DataFrame.astype()`` has deprecated the ``raise_on_error`` parameter in favor of ``errors`` (:issue:`14878`) - ``Series.sortlevel`` and ``DataFrame.sortlevel`` have been deprecated in favor of ``Series.sort_index`` and ``DataFrame.sort_index`` (:issue:`15099`) +.. _whatsnew_0200.moved + +Moved +^^^^^ + +The following exceptions have been relocated from ``pandas.core.common`` to ``pandas.api.exceptions``: + +- ``AbstractMethodError`` +- ``AmbiguousIndexError`` +- ``PandasError`` +- ``PerformanceWarning`` +- ``SettingWithCopyError`` +- ``SettingWithCopyWarning`` +- ``UnsupportedFunctionCall`` +- ``UnsortedIndexError`` + +Raising any of the above exceptions, imported from ``pandas.core.common``, will show a ``DeprecationWarning`` (:issue:`14800`) .. _whatsnew_0200.prior_deprecations: @@ -369,4 +386,4 @@ Bug Fixes - Bug in ``Series`` constructor when both ``copy=True`` and ``dtype`` arguments are provided (:issue:`15125`) - Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) -- Bug in ``Series.dt.round`` inconsistent behaviour on NAT's with different arguments (:issue:`14940`) \ No newline at end of file +- Bug in ``Series.dt.round`` inconsistent behaviour on NAT's with different arguments (:issue:`14940`) diff --git a/pandas/api/exceptions.py b/pandas/api/exceptions.py new file mode 100644 index 0000000000000..d3cf9ad419ac3 --- /dev/null +++ b/pandas/api/exceptions.py @@ -0,0 +1,46 @@ + +class PandasError(Exception): + pass + + +class PerformanceWarning(Warning): + pass + + +class SettingWithCopyError(ValueError): + pass + + +class SettingWithCopyWarning(Warning): + pass + + +class AmbiguousIndexError(PandasError, KeyError): + pass + + +class UnsupportedFunctionCall(ValueError): + pass + + +class UnsortedIndexError(KeyError): + """ Error raised when attempting to get a slice of a MultiIndex + and the index has not been lexsorted. Subclass of `KeyError`. + + .. versionadded:: 0.20.0 + + """ + pass + + +class AbstractMethodError(NotImplementedError): + """Raise this error instead of NotImplementedError for abstract methods + while keeping compatibility with Python 2 and Python 3. + """ + + def __init__(self, class_instance): + self.class_instance = class_instance + + def __str__(self): + return ("This method must be defined in the concrete class of %s" % + self.class_instance.__class__.__name__) diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py index b13b4d7de60ca..099e181d9377a 100644 --- a/pandas/api/tests/test_api.py +++ b/pandas/api/tests/test_api.py @@ -3,6 +3,8 @@ import numpy as np import pandas as pd +import pandas.core.common +from pandas.api import exceptions from pandas.core import common as com from pandas import api from pandas.api import types @@ -135,7 +137,7 @@ def test_api(self): class TestApi(Base, tm.TestCase): - allowed = ['tests', 'types'] + allowed = ['exceptions', 'tests', 'types'] def test_api(self): @@ -215,6 +217,50 @@ def test_removed_from_core_common(self): 'ensure_float']: self.assertRaises(AttributeError, lambda: getattr(com, t)) + def test_exceptions_deprecated_in_commom_core(self): + # see issue #14800. Exceptions deprecated & moved from + # pandas.common.core to pandas.api.exceptions + + class _ConcreteClass: + pass + + moved_exceptions = ('AmbiguousIndexError', 'PandasError', + 'PerformanceWarning', 'SettingWithCopyError', + 'SettingWithCopyWarning', + 'UnsupportedFunctionCall', + 'UnsortedIndexError') + + for moved_exception in moved_exceptions: + with tm.assert_produces_warning(DeprecationWarning): + getattr(pandas.core.common, moved_exception)() + + with tm.assert_produces_warning(DeprecationWarning): + pandas.core.common.AbstractMethodError(_ConcreteClass()) + + with self.assertRaises(exceptions.AbstractMethodError): + raise exceptions.AbstractMethodError(_ConcreteClass()) + + with self.assertRaises(exceptions.AmbiguousIndexError): + raise exceptions.AmbiguousIndexError() + + with self.assertRaises(exceptions.PandasError): + raise exceptions.PandasError() + + with self.assertRaises(exceptions.PerformanceWarning): + raise exceptions.PerformanceWarning() + + with self.assertRaises(exceptions.SettingWithCopyError): + raise exceptions.SettingWithCopyError() + + with self.assertRaises(exceptions.SettingWithCopyWarning): + raise exceptions.SettingWithCopyWarning() + + with self.assertRaises(exceptions.UnsupportedFunctionCall): + raise exceptions.UnsupportedFunctionCall() + + with self.assertRaises(exceptions.UnsortedIndexError): + raise exceptions.UnsortedIndexError() + class TestDatetools(tm.TestCase): @@ -228,6 +274,7 @@ def test_deprecation_access_obj(self): check_stacklevel=False): pd.datetools.monthEnd + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index adc17c7514832..3f78260e82d3c 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -21,7 +21,7 @@ from numpy import ndarray from pandas.util.validators import (validate_args, validate_kwargs, validate_args_and_kwargs) -from pandas.core.common import UnsupportedFunctionCall +from pandas.api.exceptions import UnsupportedFunctionCall from pandas.types.common import is_integer, is_bool from pandas.compat import OrderedDict diff --git a/pandas/core/base.py b/pandas/core/base.py index 77272f7721b32..72267e81f8d9c 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -16,7 +16,7 @@ from pandas.compat.numpy import function as nv from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) -from pandas.core.common import AbstractMethodError +from pandas.api.exceptions import AbstractMethodError from pandas.formats.printing import pprint_thing _shared_docs = dict() diff --git a/pandas/core/common.py b/pandas/core/common.py index fddac1f29d454..d984ebd9a9594 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -20,6 +20,8 @@ from pandas.api import types from pandas.types import common +from pandas.util.depr_module import _add_proxies + # back-compat of public API # deprecate these functions m = sys.modules['pandas.core.common'] @@ -63,6 +65,20 @@ def wrapper(*args, **kwargs): setattr(m, t, outer(t)) +# Relocate exceptions, see #14800 +_moved_exceptions = ('AbstractMethodError', + 'AmbiguousIndexError', + 'PandasError', + 'PerformanceWarning', + 'SettingWithCopyError', + 'SettingWithCopyWarning', + 'UnsupportedFunctionCall', + 'UnsortedIndexError') + +_add_proxies(old_mod_name='pandas.core.common', + new_mod_name='pandas.api.exceptions', + entities=_moved_exceptions) + # deprecate array_equivalent @@ -73,53 +89,6 @@ def array_equivalent(*args, **kwargs): return missing.array_equivalent(*args, **kwargs) -class PandasError(Exception): - pass - - -class PerformanceWarning(Warning): - pass - - -class SettingWithCopyError(ValueError): - pass - - -class SettingWithCopyWarning(Warning): - pass - - -class AmbiguousIndexError(PandasError, KeyError): - pass - - -class UnsupportedFunctionCall(ValueError): - pass - - -class UnsortedIndexError(KeyError): - """ Error raised when attempting to get a slice of a MultiIndex - and the index has not been lexsorted. Subclass of `KeyError`. - - .. versionadded:: 0.20.0 - - """ - pass - - -class AbstractMethodError(NotImplementedError): - """Raise this error instead of NotImplementedError for abstract methods - while keeping compatibility with Python 2 and Python 3. - """ - - def __init__(self, class_instance): - self.class_instance = class_instance - - def __str__(self): - return ("This method must be defined in the concrete class of %s" % - self.class_instance.__class__.__name__) - - def flatten(l): """Flatten an arbitrarily nested sequence. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4288e03c2cc49..e8308e043192e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -56,7 +56,9 @@ is_named_tuple) from pandas.types.missing import isnull, notnull -from pandas.core.common import (PandasError, _try_sort, +from pandas.api.exceptions import PandasError + +from pandas.core.common import (_try_sort, _default_index, _values_from_object, _maybe_box_datetimelike, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0b5767da74cad..88c6489c5f8a9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -27,11 +27,11 @@ from pandas.types.cast import _maybe_promote, _maybe_upcast_putmask from pandas.types.missing import isnull, notnull from pandas.types.generic import ABCSeries, ABCPanel - +from pandas.api.exceptions import SettingWithCopyError, AbstractMethodError from pandas.core.common import (_values_from_object, - _maybe_box_datetimelike, - SettingWithCopyError, SettingWithCopyWarning, - AbstractMethodError) + _maybe_box_datetimelike) + +from pandas.api.exceptions import SettingWithCopyWarning from pandas.core.base import PandasObject from pandas.core.index import (Index, MultiIndex, _ensure_index, diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 700e279cb0030..1d5a8a84e011c 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -32,8 +32,8 @@ _ensure_float) from pandas.types.cast import _possibly_downcast_to_dtype from pandas.types.missing import isnull, notnull, _maybe_fill - -from pandas.core.common import _values_from_object, AbstractMethodError +from pandas.api.exceptions import AbstractMethodError +from pandas.core.common import _values_from_object from pandas.core.base import (PandasObject, SelectionMixin, GroupByError, DataError, SpecificationError) from pandas.core.categorical import Categorical diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 396b0e048bc49..df0e9d38f8cec 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -19,8 +19,8 @@ from pandas.compat import bind_method import pandas.core.missing as missing import pandas.algos as _algos -from pandas.core.common import (_values_from_object, _maybe_match_name, - PerformanceWarning) +from pandas.api.exceptions import PerformanceWarning +from pandas.core.common import _values_from_object, _maybe_match_name from pandas.types.missing import notnull, isnull from pandas.types.common import (needs_i8_conversion, is_datetimelike_v_numeric, diff --git a/pandas/core/panel.py b/pandas/core/panel.py index f708774dd84ff..b4fc13a86ca90 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -14,6 +14,8 @@ is_string_like, is_scalar) from pandas.types.missing import notnull +from pandas.api.exceptions import PandasError + import pandas.computation.expressions as expressions import pandas.core.common as com import pandas.core.ops as ops @@ -21,7 +23,7 @@ from pandas import compat from pandas.compat import (map, zip, range, u, OrderedDict, OrderedDefaultdict) from pandas.compat.numpy import function as nv -from pandas.core.common import PandasError, _try_sort, _default_index +from pandas.core.common import _try_sort, _default_index from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, diff --git a/pandas/core/series.py b/pandas/core/series.py index d967e2d02d41f..2bc48d36ced36 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -32,14 +32,13 @@ _possibly_convert_platform, _possibly_cast_to_datetime, _possibly_castable) from pandas.types.missing import isnull, notnull - +from pandas.api.exceptions import SettingWithCopyError from pandas.core.common import (is_bool_indexer, _default_index, _asarray_tuplesafe, _values_from_object, _try_sort, _maybe_match_name, - SettingWithCopyError, _maybe_box_datetimelike, _dict_compat) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 2afafaeb544d1..c8f7249939b42 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -22,11 +22,10 @@ is_list_like, is_scalar) from pandas.types.missing import isnull, array_equivalent +from pandas.api.exceptions import PerformanceWarning, UnsortedIndexError from pandas.core.common import (_values_from_object, is_bool_indexer, - is_null_slice, - PerformanceWarning, - UnsortedIndexError) + is_null_slice) from pandas.core.base import FrozenList diff --git a/pandas/io/common.py b/pandas/io/common.py index 6817c824ad786..2e2dad216dbf3 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -9,7 +9,7 @@ from pandas.compat import StringIO, BytesIO, string_types, text_type from pandas import compat from pandas.formats.printing import pprint_thing -from pandas.core.common import AbstractMethodError +from pandas.api.exceptions import AbstractMethodError from pandas.types.common import is_number try: diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 966f53e9d75ef..a3a40b751c6f0 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -13,7 +13,7 @@ from pandas import compat from pandas.core.api import DataFrame from pandas.tools.merge import concat -from pandas.core.common import PandasError +from pandas.api.exceptions import PandasError from pandas.compat import lzip, bytes_to_str diff --git a/pandas/io/html.py b/pandas/io/html.py index 3c38dae91eb89..141704d1dde35 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -19,7 +19,7 @@ from pandas.compat import (lrange, lmap, u, string_types, iteritems, raise_with_traceback, binary_type) from pandas import Series -from pandas.core.common import AbstractMethodError +from pandas.api.exceptions import AbstractMethodError from pandas.formats.printing import pprint_thing _IMPORTS = False diff --git a/pandas/io/json.py b/pandas/io/json.py index 0a6b8af179e12..716137133f9ca 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -10,8 +10,8 @@ from pandas.compat import StringIO, long, u from pandas import compat, isnull from pandas import Series, DataFrame, to_datetime +from pandas.api.exceptions import AbstractMethodError from pandas.io.common import get_filepath_or_buffer, _get_handle -from pandas.core.common import AbstractMethodError from pandas.formats.printing import pprint_thing loads = _json.loads diff --git a/pandas/io/packers.py b/pandas/io/packers.py index ab44e46c96b77..70ba089aeeb7e 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -59,7 +59,7 @@ from pandas.sparse.api import SparseSeries, SparseDataFrame from pandas.sparse.array import BlockIndex, IntIndex from pandas.core.generic import NDFrame -from pandas.core.common import PerformanceWarning +from pandas.api.exceptions import PerformanceWarning from pandas.io.common import get_filepath_or_buffer from pandas.core.internals import BlockManager, make_block, _safe_reshape import pandas.core.internals as internals diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index fdf26fdef6b25..e3d0ce095dff0 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -22,11 +22,11 @@ is_scalar, is_categorical_dtype) from pandas.types.missing import isnull from pandas.types.cast import _astype_nansafe +from pandas.api.exceptions import AbstractMethodError from pandas.core.index import Index, MultiIndex, RangeIndex from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.categorical import Categorical -from pandas.core.common import AbstractMethodError from pandas.io.date_converters import generic_parser from pandas.io.common import (get_filepath_or_buffer, _validate_header_arg, _get_handle, UnicodeReader, UTF8Recoder, diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index e474aeab1f6ca..e37761e485bfb 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -35,7 +35,8 @@ from pandas.tseries.tdi import TimedeltaIndex from pandas.core.base import StringMixin from pandas.formats.printing import adjoin, pprint_thing -from pandas.core.common import _asarray_tuplesafe, PerformanceWarning +from pandas.api.exceptions import PerformanceWarning +from pandas.core.common import _asarray_tuplesafe from pandas.core.algorithms import match, unique from pandas.core.categorical import Categorical, _factorize_from_iterables from pandas.core.internals import (BlockManager, make_block, diff --git a/pandas/io/tests/parser/test_parsers.py b/pandas/io/tests/parser/test_parsers.py index a90f546d37fc8..733bd1a841290 100644 --- a/pandas/io/tests/parser/test_parsers.py +++ b/pandas/io/tests/parser/test_parsers.py @@ -6,7 +6,7 @@ import pandas.util.testing as tm from pandas import read_csv, read_table -from pandas.core.common import AbstractMethodError +from pandas.api.exceptions import AbstractMethodError from .common import ParserTests from .header import HeaderTests diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index 63c2ffc629ca6..219ac044f02fd 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -10,7 +10,7 @@ from pandas.compat import u, PY3 from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range, date_range, period_range, Index, Categorical) -from pandas.core.common import PerformanceWarning +from pandas.api.exceptions import PerformanceWarning from pandas.io.packers import to_msgpack, read_msgpack import pandas.util.testing as tm from pandas.util.testing import (ensure_clean, diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 00e5e002ca48d..27ed7098492b9 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -36,7 +36,6 @@ import pandas.formats.format as fmt import pandas.util.testing as tm -import pandas.core.common as com import pandas.formats.printing as printing from pandas.util.terminal import get_terminal_size import pandas as pd diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index ecce17f96a672..2668d48fc5d30 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -16,7 +16,7 @@ assert_frame_equal, assertRaisesRegexp) -from pandas.core.common import PerformanceWarning +from pandas.api.exceptions import PerformanceWarning import pandas.util.testing as tm from pandas.tests.frame.common import TestData diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index bf0fabaf3e402..36bed779f9ae6 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -21,9 +21,9 @@ from pandas import (DataFrame, Index, Series, isnull, MultiIndex, Timedelta, Timestamp, date_range) -from pandas.core.common import PandasError +from pandas.api.exceptions import PandasError import pandas as pd -import pandas.core.common as com +import pandas.api.exceptions as excp import pandas.lib as lib import pandas.util.testing as tm @@ -1245,8 +1245,8 @@ def test_constructor_single_value(self): dtype=object), index=[1, 2], columns=['a', 'c'])) - self.assertRaises(com.PandasError, DataFrame, 'a', [1, 2]) - self.assertRaises(com.PandasError, DataFrame, 'a', columns=['a', 'c']) + self.assertRaises(excp.PandasError, DataFrame, 'a', [1, 2]) + self.assertRaises(excp.PandasError, DataFrame, 'a', columns=['a', 'c']) with tm.assertRaisesRegexp(TypeError, 'incompatible data and dtype'): DataFrame('a', [1, 2], ['a', 'c'], float) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index abe40f7be1d90..b51a5a52492c6 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -11,7 +11,7 @@ from numpy.random import randn import numpy as np -import pandas.core.common as com +import pandas.api.exceptions as excp from pandas import (DataFrame, Index, Series, notnull, isnull, MultiIndex, DatetimeIndex, Timestamp, date_range) @@ -446,7 +446,7 @@ def test_setitem(self): def f(): smaller['col10'] = ['1', '2'] - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) self.assertEqual(smaller['col10'].dtype, np.object_) self.assertTrue((smaller['col10'] == ['1', '2']).all()) @@ -913,7 +913,7 @@ def test_fancy_getitem_slice_mixed(self): def f(): sliced['C'] = 4. - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) self.assertTrue((self.frame['C'] == 4).all()) def test_fancy_setitem_int_labels(self): @@ -1714,7 +1714,7 @@ def test_irow(self): # setting it makes it raise/warn def f(): result[2] = 0. - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) exp_col = df[2].copy() exp_col[4:8] = 0. assert_series_equal(df[2], exp_col) @@ -1749,7 +1749,7 @@ def test_icol(self): # and that we are setting a copy def f(): result[8] = 0. - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) self.assertTrue((df[8] == 0).all()) # list of integers diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 873c63ca257c4..79b19614602dc 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -7,9 +7,9 @@ from pandas.types.common import _ensure_platform_int from pandas import date_range, bdate_range, Timestamp, isnull +from pandas.api.exceptions import UnsupportedFunctionCall from pandas.core.index import Index, MultiIndex, CategoricalIndex from pandas.core.api import Categorical, DataFrame -from pandas.core.common import UnsupportedFunctionCall from pandas.core.groupby import (SpecificationError, DataError, _nargsort, _lexsort_indexer) from pandas.core.series import Series @@ -24,6 +24,7 @@ from pandas.tools.merge import concat from collections import defaultdict from functools import partial +import pandas.api.exceptions as excp import pandas.core.common as com import numpy as np @@ -4319,7 +4320,7 @@ def test_groupby_multiindex_not_lexsorted(self): tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) expected = lexsorted_df.groupby('a').mean() - with tm.assert_produces_warning(com.PerformanceWarning): + with tm.assert_produces_warning(excp.PerformanceWarning): result = not_lexsorted_df.groupby('a').mean() tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 2861a1f56b24b..b9fcaf5510e63 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -15,7 +15,7 @@ from pandas import (CategoricalIndex, DataFrame, Index, MultiIndex, compat, date_range, period_range) from pandas.compat import PY3, long, lrange, lzip, range, u -from pandas.core.common import PerformanceWarning, UnsortedIndexError +from pandas.api.exceptions import PerformanceWarning, UnsortedIndexError from pandas.indexes.base import InvalidIndexError from pandas.lib import Timestamp diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 6fc24e41ee914..7c0490c31efd2 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -16,14 +16,14 @@ import numpy as np import pandas as pd -import pandas.core.common as com +import pandas.api.exceptions as excp from pandas import option_context from pandas.core.indexing import _non_reducing_slice, _maybe_numeric_slice from pandas.core.api import (DataFrame, Index, Series, Panel, isnull, MultiIndex, Timestamp, Timedelta) from pandas.formats.printing import pprint_thing from pandas import concat -from pandas.core.common import PerformanceWarning, UnsortedIndexError +from pandas.api.exceptions import PerformanceWarning, UnsortedIndexError import pandas.util.testing as tm from pandas import date_range @@ -4636,12 +4636,12 @@ def test_detect_chained_assignment(self): def f(): df['A'][0] = -5 - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) def f(): df['A'][1] = np.nan - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) self.assertIsNone(df['A'].is_copy) # using a copy (the chain), fails @@ -4651,7 +4651,7 @@ def f(): def f(): df.loc[0]['A'] = -5 - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) # doc example df = DataFrame({'a': ['one', 'one', 'two', 'three', @@ -4666,7 +4666,7 @@ def f(): indexer = df.a.str.startswith('o') df[indexer]['c'] = 42 - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]}) df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) @@ -4674,12 +4674,12 @@ def f(): def f(): df['A'][0] = 111 - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) def f(): df.loc[0]['A'] = 111 - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) df.loc[0, 'A'] = 111 tm.assert_frame_equal(df, expected) @@ -4768,7 +4768,7 @@ def random_text(nobs=100): def f(): zed['eyes']['right'].fillna(value=555, inplace=True) - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) df = DataFrame(np.random.randn(10, 4)) s = df.iloc[:, 0].sort_values() @@ -4793,7 +4793,7 @@ def f(): def f(): df.iloc[0:5]['group'] = 'a' - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) # mixed type setting # same dtype & changing dtype @@ -4805,17 +4805,17 @@ def f(): def f(): df.ix[2]['D'] = 'foo' - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) def f(): df.ix[2]['C'] = 'foo' - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) def f(): df['C'][2] = 'foo' - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) def test_setting_with_copy_bug(self): @@ -4828,7 +4828,7 @@ def test_setting_with_copy_bug(self): def f(): df[['c']][mask] = df[['b']][mask] - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) # invalid warning as we are returning a new object # GH 8730 @@ -4845,7 +4845,7 @@ def test_detect_chained_assignment_warnings(self): with option_context('chained_assignment', 'warn'): df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) with tm.assert_produces_warning( - expected_warning=com.SettingWithCopyWarning): + expected_warning=excp.SettingWithCopyWarning): df.loc[0]['A'] = 111 def test_float64index_slicing_bug(self): diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index b9f999a6c6ffe..ea7d97785227d 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -12,7 +12,7 @@ from pandas.tseries.period import PeriodIndex from pandas.tseries.index import Timestamp, DatetimeIndex from pandas.tseries.tdi import TimedeltaIndex -import pandas.core.common as com +import pandas.api.exceptions as excp from pandas.util.testing import assert_series_equal import pandas.util.testing as tm @@ -270,7 +270,7 @@ def get_dir(s): def f(): s.dt.hour[0] = 5 - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) def test_dt_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 59d9e1e094d9d..15ec56d85f2e5 100755 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -13,7 +13,7 @@ from pandas.types.common import is_float_dtype, is_integer_dtype from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assertRaisesRegexp) -import pandas.core.common as com +import pandas.api.exceptions as excp import pandas.util.testing as tm from pandas.compat import (range, lrange, StringIO, lzip, u, product as cart_product, zip) @@ -544,7 +544,7 @@ def test_xs_level(self): def f(x): x[:] = 10 - self.assertRaises(com.SettingWithCopyError, f, result) + self.assertRaises(excp.SettingWithCopyError, f, result) def test_xs_level_multiple(self): from pandas import read_table @@ -568,7 +568,7 @@ def test_xs_level_multiple(self): def f(x): x[:] = 10 - self.assertRaises(com.SettingWithCopyError, f, result) + self.assertRaises(excp.SettingWithCopyError, f, result) # GH2107 dates = lrange(20111201, 20111205) @@ -1428,7 +1428,7 @@ def f(): df['foo']['one'] = 2 return df - self.assertRaises(com.SettingWithCopyError, f) + self.assertRaises(excp.SettingWithCopyError, f) try: df = f() diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 929ff43bfaaad..3f2ee10370670 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -16,7 +16,7 @@ import pandas.core.window as rwindow import pandas.tseries.offsets as offsets from pandas.core.base import SpecificationError -from pandas.core.common import UnsupportedFunctionCall +from pandas.api.exceptions import UnsupportedFunctionCall import pandas.util.testing as tm from pandas.compat import range, zip, PY3 diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index bd9933b12b580..e3d6b33961614 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -20,8 +20,8 @@ from pandas.util.decorators import cache_readonly, deprecate_kwarg from pandas.core.base import PandasObject - -from pandas.core.common import AbstractMethodError, _try_sort +from pandas.api.exceptions import AbstractMethodError +from pandas.core.common import _try_sort from pandas.core.generic import _shared_docs, _shared_doc_kwargs from pandas.core.index import Index, MultiIndex from pandas.core.series import Series, remove_na diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index a8dd2238c2063..ffffa032b5b38 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -17,7 +17,7 @@ from pandas.types.missing import isnull from pandas.core import common as com, algorithms from pandas.core.algorithms import checked_add_with_arr -from pandas.core.common import AbstractMethodError +from pandas.api.exceptions import AbstractMethodError import pandas.formats.printing as printing import pandas.tslib as tslib diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index aca962c8178d3..9e6ff1fe8145a 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -25,8 +25,8 @@ from pandas.types.missing import isnull import pandas.types.concat as _concat -from pandas.core.common import (_values_from_object, _maybe_box, - PerformanceWarning) +from pandas.api.exceptions import PerformanceWarning +from pandas.core.common import _values_from_object, _maybe_box from pandas.core.index import Index, Int64Index, Float64Index from pandas.indexes.base import _index_shared_docs diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 370dd00762896..e467ab84fb1d6 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -5,7 +5,7 @@ from pandas.types.generic import ABCSeries, ABCDatetimeIndex, ABCPeriod from pandas.tseries.tools import to_datetime, normalize_date -from pandas.core.common import AbstractMethodError +from pandas.api.exceptions import AbstractMethodError # import after tools, dateutil check from dateutil.relativedelta import relativedelta, weekday diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index e6d500144fa44..723bce1ebc17a 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -14,6 +14,7 @@ from pandas.tseries.tdi import TimedeltaIndex from pandas.tseries.offsets import DateOffset, Tick, Day, _delta_to_nanoseconds from pandas.tseries.period import PeriodIndex, period_range +import pandas.api.exceptions as excp import pandas.core.common as com import pandas.core.algorithms as algos @@ -197,7 +198,7 @@ def __setattr__(self, attr, value): def __getitem__(self, key): try: return super(Resampler, self).__getitem__(key) - except (KeyError, com.AbstractMethodError): + except (KeyError, excp.AbstractMethodError): # compat for deprecated if isinstance(self.obj, com.ABCSeries): diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index 87f9f55e0189c..f915e3864c990 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -11,7 +11,7 @@ generate_range, DateOffset, Minute) from pandas.tseries.index import cdate_range, bdate_range, date_range -from pandas.core import common as com +from pandas.api import exceptions from pandas.util.testing import assertRaisesRegexp import pandas.util.testing as tm @@ -669,7 +669,7 @@ def test_shift(self): self.assertEqual(shifted[0], self.rng[0]) self.assertEqual(shifted.offset, self.rng.offset) - with tm.assert_produces_warning(com.PerformanceWarning): + with tm.assert_produces_warning(exceptions.PerformanceWarning): rng = date_range(START, END, freq=BMonthEnd()) shifted = rng.shift(1, freq=CDay()) self.assertEqual(shifted[0], rng[0] + CDay()) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 26c311b4a72f8..39e755cd2d230 100755 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -15,7 +15,7 @@ from pandas.types.generic import ABCSeries, ABCDataFrame from pandas.compat import range, lrange, zip, product, OrderedDict from pandas.core.base import SpecificationError -from pandas.core.common import UnsupportedFunctionCall +from pandas.api.exceptions import UnsupportedFunctionCall from pandas.core.groupby import DataError from pandas.tseries.frequencies import MONTHS, DAYS from pandas.tseries.frequencies import to_offset diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 58a4457777ea0..231cff8568f32 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -26,7 +26,7 @@ NaT, timedelta_range, Timedelta, _np_version_under1p8, concat) from pandas.compat import range, long, StringIO, lrange, lmap, zip, product from pandas.compat.numpy import np_datetime64_compat -from pandas.core.common import PerformanceWarning +from pandas.api.exceptions import PerformanceWarning from pandas.tslib import iNaT from pandas.util.testing import ( assert_frame_equal, assert_series_equal, assert_almost_equal, diff --git a/pandas/util/depr_module.py b/pandas/util/depr_module.py index 736d2cdaab31c..26ba2ca1340ec 100644 --- a/pandas/util/depr_module.py +++ b/pandas/util/depr_module.py @@ -1,8 +1,9 @@ """ -This module houses a utility class for mocking deprecated modules. -It is for internal use only and should not be used beyond this purpose. +This module houses utilities mocking deprecated enties. +They are for internal use only and should not be used beyond this purpose. """ +import sys import warnings import importlib @@ -62,3 +63,44 @@ def _import_deprmod(self): warnings.filterwarnings('ignore', category=FutureWarning) deprmodule = importlib.import_module(self.deprmod) return deprmodule + + +def _add_proxies(old_mod_name, new_mod_name, entities): + """ Mock entities moved between modules + + Parameters + ---------- + old_mod_name : module that used to contain the entity implementation + new_mod_name : module contains the implementations of the entities + entities : iterable of the names of the mocked entities + + The mechanics are as follows: + + 1. Physically move the entity from 'old_mod_name' to 'new_mod_name' + 2. Add the name of the above entity to the 'entities' iterable + 3. Repeat the (1-2) for each entity you want to move + + Invoking the moved entity from 'old_mod_name' will act as proxy to the + actual entity in 'new_mod_name'. If warnings are enabled a deprecation + warning will be issued. + """ + + def create_proxy(entity): + + def wrapper(*args, **kwargs): + warnings.warn("{old}.{entity} has been deprecated. Use " + "{new}.{entity} instead.".format(entity=entity, + old=old_mod_name, + new=new_mod_name), + DeprecationWarning, stacklevel=2) + + return getattr(new_mod, entity)(*args, **kwargs) + + return wrapper + + importlib.import_module(new_mod_name) + old_mod = sys.modules[old_mod_name] + new_mod = sys.modules[new_mod_name] + + for entity in entities: + setattr(old_mod, entity, create_proxy(entity))