diff --git a/doc/source/io.rst b/doc/source/io.rst index 4d97c43e85de8..5856b4e293259 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1878,14 +1878,14 @@ to be parsed. .. code-block:: python - read_excel('path_to_file.xls', 'Sheet1', parse_cols=2, index_col=None, na_values=['NA']) + read_excel('path_to_file.xls', 'Sheet1', parse_cols=2) If `parse_cols` is a list of integers, then it is assumed to be the file column indices to be parsed. .. code-block:: python - read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3], index_col=None, na_values=['NA']) + read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3]) To write a DataFrame object to a sheet of an Excel file, you can use the ``to_excel`` instance method. The arguments are largely the same as ``to_csv`` diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 274a2341c1a9f..b872c8a60e34e 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -199,9 +199,9 @@ the right thing: Reshaping by Melt ----------------- -The ``melt`` function found in ``pandas.core.reshape`` is useful to massage a +The :func:`~pandas.melt` function is useful to massage a DataFrame into a format where one or more columns are identifier variables, -while all other columns, considered measured variables, are "pivoted" to the +while all other columns, considered measured variables, are "unpivoted" to the row axis, leaving just two non-identifier columns, "variable" and "value". The names of those columns can be customized by supplying the ``var_name`` and ``value_name`` parameters. diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 10236f6b2e191..d05ae4b72c2f1 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -261,7 +261,7 @@ bar plot: .. ipython:: python :suppress: - plt.figure(); + plt.figure() .. ipython:: python @@ -275,7 +275,7 @@ To produce a stacked bar plot, pass ``stacked=True``: .. ipython:: python :suppress: - plt.figure(); + plt.figure() .. ipython:: python @@ -287,7 +287,7 @@ To get horizontal bar plots, pass ``kind='barh'``: .. ipython:: python :suppress: - plt.figure(); + plt.figure() .. ipython:: python @@ -320,7 +320,7 @@ New since 0.10.0, the ``by`` keyword can be specified to plot grouped histograms .. ipython:: python :suppress: - plt.figure(); + plt.figure() .. ipython:: python @@ -434,12 +434,12 @@ Scatter plot matrix .. _visualization.kde: *New in 0.8.0* You can create density plots using the Series/DataFrame.plot and -setting `kind='kde'`: +setting ``kind='kde'``: .. ipython:: python :suppress: - plt.figure(); + plt.figure() .. ipython:: python @@ -460,7 +460,7 @@ too dense to plot each point individually. .. ipython:: python :suppress: - plt.figure(); + plt.figure() .. ipython:: python @@ -486,7 +486,7 @@ given by column ``z``. The bins are aggregated with numpy's ``max`` function. .. ipython:: python :suppress: - plt.figure(); + plt.figure() .. ipython:: python diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index f5ca96e2d827e..0d06e9253ce1f 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -617,16 +617,34 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, value_name='value', col_level=None): """ "Unpivots" a DataFrame from wide format to long format, optionally leaving - id variables set + identifier variables set. + + This function is useful to massage a DataFrame into a format where one + or more columns are identifier variables (`id_vars`), while all other + columns, considered measured variables (`value_vars`), are "unpivoted" to + the row axis, leaving just two non-identifier columns, 'variable' and + 'value'. Parameters ---------- frame : DataFrame - id_vars : tuple, list, or ndarray - value_vars : tuple, list, or ndarray - var_name : scalar, if None uses frame.column.name or 'variable' + id_vars : tuple, list, or ndarray, optional + Column(s) to use as identifier variables. + value_vars : tuple, list, or ndarray, optional + Column(s) to unpivot. If not specified, uses all columns that + are not set as `id_vars`. + var_name : scalar + Name to use for the 'variable' column. If None it uses + ``frame.columns.name`` or 'variable'. value_name : scalar, default 'value' - col_level : scalar, if columns are a MultiIndex then use this level to melt + Name to use for the 'value' column. + col_level : int or string, optional + If columns are a MultiIndex then use this level to melt. + + See also + -------- + pivot_table + DataFrame.pivot Examples -------- @@ -634,35 +652,53 @@ def melt(frame, id_vars=None, value_vars=None, >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, ... 'B': {0: 1, 1: 3, 2: 5}, ... 'C': {0: 2, 1: 4, 2: 6}}) - >>> df A B C 0 a 1 2 1 b 3 4 2 c 5 6 - >>> melt(df, id_vars=['A'], value_vars=['B']) + >>> pd.melt(df, id_vars=['A'], value_vars=['B']) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + + >>> pd.melt(df, id_vars=['A'], value_vars=['B', 'C']) A variable value 0 a B 1 1 b B 3 2 c B 5 + 3 a C 2 + 4 b C 4 + 5 c C 6 + + The names of 'variable' and 'value' columns can be customized: - >>> melt(df, id_vars=['A'], value_vars=['B'], - ... var_name='myVarname', value_name='myValname') + >>> pd.melt(df, id_vars=['A'], value_vars=['B'], + ... var_name='myVarname', value_name='myValname') A myVarname myValname 0 a B 1 1 b B 3 2 c B 5 + If you have multi-index columns: + >>> df.columns = [list('ABC'), list('DEF')] + >>> df + A B C + D E F + 0 a 1 2 + 1 b 3 4 + 2 c 5 6 - >>> melt(df, col_level=0, id_vars=['A'], value_vars=['B']) + >>> pd.melt(df, col_level=0, id_vars=['A'], value_vars=['B']) A variable value 0 a B 1 1 b B 3 2 c B 5 - >>> melt(df, id_vars=[('A', 'D')], value_vars=[('B', 'E')]) + >>> pd.melt(df, id_vars=[('A', 'D')], value_vars=[('B', 'E')]) (A, D) variable_0 variable_1 value 0 a B E 1 1 b B E 3