diff --git a/doc/source/getting_started/comparison/comparison_with_r.rst b/doc/source/getting_started/comparison/comparison_with_r.rst index f91f4218c3429..dad6edb25219d 100644 --- a/doc/source/getting_started/comparison/comparison_with_r.rst +++ b/doc/source/getting_started/comparison/comparison_with_r.rst @@ -133,8 +133,8 @@ with a combination of the ``iloc`` indexer attribute and ``numpy.r_``. named = list("abcdefg") n = 30 - columns = named + np.arange(len(named), n).tolist() - df = pd.DataFrame(np.random.randn(n, n), columns=columns) + column_schema = named + np.arange(len(named), n).tolist() + df = pd.DataFrame(np.random.randn(n, n), columns=column_schema) df.iloc[:, np.r_[:10, 24:30]] diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index 0105cf99193dd..b36ff6518957d 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -626,12 +626,12 @@ even if some categories are not present in the data: .. ipython:: python - columns = pd.Categorical( + column_schema = pd.Categorical( ["One", "One", "Two"], categories=["One", "Two", "Three"], ordered=True ) df = pd.DataFrame( data=[[1, 2, 3], [4, 5, 6]], - columns=pd.MultiIndex.from_arrays([["A", "B", "B"], columns]), + columns=pd.MultiIndex.from_arrays([["A", "B", "B"], column_schema]), ) df.groupby(axis=1, level=1).sum() diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index daf5a0e481b8e..02056b8d4d792 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -1171,9 +1171,9 @@ Option 2: read column names and then data .. ipython:: python pd.read_csv(StringIO(data), sep=";", header=10, nrows=10).columns - columns = pd.read_csv(StringIO(data), sep=";", header=10, nrows=10).columns + column_schema = pd.read_csv(StringIO(data), sep=";", header=10, nrows=10).columns pd.read_csv( - StringIO(data), sep=";", index_col=0, header=12, parse_dates=True, names=columns + StringIO(data), sep=";", index_col=0, header=12, parse_dates=True, names=column_schema ) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index ba3fb17cc8764..5d8ef7ce02097 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -839,10 +839,10 @@ Alternatively, the built-in methods could be used to produce the same outputs. .. ipython:: python - max = ts.groupby(lambda x: x.year).transform("max") - min = ts.groupby(lambda x: x.year).transform("min") + max_ts = ts.groupby(lambda x: x.year).transform("max") + min_ts = ts.groupby(lambda x: x.year).transform("min") - max - min + max_ts - min_ts Another common data transform is to replace missing data with the group mean. diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index f939945fc6cda..61c9dedc56f0f 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -1581,8 +1581,8 @@ The name, if set, will be shown in the console display: .. ipython:: python index = pd.Index(list(range(5)), name='rows') - columns = pd.Index(['A', 'B', 'C'], name='cols') - df = pd.DataFrame(np.random.randn(5, 3), index=index, columns=columns) + column_schema = pd.Index(['A', 'B', 'C'], name='cols') + df = pd.DataFrame(np.random.randn(5, 3), index=index, columns=column_schema) df df['A'] diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst index adca9de6c130a..cbe509e158aa3 100644 --- a/doc/source/user_guide/reshaping.rst +++ b/doc/source/user_guide/reshaping.rst @@ -179,7 +179,7 @@ processed individually. .. ipython:: python - columns = pd.MultiIndex.from_tuples( + column_schema = pd.MultiIndex.from_tuples( [ ("A", "cat", "long"), ("B", "cat", "long"), @@ -188,7 +188,7 @@ processed individually. ], names=["exp", "animal", "hair_length"], ) - df = pd.DataFrame(np.random.randn(4, 4), columns=columns) + df = pd.DataFrame(np.random.randn(4, 4), columns=column_schema) df df.stack(level=["animal", "hair_length"]) @@ -212,7 +212,7 @@ calling :meth:`~DataFrame.sort_index`, of course). Here is a more complex exampl .. ipython:: python - columns = pd.MultiIndex.from_tuples( + column_schema = pd.MultiIndex.from_tuples( [ ("A", "cat"), ("B", "dog"), @@ -224,7 +224,7 @@ calling :meth:`~DataFrame.sort_index`, of course). Here is a more complex exampl index = pd.MultiIndex.from_product( [("bar", "baz", "foo", "qux"), ("one", "two")], names=["first", "second"] ) - df = pd.DataFrame(np.random.randn(8, 4), index=index, columns=columns) + df = pd.DataFrame(np.random.randn(8, 4), index=index, columns=column_schema) df2 = df.iloc[[0, 1, 2, 4, 5, 7]] df2 diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index 129f43dd36930..05b1a328dd769 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -73,15 +73,15 @@ Option 1 loads in all the data and then filters to what we need. .. ipython:: python - columns = ["id_0", "name_0", "x_0", "y_0"] + column_schema = ["id_0", "name_0", "x_0", "y_0"] - pd.read_parquet("timeseries_wide.parquet")[columns] + pd.read_parquet("timeseries_wide.parquet")[column_schema] Option 2 only loads the columns we request. .. ipython:: python - pd.read_parquet("timeseries_wide.parquet", columns=columns) + pd.read_parquet("timeseries_wide.parquet", columns=column_schema) .. ipython:: python :suppress: