|
5 | 5 |
|
6 | 6 | import numpy as np
|
7 | 7 | import pandas as pd
|
| 8 | +
|
8 | 9 | np.set_printoptions(precision=4, suppress=True)
|
9 | 10 | pd.options.display.max_rows = 15
|
10 | 11 |
|
@@ -173,8 +174,9 @@ Furthermore you can align a level of a MultiIndexed DataFrame with a Series.
|
173 | 174 | .. ipython:: python
|
174 | 175 |
|
175 | 176 | dfmi = df.copy()
|
176 |
| - dfmi.index = pd.MultiIndex.from_tuples([ |
177 |
| - (1, 'a'), (1, 'b'), (1, 'c'), (2, 'a')], names=['first', 'second']) |
| 177 | + dfmi.index = pd.MultiIndex.from_tuples([(1, 'a'), (1, 'b'), |
| 178 | + (1, 'c'), (2, 'a')], |
| 179 | + names=['first', 'second']) |
178 | 180 | dfmi.sub(column, axis=0, level='second')
|
179 | 181 |
|
180 | 182 | With Panel, describing the matching behavior is a bit more difficult, so
|
@@ -565,8 +567,8 @@ course):
|
565 | 567 | series = pd.Series(np.random.randn(1000))
|
566 | 568 | series[::2] = np.nan
|
567 | 569 | series.describe()
|
568 |
| - frame = pd.DataFrame( |
569 |
| - np.random.randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e']) |
| 570 | + frame = pd.DataFrame(np.random.randn(1000, 5), |
| 571 | + columns=['a', 'b', 'c', 'd', 'e']) |
570 | 572 | frame.iloc[::2] = np.nan
|
571 | 573 | frame.describe()
|
572 | 574 |
|
@@ -1088,8 +1090,10 @@ a single value and returning a single value. For example:
|
1088 | 1090 | .. ipython:: python
|
1089 | 1091 |
|
1090 | 1092 | df4
|
| 1093 | +
|
1091 | 1094 | def f(x):
|
1092 |
| - len(str(x)) |
| 1095 | + return len(str(x)) |
| 1096 | +
|
1093 | 1097 | df4['one'].map(f)
|
1094 | 1098 | df4.applymap(f)
|
1095 | 1099 |
|
@@ -1433,10 +1437,8 @@ Thus, for example, iterating over a DataFrame gives you the column names:
|
1433 | 1437 |
|
1434 | 1438 | .. ipython:: python
|
1435 | 1439 |
|
1436 |
| - df = pd.DataFrame({ |
1437 |
| - 'col1': np.random.randn(3), |
1438 |
| - 'col2': np.random.randn(3)}, |
1439 |
| - index=['a', 'b', 'c']) |
| 1440 | + df = pd.DataFrame({'col1': np.random.randn(3), |
| 1441 | + 'col2': np.random.randn(3)}, index=['a', 'b', 'c']) |
1440 | 1442 |
|
1441 | 1443 | for col in df:
|
1442 | 1444 | print(col)
|
@@ -1556,7 +1558,7 @@ For instance, a contrived way to transpose the DataFrame would be:
|
1556 | 1558 | print(df2)
|
1557 | 1559 | print(df2.T)
|
1558 | 1560 |
|
1559 |
| - df2_t = pd.DataFrame(dict((idx, values) for idx, values in df2.iterrows())) |
| 1561 | + df2_t = pd.DataFrame({idx: values for idx, values in df2.iterrows()}) |
1560 | 1562 | print(df2_t)
|
1561 | 1563 |
|
1562 | 1564 | itertuples
|
@@ -1732,8 +1734,9 @@ to use to determine the sorted order.
|
1732 | 1734 |
|
1733 | 1735 | .. ipython:: python
|
1734 | 1736 |
|
1735 |
| - df1 = pd.DataFrame({ |
1736 |
| - 'one': [2, 1, 1, 1], 'two': [1, 3, 2, 4], 'three': [5, 4, 3, 2]}) |
| 1737 | + df1 = pd.DataFrame({'one': [2, 1, 1, 1], |
| 1738 | + 'two': [1, 3, 2, 4], |
| 1739 | + 'three': [5, 4, 3, 2]}) |
1737 | 1740 | df1.sort_values(by='two')
|
1738 | 1741 |
|
1739 | 1742 | The ``by`` parameter can take a list of column names, e.g.:
|
@@ -1843,8 +1846,9 @@ all levels to ``by``.
|
1843 | 1846 |
|
1844 | 1847 | .. ipython:: python
|
1845 | 1848 |
|
1846 |
| - df1.columns = pd.MultiIndex.from_tuples([ |
1847 |
| - ('a', 'one'), ('a', 'two'), ('b', 'three')]) |
| 1849 | + df1.columns = pd.MultiIndex.from_tuples([('a', 'one'), |
| 1850 | + ('a', 'two'), |
| 1851 | + ('b', 'three')]) |
1848 | 1852 | df1.sort_values(by=('a', 'two'))
|
1849 | 1853 |
|
1850 | 1854 |
|
@@ -1894,13 +1898,13 @@ with the data type of each column.
|
1894 | 1898 |
|
1895 | 1899 | .. ipython:: python
|
1896 | 1900 |
|
1897 |
| - dft = pd.DataFrame(dict(A=np.random.rand(3), |
1898 |
| - B=1, |
1899 |
| - C='foo', |
1900 |
| - D=pd.Timestamp('20010102'), |
1901 |
| - E=pd.Series([1.0] * 3).astype('float32'), |
1902 |
| - F=False, |
1903 |
| - G=pd.Series([1] * 3, dtype='int8'))) |
| 1901 | + dft = pd.DataFrame({'A': np.random.rand(3), |
| 1902 | + 'B': 1, |
| 1903 | + 'C': 'foo', |
| 1904 | + 'D': pd.Timestamp('20010102'), |
| 1905 | + 'E': pd.Series([1.0] * 3).astype('float32'), |
| 1906 | + 'F': False, |
| 1907 | + 'G': pd.Series([1] * 3, dtype='int8')}) |
1904 | 1908 | dft
|
1905 | 1909 | dft.dtypes
|
1906 | 1910 |
|
@@ -1939,10 +1943,10 @@ different numeric dtypes will **NOT** be combined. The following example will gi
|
1939 | 1943 | df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float32')
|
1940 | 1944 | df1
|
1941 | 1945 | df1.dtypes
|
1942 |
| - df2 = pd.DataFrame(dict(A=pd.Series(np.random.randn(8), dtype='float16'), |
1943 |
| - B=pd.Series(np.random.randn(8)), |
1944 |
| - C=pd.Series(np.array(np.random.randn(8), |
1945 |
| - dtype='uint8')))) |
| 1946 | + df2 = pd.DataFrame({'A': pd.Series(np.random.randn(8), dtype='float16'), |
| 1947 | + 'B': pd.Series(np.random.randn(8)), |
| 1948 | + 'C': pd.Series(np.array(np.random.randn(8), |
| 1949 | + dtype='uint8'))}) |
1946 | 1950 | df2
|
1947 | 1951 | df2.dtypes
|
1948 | 1952 |
|
@@ -2057,7 +2061,7 @@ to the correct type.
|
2057 | 2061 | df = pd.DataFrame([[1, 2],
|
2058 | 2062 | ['a', 'b'],
|
2059 | 2063 | [datetime.datetime(2016, 3, 2),
|
2060 |
| - datetime.datetime(2016, 3, 2)]]) |
| 2064 | + datetime.datetime(2016, 3, 2)]]) |
2061 | 2065 | df = df.T
|
2062 | 2066 | df
|
2063 | 2067 | df.dtypes
|
|
0 commit comments