Skip to content

Commit d00258e

Browse files
Merge pull request #10561 from jorisvandenbossche/doc-imports
DOC: consistent imports (GH9886) part IV
2 parents 98961c5 + 3bd9b26 commit d00258e

File tree

7 files changed

+188
-209
lines changed

7 files changed

+188
-209
lines changed

doc/source/groupby.rst

+76-75
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,16 @@
66
77
import numpy as np
88
np.random.seed(123456)
9-
from pandas import *
10-
options.display.max_rows=15
11-
randn = np.random.randn
129
np.set_printoptions(precision=4, suppress=True)
13-
import matplotlib.pyplot as plt
14-
plt.close('all')
10+
import pandas as pd
11+
pd.options.display.max_rows = 15
1512
import matplotlib
1613
try:
1714
matplotlib.style.use('ggplot')
1815
except AttributeError:
19-
options.display.mpl_style = 'default'
20-
from pandas.compat import zip
16+
pd.options.display.mpl_style = 'default'
17+
import matplotlib.pyplot as plt
18+
plt.close('all')
2119
2220
*****************************
2321
Group By: split-apply-combine
@@ -105,11 +103,12 @@ consider the following DataFrame:
105103

106104
.. ipython:: python
107105
108-
df = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
109-
'foo', 'bar', 'foo', 'foo'],
110-
'B' : ['one', 'one', 'two', 'three',
111-
'two', 'two', 'one', 'three'],
112-
'C' : randn(8), 'D' : randn(8)})
106+
df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
107+
'foo', 'bar', 'foo', 'foo'],
108+
'B' : ['one', 'one', 'two', 'three',
109+
'two', 'two', 'one', 'three'],
110+
'C' : np.random.randn(8),
111+
'D' : np.random.randn(8)})
113112
df
114113
115114
We could naturally group by either the ``A`` or ``B`` columns or both:
@@ -142,7 +141,7 @@ output of aggregation functions will only contain unique index values:
142141
143142
lst = [1, 2, 3, 1, 2, 3]
144143
145-
s = Series([1, 2, 3, 10, 20, 30], lst)
144+
s = pd.Series([1, 2, 3, 10, 20, 30], lst)
146145
147146
grouped = s.groupby(level=0)
148147
@@ -189,7 +188,7 @@ however pass ``sort=False`` for potential speedups:
189188

190189
.. ipython:: python
191190
192-
df2 = DataFrame({'X' : ['B', 'B', 'A', 'A'], 'Y' : [1, 2, 3, 4]})
191+
df2 = pd.DataFrame({'X' : ['B', 'B', 'A', 'A'], 'Y' : [1, 2, 3, 4]})
193192
df2.groupby(['X'], sort=True).sum()
194193
df2.groupby(['X'], sort=False).sum()
195194
@@ -203,10 +202,10 @@ however pass ``sort=False`` for potential speedups:
203202
n = 10
204203
weight = np.random.normal(166, 20, size=n)
205204
height = np.random.normal(60, 10, size=n)
206-
time = date_range('1/1/2000', periods=n)
205+
time = pd.date_range('1/1/2000', periods=n)
207206
gender = tm.choice(['male', 'female'], size=n)
208-
df = DataFrame({'height': height, 'weight': weight,
209-
'gender': gender}, index=time)
207+
df = pd.DataFrame({'height': height, 'weight': weight,
208+
'gender': gender}, index=time)
210209
211210
.. ipython:: python
212211
@@ -226,11 +225,12 @@ however pass ``sort=False`` for potential speedups:
226225
.. ipython:: python
227226
:suppress:
228227
229-
df = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
230-
'foo', 'bar', 'foo', 'foo'],
231-
'B' : ['one', 'one', 'two', 'three',
232-
'two', 'two', 'one', 'three'],
233-
'C' : randn(8), 'D' : randn(8)})
228+
df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
229+
'foo', 'bar', 'foo', 'foo'],
230+
'B' : ['one', 'one', 'two', 'three',
231+
'two', 'two', 'one', 'three'],
232+
'C' : np.random.randn(8),
233+
'D' : np.random.randn(8)})
234234
235235
.. _groupby.multiindex:
236236

@@ -248,8 +248,8 @@ natural to group by one of the levels of the hierarchy.
248248
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
249249
tuples = list(zip(*arrays))
250250
tuples
251-
index = MultiIndex.from_tuples(tuples, names=['first', 'second'])
252-
s = Series(randn(8), index=index)
251+
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
252+
s = pd.Series(np.random.randn(8), index=index)
253253
254254
.. ipython:: python
255255
@@ -281,13 +281,13 @@ Also as of v0.6, grouping with multiple levels is supported.
281281
['doo', 'doo', 'bee', 'bee', 'bop', 'bop', 'bop', 'bop'],
282282
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
283283
tuples = list(zip(*arrays))
284-
index = MultiIndex.from_tuples(tuples, names=['first', 'second', 'third'])
285-
s = Series(randn(8), index=index)
284+
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second', 'third'])
285+
s = pd.Series(np.random.randn(8), index=index)
286286
287287
.. ipython:: python
288288
289289
s
290-
s.groupby(level=['first','second']).sum()
290+
s.groupby(level=['first', 'second']).sum()
291291
292292
More on the ``sum`` function and aggregation later.
293293

@@ -499,9 +499,9 @@ to standardize the data within each group:
499499

500500
.. ipython:: python
501501
502-
index = date_range('10/1/1999', periods=1100)
503-
ts = Series(np.random.normal(0.5, 2, 1100), index)
504-
ts = rolling_mean(ts, 100, 100).dropna()
502+
index = pd.date_range('10/1/1999', periods=1100)
503+
ts = pd.Series(np.random.normal(0.5, 2, 1100), index)
504+
ts = pd.rolling_mean(ts, 100, 100).dropna()
505505
506506
ts.head()
507507
ts.tail()
@@ -528,7 +528,7 @@ We can also visually compare the original and transformed data sets.
528528

529529
.. ipython:: python
530530
531-
compare = DataFrame({'Original': ts, 'Transformed': transformed})
531+
compare = pd.DataFrame({'Original': ts, 'Transformed': transformed})
532532
533533
@savefig groupby_transform_plot.png
534534
compare.plot()
@@ -539,11 +539,11 @@ Another common data transform is to replace missing data with the group mean.
539539
:suppress:
540540
541541
cols = ['A', 'B', 'C']
542-
values = randn(1000, 3)
542+
values = np.random.randn(1000, 3)
543543
values[np.random.randint(0, 1000, 100), 0] = np.nan
544544
values[np.random.randint(0, 1000, 50), 1] = np.nan
545545
values[np.random.randint(0, 1000, 200), 2] = np.nan
546-
data_df = DataFrame(values, columns=cols)
546+
data_df = pd.DataFrame(values, columns=cols)
547547
548548
.. ipython:: python
549549
@@ -599,7 +599,7 @@ than 2.
599599

600600
.. ipython:: python
601601
602-
sf = Series([1, 1, 2, 3, 3, 3])
602+
sf = pd.Series([1, 1, 2, 3, 3, 3])
603603
sf.groupby(sf).filter(lambda x: x.sum() > 2)
604604
605605
The argument of ``filter`` must be a function that, applied to the group as a
@@ -610,7 +610,7 @@ with only a couple members.
610610

611611
.. ipython:: python
612612
613-
dff = DataFrame({'A': np.arange(8), 'B': list('aabbbbcc')})
613+
dff = pd.DataFrame({'A': np.arange(8), 'B': list('aabbbbcc')})
614614
dff.groupby('B').filter(lambda x: len(x) > 2)
615615
616616
Alternatively, instead of dropping the offending groups, we can return a
@@ -672,9 +672,9 @@ next). This enables some operations to be carried out rather succinctly:
672672

673673
.. ipython:: python
674674
675-
tsdf = DataFrame(randn(1000, 3),
676-
index=date_range('1/1/2000', periods=1000),
677-
columns=['A', 'B', 'C'])
675+
tsdf = pd.DataFrame(np.random.randn(1000, 3),
676+
index=pd.date_range('1/1/2000', periods=1000),
677+
columns=['A', 'B', 'C'])
678678
tsdf.ix[::2] = np.nan
679679
grouped = tsdf.groupby(lambda x: x.year)
680680
grouped.fillna(method='pad')
@@ -689,8 +689,8 @@ The ``nlargest`` and ``nsmallest`` methods work on ``Series`` style groupbys:
689689

690690
.. ipython:: python
691691
692-
s = Series([9, 8, 7, 5, 19, 1, 4.2, 3.3])
693-
g = Series(list('abababab'))
692+
s = pd.Series([9, 8, 7, 5, 19, 1, 4.2, 3.3])
693+
g = pd.Series(list('abababab'))
694694
gb = s.groupby(g)
695695
gb.nlargest(3)
696696
gb.nsmallest(3)
@@ -721,8 +721,8 @@ The dimension of the returned result can also change:
721721
In [8]: grouped = df.groupby('A')['C']
722722

723723
In [10]: def f(group):
724-
....: return DataFrame({'original' : group,
725-
....: 'demeaned' : group - group.mean()})
724+
....: return pd.DataFrame({'original' : group,
725+
....: 'demeaned' : group - group.mean()})
726726
....:
727727

728728
In [11]: grouped.apply(f)
@@ -732,8 +732,8 @@ The dimension of the returned result can also change:
732732
.. ipython:: python
733733
734734
def f(x):
735-
return Series([ x, x**2 ], index = ['x', 'x^s'])
736-
s = Series(np.random.rand(5))
735+
return pd.Series([ x, x**2 ], index = ['x', 'x^s'])
736+
s = pd.Series(np.random.rand(5))
737737
s
738738
s.apply(f)
739739
@@ -754,7 +754,7 @@ The dimension of the returned result can also change:
754754

755755
.. ipython:: python
756756
757-
d = DataFrame({"a":["x", "y"], "b":[1,2]})
757+
d = pd.DataFrame({"a":["x", "y"], "b":[1,2]})
758758
def identity(df):
759759
print df
760760
return df
@@ -802,9 +802,9 @@ can be used as group keys. If so, the order of the levels will be preserved:
802802

803803
.. ipython:: python
804804
805-
data = Series(np.random.randn(100))
805+
data = pd.Series(np.random.randn(100))
806806
807-
factor = qcut(data, [0, .25, .5, .75, 1.])
807+
factor = pd.qcut(data, [0, .25, .5, .75, 1.])
808808
809809
data.groupby(factor).mean()
810810
@@ -813,27 +813,28 @@ can be used as group keys. If so, the order of the levels will be preserved:
813813
Grouping with a Grouper specification
814814
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
815815

816-
Your may need to specify a bit more data to properly group. You can
816+
You may need to specify a bit more data to properly group. You can
817817
use the ``pd.Grouper`` to provide this local control.
818818

819819
.. ipython:: python
820820
821-
import datetime as DT
822-
823-
df = DataFrame({
824-
'Branch' : 'A A A A A A A B'.split(),
825-
'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(),
826-
'Quantity': [1,3,5,1,8,1,9,3],
827-
'Date' : [
828-
DT.datetime(2013,1,1,13,0),
829-
DT.datetime(2013,1,1,13,5),
830-
DT.datetime(2013,10,1,20,0),
831-
DT.datetime(2013,10,2,10,0),
832-
DT.datetime(2013,10,1,20,0),
833-
DT.datetime(2013,10,2,10,0),
834-
DT.datetime(2013,12,2,12,0),
835-
DT.datetime(2013,12,2,14,0),
836-
]})
821+
import datetime
822+
823+
df = pd.DataFrame({
824+
'Branch' : 'A A A A A A A B'.split(),
825+
'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(),
826+
'Quantity': [1,3,5,1,8,1,9,3],
827+
'Date' : [
828+
datetime.datetime(2013,1,1,13,0),
829+
datetime.datetime(2013,1,1,13,5),
830+
datetime.datetime(2013,10,1,20,0),
831+
datetime.datetime(2013,10,2,10,0),
832+
datetime.datetime(2013,10,1,20,0),
833+
datetime.datetime(2013,10,2,10,0),
834+
datetime.datetime(2013,12,2,12,0),
835+
datetime.datetime(2013,12,2,14,0),
836+
]
837+
})
837838
838839
df
839840
@@ -862,7 +863,7 @@ Just like for a DataFrame or Series you can call head and tail on a groupby:
862863

863864
.. ipython:: python
864865
865-
df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
866+
df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
866867
df
867868
868869
g = df.groupby('A')
@@ -894,7 +895,7 @@ To select from a DataFrame or Series the nth item, use the nth method. This is a
894895

895896
.. ipython:: python
896897
897-
df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
898+
df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
898899
g = df.groupby('A')
899900
900901
g.nth(0)
@@ -919,7 +920,7 @@ As with other methods, passing ``as_index=False``, will achieve a filtration, wh
919920

920921
.. ipython:: python
921922
922-
df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
923+
df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
923924
g = df.groupby('A',as_index=False)
924925
925926
g.nth(0)
@@ -929,8 +930,8 @@ You can also select multiple rows from each group by specifying multiple nth val
929930

930931
.. ipython:: python
931932
932-
business_dates = date_range(start='4/1/2014', end='6/30/2014', freq='B')
933-
df = DataFrame(1, index=business_dates, columns=['a', 'b'])
933+
business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', freq='B')
934+
df = pd.DataFrame(1, index=business_dates, columns=['a', 'b'])
934935
# get the first, 4th, and last date index for each month
935936
df.groupby((df.index.year, df.index.month)).nth([0, 3, -1])
936937
@@ -961,7 +962,7 @@ the values in column 1 where the group is "B" are 3 higher on average.
961962
.. ipython:: python
962963
963964
np.random.seed(1234)
964-
df = DataFrame(np.random.randn(50, 2))
965+
df = pd.DataFrame(np.random.randn(50, 2))
965966
df['g'] = np.random.choice(['A', 'B'], size=50)
966967
df.loc[df['g'] == 'B', 1] += 3
967968
@@ -1010,11 +1011,11 @@ column index name will be used as the name of the inserted column:
10101011
.. ipython:: python
10111012
10121013
df = pd.DataFrame({
1013-
'a': [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2],
1014-
'b': [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1],
1015-
'c': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
1016-
'd': [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1],
1017-
})
1014+
'a': [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2],
1015+
'b': [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1],
1016+
'c': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
1017+
'd': [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1],
1018+
})
10181019
10191020
def compute_metrics(x):
10201021
result = {'b_sum': x['b'].sum(), 'c_mean': x['c'].mean()}

0 commit comments

Comments
 (0)