Skip to content

Commit 48ddfbb

Browse files
authored
DOC: Fix examples in reshape (#32980)
1 parent 13b9e40 commit 48ddfbb

File tree

4 files changed

+104
-63
lines changed

4 files changed

+104
-63
lines changed

ci/code_checks.sh

+2-8
Original file line numberDiff line numberDiff line change
@@ -283,14 +283,8 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
283283
pytest -q --doctest-modules pandas/core/tools/datetimes.py
284284
RET=$(($RET + $?)) ; echo $MSG "DONE"
285285

286-
MSG='Doctests top-level reshaping functions' ; echo $MSG
287-
pytest -q --doctest-modules \
288-
pandas/core/reshape/concat.py \
289-
pandas/core/reshape/pivot.py \
290-
pandas/core/reshape/reshape.py \
291-
pandas/core/reshape/tile.py \
292-
pandas/core/reshape/melt.py \
293-
-k"-crosstab -pivot_table -cut"
286+
MSG='Doctests reshaping functions' ; echo $MSG
287+
pytest -q --doctest-modules pandas/core/reshape/
294288
RET=$(($RET + $?)) ; echo $MSG "DONE"
295289

296290
MSG='Doctests interval classes' ; echo $MSG

pandas/core/reshape/merge.py

+87-41
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,14 @@ def merge_ordered(
223223
224224
Examples
225225
--------
226-
>>> A
226+
>>> df1 = pd.DataFrame(
227+
... {
228+
... "key": ["a", "c", "e", "a", "c", "e"],
229+
... "lvalue": [1, 2, 3, 1, 2, 3],
230+
... "group": ["a", "a", "a", "b", "b", "b"]
231+
... }
232+
... )
233+
>>> df1
227234
key lvalue group
228235
0 a 1 a
229236
1 c 2 a
@@ -232,24 +239,25 @@ def merge_ordered(
232239
4 c 2 b
233240
5 e 3 b
234241
235-
>>> B
236-
Key rvalue
237-
0 b 1
238-
1 c 2
239-
2 d 3
240-
241-
>>> merge_ordered(A, B, fill_method='ffill', left_by='group')
242-
group key lvalue rvalue
243-
0 a a 1 NaN
244-
1 a b 1 1.0
245-
2 a c 2 2.0
246-
3 a d 2 3.0
247-
4 a e 3 3.0
248-
5 b a 1 NaN
249-
6 b b 1 1.0
250-
7 b c 2 2.0
251-
8 b d 2 3.0
252-
9 b e 3 3.0
242+
>>> df2 = pd.DataFrame({"key": ["b", "c", "d"], "rvalue": [1, 2, 3]})
243+
>>> df2
244+
key rvalue
245+
0 b 1
246+
1 c 2
247+
2 d 3
248+
249+
>>> merge_ordered(df1, df2, fill_method="ffill", left_by="group")
250+
key lvalue group rvalue
251+
0 a 1 a NaN
252+
1 b 1 a 1.0
253+
2 c 2 a 2.0
254+
3 d 2 a 3.0
255+
4 e 3 a 3.0
256+
5 a 1 b NaN
257+
6 b 1 b 1.0
258+
7 c 2 b 2.0
259+
8 d 2 b 3.0
260+
9 e 3 b 3.0
253261
"""
254262

255263
def _merger(x, y):
@@ -370,15 +378,14 @@ def merge_asof(
370378
371379
Examples
372380
--------
373-
>>> left = pd.DataFrame({'a': [1, 5, 10], 'left_val': ['a', 'b', 'c']})
381+
>>> left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
374382
>>> left
375383
a left_val
376384
0 1 a
377385
1 5 b
378386
2 10 c
379387
380-
>>> right = pd.DataFrame({'a': [1, 2, 3, 6, 7],
381-
... 'right_val': [1, 2, 3, 6, 7]})
388+
>>> right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]})
382389
>>> right
383390
a right_val
384391
0 1 1
@@ -387,41 +394,40 @@ def merge_asof(
387394
3 6 6
388395
4 7 7
389396
390-
>>> pd.merge_asof(left, right, on='a')
397+
>>> pd.merge_asof(left, right, on="a")
391398
a left_val right_val
392399
0 1 a 1
393400
1 5 b 3
394401
2 10 c 7
395402
396-
>>> pd.merge_asof(left, right, on='a', allow_exact_matches=False)
403+
>>> pd.merge_asof(left, right, on="a", allow_exact_matches=False)
397404
a left_val right_val
398405
0 1 a NaN
399406
1 5 b 3.0
400407
2 10 c 7.0
401408
402-
>>> pd.merge_asof(left, right, on='a', direction='forward')
409+
>>> pd.merge_asof(left, right, on="a", direction="forward")
403410
a left_val right_val
404411
0 1 a 1.0
405412
1 5 b 6.0
406413
2 10 c NaN
407414
408-
>>> pd.merge_asof(left, right, on='a', direction='nearest')
415+
>>> pd.merge_asof(left, right, on="a", direction="nearest")
409416
a left_val right_val
410417
0 1 a 1
411418
1 5 b 6
412419
2 10 c 7
413420
414421
We can use indexed DataFrames as well.
415422
416-
>>> left = pd.DataFrame({'left_val': ['a', 'b', 'c']}, index=[1, 5, 10])
423+
>>> left = pd.DataFrame({"left_val": ["a", "b", "c"]}, index=[1, 5, 10])
417424
>>> left
418425
left_val
419426
1 a
420427
5 b
421428
10 c
422429
423-
>>> right = pd.DataFrame({'right_val': [1, 2, 3, 6, 7]},
424-
... index=[1, 2, 3, 6, 7])
430+
>>> right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7])
425431
>>> right
426432
right_val
427433
1 1
@@ -438,6 +444,32 @@ def merge_asof(
438444
439445
Here is a real-world times-series example
440446
447+
>>> quotes = pd.DataFrame(
448+
... {
449+
... "time": [
450+
... pd.Timestamp("2016-05-25 13:30:00.023"),
451+
... pd.Timestamp("2016-05-25 13:30:00.023"),
452+
... pd.Timestamp("2016-05-25 13:30:00.030"),
453+
... pd.Timestamp("2016-05-25 13:30:00.041"),
454+
... pd.Timestamp("2016-05-25 13:30:00.048"),
455+
... pd.Timestamp("2016-05-25 13:30:00.049"),
456+
... pd.Timestamp("2016-05-25 13:30:00.072"),
457+
... pd.Timestamp("2016-05-25 13:30:00.075")
458+
... ],
459+
... "ticker": [
460+
... "GOOG",
461+
... "MSFT",
462+
... "MSFT",
463+
... "MSFT",
464+
... "GOOG",
465+
... "AAPL",
466+
... "GOOG",
467+
... "MSFT"
468+
... ],
469+
... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
470+
... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03]
471+
... }
472+
... )
441473
>>> quotes
442474
time ticker bid ask
443475
0 2016-05-25 13:30:00.023 GOOG 720.50 720.93
@@ -449,6 +481,20 @@ def merge_asof(
449481
6 2016-05-25 13:30:00.072 GOOG 720.50 720.88
450482
7 2016-05-25 13:30:00.075 MSFT 52.01 52.03
451483
484+
>>> trades = pd.DataFrame(
485+
... {
486+
... "time": [
487+
... pd.Timestamp("2016-05-25 13:30:00.023"),
488+
... pd.Timestamp("2016-05-25 13:30:00.038"),
489+
... pd.Timestamp("2016-05-25 13:30:00.048"),
490+
... pd.Timestamp("2016-05-25 13:30:00.048"),
491+
... pd.Timestamp("2016-05-25 13:30:00.048")
492+
... ],
493+
... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
494+
... "price": [51.95, 51.95, 720.77, 720.92, 98.0],
495+
... "quantity": [75, 155, 100, 100, 100]
496+
... }
497+
... )
452498
>>> trades
453499
time ticker price quantity
454500
0 2016-05-25 13:30:00.023 MSFT 51.95 75
@@ -459,9 +505,7 @@ def merge_asof(
459505
460506
By default we are taking the asof of the quotes
461507
462-
>>> pd.merge_asof(trades, quotes,
463-
... on='time',
464-
... by='ticker')
508+
>>> pd.merge_asof(trades, quotes, on="time", by="ticker")
465509
time ticker price quantity bid ask
466510
0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96
467511
1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98
@@ -471,10 +515,9 @@ def merge_asof(
471515
472516
We only asof within 2ms between the quote time and the trade time
473517
474-
>>> pd.merge_asof(trades, quotes,
475-
... on='time',
476-
... by='ticker',
477-
... tolerance=pd.Timedelta('2ms'))
518+
>>> pd.merge_asof(
519+
... trades, quotes, on="time", by="ticker", tolerance=pd.Timedelta("2ms")
520+
... )
478521
time ticker price quantity bid ask
479522
0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96
480523
1 2016-05-25 13:30:00.038 MSFT 51.95 155 NaN NaN
@@ -486,11 +529,14 @@ def merge_asof(
486529
and we exclude exact matches on time. However *prior* data will
487530
propagate forward
488531
489-
>>> pd.merge_asof(trades, quotes,
490-
... on='time',
491-
... by='ticker',
492-
... tolerance=pd.Timedelta('10ms'),
493-
... allow_exact_matches=False)
532+
>>> pd.merge_asof(
533+
... trades,
534+
... quotes,
535+
... on="time",
536+
... by="ticker",
537+
... tolerance=pd.Timedelta("10ms"),
538+
... allow_exact_matches=False
539+
... )
494540
time ticker price quantity bid ask
495541
0 2016-05-25 13:30:00.023 MSFT 51.95 75 NaN NaN
496542
1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98

pandas/core/reshape/tile.py

+14-12
Original file line numberDiff line numberDiff line change
@@ -171,24 +171,26 @@ def cut(
171171
... index=['a', 'b', 'c', 'd', 'e'])
172172
>>> pd.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False)
173173
... # doctest: +ELLIPSIS
174-
(a 0.0
175-
b 1.0
176-
c 2.0
177-
d 3.0
178-
e 4.0
179-
dtype: float64, array([0, 2, 4, 6, 8]))
174+
(a 1.0
175+
b 2.0
176+
c 3.0
177+
d 4.0
178+
e NaN
179+
dtype: float64,
180+
array([ 0, 2, 4, 6, 8, 10]))
180181
181182
Use `drop` optional when bins is not unique
182183
183184
>>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True,
184185
... right=False, duplicates='drop')
185186
... # doctest: +ELLIPSIS
186-
(a 0.0
187-
b 1.0
188-
c 2.0
187+
(a 1.0
188+
b 2.0
189+
c 3.0
189190
d 3.0
190-
e 3.0
191-
dtype: float64, array([0, 2, 4, 6, 8]))
191+
e NaN
192+
dtype: float64,
193+
array([ 0, 2, 4, 6, 10]))
192194
193195
Passing an IntervalIndex for `bins` results in those categories exactly.
194196
Notice that values not covered by the IntervalIndex are set to NaN. 0
@@ -197,7 +199,7 @@ def cut(
197199
198200
>>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)])
199201
>>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins)
200-
[NaN, (0, 1], NaN, (2, 3], (4, 5]]
202+
[NaN, (0.0, 1.0], NaN, (2.0, 3.0], (4.0, 5.0]]
201203
Categories (3, interval[int64]): [(0, 1] < (2, 3] < (4, 5]]
202204
"""
203205
# NOTE: this binning code is changed a bit from histogram for var(x) == 0

pandas/core/reshape/util.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@ def cartesian_product(X):
1919
Examples
2020
--------
2121
>>> cartesian_product([list('ABC'), [1, 2]])
22-
[array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'),
23-
array([1, 2, 1, 2, 1, 2])]
22+
[array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='<U1'), array([1, 2, 1, 2, 1, 2])]
2423
2524
See Also
2625
--------

0 commit comments

Comments
 (0)