@@ -46,6 +46,7 @@ class providing the base-class of operations.
46
46
ArrayLike ,
47
47
IndexLabel ,
48
48
NDFrameT ,
49
+ PositionalIndexer ,
49
50
RandomState ,
50
51
Scalar ,
51
52
T ,
@@ -65,6 +66,7 @@ class providing the base-class of operations.
65
66
is_bool_dtype ,
66
67
is_datetime64_dtype ,
67
68
is_float_dtype ,
69
+ is_integer ,
68
70
is_integer_dtype ,
69
71
is_numeric_dtype ,
70
72
is_object_dtype ,
@@ -97,6 +99,7 @@ class providing the base-class of operations.
97
99
numba_ ,
98
100
ops ,
99
101
)
102
+ from pandas .core .groupby .indexing import GroupByIndexingMixin
100
103
from pandas .core .indexes .api import (
101
104
CategoricalIndex ,
102
105
Index ,
@@ -555,7 +558,7 @@ def f(self):
555
558
]
556
559
557
560
558
- class BaseGroupBy (PandasObject , SelectionMixin [NDFrameT ]):
561
+ class BaseGroupBy (PandasObject , SelectionMixin [NDFrameT ], GroupByIndexingMixin ):
559
562
_group_selection : IndexLabel | None = None
560
563
_apply_allowlist : frozenset [str ] = frozenset ()
561
564
_hidden_attrs = PandasObject ._hidden_attrs | {
@@ -2445,23 +2448,28 @@ def backfill(self, limit=None):
2445
2448
@Substitution (name = "groupby" )
2446
2449
@Substitution (see_also = _common_see_also )
2447
2450
def nth (
2448
- self , n : int | list [int ], dropna : Literal ["any" , "all" , None ] = None
2451
+ self ,
2452
+ n : PositionalIndexer | tuple ,
2453
+ dropna : Literal ["any" , "all" , None ] = None ,
2449
2454
) -> NDFrameT :
2450
2455
"""
2451
- Take the nth row from each group if n is an int, or a subset of rows
2452
- if n is a list of ints.
2456
+ Take the nth row from each group if n is an int, otherwise a subset of rows.
2453
2457
2454
2458
If dropna, will take the nth non-null row, dropna is either
2455
2459
'all' or 'any'; this is equivalent to calling dropna(how=dropna)
2456
2460
before the groupby.
2457
2461
2458
2462
Parameters
2459
2463
----------
2460
- n : int or list of ints
2461
- A single nth value for the row or a list of nth values.
2464
+ n : int, slice or list of ints and slices
2465
+ A single nth value for the row or a list of nth values or slices.
2466
+
2467
+ .. versionchanged:: 1.4.0
2468
+ Added slice and lists containiing slices.
2469
+
2462
2470
dropna : {'any', 'all', None}, default None
2463
2471
Apply the specified dropna operation before counting which row is
2464
- the nth row.
2472
+ the nth row. Only supported if n is an int.
2465
2473
2466
2474
Returns
2467
2475
-------
@@ -2496,6 +2504,12 @@ def nth(
2496
2504
1 2.0
2497
2505
2 3.0
2498
2506
2 5.0
2507
+ >>> g.nth(slice(None, -1))
2508
+ B
2509
+ A
2510
+ 1 NaN
2511
+ 1 2.0
2512
+ 2 3.0
2499
2513
2500
2514
Specifying `dropna` allows count ignoring ``NaN``
2501
2515
@@ -2520,33 +2534,16 @@ def nth(
2520
2534
1 1 2.0
2521
2535
4 2 5.0
2522
2536
"""
2523
- valid_containers = (set , list , tuple )
2524
- if not isinstance (n , (valid_containers , int )):
2525
- raise TypeError ("n needs to be an int or a list/set/tuple of ints" )
2526
-
2527
2537
if not dropna :
2528
-
2529
- if isinstance (n , int ):
2530
- nth_values = [n ]
2531
- elif isinstance (n , valid_containers ):
2532
- nth_values = list (set (n ))
2533
-
2534
- nth_array = np .array (nth_values , dtype = np .intp )
2535
2538
with self ._group_selection_context ():
2536
-
2537
- mask_left = np .in1d (self ._cumcount_array (), nth_array )
2538
- mask_right = np .in1d (
2539
- self ._cumcount_array (ascending = False ) + 1 , - nth_array
2540
- )
2541
- mask = mask_left | mask_right
2539
+ mask = self ._make_mask_from_positional_indexer (n )
2542
2540
2543
2541
ids , _ , _ = self .grouper .group_info
2544
2542
2545
2543
# Drop NA values in grouping
2546
2544
mask = mask & (ids != - 1 )
2547
2545
2548
2546
out = self ._mask_selected_obj (mask )
2549
-
2550
2547
if not self .as_index :
2551
2548
return out
2552
2549
@@ -2563,19 +2560,20 @@ def nth(
2563
2560
return out .sort_index (axis = self .axis ) if self .sort else out
2564
2561
2565
2562
# dropna is truthy
2566
- if isinstance ( n , valid_containers ):
2567
- raise ValueError ("dropna option with a list of nth values is not supported " )
2563
+ if not is_integer ( n ):
2564
+ raise ValueError ("dropna option only supported for an integer argument " )
2568
2565
2569
2566
if dropna not in ["any" , "all" ]:
2570
2567
# Note: when agg-ing picker doesn't raise this, just returns NaN
2571
2568
raise ValueError (
2572
- "For a DataFrame groupby, dropna must be "
2569
+ "For a DataFrame or Series groupby.nth , dropna must be "
2573
2570
"either None, 'any' or 'all', "
2574
2571
f"(was passed { dropna } )."
2575
2572
)
2576
2573
2577
2574
# old behaviour, but with all and any support for DataFrames.
2578
2575
# modified in GH 7559 to have better perf
2576
+ n = cast (int , n )
2579
2577
max_len = n if n >= 0 else - 1 - n
2580
2578
dropped = self .obj .dropna (how = dropna , axis = self .axis )
2581
2579
@@ -3301,11 +3299,16 @@ def head(self, n=5):
3301
3299
from the original DataFrame with original index and order preserved
3302
3300
(``as_index`` flag is ignored).
3303
3301
3304
- Does not work for negative values of `n`.
3302
+ Parameters
3303
+ ----------
3304
+ n : int
3305
+ If positive: number of entries to include from start of each group.
3306
+ If negative: number of entries to exclude from end of each group.
3305
3307
3306
3308
Returns
3307
3309
-------
3308
3310
Series or DataFrame
3311
+ Subset of original Series or DataFrame as determined by n.
3309
3312
%(see_also)s
3310
3313
Examples
3311
3314
--------
@@ -3317,12 +3320,11 @@ def head(self, n=5):
3317
3320
0 1 2
3318
3321
2 5 6
3319
3322
>>> df.groupby('A').head(-1)
3320
- Empty DataFrame
3321
- Columns: [A, B]
3322
- Index: []
3323
+ A B
3324
+ 0 1 2
3323
3325
"""
3324
3326
self ._reset_group_selection ()
3325
- mask = self ._cumcount_array () < n
3327
+ mask = self ._make_mask_from_positional_indexer ( slice ( None , n ))
3326
3328
return self ._mask_selected_obj (mask )
3327
3329
3328
3330
@final
@@ -3336,11 +3338,16 @@ def tail(self, n=5):
3336
3338
from the original DataFrame with original index and order preserved
3337
3339
(``as_index`` flag is ignored).
3338
3340
3339
- Does not work for negative values of `n`.
3341
+ Parameters
3342
+ ----------
3343
+ n : int
3344
+ If positive: number of entries to include from end of each group.
3345
+ If negative: number of entries to exclude from start of each group.
3340
3346
3341
3347
Returns
3342
3348
-------
3343
3349
Series or DataFrame
3350
+ Subset of original Series or DataFrame as determined by n.
3344
3351
%(see_also)s
3345
3352
Examples
3346
3353
--------
@@ -3352,12 +3359,16 @@ def tail(self, n=5):
3352
3359
1 a 2
3353
3360
3 b 2
3354
3361
>>> df.groupby('A').tail(-1)
3355
- Empty DataFrame
3356
- Columns: [A, B]
3357
- Index: []
3362
+ A B
3363
+ 1 a 2
3364
+ 3 b 2
3358
3365
"""
3359
3366
self ._reset_group_selection ()
3360
- mask = self ._cumcount_array (ascending = False ) < n
3367
+ if n :
3368
+ mask = self ._make_mask_from_positional_indexer (slice (- n , None ))
3369
+ else :
3370
+ mask = self ._make_mask_from_positional_indexer ([])
3371
+
3361
3372
return self ._mask_selected_obj (mask )
3362
3373
3363
3374
@final
0 commit comments