6
6
from numpy import nan
7
7
from datetime import datetime
8
8
from itertools import permutations
9
- from pandas import Series , Categorical , CategoricalIndex , Index
9
+ from pandas import (Series , Categorical , CategoricalIndex , Index ,
10
+ Timestamp , DatetimeIndex )
10
11
import pandas as pd
11
12
12
13
from pandas import compat
@@ -34,7 +35,7 @@ def test_ints(self):
34
35
expected = Series (np .array ([0 , 2 , 1 , 1 , 0 , 2 , np .nan , 0 ]))
35
36
tm .assert_series_equal (result , expected )
36
37
37
- s = pd . Series (np .arange (5 ), dtype = np .float32 )
38
+ s = Series (np .arange (5 ), dtype = np .float32 )
38
39
result = algos .match (s , [2 , 4 ])
39
40
expected = np .array ([- 1 , - 1 , 0 , - 1 , 1 ], dtype = np .int64 )
40
41
self .assert_numpy_array_equal (result , expected )
@@ -204,20 +205,20 @@ def test_mixed(self):
204
205
def test_datelike (self ):
205
206
206
207
# M8
207
- v1 = pd . Timestamp ('20130101 09:00:00.00004' )
208
- v2 = pd . Timestamp ('20130101' )
208
+ v1 = Timestamp ('20130101 09:00:00.00004' )
209
+ v2 = Timestamp ('20130101' )
209
210
x = Series ([v1 , v1 , v1 , v2 , v2 , v1 ])
210
211
labels , uniques = algos .factorize (x )
211
212
212
213
exp = np .array ([0 , 0 , 0 , 1 , 1 , 0 ], dtype = np .intp )
213
214
self .assert_numpy_array_equal (labels , exp )
214
- exp = pd . DatetimeIndex ([v1 , v2 ])
215
+ exp = DatetimeIndex ([v1 , v2 ])
215
216
self .assert_index_equal (uniques , exp )
216
217
217
218
labels , uniques = algos .factorize (x , sort = True )
218
219
exp = np .array ([1 , 1 , 1 , 0 , 0 , 1 ], dtype = np .intp )
219
220
self .assert_numpy_array_equal (labels , exp )
220
- exp = pd . DatetimeIndex ([v2 , v1 ])
221
+ exp = DatetimeIndex ([v2 , v1 ])
221
222
self .assert_index_equal (uniques , exp )
222
223
223
224
# period
@@ -350,7 +351,7 @@ def test_datetime64_dtype_array_returned(self):
350
351
tm .assert_numpy_array_equal (result , expected )
351
352
self .assertEqual (result .dtype , expected .dtype )
352
353
353
- s = pd . Series (dt_index )
354
+ s = Series (dt_index )
354
355
result = algos .unique (s )
355
356
tm .assert_numpy_array_equal (result , expected )
356
357
self .assertEqual (result .dtype , expected .dtype )
@@ -369,7 +370,7 @@ def test_timedelta64_dtype_array_returned(self):
369
370
tm .assert_numpy_array_equal (result , expected )
370
371
self .assertEqual (result .dtype , expected .dtype )
371
372
372
- s = pd . Series (td_index )
373
+ s = Series (td_index )
373
374
result = algos .unique (s )
374
375
tm .assert_numpy_array_equal (result , expected )
375
376
self .assertEqual (result .dtype , expected .dtype )
@@ -380,11 +381,12 @@ def test_timedelta64_dtype_array_returned(self):
380
381
self .assertEqual (result .dtype , expected .dtype )
381
382
382
383
def test_uint64_overflow (self ):
383
- s = pd . Series ([1 , 2 , 2 ** 63 , 2 ** 63 ], dtype = np .uint64 )
384
+ s = Series ([1 , 2 , 2 ** 63 , 2 ** 63 ], dtype = np .uint64 )
384
385
exp = np .array ([1 , 2 , 2 ** 63 ], dtype = np .uint64 )
385
386
tm .assert_numpy_array_equal (algos .unique (s ), exp )
386
387
387
388
def test_categorical (self ):
389
+ # GH 15939
388
390
c = pd .Categorical (list ('aabc' ))
389
391
result = c .unique ()
390
392
expected = pd .Categorical (list ('abc' ))
@@ -397,28 +399,56 @@ def test_categorical(self):
397
399
expected = Series (expected , name = 'foo' )
398
400
tm .assert_series_equal (result , expected )
399
401
402
+ def test_datetime64tz_aware (self ):
403
+ # GH 15939
404
+ result = Series (
405
+ pd .Index ([Timestamp ('20160101' , tz = 'US/Eastern' ),
406
+ Timestamp ('20160101' , tz = 'US/Eastern' )])).unique ()
407
+ expected = np .array ([Timestamp ('2016-01-01 00:00:00-0500' ,
408
+ tz = 'US/Eastern' )], dtype = object )
409
+ tm .assert_numpy_array_equal (result , expected )
410
+
411
+ result = pd .Index ([Timestamp ('20160101' , tz = 'US/Eastern' ),
412
+ Timestamp ('20160101' , tz = 'US/Eastern' )]).unique ()
413
+ expected = DatetimeIndex (['2016-01-01 00:00:00' ],
414
+ dtype = 'datetime64[ns, US/Eastern]' , freq = None )
415
+ tm .assert_index_equal (result , expected )
416
+
417
+ result = pd .unique (
418
+ Series (pd .Index ([Timestamp ('20160101' , tz = 'US/Eastern' ),
419
+ Timestamp ('20160101' , tz = 'US/Eastern' )])))
420
+ expected = DatetimeIndex (['2016-01-01 00:00:00' ],
421
+ dtype = 'datetime64[ns, US/Eastern]' , freq = None )
422
+ tm .assert_index_equal (result , expected )
423
+
424
+ result = pd .unique (pd .Index ([Timestamp ('20160101' , tz = 'US/Eastern' ),
425
+ Timestamp ('20160101' , tz = 'US/Eastern' )]))
426
+ expected = DatetimeIndex (['2016-01-01 00:00:00' ],
427
+ dtype = 'datetime64[ns, US/Eastern]' , freq = None )
428
+ tm .assert_index_equal (result , expected )
429
+
400
430
def test_order_of_appearance (self ):
401
431
# 9346
402
432
# light testing of guarantee of order of appearance
403
433
# these also are the doc-examples
404
- result = pd .unique (pd . Series ([2 , 1 , 3 , 3 ]))
434
+ result = pd .unique (Series ([2 , 1 , 3 , 3 ]))
405
435
tm .assert_numpy_array_equal (result , np .array ([2 , 1 , 3 ]))
406
436
407
- result = pd .unique (pd . Series ([2 ] + [1 ] * 5 ))
437
+ result = pd .unique (Series ([2 ] + [1 ] * 5 ))
408
438
tm .assert_numpy_array_equal (result , np .array ([2 , 1 ]))
409
439
410
- result = pd .unique (Series ([pd . Timestamp ('20160101' ),
411
- pd . Timestamp ('20160101' )]))
440
+ result = pd .unique (Series ([Timestamp ('20160101' ),
441
+ Timestamp ('20160101' )]))
412
442
expected = np .array (['2016-01-01T00:00:00.000000000' ],
413
443
dtype = 'datetime64[ns]' )
414
444
tm .assert_numpy_array_equal (result , expected )
415
445
416
446
result = pd .unique (pd .Index (
417
- [pd . Timestamp ('20160101' , tz = 'US/Eastern' ),
418
- pd . Timestamp ('20160101' , tz = 'US/Eastern' )]))
419
- expected = pd . DatetimeIndex (['2016-01-01 00:00:00' ],
420
- dtype = 'datetime64[ns, US/Eastern]' ,
421
- freq = None )
447
+ [Timestamp ('20160101' , tz = 'US/Eastern' ),
448
+ Timestamp ('20160101' , tz = 'US/Eastern' )]))
449
+ expected = DatetimeIndex (['2016-01-01 00:00:00' ],
450
+ dtype = 'datetime64[ns, US/Eastern]' ,
451
+ freq = None )
422
452
tm .assert_index_equal (result , expected )
423
453
424
454
result = pd .unique (list ('aabc' ))
@@ -448,27 +478,27 @@ def test_basic(self):
448
478
expected = np .array ([True , False ])
449
479
tm .assert_numpy_array_equal (result , expected )
450
480
451
- result = algos .isin (pd . Series ([1 , 2 ]), [1 ])
481
+ result = algos .isin (Series ([1 , 2 ]), [1 ])
452
482
expected = np .array ([True , False ])
453
483
tm .assert_numpy_array_equal (result , expected )
454
484
455
- result = algos .isin (pd . Series ([1 , 2 ]), pd . Series ([1 ]))
485
+ result = algos .isin (Series ([1 , 2 ]), Series ([1 ]))
456
486
expected = np .array ([True , False ])
457
487
tm .assert_numpy_array_equal (result , expected )
458
488
459
- result = algos .isin (pd . Series ([1 , 2 ]), set ([1 ]))
489
+ result = algos .isin (Series ([1 , 2 ]), set ([1 ]))
460
490
expected = np .array ([True , False ])
461
491
tm .assert_numpy_array_equal (result , expected )
462
492
463
493
result = algos .isin (['a' , 'b' ], ['a' ])
464
494
expected = np .array ([True , False ])
465
495
tm .assert_numpy_array_equal (result , expected )
466
496
467
- result = algos .isin (pd . Series (['a' , 'b' ]), pd . Series (['a' ]))
497
+ result = algos .isin (Series (['a' , 'b' ]), Series (['a' ]))
468
498
expected = np .array ([True , False ])
469
499
tm .assert_numpy_array_equal (result , expected )
470
500
471
- result = algos .isin (pd . Series (['a' , 'b' ]), set (['a' ]))
501
+ result = algos .isin (Series (['a' , 'b' ]), set (['a' ]))
472
502
expected = np .array ([True , False ])
473
503
tm .assert_numpy_array_equal (result , expected )
474
504
@@ -565,33 +595,33 @@ def test_value_counts_nat(self):
565
595
self .assertEqual (len (vc ), 1 )
566
596
self .assertEqual (len (vc_with_na ), 2 )
567
597
568
- exp_dt = pd . Series ({pd . Timestamp ('2014-01-01 00:00:00' ): 1 })
598
+ exp_dt = Series ({Timestamp ('2014-01-01 00:00:00' ): 1 })
569
599
tm .assert_series_equal (algos .value_counts (dt ), exp_dt )
570
600
# TODO same for (timedelta)
571
601
572
602
def test_value_counts_datetime_outofbounds (self ):
573
603
# GH 13663
574
- s = pd . Series ([datetime (3000 , 1 , 1 ), datetime (5000 , 1 , 1 ),
575
- datetime (5000 , 1 , 1 ), datetime (6000 , 1 , 1 ),
576
- datetime (3000 , 1 , 1 ), datetime (3000 , 1 , 1 )])
604
+ s = Series ([datetime (3000 , 1 , 1 ), datetime (5000 , 1 , 1 ),
605
+ datetime (5000 , 1 , 1 ), datetime (6000 , 1 , 1 ),
606
+ datetime (3000 , 1 , 1 ), datetime (3000 , 1 , 1 )])
577
607
res = s .value_counts ()
578
608
579
609
exp_index = pd .Index ([datetime (3000 , 1 , 1 ), datetime (5000 , 1 , 1 ),
580
610
datetime (6000 , 1 , 1 )], dtype = object )
581
- exp = pd . Series ([3 , 2 , 1 ], index = exp_index )
611
+ exp = Series ([3 , 2 , 1 ], index = exp_index )
582
612
tm .assert_series_equal (res , exp )
583
613
584
614
# GH 12424
585
- res = pd .to_datetime (pd . Series (['2362-01-01' , np .nan ]),
615
+ res = pd .to_datetime (Series (['2362-01-01' , np .nan ]),
586
616
errors = 'ignore' )
587
- exp = pd . Series (['2362-01-01' , np .nan ], dtype = object )
617
+ exp = Series (['2362-01-01' , np .nan ], dtype = object )
588
618
tm .assert_series_equal (res , exp )
589
619
590
620
def test_categorical (self ):
591
621
s = Series (pd .Categorical (list ('aaabbc' )))
592
622
result = s .value_counts ()
593
- expected = pd . Series ([3 , 2 , 1 ],
594
- index = pd .CategoricalIndex (['a' , 'b' , 'c' ]))
623
+ expected = Series ([3 , 2 , 1 ],
624
+ index = pd .CategoricalIndex (['a' , 'b' , 'c' ]))
595
625
tm .assert_series_equal (result , expected , check_index_type = True )
596
626
597
627
# preserve order?
@@ -604,11 +634,11 @@ def test_categorical_nans(self):
604
634
s = Series (pd .Categorical (list ('aaaaabbbcc' ))) # 4,3,2,1 (nan)
605
635
s .iloc [1 ] = np .nan
606
636
result = s .value_counts ()
607
- expected = pd . Series ([4 , 3 , 2 ], index = pd .CategoricalIndex (
637
+ expected = Series ([4 , 3 , 2 ], index = pd .CategoricalIndex (
608
638
['a' , 'b' , 'c' ], categories = ['a' , 'b' , 'c' ]))
609
639
tm .assert_series_equal (result , expected , check_index_type = True )
610
640
result = s .value_counts (dropna = False )
611
- expected = pd . Series ([
641
+ expected = Series ([
612
642
4 , 3 , 2 , 1
613
643
], index = pd .CategoricalIndex (['a' , 'b' , 'c' , np .nan ]))
614
644
tm .assert_series_equal (result , expected , check_index_type = True )
@@ -618,12 +648,12 @@ def test_categorical_nans(self):
618
648
list ('aaaaabbbcc' ), ordered = True , categories = ['b' , 'a' , 'c' ]))
619
649
s .iloc [1 ] = np .nan
620
650
result = s .value_counts ()
621
- expected = pd . Series ([4 , 3 , 2 ], index = pd .CategoricalIndex (
651
+ expected = Series ([4 , 3 , 2 ], index = pd .CategoricalIndex (
622
652
['a' , 'b' , 'c' ], categories = ['b' , 'a' , 'c' ], ordered = True ))
623
653
tm .assert_series_equal (result , expected , check_index_type = True )
624
654
625
655
result = s .value_counts (dropna = False )
626
- expected = pd . Series ([4 , 3 , 2 , 1 ], index = pd .CategoricalIndex (
656
+ expected = Series ([4 , 3 , 2 , 1 ], index = pd .CategoricalIndex (
627
657
['a' , 'b' , 'c' , np .nan ], categories = ['b' , 'a' , 'c' ], ordered = True ))
628
658
tm .assert_series_equal (result , expected , check_index_type = True )
629
659
@@ -640,33 +670,33 @@ def test_dropna(self):
640
670
# https://github.com/pandas-dev/pandas/issues/9443#issuecomment-73719328
641
671
642
672
tm .assert_series_equal (
643
- pd . Series ([True , True , False ]).value_counts (dropna = True ),
644
- pd . Series ([2 , 1 ], index = [True , False ]))
673
+ Series ([True , True , False ]).value_counts (dropna = True ),
674
+ Series ([2 , 1 ], index = [True , False ]))
645
675
tm .assert_series_equal (
646
- pd . Series ([True , True , False ]).value_counts (dropna = False ),
647
- pd . Series ([2 , 1 ], index = [True , False ]))
676
+ Series ([True , True , False ]).value_counts (dropna = False ),
677
+ Series ([2 , 1 ], index = [True , False ]))
648
678
649
679
tm .assert_series_equal (
650
- pd . Series ([True , True , False , None ]).value_counts (dropna = True ),
651
- pd . Series ([2 , 1 ], index = [True , False ]))
680
+ Series ([True , True , False , None ]).value_counts (dropna = True ),
681
+ Series ([2 , 1 ], index = [True , False ]))
652
682
tm .assert_series_equal (
653
- pd . Series ([True , True , False , None ]).value_counts (dropna = False ),
654
- pd . Series ([2 , 1 , 1 ], index = [True , False , np .nan ]))
683
+ Series ([True , True , False , None ]).value_counts (dropna = False ),
684
+ Series ([2 , 1 , 1 ], index = [True , False , np .nan ]))
655
685
tm .assert_series_equal (
656
- pd . Series ([10.3 , 5. , 5. ]).value_counts (dropna = True ),
657
- pd . Series ([2 , 1 ], index = [5. , 10.3 ]))
686
+ Series ([10.3 , 5. , 5. ]).value_counts (dropna = True ),
687
+ Series ([2 , 1 ], index = [5. , 10.3 ]))
658
688
tm .assert_series_equal (
659
- pd . Series ([10.3 , 5. , 5. ]).value_counts (dropna = False ),
660
- pd . Series ([2 , 1 ], index = [5. , 10.3 ]))
689
+ Series ([10.3 , 5. , 5. ]).value_counts (dropna = False ),
690
+ Series ([2 , 1 ], index = [5. , 10.3 ]))
661
691
662
692
tm .assert_series_equal (
663
- pd . Series ([10.3 , 5. , 5. , None ]).value_counts (dropna = True ),
664
- pd . Series ([2 , 1 ], index = [5. , 10.3 ]))
693
+ Series ([10.3 , 5. , 5. , None ]).value_counts (dropna = True ),
694
+ Series ([2 , 1 ], index = [5. , 10.3 ]))
665
695
666
696
# 32-bit linux has a different ordering
667
697
if not compat .is_platform_32bit ():
668
- result = pd . Series ([10.3 , 5. , 5. , None ]).value_counts (dropna = False )
669
- expected = pd . Series ([2 , 1 , 1 ], index = [5. , 10.3 , np .nan ])
698
+ result = Series ([10.3 , 5. , 5. , None ]).value_counts (dropna = False )
699
+ expected = Series ([2 , 1 , 1 ], index = [5. , 10.3 , np .nan ])
670
700
tm .assert_series_equal (result , expected )
671
701
672
702
def test_value_counts_normalized (self ):
@@ -781,15 +811,15 @@ def test_numeric_object_likes(self):
781
811
tm .assert_numpy_array_equal (res_false , exp_false )
782
812
783
813
# series
784
- for s in [pd . Series (case ), pd . Series (case , dtype = 'category' )]:
814
+ for s in [Series (case ), Series (case , dtype = 'category' )]:
785
815
res_first = s .duplicated (keep = 'first' )
786
- tm .assert_series_equal (res_first , pd . Series (exp_first ))
816
+ tm .assert_series_equal (res_first , Series (exp_first ))
787
817
788
818
res_last = s .duplicated (keep = 'last' )
789
- tm .assert_series_equal (res_last , pd . Series (exp_last ))
819
+ tm .assert_series_equal (res_last , Series (exp_last ))
790
820
791
821
res_false = s .duplicated (keep = False )
792
- tm .assert_series_equal (res_false , pd . Series (exp_false ))
822
+ tm .assert_series_equal (res_false , Series (exp_false ))
793
823
794
824
def test_datetime_likes (self ):
795
825
@@ -798,8 +828,8 @@ def test_datetime_likes(self):
798
828
td = ['1 days' , '2 days' , '1 days' , 'NaT' , '3 days' ,
799
829
'2 days' , '4 days' , '1 days' , 'NaT' , '6 days' ]
800
830
801
- cases = [np .array ([pd . Timestamp (d ) for d in dt ]),
802
- np .array ([pd . Timestamp (d , tz = 'US/Eastern' ) for d in dt ]),
831
+ cases = [np .array ([Timestamp (d ) for d in dt ]),
832
+ np .array ([Timestamp (d , tz = 'US/Eastern' ) for d in dt ]),
803
833
np .array ([pd .Period (d , freq = 'D' ) for d in dt ]),
804
834
np .array ([np .datetime64 (d ) for d in dt ]),
805
835
np .array ([pd .Timedelta (d ) for d in td ])]
@@ -833,16 +863,16 @@ def test_datetime_likes(self):
833
863
tm .assert_numpy_array_equal (res_false , exp_false )
834
864
835
865
# series
836
- for s in [pd . Series (case ), pd . Series (case , dtype = 'category' ),
837
- pd . Series (case , dtype = object )]:
866
+ for s in [Series (case ), Series (case , dtype = 'category' ),
867
+ Series (case , dtype = object )]:
838
868
res_first = s .duplicated (keep = 'first' )
839
- tm .assert_series_equal (res_first , pd . Series (exp_first ))
869
+ tm .assert_series_equal (res_first , Series (exp_first ))
840
870
841
871
res_last = s .duplicated (keep = 'last' )
842
- tm .assert_series_equal (res_last , pd . Series (exp_last ))
872
+ tm .assert_series_equal (res_last , Series (exp_last ))
843
873
844
874
res_false = s .duplicated (keep = False )
845
- tm .assert_series_equal (res_false , pd . Series (exp_false ))
875
+ tm .assert_series_equal (res_false , Series (exp_false ))
846
876
847
877
def test_unique_index (self ):
848
878
cases = [pd .Index ([1 , 2 , 3 ]), pd .RangeIndex (0 , 3 )]
@@ -984,7 +1014,7 @@ def test_lookup_overflow(self):
984
1014
np .arange (len (xs ), dtype = np .int64 ))
985
1015
986
1016
def test_get_unique (self ):
987
- s = pd . Series ([1 , 2 , 2 ** 63 , 2 ** 63 ], dtype = np .uint64 )
1017
+ s = Series ([1 , 2 , 2 ** 63 , 2 ** 63 ], dtype = np .uint64 )
988
1018
exp = np .array ([1 , 2 , 2 ** 63 ], dtype = np .uint64 )
989
1019
self .assert_numpy_array_equal (s .unique (), exp )
990
1020
0 commit comments