forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathasserters.py
1380 lines (1189 loc) · 43.9 KB
/
asserters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from __future__ import annotations
import operator
from typing import (
TYPE_CHECKING,
Literal,
cast,
)
import numpy as np
from pandas._libs.missing import is_matching_na
from pandas._libs.sparse import SparseIndex
import pandas._libs.testing as _testing
from pandas._libs.tslibs.np_datetime import compare_mismatched_resolutions
from pandas.core.dtypes.common import (
is_bool,
is_integer_dtype,
is_number,
is_numeric_dtype,
needs_i8_conversion,
)
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
DatetimeTZDtype,
ExtensionDtype,
NumpyEADtype,
)
from pandas.core.dtypes.missing import array_equivalent
import pandas as pd
from pandas import (
Categorical,
DataFrame,
DatetimeIndex,
Index,
IntervalDtype,
IntervalIndex,
MultiIndex,
PeriodIndex,
RangeIndex,
Series,
TimedeltaIndex,
)
from pandas.core.arrays import (
DatetimeArray,
ExtensionArray,
IntervalArray,
PeriodArray,
TimedeltaArray,
)
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
from pandas.core.arrays.string_ import StringDtype
from pandas.core.indexes.api import safe_sort_index
from pandas.io.formats.printing import pprint_thing
if TYPE_CHECKING:
from pandas._typing import DtypeObj
def assert_almost_equal(
left,
right,
check_dtype: bool | Literal["equiv"] = "equiv",
rtol: float = 1.0e-5,
atol: float = 1.0e-8,
**kwargs,
) -> None:
"""
Check that the left and right objects are approximately equal.
By approximately equal, we refer to objects that are numbers or that
contain numbers which may be equivalent to specific levels of precision.
Parameters
----------
left : object
right : object
check_dtype : bool or {'equiv'}, default 'equiv'
Check dtype if both a and b are the same type. If 'equiv' is passed in,
then `RangeIndex` and `Index` with int64 dtype are also considered
equivalent when doing type checking.
rtol : float, default 1e-5
Relative tolerance.
atol : float, default 1e-8
Absolute tolerance.
"""
if isinstance(left, Index):
assert_index_equal(
left,
right,
check_exact=False,
exact=check_dtype,
rtol=rtol,
atol=atol,
**kwargs,
)
elif isinstance(left, Series):
assert_series_equal(
left,
right,
check_exact=False,
check_dtype=check_dtype,
rtol=rtol,
atol=atol,
**kwargs,
)
elif isinstance(left, DataFrame):
assert_frame_equal(
left,
right,
check_exact=False,
check_dtype=check_dtype,
rtol=rtol,
atol=atol,
**kwargs,
)
else:
# Other sequences.
if check_dtype:
if is_number(left) and is_number(right):
# Do not compare numeric classes, like np.float64 and float.
pass
elif is_bool(left) and is_bool(right):
# Do not compare bool classes, like np.bool_ and bool.
pass
else:
if isinstance(left, np.ndarray) or isinstance(right, np.ndarray):
obj = "numpy array"
else:
obj = "Input"
assert_class_equal(left, right, obj=obj)
# if we have "equiv", this becomes True
_testing.assert_almost_equal(
left, right, check_dtype=bool(check_dtype), rtol=rtol, atol=atol, **kwargs
)
def _check_isinstance(left, right, cls):
"""
Helper method for our assert_* methods that ensures that
the two objects being compared have the right type before
proceeding with the comparison.
Parameters
----------
left : The first object being compared.
right : The second object being compared.
cls : The class type to check against.
Raises
------
AssertionError : Either `left` or `right` is not an instance of `cls`.
"""
cls_name = cls.__name__
if not isinstance(left, cls):
raise AssertionError(
f"{cls_name} Expected type {cls}, found {type(left)} instead"
)
if not isinstance(right, cls):
raise AssertionError(
f"{cls_name} Expected type {cls}, found {type(right)} instead"
)
def assert_dict_equal(left, right, compare_keys: bool = True) -> None:
_check_isinstance(left, right, dict)
_testing.assert_dict_equal(left, right, compare_keys=compare_keys)
def assert_index_equal(
left: Index,
right: Index,
exact: bool | str = "equiv",
check_names: bool = True,
check_exact: bool = True,
check_categorical: bool = True,
check_order: bool = True,
rtol: float = 1.0e-5,
atol: float = 1.0e-8,
obj: str = "Index",
) -> None:
"""
Check that left and right Index are equal.
Parameters
----------
left : Index
right : Index
exact : bool or {'equiv'}, default 'equiv'
Whether to check the Index class, dtype and inferred_type
are identical. If 'equiv', then RangeIndex can be substituted for
Index with an int64 dtype as well.
check_names : bool, default True
Whether to check the names attribute.
check_exact : bool, default True
Whether to compare number exactly.
check_categorical : bool, default True
Whether to compare internal Categorical exactly.
check_order : bool, default True
Whether to compare the order of index entries as well as their values.
If True, both indexes must contain the same elements, in the same order.
If False, both indexes must contain the same elements, but in any order.
.. versionadded:: 1.2.0
rtol : float, default 1e-5
Relative tolerance. Only used when check_exact is False.
atol : float, default 1e-8
Absolute tolerance. Only used when check_exact is False.
obj : str, default 'Index'
Specify object name being compared, internally used to show appropriate
assertion message.
Examples
--------
>>> from pandas import testing as tm
>>> a = pd.Index([1, 2, 3])
>>> b = pd.Index([1, 2, 3])
>>> tm.assert_index_equal(a, b)
"""
__tracebackhide__ = True
def _check_types(left, right, obj: str = "Index") -> None:
if not exact:
return
assert_class_equal(left, right, exact=exact, obj=obj)
assert_attr_equal("inferred_type", left, right, obj=obj)
# Skip exact dtype checking when `check_categorical` is False
if isinstance(left.dtype, CategoricalDtype) and isinstance(
right.dtype, CategoricalDtype
):
if check_categorical:
assert_attr_equal("dtype", left, right, obj=obj)
assert_index_equal(left.categories, right.categories, exact=exact)
return
assert_attr_equal("dtype", left, right, obj=obj)
# instance validation
_check_isinstance(left, right, Index)
# class / dtype comparison
_check_types(left, right, obj=obj)
# level comparison
if left.nlevels != right.nlevels:
msg1 = f"{obj} levels are different"
msg2 = f"{left.nlevels}, {left}"
msg3 = f"{right.nlevels}, {right}"
raise_assert_detail(obj, msg1, msg2, msg3)
# length comparison
if len(left) != len(right):
msg1 = f"{obj} length are different"
msg2 = f"{len(left)}, {left}"
msg3 = f"{len(right)}, {right}"
raise_assert_detail(obj, msg1, msg2, msg3)
# If order doesn't matter then sort the index entries
if not check_order:
left = safe_sort_index(left)
right = safe_sort_index(right)
# MultiIndex special comparison for little-friendly error messages
if isinstance(left, MultiIndex):
right = cast(MultiIndex, right)
for level in range(left.nlevels):
lobj = f"MultiIndex level [{level}]"
try:
# try comparison on levels/codes to avoid densifying MultiIndex
assert_index_equal(
left.levels[level],
right.levels[level],
exact=exact,
check_names=check_names,
check_exact=check_exact,
check_categorical=check_categorical,
rtol=rtol,
atol=atol,
obj=lobj,
)
assert_numpy_array_equal(left.codes[level], right.codes[level])
except AssertionError:
llevel = left.get_level_values(level)
rlevel = right.get_level_values(level)
assert_index_equal(
llevel,
rlevel,
exact=exact,
check_names=check_names,
check_exact=check_exact,
check_categorical=check_categorical,
rtol=rtol,
atol=atol,
obj=lobj,
)
# get_level_values may change dtype
_check_types(left.levels[level], right.levels[level], obj=obj)
# skip exact index checking when `check_categorical` is False
elif check_exact and check_categorical:
if not left.equals(right):
mismatch = left._values != right._values
if not isinstance(mismatch, np.ndarray):
mismatch = cast("ExtensionArray", mismatch).fillna(True)
diff = np.sum(mismatch.astype(int)) * 100.0 / len(left)
msg = f"{obj} values are different ({np.round(diff, 5)} %)"
raise_assert_detail(obj, msg, left, right)
else:
# if we have "equiv", this becomes True
exact_bool = bool(exact)
_testing.assert_almost_equal(
left.values,
right.values,
rtol=rtol,
atol=atol,
check_dtype=exact_bool,
obj=obj,
lobj=left,
robj=right,
)
# metadata comparison
if check_names:
assert_attr_equal("names", left, right, obj=obj)
if isinstance(left, PeriodIndex) or isinstance(right, PeriodIndex):
assert_attr_equal("dtype", left, right, obj=obj)
if isinstance(left, IntervalIndex) or isinstance(right, IntervalIndex):
assert_interval_array_equal(left._values, right._values)
if check_categorical:
if isinstance(left.dtype, CategoricalDtype) or isinstance(
right.dtype, CategoricalDtype
):
assert_categorical_equal(left._values, right._values, obj=f"{obj} category")
def assert_class_equal(
left, right, exact: bool | str = True, obj: str = "Input"
) -> None:
"""
Checks classes are equal.
"""
__tracebackhide__ = True
def repr_class(x):
if isinstance(x, Index):
# return Index as it is to include values in the error message
return x
return type(x).__name__
def is_class_equiv(idx: Index) -> bool:
"""Classes that are a RangeIndex (sub-)instance or exactly an `Index` .
This only checks class equivalence. There is a separate check that the
dtype is int64.
"""
return type(idx) is Index or isinstance(idx, RangeIndex)
if type(left) == type(right):
return
if exact == "equiv":
if is_class_equiv(left) and is_class_equiv(right):
return
msg = f"{obj} classes are different"
raise_assert_detail(obj, msg, repr_class(left), repr_class(right))
def assert_attr_equal(attr: str, left, right, obj: str = "Attributes") -> None:
"""
Check attributes are equal. Both objects must have attribute.
Parameters
----------
attr : str
Attribute name being compared.
left : object
right : object
obj : str, default 'Attributes'
Specify object name being compared, internally used to show appropriate
assertion message
"""
__tracebackhide__ = True
left_attr = getattr(left, attr)
right_attr = getattr(right, attr)
if left_attr is right_attr or is_matching_na(left_attr, right_attr):
# e.g. both np.nan, both NaT, both pd.NA, ...
return None
try:
result = left_attr == right_attr
except TypeError:
# datetimetz on rhs may raise TypeError
result = False
if (left_attr is pd.NA) ^ (right_attr is pd.NA):
result = False
elif not isinstance(result, bool):
result = result.all()
if not result:
msg = f'Attribute "{attr}" are different'
raise_assert_detail(obj, msg, left_attr, right_attr)
return None
def assert_is_valid_plot_return_object(objs) -> None:
from matplotlib.artist import Artist
from matplotlib.axes import Axes
if isinstance(objs, (Series, np.ndarray)):
if isinstance(objs, Series):
objs = objs._values
for el in objs.ravel():
msg = (
"one of 'objs' is not a matplotlib Axes instance, "
f"type encountered {repr(type(el).__name__)}"
)
assert isinstance(el, (Axes, dict)), msg
else:
msg = (
"objs is neither an ndarray of Artist instances nor a single "
"ArtistArtist instance, tuple, or dict, 'objs' is a "
f"{repr(type(objs).__name__)}"
)
assert isinstance(objs, (Artist, tuple, dict)), msg
def assert_is_sorted(seq) -> None:
"""Assert that the sequence is sorted."""
if isinstance(seq, (Index, Series)):
seq = seq.values
# sorting does not change precisions
if isinstance(seq, np.ndarray):
assert_numpy_array_equal(seq, np.sort(np.array(seq)))
else:
assert_extension_array_equal(seq, seq[seq.argsort()])
def assert_categorical_equal(
left,
right,
check_dtype: bool = True,
check_category_order: bool = True,
obj: str = "Categorical",
) -> None:
"""
Test that Categoricals are equivalent.
Parameters
----------
left : Categorical
right : Categorical
check_dtype : bool, default True
Check that integer dtype of the codes are the same.
check_category_order : bool, default True
Whether the order of the categories should be compared, which
implies identical integer codes. If False, only the resulting
values are compared. The ordered attribute is
checked regardless.
obj : str, default 'Categorical'
Specify object name being compared, internally used to show appropriate
assertion message.
"""
_check_isinstance(left, right, Categorical)
exact: bool | str
if isinstance(left.categories, RangeIndex) or isinstance(
right.categories, RangeIndex
):
exact = "equiv"
else:
# We still want to require exact matches for Index
exact = True
if check_category_order:
assert_index_equal(
left.categories, right.categories, obj=f"{obj}.categories", exact=exact
)
assert_numpy_array_equal(
left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes"
)
else:
try:
lc = left.categories.sort_values()
rc = right.categories.sort_values()
except TypeError:
# e.g. '<' not supported between instances of 'int' and 'str'
lc, rc = left.categories, right.categories
assert_index_equal(lc, rc, obj=f"{obj}.categories", exact=exact)
assert_index_equal(
left.categories.take(left.codes),
right.categories.take(right.codes),
obj=f"{obj}.values",
exact=exact,
)
assert_attr_equal("ordered", left, right, obj=obj)
def assert_interval_array_equal(
left, right, exact: bool | Literal["equiv"] = "equiv", obj: str = "IntervalArray"
) -> None:
"""
Test that two IntervalArrays are equivalent.
Parameters
----------
left, right : IntervalArray
The IntervalArrays to compare.
exact : bool or {'equiv'}, default 'equiv'
Whether to check the Index class, dtype and inferred_type
are identical. If 'equiv', then RangeIndex can be substituted for
Index with an int64 dtype as well.
obj : str, default 'IntervalArray'
Specify object name being compared, internally used to show appropriate
assertion message
"""
_check_isinstance(left, right, IntervalArray)
kwargs = {}
if left._left.dtype.kind in "mM":
# We have a DatetimeArray or TimedeltaArray
kwargs["check_freq"] = False
assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs)
assert_equal(left._right, right._right, obj=f"{obj}.left", **kwargs)
assert_attr_equal("closed", left, right, obj=obj)
def assert_period_array_equal(left, right, obj: str = "PeriodArray") -> None:
_check_isinstance(left, right, PeriodArray)
assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
assert_attr_equal("dtype", left, right, obj=obj)
def assert_datetime_array_equal(
left, right, obj: str = "DatetimeArray", check_freq: bool = True
) -> None:
__tracebackhide__ = True
_check_isinstance(left, right, DatetimeArray)
assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
if check_freq:
assert_attr_equal("freq", left, right, obj=obj)
assert_attr_equal("tz", left, right, obj=obj)
def assert_timedelta_array_equal(
left, right, obj: str = "TimedeltaArray", check_freq: bool = True
) -> None:
__tracebackhide__ = True
_check_isinstance(left, right, TimedeltaArray)
assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
if check_freq:
assert_attr_equal("freq", left, right, obj=obj)
def raise_assert_detail(
obj, message, left, right, diff=None, first_diff=None, index_values=None
):
__tracebackhide__ = True
msg = f"""{obj} are different
{message}"""
if isinstance(index_values, Index):
index_values = np.asarray(index_values)
if isinstance(index_values, np.ndarray):
msg += f"\n[index]: {pprint_thing(index_values)}"
if isinstance(left, np.ndarray):
left = pprint_thing(left)
elif isinstance(left, (CategoricalDtype, NumpyEADtype, StringDtype)):
left = repr(left)
if isinstance(right, np.ndarray):
right = pprint_thing(right)
elif isinstance(right, (CategoricalDtype, NumpyEADtype, StringDtype)):
right = repr(right)
msg += f"""
[left]: {left}
[right]: {right}"""
if diff is not None:
msg += f"\n[diff]: {diff}"
if first_diff is not None:
msg += f"\n{first_diff}"
raise AssertionError(msg)
def assert_numpy_array_equal(
left,
right,
strict_nan: bool = False,
check_dtype: bool | Literal["equiv"] = True,
err_msg=None,
check_same=None,
obj: str = "numpy array",
index_values=None,
) -> None:
"""
Check that 'np.ndarray' is equivalent.
Parameters
----------
left, right : numpy.ndarray or iterable
The two arrays to be compared.
strict_nan : bool, default False
If True, consider NaN and None to be different.
check_dtype : bool, default True
Check dtype if both a and b are np.ndarray.
err_msg : str, default None
If provided, used as assertion message.
check_same : None|'copy'|'same', default None
Ensure left and right refer/do not refer to the same memory area.
obj : str, default 'numpy array'
Specify object name being compared, internally used to show appropriate
assertion message.
index_values : Index | numpy.ndarray, default None
optional index (shared by both left and right), used in output.
"""
__tracebackhide__ = True
# instance validation
# Show a detailed error message when classes are different
assert_class_equal(left, right, obj=obj)
# both classes must be an np.ndarray
_check_isinstance(left, right, np.ndarray)
def _get_base(obj):
return obj.base if getattr(obj, "base", None) is not None else obj
left_base = _get_base(left)
right_base = _get_base(right)
if check_same == "same":
if left_base is not right_base:
raise AssertionError(f"{repr(left_base)} is not {repr(right_base)}")
elif check_same == "copy":
if left_base is right_base:
raise AssertionError(f"{repr(left_base)} is {repr(right_base)}")
def _raise(left, right, err_msg):
if err_msg is None:
if left.shape != right.shape:
raise_assert_detail(
obj, f"{obj} shapes are different", left.shape, right.shape
)
diff = 0
for left_arr, right_arr in zip(left, right):
# count up differences
if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan):
diff += 1
diff = diff * 100.0 / left.size
msg = f"{obj} values are different ({np.round(diff, 5)} %)"
raise_assert_detail(obj, msg, left, right, index_values=index_values)
raise AssertionError(err_msg)
# compare shape and values
if not array_equivalent(left, right, strict_nan=strict_nan):
_raise(left, right, err_msg)
if check_dtype:
if isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
assert_attr_equal("dtype", left, right, obj=obj)
def assert_extension_array_equal(
left,
right,
check_dtype: bool | Literal["equiv"] = True,
index_values=None,
check_exact: bool = False,
rtol: float = 1.0e-5,
atol: float = 1.0e-8,
obj: str = "ExtensionArray",
) -> None:
"""
Check that left and right ExtensionArrays are equal.
Parameters
----------
left, right : ExtensionArray
The two arrays to compare.
check_dtype : bool, default True
Whether to check if the ExtensionArray dtypes are identical.
index_values : Index | numpy.ndarray, default None
Optional index (shared by both left and right), used in output.
check_exact : bool, default False
Whether to compare number exactly.
rtol : float, default 1e-5
Relative tolerance. Only used when check_exact is False.
atol : float, default 1e-8
Absolute tolerance. Only used when check_exact is False.
obj : str, default 'ExtensionArray'
Specify object name being compared, internally used to show appropriate
assertion message.
.. versionadded:: 2.0.0
Notes
-----
Missing values are checked separately from valid values.
A mask of missing values is computed for each and checked to match.
The remaining all-valid values are cast to object dtype and checked.
Examples
--------
>>> from pandas import testing as tm
>>> a = pd.Series([1, 2, 3, 4])
>>> b, c = a.array, a.array
>>> tm.assert_extension_array_equal(b, c)
"""
assert isinstance(left, ExtensionArray), "left is not an ExtensionArray"
assert isinstance(right, ExtensionArray), "right is not an ExtensionArray"
if check_dtype:
assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
if (
isinstance(left, DatetimeLikeArrayMixin)
and isinstance(right, DatetimeLikeArrayMixin)
and type(right) == type(left)
):
# GH 52449
if not check_dtype and left.dtype.kind in "mM":
if not isinstance(left.dtype, np.dtype):
l_unit = cast(DatetimeTZDtype, left.dtype).unit
else:
l_unit = np.datetime_data(left.dtype)[0]
if not isinstance(right.dtype, np.dtype):
r_unit = cast(DatetimeTZDtype, right.dtype).unit
else:
r_unit = np.datetime_data(right.dtype)[0]
if (
l_unit != r_unit
and compare_mismatched_resolutions(
left._ndarray, right._ndarray, operator.eq
).all()
):
return
# Avoid slow object-dtype comparisons
# np.asarray for case where we have a np.MaskedArray
assert_numpy_array_equal(
np.asarray(left.asi8),
np.asarray(right.asi8),
index_values=index_values,
obj=obj,
)
return
left_na = np.asarray(left.isna())
right_na = np.asarray(right.isna())
assert_numpy_array_equal(
left_na, right_na, obj=f"{obj} NA mask", index_values=index_values
)
left_valid = left[~left_na].to_numpy(dtype=object)
right_valid = right[~right_na].to_numpy(dtype=object)
if check_exact:
assert_numpy_array_equal(
left_valid, right_valid, obj=obj, index_values=index_values
)
else:
_testing.assert_almost_equal(
left_valid,
right_valid,
check_dtype=bool(check_dtype),
rtol=rtol,
atol=atol,
obj=obj,
index_values=index_values,
)
# This could be refactored to use the NDFrame.equals method
def assert_series_equal(
left,
right,
check_dtype: bool | Literal["equiv"] = True,
check_index_type: bool | Literal["equiv"] = "equiv",
check_series_type: bool = True,
check_names: bool = True,
check_exact: bool = False,
check_datetimelike_compat: bool = False,
check_categorical: bool = True,
check_category_order: bool = True,
check_freq: bool = True,
check_flags: bool = True,
rtol: float = 1.0e-5,
atol: float = 1.0e-8,
obj: str = "Series",
*,
check_index: bool = True,
check_like: bool = False,
) -> None:
"""
Check that left and right Series are equal.
Parameters
----------
left : Series
right : Series
check_dtype : bool, default True
Whether to check the Series dtype is identical.
check_index_type : bool or {'equiv'}, default 'equiv'
Whether to check the Index class, dtype and inferred_type
are identical.
check_series_type : bool, default True
Whether to check the Series class is identical.
check_names : bool, default True
Whether to check the Series and Index names attribute.
check_exact : bool, default False
Whether to compare number exactly.
check_datetimelike_compat : bool, default False
Compare datetime-like which is comparable ignoring dtype.
check_categorical : bool, default True
Whether to compare internal Categorical exactly.
check_category_order : bool, default True
Whether to compare category order of internal Categoricals.
check_freq : bool, default True
Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
check_flags : bool, default True
Whether to check the `flags` attribute.
.. versionadded:: 1.2.0
rtol : float, default 1e-5
Relative tolerance. Only used when check_exact is False.
atol : float, default 1e-8
Absolute tolerance. Only used when check_exact is False.
obj : str, default 'Series'
Specify object name being compared, internally used to show appropriate
assertion message.
check_index : bool, default True
Whether to check index equivalence. If False, then compare only values.
.. versionadded:: 1.3.0
check_like : bool, default False
If True, ignore the order of the index. Must be False if check_index is False.
Note: same labels must be with the same data.
.. versionadded:: 1.5.0
Examples
--------
>>> from pandas import testing as tm
>>> a = pd.Series([1, 2, 3, 4])
>>> b = pd.Series([1, 2, 3, 4])
>>> tm.assert_series_equal(a, b)
"""
__tracebackhide__ = True
if not check_index and check_like:
raise ValueError("check_like must be False if check_index is False")
# instance validation
_check_isinstance(left, right, Series)
if check_series_type:
assert_class_equal(left, right, obj=obj)
# length comparison
if len(left) != len(right):
msg1 = f"{len(left)}, {left.index}"
msg2 = f"{len(right)}, {right.index}"
raise_assert_detail(obj, "Series length are different", msg1, msg2)
if check_flags:
assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}"
if check_index:
# GH #38183
assert_index_equal(
left.index,
right.index,
exact=check_index_type,
check_names=check_names,
check_exact=check_exact,
check_categorical=check_categorical,
check_order=not check_like,
rtol=rtol,
atol=atol,
obj=f"{obj}.index",
)
if check_like:
left = left.reindex_like(right)
if check_freq and isinstance(left.index, (DatetimeIndex, TimedeltaIndex)):
lidx = left.index
ridx = right.index
assert lidx.freq == ridx.freq, (lidx.freq, ridx.freq)
if check_dtype:
# We want to skip exact dtype checking when `check_categorical`
# is False. We'll still raise if only one is a `Categorical`,
# regardless of `check_categorical`
if (
isinstance(left.dtype, CategoricalDtype)
and isinstance(right.dtype, CategoricalDtype)
and not check_categorical
):
pass
else:
assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
if check_exact and is_numeric_dtype(left.dtype) and is_numeric_dtype(right.dtype):
left_values = left._values
right_values = right._values
# Only check exact if dtype is numeric
if isinstance(left_values, ExtensionArray) and isinstance(
right_values, ExtensionArray
):
assert_extension_array_equal(
left_values,
right_values,
check_dtype=check_dtype,
index_values=left.index,
obj=str(obj),
)
else:
assert_numpy_array_equal(
left_values,
right_values,
check_dtype=check_dtype,
obj=str(obj),
index_values=left.index,
)
elif check_datetimelike_compat and (
needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype)
):
# we want to check only if we have compat dtypes
# e.g. integer and M|m are NOT compat, but we can simply check
# the values in that case
# datetimelike may have different objects (e.g. datetime.datetime
# vs Timestamp) but will compare equal
if not Index(left._values).equals(Index(right._values)):
msg = (
f"[datetimelike_compat=True] {left._values} "
f"is not equal to {right._values}."
)
raise AssertionError(msg)
elif isinstance(left.dtype, IntervalDtype) and isinstance(
right.dtype, IntervalDtype
):
assert_interval_array_equal(left.array, right.array)
elif isinstance(left.dtype, CategoricalDtype) or isinstance(
right.dtype, CategoricalDtype
):
_testing.assert_almost_equal(
left._values,
right._values,
rtol=rtol,
atol=atol,
check_dtype=bool(check_dtype),
obj=str(obj),
index_values=left.index,
)
elif isinstance(left.dtype, ExtensionDtype) and isinstance(
right.dtype, ExtensionDtype
):
assert_extension_array_equal(
left._values,
right._values,
rtol=rtol,
atol=atol,
check_dtype=check_dtype,
index_values=left.index,
obj=str(obj),
)
elif is_extension_array_dtype_and_needs_i8_conversion(