forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathapi.py
163 lines (132 loc) · 5.27 KB
/
api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import textwrap
import warnings
from pandas.core.indexes.base import (Index,
_new_Index,
_ensure_index,
_ensure_index_from_sequences,
InvalidIndexError) # noqa
from pandas.core.indexes.category import CategoricalIndex # noqa
from pandas.core.indexes.multi import MultiIndex # noqa
from pandas.core.indexes.interval import IntervalIndex # noqa
from pandas.core.indexes.numeric import (NumericIndex, Float64Index, # noqa
Int64Index, UInt64Index)
from pandas.core.indexes.range import RangeIndex # noqa
from pandas.core.indexes.timedeltas import TimedeltaIndex
from pandas.core.indexes.period import PeriodIndex
from pandas.core.indexes.datetimes import DatetimeIndex
import pandas.core.common as com
from pandas._libs import lib
from pandas._libs.tslib import NaT
_sort_msg = textwrap.dedent("""\
Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.
To accept the future behavior, pass 'sort=False'.
To retain the current behavior and silence the warning, pass 'sort=True'.
""")
# TODO: there are many places that rely on these private methods existing in
# pandas.core.index
__all__ = ['Index', 'MultiIndex', 'NumericIndex', 'Float64Index', 'Int64Index',
'CategoricalIndex', 'IntervalIndex', 'RangeIndex', 'UInt64Index',
'InvalidIndexError', 'TimedeltaIndex',
'PeriodIndex', 'DatetimeIndex',
'_new_Index', 'NaT',
'_ensure_index', '_ensure_index_from_sequences',
'_get_combined_index',
'_get_objs_combined_axis', '_union_indexes',
'_get_consensus_names',
'_all_indexes_same']
def _get_objs_combined_axis(objs, intersect=False, axis=0, sort=True):
# Extract combined index: return intersection or union (depending on the
# value of "intersect") of indexes on given axis, or None if all objects
# lack indexes (e.g. they are numpy arrays)
obs_idxes = [obj._get_axis(axis) for obj in objs
if hasattr(obj, '_get_axis')]
if obs_idxes:
return _get_combined_index(obs_idxes, intersect=intersect, sort=sort)
def _get_combined_index(indexes, intersect=False, sort=False):
# TODO: handle index names!
indexes = com._get_distinct_objs(indexes)
if len(indexes) == 0:
index = Index([])
elif len(indexes) == 1:
index = indexes[0]
elif intersect:
index = indexes[0]
for other in indexes[1:]:
index = index.intersection(other)
else:
index = _union_indexes(indexes, sort=sort)
index = _ensure_index(index)
if sort:
try:
index = index.sort_values()
except TypeError:
pass
return index
def _union_indexes(indexes, sort=True):
if len(indexes) == 0:
raise AssertionError('Must have at least 1 Index to union')
if len(indexes) == 1:
result = indexes[0]
if isinstance(result, list):
result = Index(sorted(result))
return result
indexes, kind = _sanitize_and_check(indexes)
def _unique_indices(inds):
def conv(i):
if isinstance(i, Index):
i = i.tolist()
return i
return Index(
lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort))
if kind == 'special':
result = indexes[0]
if hasattr(result, 'union_many'):
return result.union_many(indexes[1:])
else:
for other in indexes[1:]:
result = result.union(other)
return result
elif kind == 'array':
index = indexes[0]
for other in indexes[1:]:
if not index.equals(other):
if sort is None:
# TODO: remove once pd.concat sort default changes
warnings.warn(_sort_msg, FutureWarning, stacklevel=8)
sort = True
return _unique_indices(indexes)
name = _get_consensus_names(indexes)[0]
if name != index.name:
index = index._shallow_copy(name=name)
return index
else: # kind='list'
return _unique_indices(indexes)
def _sanitize_and_check(indexes):
kinds = list({type(index) for index in indexes})
if list in kinds:
if len(kinds) > 1:
indexes = [Index(com._try_sort(x))
if not isinstance(x, Index) else
x for x in indexes]
kinds.remove(list)
else:
return indexes, 'list'
if len(kinds) > 1 or Index not in kinds:
return indexes, 'special'
else:
return indexes, 'array'
def _get_consensus_names(indexes):
# find the non-none names, need to tupleify to make
# the set hashable, then reverse on return
consensus_names = set(tuple(i.names) for i in indexes
if com._any_not_none(*i.names))
if len(consensus_names) == 1:
return list(list(consensus_names)[0])
return [None] * indexes[0].nlevels
def _all_indexes_same(indexes):
first = indexes[0]
for index in indexes[1:]:
if not first.equals(index):
return False
return True