|
123 | 123 | )
|
124 | 124 |
|
125 | 125 |
|
126 |
| -class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine): |
127 |
| - """ |
128 |
| - This class manages a MultiIndex by mapping label combinations to positive |
129 |
| - integers. |
| 126 | +class MultiIndexUInt64Engine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine): |
| 127 | + """Manages a MultiIndex by mapping label combinations to positive integers. |
| 128 | +
|
| 129 | + The number of possible label combinations must not overflow the 64 bits integers. |
130 | 130 | """
|
131 | 131 |
|
132 | 132 | _base = libindex.UInt64Engine
|
| 133 | + _codes_dtype = "uint64" |
133 | 134 |
|
134 |
| - def _codes_to_ints(self, codes): |
135 |
| - """ |
136 |
| - Transform combination(s) of uint64 in one uint64 (each), in a strictly |
137 |
| - monotonic way (i.e. respecting the lexicographic order of integer |
138 |
| - combinations): see BaseMultiIndexCodesEngine documentation. |
139 | 135 |
|
140 |
| - Parameters |
141 |
| - ---------- |
142 |
| - codes : 1- or 2-dimensional array of dtype uint64 |
143 |
| - Combinations of integers (one per row) |
| 136 | +class MultiIndexUInt32Engine(libindex.BaseMultiIndexCodesEngine, libindex.UInt32Engine): |
| 137 | + """Manages a MultiIndex by mapping label combinations to positive integers. |
144 | 138 |
|
145 |
| - Returns |
146 |
| - ------- |
147 |
| - scalar or 1-dimensional array, of dtype uint64 |
148 |
| - Integer(s) representing one combination (each). |
149 |
| - """ |
150 |
| - # Shift the representation of each level by the pre-calculated number |
151 |
| - # of bits: |
152 |
| - codes <<= self.offsets |
| 139 | + The number of possible label combinations must not overflow the 32 bits integers. |
| 140 | + """ |
153 | 141 |
|
154 |
| - # Now sum and OR are in fact interchangeable. This is a simple |
155 |
| - # composition of the (disjunct) significant bits of each level (i.e. |
156 |
| - # each column in "codes") in a single positive integer: |
157 |
| - if codes.ndim == 1: |
158 |
| - # Single key |
159 |
| - return np.bitwise_or.reduce(codes) |
| 142 | + _base = libindex.UInt32Engine |
| 143 | + _codes_dtype = "uint32" |
160 | 144 |
|
161 |
| - # Multiple keys |
162 |
| - return np.bitwise_or.reduce(codes, axis=1) |
163 | 145 |
|
| 146 | +class MultiIndexUInt16Engine(libindex.BaseMultiIndexCodesEngine, libindex.UInt16Engine): |
| 147 | + """Manages a MultiIndex by mapping label combinations to positive integers. |
164 | 148 |
|
165 |
| -class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine): |
166 |
| - """ |
167 |
| - This class manages those (extreme) cases in which the number of possible |
168 |
| - label combinations overflows the 64 bits integers, and uses an ObjectEngine |
169 |
| - containing Python integers. |
| 149 | + The number of possible label combinations must not overflow the 16 bits integers. |
170 | 150 | """
|
171 | 151 |
|
172 |
| - _base = libindex.ObjectEngine |
| 152 | + _base = libindex.UInt16Engine |
| 153 | + _codes_dtype = "uint16" |
173 | 154 |
|
174 |
| - def _codes_to_ints(self, codes): |
175 |
| - """ |
176 |
| - Transform combination(s) of uint64 in one Python integer (each), in a |
177 |
| - strictly monotonic way (i.e. respecting the lexicographic order of |
178 |
| - integer combinations): see BaseMultiIndexCodesEngine documentation. |
179 | 155 |
|
180 |
| - Parameters |
181 |
| - ---------- |
182 |
| - codes : 1- or 2-dimensional array of dtype uint64 |
183 |
| - Combinations of integers (one per row) |
| 156 | +class MultiIndexUInt8Engine(libindex.BaseMultiIndexCodesEngine, libindex.UInt8Engine): |
| 157 | + """Manages a MultiIndex by mapping label combinations to positive integers. |
184 | 158 |
|
185 |
| - Returns |
186 |
| - ------- |
187 |
| - int, or 1-dimensional array of dtype object |
188 |
| - Integer(s) representing one combination (each). |
189 |
| - """ |
190 |
| - # Shift the representation of each level by the pre-calculated number |
191 |
| - # of bits. Since this can overflow uint64, first make sure we are |
192 |
| - # working with Python integers: |
193 |
| - codes = codes.astype("object") << self.offsets |
| 159 | + The number of possible label combinations must not overflow the 8 bits integers. |
| 160 | + """ |
194 | 161 |
|
195 |
| - # Now sum and OR are in fact interchangeable. This is a simple |
196 |
| - # composition of the (disjunct) significant bits of each level (i.e. |
197 |
| - # each column in "codes") in a single positive integer (per row): |
198 |
| - if codes.ndim == 1: |
199 |
| - # Single key |
200 |
| - return np.bitwise_or.reduce(codes) |
| 162 | + _base = libindex.UInt8Engine |
| 163 | + _codes_dtype = "uint8" |
201 | 164 |
|
202 |
| - # Multiple keys |
203 |
| - return np.bitwise_or.reduce(codes, axis=1) |
| 165 | + |
| 166 | +class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine): |
| 167 | + """Manages a MultiIndex by mapping label combinations to positive integers. |
| 168 | +
|
| 169 | + This class manages those (extreme) cases in which the number of possible |
| 170 | + label combinations overflows the 64 bits integers, and uses an ObjectEngine |
| 171 | + containing Python integers. |
| 172 | + """ |
| 173 | + |
| 174 | + _base = libindex.ObjectEngine |
| 175 | + _codes_dtype = "object" |
204 | 176 |
|
205 | 177 |
|
206 | 178 | def names_compat(meth: F) -> F:
|
@@ -1229,13 +1201,25 @@ def _engine(self):
|
1229 | 1201 | # equivalent to sorting lexicographically the codes themselves. Notice
|
1230 | 1202 | # that each level needs to be shifted by the number of bits needed to
|
1231 | 1203 | # represent the _previous_ ones:
|
1232 |
| - offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64") |
| 1204 | + offsets = np.concatenate([lev_bits[1:], [0]]) |
| 1205 | + # Downcast the type if possible, to prevent upcasting when shifting codes: |
| 1206 | + offsets = offsets.astype(np.min_scalar_type(int(offsets[0]))) |
1233 | 1207 |
|
1234 | 1208 | # Check the total number of bits needed for our representation:
|
1235 | 1209 | if lev_bits[0] > 64:
|
1236 | 1210 | # The levels would overflow a 64 bit uint - use Python integers:
|
1237 | 1211 | return MultiIndexPyIntEngine(self.levels, self.codes, offsets)
|
1238 |
| - return MultiIndexUIntEngine(self.levels, self.codes, offsets) |
| 1212 | + if lev_bits[0] > 32: |
| 1213 | + # The levels would overflow a 32 bit uint - use uint64 |
| 1214 | + return MultiIndexUInt64Engine(self.levels, self.codes, offsets) |
| 1215 | + if lev_bits[0] > 16: |
| 1216 | + # The levels would overflow a 16 bit uint - use uint8 |
| 1217 | + return MultiIndexUInt32Engine(self.levels, self.codes, offsets) |
| 1218 | + if lev_bits[0] > 8: |
| 1219 | + # The levels would overflow a 8 bit uint - use uint16 |
| 1220 | + return MultiIndexUInt16Engine(self.levels, self.codes, offsets) |
| 1221 | + # The levels fit in an 8 bit uint - use uint8 |
| 1222 | + return MultiIndexUInt8Engine(self.levels, self.codes, offsets) |
1239 | 1223 |
|
1240 | 1224 | # Return type "Callable[..., MultiIndex]" of "_constructor" incompatible with return
|
1241 | 1225 | # type "Type[MultiIndex]" in supertype "Index"
|
|
0 commit comments