|
2 | 2 |
|
3 | 3 | import pytest
|
4 | 4 | import numpy as np
|
5 |
| -from datetime import timedelta |
6 | 5 | from pandas import (
|
7 | 6 | Interval, IntervalIndex, Index, isna, notna, interval_range, Timestamp,
|
8 |
| - Timedelta, compat, date_range, timedelta_range, DateOffset) |
| 7 | + Timedelta, date_range, timedelta_range) |
9 | 8 | from pandas.compat import lzip
|
10 | 9 | from pandas.core.common import _asarray_tuplesafe
|
11 |
| -from pandas.tseries.offsets import Day |
12 |
| -from pandas._libs.interval import IntervalTree |
13 | 10 | from pandas.tests.indexes.common import Base
|
14 | 11 | import pandas.util.testing as tm
|
15 | 12 | import pandas as pd
|
@@ -1158,367 +1155,3 @@ def test_to_tuples_na(self, tuples, na_tuple):
|
1158 | 1155 | assert all(isna(x) for x in result_na)
|
1159 | 1156 | else:
|
1160 | 1157 | assert isna(result_na)
|
1161 |
| - |
1162 |
| - |
1163 |
| -class TestIntervalRange(object): |
1164 |
| - |
1165 |
| - def test_construction_from_numeric(self, closed, name): |
1166 |
| - # combinations of start/end/periods without freq |
1167 |
| - expected = IntervalIndex.from_breaks( |
1168 |
| - np.arange(0, 6), name=name, closed=closed) |
1169 |
| - |
1170 |
| - result = interval_range(start=0, end=5, name=name, closed=closed) |
1171 |
| - tm.assert_index_equal(result, expected) |
1172 |
| - |
1173 |
| - result = interval_range(start=0, periods=5, name=name, closed=closed) |
1174 |
| - tm.assert_index_equal(result, expected) |
1175 |
| - |
1176 |
| - result = interval_range(end=5, periods=5, name=name, closed=closed) |
1177 |
| - tm.assert_index_equal(result, expected) |
1178 |
| - |
1179 |
| - # combinations of start/end/periods with freq |
1180 |
| - expected = IntervalIndex.from_tuples([(0, 2), (2, 4), (4, 6)], |
1181 |
| - name=name, closed=closed) |
1182 |
| - |
1183 |
| - result = interval_range(start=0, end=6, freq=2, name=name, |
1184 |
| - closed=closed) |
1185 |
| - tm.assert_index_equal(result, expected) |
1186 |
| - |
1187 |
| - result = interval_range(start=0, periods=3, freq=2, name=name, |
1188 |
| - closed=closed) |
1189 |
| - tm.assert_index_equal(result, expected) |
1190 |
| - |
1191 |
| - result = interval_range(end=6, periods=3, freq=2, name=name, |
1192 |
| - closed=closed) |
1193 |
| - tm.assert_index_equal(result, expected) |
1194 |
| - |
1195 |
| - # output truncates early if freq causes end to be skipped. |
1196 |
| - expected = IntervalIndex.from_tuples([(0.0, 1.5), (1.5, 3.0)], |
1197 |
| - name=name, closed=closed) |
1198 |
| - result = interval_range(start=0, end=4, freq=1.5, name=name, |
1199 |
| - closed=closed) |
1200 |
| - tm.assert_index_equal(result, expected) |
1201 |
| - |
1202 |
| - @pytest.mark.parametrize('tz', [None, 'US/Eastern']) |
1203 |
| - def test_construction_from_timestamp(self, closed, name, tz): |
1204 |
| - # combinations of start/end/periods without freq |
1205 |
| - start = Timestamp('2017-01-01', tz=tz) |
1206 |
| - end = Timestamp('2017-01-06', tz=tz) |
1207 |
| - breaks = date_range(start=start, end=end) |
1208 |
| - expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) |
1209 |
| - |
1210 |
| - result = interval_range(start=start, end=end, name=name, |
1211 |
| - closed=closed) |
1212 |
| - tm.assert_index_equal(result, expected) |
1213 |
| - |
1214 |
| - result = interval_range(start=start, periods=5, name=name, |
1215 |
| - closed=closed) |
1216 |
| - tm.assert_index_equal(result, expected) |
1217 |
| - |
1218 |
| - result = interval_range(end=end, periods=5, name=name, |
1219 |
| - closed=closed) |
1220 |
| - tm.assert_index_equal(result, expected) |
1221 |
| - |
1222 |
| - # combinations of start/end/periods with fixed freq |
1223 |
| - freq = '2D' |
1224 |
| - start = Timestamp('2017-01-01', tz=tz) |
1225 |
| - end = Timestamp('2017-01-07', tz=tz) |
1226 |
| - breaks = date_range(start=start, end=end, freq=freq) |
1227 |
| - expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) |
1228 |
| - |
1229 |
| - result = interval_range(start=start, end=end, freq=freq, name=name, |
1230 |
| - closed=closed) |
1231 |
| - tm.assert_index_equal(result, expected) |
1232 |
| - |
1233 |
| - result = interval_range(start=start, periods=3, freq=freq, name=name, |
1234 |
| - closed=closed) |
1235 |
| - tm.assert_index_equal(result, expected) |
1236 |
| - |
1237 |
| - result = interval_range(end=end, periods=3, freq=freq, name=name, |
1238 |
| - closed=closed) |
1239 |
| - tm.assert_index_equal(result, expected) |
1240 |
| - |
1241 |
| - # output truncates early if freq causes end to be skipped. |
1242 |
| - end = Timestamp('2017-01-08', tz=tz) |
1243 |
| - result = interval_range(start=start, end=end, freq=freq, name=name, |
1244 |
| - closed=closed) |
1245 |
| - tm.assert_index_equal(result, expected) |
1246 |
| - |
1247 |
| - # combinations of start/end/periods with non-fixed freq |
1248 |
| - freq = 'M' |
1249 |
| - start = Timestamp('2017-01-01', tz=tz) |
1250 |
| - end = Timestamp('2017-12-31', tz=tz) |
1251 |
| - breaks = date_range(start=start, end=end, freq=freq) |
1252 |
| - expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) |
1253 |
| - |
1254 |
| - result = interval_range(start=start, end=end, freq=freq, name=name, |
1255 |
| - closed=closed) |
1256 |
| - tm.assert_index_equal(result, expected) |
1257 |
| - |
1258 |
| - result = interval_range(start=start, periods=11, freq=freq, name=name, |
1259 |
| - closed=closed) |
1260 |
| - tm.assert_index_equal(result, expected) |
1261 |
| - |
1262 |
| - result = interval_range(end=end, periods=11, freq=freq, name=name, |
1263 |
| - closed=closed) |
1264 |
| - tm.assert_index_equal(result, expected) |
1265 |
| - |
1266 |
| - # output truncates early if freq causes end to be skipped. |
1267 |
| - end = Timestamp('2018-01-15', tz=tz) |
1268 |
| - result = interval_range(start=start, end=end, freq=freq, name=name, |
1269 |
| - closed=closed) |
1270 |
| - tm.assert_index_equal(result, expected) |
1271 |
| - |
1272 |
| - def test_construction_from_timedelta(self, closed, name): |
1273 |
| - # combinations of start/end/periods without freq |
1274 |
| - start, end = Timedelta('1 day'), Timedelta('6 days') |
1275 |
| - breaks = timedelta_range(start=start, end=end) |
1276 |
| - expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) |
1277 |
| - |
1278 |
| - result = interval_range(start=start, end=end, name=name, |
1279 |
| - closed=closed) |
1280 |
| - tm.assert_index_equal(result, expected) |
1281 |
| - |
1282 |
| - result = interval_range(start=start, periods=5, name=name, |
1283 |
| - closed=closed) |
1284 |
| - tm.assert_index_equal(result, expected) |
1285 |
| - |
1286 |
| - result = interval_range(end=end, periods=5, name=name, |
1287 |
| - closed=closed) |
1288 |
| - tm.assert_index_equal(result, expected) |
1289 |
| - |
1290 |
| - # combinations of start/end/periods with fixed freq |
1291 |
| - freq = '2D' |
1292 |
| - start, end = Timedelta('1 day'), Timedelta('7 days') |
1293 |
| - breaks = timedelta_range(start=start, end=end, freq=freq) |
1294 |
| - expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) |
1295 |
| - |
1296 |
| - result = interval_range(start=start, end=end, freq=freq, name=name, |
1297 |
| - closed=closed) |
1298 |
| - tm.assert_index_equal(result, expected) |
1299 |
| - |
1300 |
| - result = interval_range(start=start, periods=3, freq=freq, name=name, |
1301 |
| - closed=closed) |
1302 |
| - tm.assert_index_equal(result, expected) |
1303 |
| - |
1304 |
| - result = interval_range(end=end, periods=3, freq=freq, name=name, |
1305 |
| - closed=closed) |
1306 |
| - tm.assert_index_equal(result, expected) |
1307 |
| - |
1308 |
| - # output truncates early if freq causes end to be skipped. |
1309 |
| - end = Timedelta('7 days 1 hour') |
1310 |
| - result = interval_range(start=start, end=end, freq=freq, name=name, |
1311 |
| - closed=closed) |
1312 |
| - tm.assert_index_equal(result, expected) |
1313 |
| - |
1314 |
| - def test_constructor_coverage(self): |
1315 |
| - # float value for periods |
1316 |
| - expected = pd.interval_range(start=0, periods=10) |
1317 |
| - result = pd.interval_range(start=0, periods=10.5) |
1318 |
| - tm.assert_index_equal(result, expected) |
1319 |
| - |
1320 |
| - # equivalent timestamp-like start/end |
1321 |
| - start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15') |
1322 |
| - expected = pd.interval_range(start=start, end=end) |
1323 |
| - |
1324 |
| - result = pd.interval_range(start=start.to_pydatetime(), |
1325 |
| - end=end.to_pydatetime()) |
1326 |
| - tm.assert_index_equal(result, expected) |
1327 |
| - |
1328 |
| - result = pd.interval_range(start=start.asm8, end=end.asm8) |
1329 |
| - tm.assert_index_equal(result, expected) |
1330 |
| - |
1331 |
| - # equivalent freq with timestamp |
1332 |
| - equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1), |
1333 |
| - DateOffset(days=1)] |
1334 |
| - for freq in equiv_freq: |
1335 |
| - result = pd.interval_range(start=start, end=end, freq=freq) |
1336 |
| - tm.assert_index_equal(result, expected) |
1337 |
| - |
1338 |
| - # equivalent timedelta-like start/end |
1339 |
| - start, end = Timedelta(days=1), Timedelta(days=10) |
1340 |
| - expected = pd.interval_range(start=start, end=end) |
1341 |
| - |
1342 |
| - result = pd.interval_range(start=start.to_pytimedelta(), |
1343 |
| - end=end.to_pytimedelta()) |
1344 |
| - tm.assert_index_equal(result, expected) |
1345 |
| - |
1346 |
| - result = pd.interval_range(start=start.asm8, end=end.asm8) |
1347 |
| - tm.assert_index_equal(result, expected) |
1348 |
| - |
1349 |
| - # equivalent freq with timedelta |
1350 |
| - equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)] |
1351 |
| - for freq in equiv_freq: |
1352 |
| - result = pd.interval_range(start=start, end=end, freq=freq) |
1353 |
| - tm.assert_index_equal(result, expected) |
1354 |
| - |
1355 |
| - def test_errors(self): |
1356 |
| - # not enough params |
1357 |
| - msg = ('Of the three parameters: start, end, and periods, ' |
1358 |
| - 'exactly two must be specified') |
1359 |
| - |
1360 |
| - with tm.assert_raises_regex(ValueError, msg): |
1361 |
| - interval_range(start=0) |
1362 |
| - |
1363 |
| - with tm.assert_raises_regex(ValueError, msg): |
1364 |
| - interval_range(end=5) |
1365 |
| - |
1366 |
| - with tm.assert_raises_regex(ValueError, msg): |
1367 |
| - interval_range(periods=2) |
1368 |
| - |
1369 |
| - with tm.assert_raises_regex(ValueError, msg): |
1370 |
| - interval_range() |
1371 |
| - |
1372 |
| - # too many params |
1373 |
| - with tm.assert_raises_regex(ValueError, msg): |
1374 |
| - interval_range(start=0, end=5, periods=6) |
1375 |
| - |
1376 |
| - # mixed units |
1377 |
| - msg = 'start, end, freq need to be type compatible' |
1378 |
| - with tm.assert_raises_regex(TypeError, msg): |
1379 |
| - interval_range(start=0, end=Timestamp('20130101'), freq=2) |
1380 |
| - |
1381 |
| - with tm.assert_raises_regex(TypeError, msg): |
1382 |
| - interval_range(start=0, end=Timedelta('1 day'), freq=2) |
1383 |
| - |
1384 |
| - with tm.assert_raises_regex(TypeError, msg): |
1385 |
| - interval_range(start=0, end=10, freq='D') |
1386 |
| - |
1387 |
| - with tm.assert_raises_regex(TypeError, msg): |
1388 |
| - interval_range(start=Timestamp('20130101'), end=10, freq='D') |
1389 |
| - |
1390 |
| - with tm.assert_raises_regex(TypeError, msg): |
1391 |
| - interval_range(start=Timestamp('20130101'), |
1392 |
| - end=Timedelta('1 day'), freq='D') |
1393 |
| - |
1394 |
| - with tm.assert_raises_regex(TypeError, msg): |
1395 |
| - interval_range(start=Timestamp('20130101'), |
1396 |
| - end=Timestamp('20130110'), freq=2) |
1397 |
| - |
1398 |
| - with tm.assert_raises_regex(TypeError, msg): |
1399 |
| - interval_range(start=Timedelta('1 day'), end=10, freq='D') |
1400 |
| - |
1401 |
| - with tm.assert_raises_regex(TypeError, msg): |
1402 |
| - interval_range(start=Timedelta('1 day'), |
1403 |
| - end=Timestamp('20130110'), freq='D') |
1404 |
| - |
1405 |
| - with tm.assert_raises_regex(TypeError, msg): |
1406 |
| - interval_range(start=Timedelta('1 day'), |
1407 |
| - end=Timedelta('10 days'), freq=2) |
1408 |
| - |
1409 |
| - # invalid periods |
1410 |
| - msg = 'periods must be a number, got foo' |
1411 |
| - with tm.assert_raises_regex(TypeError, msg): |
1412 |
| - interval_range(start=0, periods='foo') |
1413 |
| - |
1414 |
| - # invalid start |
1415 |
| - msg = 'start must be numeric or datetime-like, got foo' |
1416 |
| - with tm.assert_raises_regex(ValueError, msg): |
1417 |
| - interval_range(start='foo', periods=10) |
1418 |
| - |
1419 |
| - # invalid end |
1420 |
| - msg = r'end must be numeric or datetime-like, got \(0, 1\]' |
1421 |
| - with tm.assert_raises_regex(ValueError, msg): |
1422 |
| - interval_range(end=Interval(0, 1), periods=10) |
1423 |
| - |
1424 |
| - # invalid freq for datetime-like |
1425 |
| - msg = 'freq must be numeric or convertible to DateOffset, got foo' |
1426 |
| - with tm.assert_raises_regex(ValueError, msg): |
1427 |
| - interval_range(start=0, end=10, freq='foo') |
1428 |
| - |
1429 |
| - with tm.assert_raises_regex(ValueError, msg): |
1430 |
| - interval_range(start=Timestamp('20130101'), periods=10, freq='foo') |
1431 |
| - |
1432 |
| - with tm.assert_raises_regex(ValueError, msg): |
1433 |
| - interval_range(end=Timedelta('1 day'), periods=10, freq='foo') |
1434 |
| - |
1435 |
| - # mixed tz |
1436 |
| - start = Timestamp('2017-01-01', tz='US/Eastern') |
1437 |
| - end = Timestamp('2017-01-07', tz='US/Pacific') |
1438 |
| - msg = 'Start and end cannot both be tz-aware with different timezones' |
1439 |
| - with tm.assert_raises_regex(TypeError, msg): |
1440 |
| - interval_range(start=start, end=end) |
1441 |
| - |
1442 |
| - |
1443 |
| -class TestIntervalTree(object): |
1444 |
| - def setup_method(self, method): |
1445 |
| - gentree = lambda dtype: IntervalTree(np.arange(5, dtype=dtype), |
1446 |
| - np.arange(5, dtype=dtype) + 2) |
1447 |
| - self.tree = gentree('int64') |
1448 |
| - self.trees = {dtype: gentree(dtype) |
1449 |
| - for dtype in ['int32', 'int64', 'float32', 'float64']} |
1450 |
| - |
1451 |
| - def test_get_loc(self): |
1452 |
| - for dtype, tree in self.trees.items(): |
1453 |
| - tm.assert_numpy_array_equal(tree.get_loc(1), |
1454 |
| - np.array([0], dtype='int64')) |
1455 |
| - tm.assert_numpy_array_equal(np.sort(tree.get_loc(2)), |
1456 |
| - np.array([0, 1], dtype='int64')) |
1457 |
| - with pytest.raises(KeyError): |
1458 |
| - tree.get_loc(-1) |
1459 |
| - |
1460 |
| - def test_get_indexer(self): |
1461 |
| - for dtype, tree in self.trees.items(): |
1462 |
| - tm.assert_numpy_array_equal( |
1463 |
| - tree.get_indexer(np.array([1.0, 5.5, 6.5])), |
1464 |
| - np.array([0, 4, -1], dtype='int64')) |
1465 |
| - with pytest.raises(KeyError): |
1466 |
| - tree.get_indexer(np.array([3.0])) |
1467 |
| - |
1468 |
| - def test_get_indexer_non_unique(self): |
1469 |
| - indexer, missing = self.tree.get_indexer_non_unique( |
1470 |
| - np.array([1.0, 2.0, 6.5])) |
1471 |
| - tm.assert_numpy_array_equal(indexer[:1], |
1472 |
| - np.array([0], dtype='int64')) |
1473 |
| - tm.assert_numpy_array_equal(np.sort(indexer[1:3]), |
1474 |
| - np.array([0, 1], dtype='int64')) |
1475 |
| - tm.assert_numpy_array_equal(np.sort(indexer[3:]), |
1476 |
| - np.array([-1], dtype='int64')) |
1477 |
| - tm.assert_numpy_array_equal(missing, np.array([2], dtype='int64')) |
1478 |
| - |
1479 |
| - def test_duplicates(self): |
1480 |
| - tree = IntervalTree([0, 0, 0], [1, 1, 1]) |
1481 |
| - tm.assert_numpy_array_equal(np.sort(tree.get_loc(0.5)), |
1482 |
| - np.array([0, 1, 2], dtype='int64')) |
1483 |
| - |
1484 |
| - with pytest.raises(KeyError): |
1485 |
| - tree.get_indexer(np.array([0.5])) |
1486 |
| - |
1487 |
| - indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) |
1488 |
| - tm.assert_numpy_array_equal(np.sort(indexer), |
1489 |
| - np.array([0, 1, 2], dtype='int64')) |
1490 |
| - tm.assert_numpy_array_equal(missing, np.array([], dtype='int64')) |
1491 |
| - |
1492 |
| - def test_get_loc_closed(self): |
1493 |
| - for closed in ['left', 'right', 'both', 'neither']: |
1494 |
| - tree = IntervalTree([0], [1], closed=closed) |
1495 |
| - for p, errors in [(0, tree.open_left), |
1496 |
| - (1, tree.open_right)]: |
1497 |
| - if errors: |
1498 |
| - with pytest.raises(KeyError): |
1499 |
| - tree.get_loc(p) |
1500 |
| - else: |
1501 |
| - tm.assert_numpy_array_equal(tree.get_loc(p), |
1502 |
| - np.array([0], dtype='int64')) |
1503 |
| - |
1504 |
| - @pytest.mark.skipif(compat.is_platform_32bit(), |
1505 |
| - reason="int type mismatch on 32bit") |
1506 |
| - def test_get_indexer_closed(self): |
1507 |
| - x = np.arange(1000, dtype='float64') |
1508 |
| - found = x.astype('intp') |
1509 |
| - not_found = (-1 * np.ones(1000)).astype('intp') |
1510 |
| - |
1511 |
| - for leaf_size in [1, 10, 100, 10000]: |
1512 |
| - for closed in ['left', 'right', 'both', 'neither']: |
1513 |
| - tree = IntervalTree(x, x + 0.5, closed=closed, |
1514 |
| - leaf_size=leaf_size) |
1515 |
| - tm.assert_numpy_array_equal(found, |
1516 |
| - tree.get_indexer(x + 0.25)) |
1517 |
| - |
1518 |
| - expected = found if tree.closed_left else not_found |
1519 |
| - tm.assert_numpy_array_equal(expected, |
1520 |
| - tree.get_indexer(x + 0.0)) |
1521 |
| - |
1522 |
| - expected = found if tree.closed_right else not_found |
1523 |
| - tm.assert_numpy_array_equal(expected, |
1524 |
| - tree.get_indexer(x + 0.5)) |
0 commit comments