|
2 | 2 |
|
3 | 3 | from __future__ import print_function
|
4 | 4 |
|
| 5 | +from distutils.version import LooseVersion |
5 | 6 | from numpy import nan, random
|
6 | 7 | import numpy as np
|
7 | 8 |
|
|
18 | 19 | from pandas.tests.frame.common import TestData, _check_mixed_float
|
19 | 20 |
|
20 | 21 |
|
| 22 | +def _skip_if_no_pchip(): |
| 23 | + try: |
| 24 | + from scipy.interpolate import pchip_interpolate # noqa |
| 25 | + except ImportError: |
| 26 | + raise nose.SkipTest('scipy.interpolate.pchip missing') |
| 27 | + |
| 28 | + |
21 | 29 | class TestDataFrameMissingData(tm.TestCase, TestData):
|
22 | 30 |
|
23 | 31 | _multiprocess_can_split_ = True
|
@@ -436,6 +444,218 @@ def test_fill_value_when_combine_const(self):
|
436 | 444 | assert_frame_equal(res, exp)
|
437 | 445 |
|
438 | 446 |
|
| 447 | +class TestDataFrameInterpolate(tm.TestCase, TestData): |
| 448 | + |
| 449 | + def test_interp_basic(self): |
| 450 | + df = DataFrame({'A': [1, 2, np.nan, 4], |
| 451 | + 'B': [1, 4, 9, np.nan], |
| 452 | + 'C': [1, 2, 3, 5], |
| 453 | + 'D': list('abcd')}) |
| 454 | + expected = DataFrame({'A': [1., 2., 3., 4.], |
| 455 | + 'B': [1., 4., 9., 9.], |
| 456 | + 'C': [1, 2, 3, 5], |
| 457 | + 'D': list('abcd')}) |
| 458 | + result = df.interpolate() |
| 459 | + assert_frame_equal(result, expected) |
| 460 | + |
| 461 | + result = df.set_index('C').interpolate() |
| 462 | + expected = df.set_index('C') |
| 463 | + expected.loc[3, 'A'] = 3 |
| 464 | + expected.loc[5, 'B'] = 9 |
| 465 | + assert_frame_equal(result, expected) |
| 466 | + |
| 467 | + def test_interp_bad_method(self): |
| 468 | + df = DataFrame({'A': [1, 2, np.nan, 4], |
| 469 | + 'B': [1, 4, 9, np.nan], |
| 470 | + 'C': [1, 2, 3, 5], |
| 471 | + 'D': list('abcd')}) |
| 472 | + with tm.assertRaises(ValueError): |
| 473 | + df.interpolate(method='not_a_method') |
| 474 | + |
| 475 | + def test_interp_combo(self): |
| 476 | + df = DataFrame({'A': [1., 2., np.nan, 4.], |
| 477 | + 'B': [1, 4, 9, np.nan], |
| 478 | + 'C': [1, 2, 3, 5], |
| 479 | + 'D': list('abcd')}) |
| 480 | + |
| 481 | + result = df['A'].interpolate() |
| 482 | + expected = Series([1., 2., 3., 4.], name='A') |
| 483 | + assert_series_equal(result, expected) |
| 484 | + |
| 485 | + result = df['A'].interpolate(downcast='infer') |
| 486 | + expected = Series([1, 2, 3, 4], name='A') |
| 487 | + assert_series_equal(result, expected) |
| 488 | + |
| 489 | + def test_interp_nan_idx(self): |
| 490 | + df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [np.nan, 2, 3, 4]}) |
| 491 | + df = df.set_index('A') |
| 492 | + with tm.assertRaises(NotImplementedError): |
| 493 | + df.interpolate(method='values') |
| 494 | + |
| 495 | + def test_interp_various(self): |
| 496 | + tm._skip_if_no_scipy() |
| 497 | + df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], |
| 498 | + 'C': [1, 2, 3, 5, 8, 13, 21]}) |
| 499 | + df = df.set_index('C') |
| 500 | + expected = df.copy() |
| 501 | + result = df.interpolate(method='polynomial', order=1) |
| 502 | + |
| 503 | + expected.A.loc[3] = 2.66666667 |
| 504 | + expected.A.loc[13] = 5.76923076 |
| 505 | + assert_frame_equal(result, expected) |
| 506 | + |
| 507 | + result = df.interpolate(method='cubic') |
| 508 | + expected.A.loc[3] = 2.81621174 |
| 509 | + expected.A.loc[13] = 5.64146581 |
| 510 | + assert_frame_equal(result, expected) |
| 511 | + |
| 512 | + result = df.interpolate(method='nearest') |
| 513 | + expected.A.loc[3] = 2 |
| 514 | + expected.A.loc[13] = 5 |
| 515 | + assert_frame_equal(result, expected, check_dtype=False) |
| 516 | + |
| 517 | + result = df.interpolate(method='quadratic') |
| 518 | + expected.A.loc[3] = 2.82533638 |
| 519 | + expected.A.loc[13] = 6.02817974 |
| 520 | + assert_frame_equal(result, expected) |
| 521 | + |
| 522 | + result = df.interpolate(method='slinear') |
| 523 | + expected.A.loc[3] = 2.66666667 |
| 524 | + expected.A.loc[13] = 5.76923077 |
| 525 | + assert_frame_equal(result, expected) |
| 526 | + |
| 527 | + result = df.interpolate(method='zero') |
| 528 | + expected.A.loc[3] = 2. |
| 529 | + expected.A.loc[13] = 5 |
| 530 | + assert_frame_equal(result, expected, check_dtype=False) |
| 531 | + |
| 532 | + result = df.interpolate(method='quadratic') |
| 533 | + expected.A.loc[3] = 2.82533638 |
| 534 | + expected.A.loc[13] = 6.02817974 |
| 535 | + assert_frame_equal(result, expected) |
| 536 | + |
| 537 | + def test_interp_alt_scipy(self): |
| 538 | + tm._skip_if_no_scipy() |
| 539 | + df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], |
| 540 | + 'C': [1, 2, 3, 5, 8, 13, 21]}) |
| 541 | + result = df.interpolate(method='barycentric') |
| 542 | + expected = df.copy() |
| 543 | + expected.ix[2, 'A'] = 3 |
| 544 | + expected.ix[5, 'A'] = 6 |
| 545 | + assert_frame_equal(result, expected) |
| 546 | + |
| 547 | + result = df.interpolate(method='barycentric', downcast='infer') |
| 548 | + assert_frame_equal(result, expected.astype(np.int64)) |
| 549 | + |
| 550 | + result = df.interpolate(method='krogh') |
| 551 | + expectedk = df.copy() |
| 552 | + expectedk['A'] = expected['A'] |
| 553 | + assert_frame_equal(result, expectedk) |
| 554 | + |
| 555 | + _skip_if_no_pchip() |
| 556 | + import scipy |
| 557 | + result = df.interpolate(method='pchip') |
| 558 | + expected.ix[2, 'A'] = 3 |
| 559 | + |
| 560 | + if LooseVersion(scipy.__version__) >= '0.17.0': |
| 561 | + expected.ix[5, 'A'] = 6.0 |
| 562 | + else: |
| 563 | + expected.ix[5, 'A'] = 6.125 |
| 564 | + |
| 565 | + assert_frame_equal(result, expected) |
| 566 | + |
| 567 | + def test_interp_rowwise(self): |
| 568 | + df = DataFrame({0: [1, 2, np.nan, 4], |
| 569 | + 1: [2, 3, 4, np.nan], |
| 570 | + 2: [np.nan, 4, 5, 6], |
| 571 | + 3: [4, np.nan, 6, 7], |
| 572 | + 4: [1, 2, 3, 4]}) |
| 573 | + result = df.interpolate(axis=1) |
| 574 | + expected = df.copy() |
| 575 | + expected.loc[3, 1] = 5 |
| 576 | + expected.loc[0, 2] = 3 |
| 577 | + expected.loc[1, 3] = 3 |
| 578 | + expected[4] = expected[4].astype(np.float64) |
| 579 | + assert_frame_equal(result, expected) |
| 580 | + |
| 581 | + # scipy route |
| 582 | + tm._skip_if_no_scipy() |
| 583 | + result = df.interpolate(axis=1, method='values') |
| 584 | + assert_frame_equal(result, expected) |
| 585 | + |
| 586 | + result = df.interpolate(axis=0) |
| 587 | + expected = df.interpolate() |
| 588 | + assert_frame_equal(result, expected) |
| 589 | + |
| 590 | + def test_rowwise_alt(self): |
| 591 | + df = DataFrame({0: [0, .5, 1., np.nan, 4, 8, np.nan, np.nan, 64], |
| 592 | + 1: [1, 2, 3, 4, 3, 2, 1, 0, -1]}) |
| 593 | + df.interpolate(axis=0) |
| 594 | + |
| 595 | + def test_interp_leading_nans(self): |
| 596 | + df = DataFrame({"A": [np.nan, np.nan, .5, .25, 0], |
| 597 | + "B": [np.nan, -3, -3.5, np.nan, -4]}) |
| 598 | + result = df.interpolate() |
| 599 | + expected = df.copy() |
| 600 | + expected['B'].loc[3] = -3.75 |
| 601 | + assert_frame_equal(result, expected) |
| 602 | + |
| 603 | + tm._skip_if_no_scipy() |
| 604 | + result = df.interpolate(method='polynomial', order=1) |
| 605 | + assert_frame_equal(result, expected) |
| 606 | + |
| 607 | + def test_interp_raise_on_only_mixed(self): |
| 608 | + df = DataFrame({'A': [1, 2, np.nan, 4], |
| 609 | + 'B': ['a', 'b', 'c', 'd'], |
| 610 | + 'C': [np.nan, 2, 5, 7], |
| 611 | + 'D': [np.nan, np.nan, 9, 9], |
| 612 | + 'E': [1, 2, 3, 4]}) |
| 613 | + with tm.assertRaises(TypeError): |
| 614 | + df.interpolate(axis=1) |
| 615 | + |
| 616 | + def test_interp_inplace(self): |
| 617 | + df = DataFrame({'a': [1., 2., np.nan, 4.]}) |
| 618 | + expected = DataFrame({'a': [1., 2., 3., 4.]}) |
| 619 | + result = df.copy() |
| 620 | + result['a'].interpolate(inplace=True) |
| 621 | + assert_frame_equal(result, expected) |
| 622 | + |
| 623 | + result = df.copy() |
| 624 | + result['a'].interpolate(inplace=True, downcast='infer') |
| 625 | + assert_frame_equal(result, expected.astype('int64')) |
| 626 | + |
| 627 | + def test_interp_inplace_row(self): |
| 628 | + # GH 10395 |
| 629 | + result = DataFrame({'a': [1., 2., 3., 4.], |
| 630 | + 'b': [np.nan, 2., 3., 4.], |
| 631 | + 'c': [3, 2, 2, 2]}) |
| 632 | + expected = result.interpolate(method='linear', axis=1, inplace=False) |
| 633 | + result.interpolate(method='linear', axis=1, inplace=True) |
| 634 | + assert_frame_equal(result, expected) |
| 635 | + |
| 636 | + def test_interp_ignore_all_good(self): |
| 637 | + # GH |
| 638 | + df = DataFrame({'A': [1, 2, np.nan, 4], |
| 639 | + 'B': [1, 2, 3, 4], |
| 640 | + 'C': [1., 2., np.nan, 4.], |
| 641 | + 'D': [1., 2., 3., 4.]}) |
| 642 | + expected = DataFrame({'A': np.array( |
| 643 | + [1, 2, 3, 4], dtype='float64'), |
| 644 | + 'B': np.array( |
| 645 | + [1, 2, 3, 4], dtype='int64'), |
| 646 | + 'C': np.array( |
| 647 | + [1., 2., 3, 4.], dtype='float64'), |
| 648 | + 'D': np.array( |
| 649 | + [1., 2., 3., 4.], dtype='float64')}) |
| 650 | + |
| 651 | + result = df.interpolate(downcast=None) |
| 652 | + assert_frame_equal(result, expected) |
| 653 | + |
| 654 | + # all good |
| 655 | + result = df[['B', 'D']].interpolate(downcast=None) |
| 656 | + assert_frame_equal(result, df[['B', 'D']]) |
| 657 | + |
| 658 | + |
439 | 659 | if __name__ == '__main__':
|
440 | 660 | import nose
|
441 | 661 | nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
|
|
0 commit comments