|
11 | 11 | from numpy import nan
|
12 | 12 | import numpy as np
|
13 | 13 |
|
14 |
| -from pandas import DataFrame, Index |
| 14 | +from pandas import DataFrame, Index, isnull |
15 | 15 | from pandas.io.parsers import read_csv, read_table, ExcelFile, TextParser
|
16 | 16 | from pandas.util.testing import assert_almost_equal, assert_frame_equal
|
17 | 17 | import pandas._tseries as lib
|
@@ -584,6 +584,61 @@ def test_read_csv_parse_simple_list(self):
|
584 | 584 | 'foo', 'bar']})
|
585 | 585 | assert_frame_equal(df, expected)
|
586 | 586 |
|
| 587 | + def test_converters_corner_with_nas(self): |
| 588 | + import StringIO |
| 589 | + import numpy as np |
| 590 | + import pandas |
| 591 | + csv = """id,score,days |
| 592 | +1,2,12 |
| 593 | +2,2-5, |
| 594 | +3,,14+ |
| 595 | +4,6-12,2""" |
| 596 | + |
| 597 | + def convert_days(x): |
| 598 | + x = x.strip() |
| 599 | + if not x: return np.nan |
| 600 | + |
| 601 | + is_plus = x.endswith('+') |
| 602 | + if is_plus: |
| 603 | + x = int(x[:-1]) + 1 |
| 604 | + else: |
| 605 | + x = int(x) |
| 606 | + return x |
| 607 | + |
| 608 | + def convert_days_sentinel(x): |
| 609 | + x = x.strip() |
| 610 | + if not x: return -1 |
| 611 | + |
| 612 | + is_plus = x.endswith('+') |
| 613 | + if is_plus: |
| 614 | + x = int(x[:-1]) + 1 |
| 615 | + else: |
| 616 | + x = int(x) |
| 617 | + return x |
| 618 | + |
| 619 | + def convert_score(x): |
| 620 | + x = x.strip() |
| 621 | + if not x: return np.nan |
| 622 | + if x.find('-')>0: |
| 623 | + valmin, valmax = map(int, x.split('-')) |
| 624 | + val = 0.5*(valmin + valmax) |
| 625 | + else: |
| 626 | + val = float(x) |
| 627 | + |
| 628 | + return val |
| 629 | + |
| 630 | + fh = StringIO.StringIO(csv) |
| 631 | + result = pandas.read_csv(fh, converters={'score':convert_score, |
| 632 | + 'days':convert_days}, |
| 633 | + na_values=[-1,'',None]) |
| 634 | + self.assert_(isnull(result['days'][1])) |
| 635 | + |
| 636 | + fh = StringIO.StringIO(csv) |
| 637 | + result2 = pandas.read_csv(fh, converters={'score':convert_score, |
| 638 | + 'days':convert_days_sentinel}, |
| 639 | + na_values=[-1,'',None]) |
| 640 | + assert_frame_equal(result, result2) |
| 641 | + |
587 | 642 | class TestParseSQL(unittest.TestCase):
|
588 | 643 |
|
589 | 644 | def test_convert_sql_column_floats(self):
|
|
0 commit comments