|
6 | 6 | """
|
7 | 7 |
|
8 | 8 | from distutils.version import LooseVersion
|
9 |
| -from datetime import datetime |
| 9 | +from datetime import datetime, date |
10 | 10 |
|
11 | 11 | import pytest
|
12 | 12 | import numpy as np
|
|
19 | 19 | import pandas.util.testing as tm
|
20 | 20 |
|
21 | 21 | import pandas.io.date_converters as conv
|
22 |
| -from pandas import DataFrame, Series, Index, DatetimeIndex |
| 22 | +from pandas import DataFrame, Series, Index, DatetimeIndex, MultiIndex |
23 | 23 | from pandas import compat
|
24 | 24 | from pandas.compat import parse_date, StringIO, lrange
|
| 25 | +from pandas.compat.numpy import np_array_datetime64_compat |
25 | 26 | from pandas.tseries.index import date_range
|
26 | 27 |
|
27 | 28 |
|
@@ -510,3 +511,146 @@ def test_parse_date_time_multi_level_column_name(self):
|
510 | 511 | expected = DataFrame(expected_data,
|
511 | 512 | columns=['date_time', ('A', 'a'), ('B', 'b')])
|
512 | 513 | tm.assert_frame_equal(result, expected)
|
| 514 | + |
| 515 | + def test_parse_date_time(self): |
| 516 | + dates = np.array(['2007/1/3', '2008/2/4'], dtype=object) |
| 517 | + times = np.array(['05:07:09', '06:08:00'], dtype=object) |
| 518 | + expected = np.array([datetime(2007, 1, 3, 5, 7, 9), |
| 519 | + datetime(2008, 2, 4, 6, 8, 0)]) |
| 520 | + |
| 521 | + result = conv.parse_date_time(dates, times) |
| 522 | + self.assertTrue((result == expected).all()) |
| 523 | + |
| 524 | + data = """\ |
| 525 | +date, time, a, b |
| 526 | +2001-01-05, 10:00:00, 0.0, 10. |
| 527 | +2001-01-05, 00:00:00, 1., 11. |
| 528 | +""" |
| 529 | + datecols = {'date_time': [0, 1]} |
| 530 | + df = self.read_csv(StringIO(data), sep=',', header=0, |
| 531 | + parse_dates=datecols, |
| 532 | + date_parser=conv.parse_date_time) |
| 533 | + self.assertIn('date_time', df) |
| 534 | + self.assertEqual(df.date_time.loc[0], datetime(2001, 1, 5, 10, 0, 0)) |
| 535 | + |
| 536 | + data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" |
| 537 | + "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" |
| 538 | + "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" |
| 539 | + "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" |
| 540 | + "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" |
| 541 | + "KORD,19990127, 23:00:00, 22:56:00, -0.5900") |
| 542 | + |
| 543 | + date_spec = {'nominal': [1, 2], 'actual': [1, 3]} |
| 544 | + df = self.read_csv(StringIO(data), header=None, parse_dates=date_spec, |
| 545 | + date_parser=conv.parse_date_time) |
| 546 | + |
| 547 | + def test_parse_date_fields(self): |
| 548 | + years = np.array([2007, 2008]) |
| 549 | + months = np.array([1, 2]) |
| 550 | + days = np.array([3, 4]) |
| 551 | + result = conv.parse_date_fields(years, months, days) |
| 552 | + expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)]) |
| 553 | + self.assertTrue((result == expected).all()) |
| 554 | + |
| 555 | + data = ("year, month, day, a\n 2001 , 01 , 10 , 10.\n" |
| 556 | + "2001 , 02 , 1 , 11.") |
| 557 | + datecols = {'ymd': [0, 1, 2]} |
| 558 | + df = self.read_csv(StringIO(data), sep=',', header=0, |
| 559 | + parse_dates=datecols, |
| 560 | + date_parser=conv.parse_date_fields) |
| 561 | + self.assertIn('ymd', df) |
| 562 | + self.assertEqual(df.ymd.loc[0], datetime(2001, 1, 10)) |
| 563 | + |
| 564 | + def test_datetime_six_col(self): |
| 565 | + years = np.array([2007, 2008]) |
| 566 | + months = np.array([1, 2]) |
| 567 | + days = np.array([3, 4]) |
| 568 | + hours = np.array([5, 6]) |
| 569 | + minutes = np.array([7, 8]) |
| 570 | + seconds = np.array([9, 0]) |
| 571 | + expected = np.array([datetime(2007, 1, 3, 5, 7, 9), |
| 572 | + datetime(2008, 2, 4, 6, 8, 0)]) |
| 573 | + |
| 574 | + result = conv.parse_all_fields(years, months, days, |
| 575 | + hours, minutes, seconds) |
| 576 | + |
| 577 | + self.assertTrue((result == expected).all()) |
| 578 | + |
| 579 | + data = """\ |
| 580 | +year, month, day, hour, minute, second, a, b |
| 581 | +2001, 01, 05, 10, 00, 0, 0.0, 10. |
| 582 | +2001, 01, 5, 10, 0, 00, 1., 11. |
| 583 | +""" |
| 584 | + datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} |
| 585 | + df = self.read_csv(StringIO(data), sep=',', header=0, |
| 586 | + parse_dates=datecols, |
| 587 | + date_parser=conv.parse_all_fields) |
| 588 | + self.assertIn('ymdHMS', df) |
| 589 | + self.assertEqual(df.ymdHMS.loc[0], datetime(2001, 1, 5, 10, 0, 0)) |
| 590 | + |
| 591 | + def test_datetime_fractional_seconds(self): |
| 592 | + data = """\ |
| 593 | +year, month, day, hour, minute, second, a, b |
| 594 | +2001, 01, 05, 10, 00, 0.123456, 0.0, 10. |
| 595 | +2001, 01, 5, 10, 0, 0.500000, 1., 11. |
| 596 | +""" |
| 597 | + datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} |
| 598 | + df = self.read_csv(StringIO(data), sep=',', header=0, |
| 599 | + parse_dates=datecols, |
| 600 | + date_parser=conv.parse_all_fields) |
| 601 | + self.assertIn('ymdHMS', df) |
| 602 | + self.assertEqual(df.ymdHMS.loc[0], datetime(2001, 1, 5, 10, 0, 0, |
| 603 | + microsecond=123456)) |
| 604 | + self.assertEqual(df.ymdHMS.loc[1], datetime(2001, 1, 5, 10, 0, 0, |
| 605 | + microsecond=500000)) |
| 606 | + |
| 607 | + def test_generic(self): |
| 608 | + data = "year, month, day, a\n 2001, 01, 10, 10.\n 2001, 02, 1, 11." |
| 609 | + datecols = {'ym': [0, 1]} |
| 610 | + dateconverter = lambda y, m: date(year=int(y), month=int(m), day=1) |
| 611 | + df = self.read_csv(StringIO(data), sep=',', header=0, |
| 612 | + parse_dates=datecols, |
| 613 | + date_parser=dateconverter) |
| 614 | + self.assertIn('ym', df) |
| 615 | + self.assertEqual(df.ym.loc[0], date(2001, 1, 1)) |
| 616 | + |
| 617 | + def test_dateparser_resolution_if_not_ns(self): |
| 618 | + # GH 10245 |
| 619 | + data = """\ |
| 620 | +date,time,prn,rxstatus |
| 621 | +2013-11-03,19:00:00,126,00E80000 |
| 622 | +2013-11-03,19:00:00,23,00E80000 |
| 623 | +2013-11-03,19:00:00,13,00E80000 |
| 624 | +""" |
| 625 | + |
| 626 | + def date_parser(date, time): |
| 627 | + datetime = np_array_datetime64_compat( |
| 628 | + date + 'T' + time + 'Z', dtype='datetime64[s]') |
| 629 | + return datetime |
| 630 | + |
| 631 | + df = self.read_csv(StringIO(data), date_parser=date_parser, |
| 632 | + parse_dates={'datetime': ['date', 'time']}, |
| 633 | + index_col=['datetime', 'prn']) |
| 634 | + |
| 635 | + datetimes = np_array_datetime64_compat(['2013-11-03T19:00:00Z'] * 3, |
| 636 | + dtype='datetime64[s]') |
| 637 | + df_correct = DataFrame(data={'rxstatus': ['00E80000'] * 3}, |
| 638 | + index=MultiIndex.from_tuples( |
| 639 | + [(datetimes[0], 126), |
| 640 | + (datetimes[1], 23), |
| 641 | + (datetimes[2], 13)], |
| 642 | + names=['datetime', 'prn'])) |
| 643 | + tm.assert_frame_equal(df, df_correct) |
| 644 | + |
| 645 | + def test_parse_date_column_with_empty_string(self): |
| 646 | + # GH 6428 |
| 647 | + data = """case,opdate |
| 648 | + 7,10/18/2006 |
| 649 | + 7,10/18/2008 |
| 650 | + 621, """ |
| 651 | + result = self.read_csv(StringIO(data), parse_dates=['opdate']) |
| 652 | + expected_data = [[7, '10/18/2006'], |
| 653 | + [7, '10/18/2008'], |
| 654 | + [621, ' ']] |
| 655 | + expected = DataFrame(expected_data, columns=['case', 'opdate']) |
| 656 | + tm.assert_frame_equal(result, expected) |
0 commit comments