|
1 | 1 | # pylint: disable=E1101
|
2 | 2 |
|
3 | 3 | from pandas.compat import StringIO, BytesIO, PY3, u, range, map
|
4 |
| -from datetime import datetime |
| 4 | +#from datetime import datetime |
5 | 5 | from os.path import split as psplit
|
6 | 6 | import csv
|
7 | 7 | import os
|
|
14 | 14 | from numpy import nan
|
15 | 15 | import numpy as np
|
16 | 16 |
|
17 |
| -from pandas import DataFrame, Series, Index, MultiIndex, DatetimeIndex |
| 17 | +from pandas import DataFrame, Series, Index, MultiIndex, DatetimeIndex, datetime |
18 | 18 | import pandas.io.parsers as parsers
|
19 | 19 | from pandas.io.parsers import (read_csv, read_table, read_fwf,
|
20 | 20 | TextParser, TextFileReader)
|
@@ -66,6 +66,78 @@ def _skip_if_no_excelsuite():
|
66 | 66 | _skip_if_no_openpyxl()
|
67 | 67 |
|
68 | 68 |
|
| 69 | +def _skip_if_no_mpl(): |
| 70 | + '''pandas.tseries.converter imports matplotlib''' |
| 71 | + try: |
| 72 | + import matplotlib |
| 73 | + except ImportError: |
| 74 | + raise nose.SkipTest('matplotlib not installed, skipping') |
| 75 | + |
| 76 | + |
| 77 | +def _offset_time(value, offset=-10): |
| 78 | + '''appply corrective time offset in minutes |
| 79 | +
|
| 80 | + input |
| 81 | + ----- |
| 82 | + value : datetime.time |
| 83 | + offset : integer value in minutes |
| 84 | + ''' |
| 85 | + # if a excel time like '23.07.2013 24:00' they actually mean |
| 86 | + # in Python '23.07.2013 23:59', must be converted |
| 87 | +# offset = -10 # minutes |
| 88 | + _skip_if_no_mpl() |
| 89 | + from pandas.io.date_converters import offset_datetime |
| 90 | + ti_corr = offset_datetime(value, minutes=offset) |
| 91 | + # combine the corrected time component with the datetime |
| 92 | +# dt_comb = dt.datetime.combine(dt_now, ti_corr) |
| 93 | + |
| 94 | + #since input is time, we return it. |
| 95 | + #TODO: |
| 96 | + #it is actually very strange that Pandas does consider an index |
| 97 | + #of datetime.time as index of objects and not time |
| 98 | + |
| 99 | + return ti_corr |
| 100 | + |
| 101 | + |
| 102 | +def _correct_date_time(value): |
| 103 | + '''corrects the times in the Excel test file to Python time |
| 104 | + ''' |
| 105 | + _skip_if_no_xlrd() |
| 106 | + _skip_if_no_mpl() |
| 107 | + from pandas.io.date_converters import dt2ti |
| 108 | + |
| 109 | + # if a excel time like '24:00' it converted to 23.07.2013 00:00' |
| 110 | + # here, we just want the time component, |
| 111 | + # since all inputs shall be equal |
| 112 | + value = dt2ti(value) |
| 113 | + |
| 114 | + #apply offset |
| 115 | + value = _offset_time(value) |
| 116 | + |
| 117 | + return value |
| 118 | + |
| 119 | + |
| 120 | +def read_excel_cell(filename): |
| 121 | + '''read the excel cells into a dt object''' |
| 122 | + _skip_if_no_xlrd() |
| 123 | + # NameError: global name 'xlrd' is not defined |
| 124 | + from xlrd import open_workbook, xldate_as_tuple |
| 125 | + import datetime as dt |
| 126 | + wb = open_workbook(filename) |
| 127 | + sh = wb.sheet_by_name('min') |
| 128 | + #get first time stamp |
| 129 | + #TODO: the start row is: 12 |
| 130 | + ti_start = xldate_as_tuple(sh.row(12)[1].value, 1) |
| 131 | + #get first last stamp |
| 132 | + ti_end = xldate_as_tuple(sh.row(155)[1].value, 1) |
| 133 | + |
| 134 | + #as timestamp |
| 135 | + ti_start = dt.time(*ti_start[3:]) |
| 136 | + ti_end = dt.time(*ti_end[3:]) |
| 137 | + |
| 138 | + return (ti_start, ti_end) |
| 139 | + |
| 140 | + |
69 | 141 | _seriesd = tm.getSeriesData()
|
70 | 142 | _tsd = tm.getTimeSeriesData()
|
71 | 143 | _frame = DataFrame(_seriesd)[:10]
|
@@ -295,6 +367,75 @@ def test_xlsx_table(self):
|
295 | 367 | tm.assert_frame_equal(df4, df.ix[:-1])
|
296 | 368 | tm.assert_frame_equal(df4, df5)
|
297 | 369 |
|
| 370 | + def test_xlsx_table_hours(self): |
| 371 | + #check if the hours are read incorrectly |
| 372 | + _skip_if_no_xlrd() |
| 373 | + _skip_if_no_openpyxl() |
| 374 | + _skip_if_no_mpl() |
| 375 | + import datetime as dt |
| 376 | + |
| 377 | + |
| 378 | + |
| 379 | + # 1900 datemode file |
| 380 | + filename = 'example_file_2013-07-25.xlsx' |
| 381 | + pth = os.path.join(self.dirpath, filename) |
| 382 | + xlsx = ExcelFile(pth) |
| 383 | + # parse_dates=False is necessary to obtain right sorting of rows in df |
| 384 | + # TODO: this must actually be skiprows=11, header=10 |
| 385 | +# df =xlsx.parse('min', skiprows=12, header=10, index_col=1, |
| 386 | +# parse_dates=False, date_parser=correct_date_time) |
| 387 | + df =xlsx.parse('min', skiprows=12, header=10, index_col=1, |
| 388 | + parse_dates=False, date_parser=_correct_date_time) |
| 389 | + |
| 390 | + df_start = df.index[0] |
| 391 | + df_end = df.index[-1:] |
| 392 | + # test: are the first/last index equal to the cell read in diretly by xlrd |
| 393 | + excel_cells = read_excel_cell(pth) |
| 394 | + |
| 395 | + xl_start = _offset_time(excel_cells[0]) |
| 396 | + xl_end = _offset_time(excel_cells[1]) |
| 397 | + |
| 398 | + self.assertEqual(df_start, xl_start) |
| 399 | + self.assertEqual(df_end, xl_end) |
| 400 | + |
| 401 | + #test Excel 1904 datemode |
| 402 | + filename_1904 = 'example_file_2013-07-25_1904-dates.xlsx' |
| 403 | + pth = os.path.join(self.dirpath, filename_1904) |
| 404 | + xlsx = ExcelFile(pth) |
| 405 | + # parse_dates=False is necessary to obtain right sorting of roes in df |
| 406 | + # TODO: this must actually be skiprows=11 |
| 407 | + df =xlsx.parse('min', skiprows=12, header=10, index_col=1, |
| 408 | + parse_dates=False, date_parser=_correct_date_time) |
| 409 | + |
| 410 | + df_start = df.index[0] |
| 411 | + df_end = df.index[-1:] |
| 412 | + |
| 413 | + excel_cells = read_excel_cell(pth) |
| 414 | + xl_start = _offset_time(excel_cells[0]) |
| 415 | + xl_end = _offset_time(excel_cells[1]) |
| 416 | + |
| 417 | + # test: are the first/last index equal to the cell read in diretly |
| 418 | + self.assertEqual(df_start, xl_start) |
| 419 | + self.assertEqual(df_end, xl_end) |
| 420 | + |
| 421 | + # test if a produced datetime is equal to a datetime directly produced by xlrd |
| 422 | + daydt_str = filename.split('.')[0][-10:] |
| 423 | + daydt = dt.datetime.strptime(daydt_str, '%Y-%m-%d') |
| 424 | +# |
| 425 | + df['date'] = daydt |
| 426 | + df['time'] = df.index |
| 427 | + |
| 428 | + #TODO review this |
| 429 | +# df['datetime'] = df.apply(lambda x: pd.datetime.combine(x['date'], x['time'], axis=1)) |
| 430 | + |
| 431 | +# df.set_index(['datetime']) |
| 432 | +# import datetime as dt |
| 433 | +# dt_test = dt.datetime.combine(daydt, excel_cells[1]) |
| 434 | + |
| 435 | +# pdt_test = df.index[-1] |
| 436 | + |
| 437 | +# self.assertEqual(dt_test, pdt_test) |
| 438 | + |
298 | 439 | def test_specify_kind_xls(self):
|
299 | 440 | _skip_if_no_xlrd()
|
300 | 441 | xlsx_file = os.path.join(self.dirpath, 'test.xlsx')
|
|
0 commit comments