From 4bf2f2eaaaac9e82068e2b050632d1465c735633 Mon Sep 17 00:00:00 2001 From: John Owens Date: Wed, 26 Jul 2017 16:40:43 -0700 Subject: [PATCH 001/141] ENH: GH17054: read_html() handles rowspan/colspan and infers headers --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/html.py | 354 ++++++++++++++++++++++---------- pandas/tests/io/test_html.py | 127 +++++++++++- 3 files changed, 365 insertions(+), 117 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index aed00ca578984..4c9c8a402505e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -78,6 +78,7 @@ Other Enhancements - :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) - :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) +- :func:`read_html` handles colspan and rowspan arguments and attempts to infer a header if the header is not explicitly specified (:issue:`17054`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/io/html.py b/pandas/io/html.py index a4acb26af5259..b2561c519f71f 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -17,10 +17,10 @@ from pandas.io.common import (_is_url, urlopen, parse_url, _validate_header_arg) from pandas.io.parsers import TextParser -from pandas.compat import (lrange, lmap, u, string_types, iteritems, +from pandas.compat import (lrange, lmap, lfilter, u, string_types, iteritems, raise_with_traceback, binary_type) from pandas import Series -from pandas.core.common import AbstractMethodError +from pandas.core.common import (AbstractMethodError, flatten) from pandas.io.formats.printing import pprint_thing _IMPORTS = False @@ -176,13 +176,15 @@ class _HtmlFrameParser(object): ----- To subclass this class effectively you must override the following methods: * :func:`_build_doc` - * :func:`_text_getter` - * :func:`_parse_td` * :func:`_parse_tables` - * :func:`_parse_tr` - * :func:`_parse_thead` - * :func:`_parse_tbody` - * :func:`_parse_tfoot` + * :func:`_text_getter` + * :func:`_equals_tag` + * :func:`_has_tag` + * :func:`_extract_td` + * :func:`_extract_tr` + * :func:`_extract_thead` + * :func:`_extract_tbody` + * :func:`_extract_tfoot` See each method's respective documentation for details on their functionality. """ @@ -197,29 +199,32 @@ def parse_tables(self): tables = self._parse_tables(self._build_doc(), self.match, self.attrs) return (self._build_table(table) for table in tables) - def _parse_raw_data(self, rows): - """Parse the raw data into a list of lists. + def _parse_tables(self, doc, match, attrs): + """Return all tables from the parsed DOM. Parameters ---------- - rows : iterable of node-like - A list of row elements. + doc : tree-like + The DOM from which to parse the table element. - text_getter : callable - A callable that gets the text from an individual node. This must be - defined by subclasses. + match : str or regular expression + The text to search for in the DOM tree. - column_finder : callable - A callable that takes a row node as input and returns a list of the - column node in that row. This must be defined by subclasses. + attrs : dict + A dictionary of table attributes that can be used to disambiguate + mutliple tables on a page. + + Raises + ------ + ValueError + * If `match` does not match any text in the document. Returns ------- - data : list of list of strings + tables : list of node-like + A list of elements to be parsed into raw data. """ - data = [[_remove_whitespace(self._text_getter(col)) for col in - self._parse_td(row)] for row in rows] - return data + raise AbstractMethodError(self) def _text_getter(self, obj): """Return the text of an individual DOM node. @@ -236,48 +241,58 @@ def _text_getter(self, obj): """ raise AbstractMethodError(self) - def _parse_td(self, obj): - """Return the td elements from a row element. + def _equals_tag(self, obj, tag): + """Returns whether an individual DOM node matches a tag Parameters ---------- obj : node-like + A DOM node. + + tag : string + Tag to be checked for equality Returns ------- - columns : list of node-like - These are the elements of each row, i.e., the columns. + boolean + Does the object match tag 'tag'? """ raise AbstractMethodError(self) - def _parse_tables(self, doc, match, attrs): - """Return all tables from the parsed DOM. + def _contains_tag(self, obj, tag): + """Returns whether an individual DOM node has a particular tag + contained within it Parameters ---------- - doc : tree-like - The DOM from which to parse the table element. + obj : node-like + A DOM node. - match : str or regular expression - The text to search for in the DOM tree. + tag : string + Tag to be found in this DOM - attrs : dict - A dictionary of table attributes that can be used to disambiguate - mutliple tables on a page. + Returns + ------- + boolean + Does the object contain tag 'tag'? + """ + raise AbstractMethodError(self) - Raises - ------ - ValueError - * If `match` does not match any text in the document. + def _extract_td(self, obj): + """Return the td elements from a row element. + + Parameters + ---------- + obj : node-like Returns ------- - tables : list of node-like - A list of
elements to be parsed into raw data. + columns : list of node-like + These are the elements of each row, i.e., the columns. """ raise AbstractMethodError(self) - def _parse_tr(self, table): + def _extract_tr(self, table): """Return the list of row elements from the parsed table element. Parameters @@ -292,7 +307,7 @@ def _parse_tr(self, table): """ raise AbstractMethodError(self) - def _parse_thead(self, table): + def _extract_thead(self, table): """Return the header of a table. Parameters @@ -307,7 +322,7 @@ def _parse_thead(self, table): """ raise AbstractMethodError(self) - def _parse_tbody(self, table): + def _extract_tbody(self, table): """Return the body of the table. Parameters @@ -322,7 +337,7 @@ def _parse_tbody(self, table): """ raise AbstractMethodError(self) - def _parse_tfoot(self, table): + def _extract_tfoot(self, table): """Return the footer of the table if any. Parameters @@ -346,40 +361,149 @@ def _build_doc(self): """ raise AbstractMethodError(self) - def _build_table(self, table): - header = self._parse_raw_thead(table) - body = self._parse_raw_tbody(table) - footer = self._parse_raw_tfoot(table) + def _build_table(self, table_html): + header, body, footer = self._parse_raw_thead_tbody_tfoot(table_html) + # the above "footer" actually produces a footer. The below "footer" + # rarely does. The below "footer" is the legacy behavior and so I'm + # leaving it for the time being. + footer = self._parse_raw_tfoot(table_html) return header, body, footer - def _parse_raw_thead(self, table): - thead = self._parse_thead(table) + def _parse_raw_thead_tbody_tfoot(self, table_html): + """Given a table, return parsed header, body, and foot. + Header and body are lists-of-lists. Top level list is a list of + rows. Each row is a list of parsed elements. + + Logic: Use , , elements to identify + header, body, and footer, otherwise: + - Put all rows into body + - Move rows from top of body to header only if + all elements inside row are
+ - Move rows from bottom of body to footer only if + all elements inside row are + + Parameters + ---------- + table_html : node-like + A single table element + + Returns + ------- + header, body, footer + header : list of list of node-like + List of rows, each of which is a list of parsed header elements + body : list of list of node-like + List of rows, each of which is a list of parsed body elements + footer : list of list of node-like + List of rows, each of which is a list of parsed footer elements + """ + header_rows = [] + body_rows = [] + footer_rows = [] + # first, are there thead and tbody elements in the table? + if (self._contains_tag(table_html, 'thead') and + self._contains_tag(table_html, 'tbody')): + header_rows = self._extract_tr(self._extract_thead(table_html)[0]) + body_rows = self._extract_tr(self._extract_tbody(table_html)[0]) + if self._contains_tag(table_html, 'tfoot'): + footer_rows = self._extract_tr( + self._extract_tfoot(table_html)[0]) + else: + # otherwise we need to split the body into header/body/foot + body_rows = self._extract_tr(table_html) + if body_rows == []: + # empty table, just return nothing + return [], [], [] + # splitting criterion: if all tags within a row are th, it's part + # of the header/footer + while all(self._equals_tag(t, 'th') for t in + self._extract_td(body_rows[0])): + # this row should be a header row, move it from body to header + header_rows.append(body_rows.pop(0)) + while all(self._equals_tag(t, 'th') for t in + self._extract_td(body_rows[-1])): + # this row should be a footer row, move it from body to footer + footer_rows.insert(0, body_rows.pop()) + header = self._expand_colspan_rowspan(header_rows, fill_rowspan=False) + body = self._expand_colspan_rowspan(body_rows, fill_rowspan=True) + footer = self._expand_colspan_rowspan(footer_rows, fill_rowspan=False) + # the below line is lifted from _parse_raw_tfoot. Not sure what it + # does. + footer = np.atleast_1d(np.array(footer).squeeze( + )) if footer and len(footer) == 1 else footer + return header, body, footer + + def _expand_colspan_rowspan(self, rows, fill_rowspan=True): + """Given a list of rows, return a list of rows that properly handle + colspan/rowspan + + Discussion on behavior of fill_rowspan in #17073 + + Parameters + ---------- + rows : list of list of node-like + List of rows, each of which is a list of nodes + + fill_rowspan : boolean + Should a rowspan fill every item in the rowspan (True) or only the + bottommost element (False)? + + Returns + ------- + res : list of list of node-like + List of rows, each of which is a list of nodes, respecting + colspan/rowspan + """ res = [] - if thead: - trs = self._parse_tr(thead[0]) - for tr in trs: - cols = lmap(self._text_getter, self._parse_td(tr)) - if any([col != '' for col in cols]): - res.append(cols) + saved_span = [] + for row in rows: + extracted_row = self._extract_td(row) + cols_text = [_remove_whitespace( + self._text_getter(col)) for col in extracted_row] + col_colspans = [int(col.get('colspan', 1)) + for col in extracted_row] + col_rowspans = [int(col.get('rowspan', 1)) + for col in extracted_row] + # expand cols using col_colspans + # maybe this can be done with a list comprehension, dunno + cols = zip( + list(flatten( + lmap(lambda (text, nc): [text] * nc, + zip(cols_text, col_colspans)))), + list(flatten( + lmap(lambda (nc, nr): [nr] * nc, + zip(col_colspans, col_rowspans)))) + ) + # cols is now a list of (text, number of rows) + # now insert any previous rowspans + for (col, (text, nr)) in saved_span: + cols.insert(col, (text, nr)) + + # save next saved_span + def advance_item_to_next_row(item): + (col, (text, nr)) = item + if nr == 1: + return None + else: + # only keep the text around if fill_rowspan is set + return (col, (text if fill_rowspan else '', nr - 1)) + saved_span = lfilter(lambda i: i is not None, + lmap(advance_item_to_next_row, + list(enumerate(cols)))) + cols = [text for (text, nr) in cols] + # generate cols with text only + if any([col != '' for col in cols]): + res.append(cols) return res - def _parse_raw_tfoot(self, table): - tfoot = self._parse_tfoot(table) + def _parse_raw_tfoot(self, table_html): + tfoot = self._extract_tfoot(table_html) res = [] if tfoot: - res = lmap(self._text_getter, self._parse_td(tfoot[0])) + res = lmap(self._text_getter, self._extract_td(tfoot[0])) return np.atleast_1d( np.array(res).squeeze()) if res and len(res) == 1 else res - def _parse_raw_tbody(self, table): - tbody = self._parse_tbody(table) - - try: - res = self._parse_tr(tbody[0]) - except IndexError: - res = self._parse_tr(table) - return self._parse_raw_data(res) - class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser): """HTML to DataFrame parser that uses BeautifulSoup under the hood. @@ -401,27 +525,6 @@ def __init__(self, *args, **kwargs): from bs4 import SoupStrainer self._strainer = SoupStrainer('table') - def _text_getter(self, obj): - return obj.text - - def _parse_td(self, row): - return row.find_all(('td', 'th')) - - def _parse_tr(self, element): - return element.find_all('tr') - - def _parse_th(self, element): - return element.find_all('th') - - def _parse_thead(self, table): - return table.find_all('thead') - - def _parse_tbody(self, table): - return table.find_all('tbody') - - def _parse_tfoot(self, table): - return table.find_all('tfoot') - def _parse_tables(self, doc, match, attrs): element_name = self._strainer.name tables = doc.find_all(element_name, attrs=attrs) @@ -443,6 +546,33 @@ def _parse_tables(self, doc, match, attrs): match.pattern) return result + def _text_getter(self, obj): + return obj.text + + def _equals_tag(self, obj, tag): + return obj.name == tag + + def _contains_tag(self, obj, tag): + return obj.find(tag) is not None + + def _extract_td(self, row): + return row.find_all(('td', 'th')) + + def _extract_tr(self, element): + return element.find_all('tr') + + def _extract_th(self, element): + return element.find_all('th') + + def _extract_thead(self, table): + return table.find_all('thead') + + def _extract_tbody(self, table): + return table.find_all('tbody') + + def _extract_tfoot(self, table): + return table.find_all('tfoot') + def _setup_build_doc(self): raw_text = _read(self.io) if not raw_text: @@ -502,16 +632,6 @@ class _LxmlFrameParser(_HtmlFrameParser): def __init__(self, *args, **kwargs): super(_LxmlFrameParser, self).__init__(*args, **kwargs) - def _text_getter(self, obj): - return obj.text_content() - - def _parse_td(self, row): - return row.xpath('.//td|.//th') - - def _parse_tr(self, table): - expr = './/tr[normalize-space()]' - return table.xpath(expr) - def _parse_tables(self, doc, match, kwargs): pattern = match.pattern @@ -531,6 +651,22 @@ def _parse_tables(self, doc, match, kwargs): raise ValueError("No tables found matching regex %r" % pattern) return tables + def _equals_tag(self, obj, tag): + return obj.tag == tag + + def _contains_tag(self, obj, tag): + return obj.find(tag) is not None + + def _text_getter(self, obj): + return obj.text_content() + + def _extract_td(self, row): + return row.xpath('.//td|.//th') + + def _extract_tr(self, table): + expr = './/tr[normalize-space()]' + return table.xpath(expr) + def _build_doc(self): """ Raises @@ -585,13 +721,13 @@ def _build_doc(self): raise XMLSyntaxError("no text parsed from document", 0, 0, 0) return r - def _parse_tbody(self, table): + def _extract_tbody(self, table): return table.xpath('.//tbody') - def _parse_thead(self, table): + def _extract_thead(self, table): return table.xpath('.//thead') - def _parse_tfoot(self, table): + def _extract_tfoot(self, table): return table.xpath('.//tfoot') def _parse_raw_thead(self, table): @@ -599,10 +735,10 @@ def _parse_raw_thead(self, table): thead = table.xpath(expr) res = [] if thead: - trs = self._parse_tr(thead[0]) + trs = self._extract_tr(thead[0]) for tr in trs: cols = [_remove_whitespace(x.text_content()) for x in - self._parse_td(tr)] + self._extract_td(tr)] if any([col != '' for col in cols]): res.append(cols) return res @@ -873,7 +1009,13 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None, This function searches for ```` elements and only for ```` and ```` or ```` argument, it is used to construct + the header, otherwise the function attempts to find the header within + the body (by putting rows with only ``
`` rows and ```` elements within each ``
`` - element in the table. ```` stands for "table data". + element in the table. ```` stands for "table data". This function + attempts to properly handle ``colspan`` and ``rowspan`` attributes. + If the function has a ``
`` elements into the header). + + .. versionadded:: 0.21.0 Similar to :func:`~pandas.read_csv` the `header` argument is applied **after** `skiprows` is applied. diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 6fc080c8d9090..c016106dfcc80 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -385,7 +385,7 @@ def test_thousands_macau_stats(self): attrs={'class': 'style1'}) df = dfs[all_non_nan_table_index] - assert not any(s.isna().any() for _, s in df.iteritems()) + assert not any(s.isnull().any() for _, s in df.iteritems()) @pytest.mark.slow def test_thousands_macau_index_col(self): @@ -394,7 +394,7 @@ def test_thousands_macau_index_col(self): dfs = self.read_html(macau_data, index_col=0, header=0) df = dfs[all_non_nan_table_index] - assert not any(s.isna().any() for _, s in df.iteritems()) + assert not any(s.isnull().any() for _, s in df.iteritems()) def test_empty_tables(self): """ @@ -640,6 +640,118 @@ def test_different_number_of_rows(self): res = self.read_html(out, index_col=0)[0] tm.assert_frame_equal(expected, res) + def test_colspan_rowspan_are_1(self): + expected = """ + + + + + + + + + + +
XYZW
""" + out = """ + + + + + + + + + + +
XYZW
""" + expected = self.read_html(expected)[0] + res = self.read_html(out)[0] + tm.assert_frame_equal(expected, res) + + def test_colspan_rowspan_are_more_than_1(self): + expected = """ + + + + + + + + + + + + + + + + + + +
XXYZW
1223
""" + out = """ + + + + + + + + + + + + + + + +
XYZW
123
""" + expected = self.read_html(expected)[0] + res = self.read_html(out)[0] + tm.assert_frame_equal(expected, res) + + def test_header_should_be_inferred_from_th_elements(self): + expected = """ + + + + + + + + + + + + + + + + + +
XXYZW
12345
""" + out = """ + + + + + + + + + + + + + +
XXYZW
12345
""" + expected = self.read_html(expected)[0] # header is explicit + res = self.read_html(out)[0] # infer header + tm.assert_frame_equal(expected, res) + res2 = self.read_html(out, header=0)[0] # manually set header + tm.assert_frame_equal(expected, res2) + def test_parse_dates_list(self): df = DataFrame({'date': date_range('1/1/2001', periods=10)}) expected = df.to_html() @@ -657,14 +769,6 @@ def test_parse_dates_combine(self): newdf = DataFrame({'datetime': raw_dates}) tm.assert_frame_equal(newdf, res[0]) - def test_computer_sales_page(self): - data = os.path.join(DATA_PATH, 'computer_sales_page.html') - with tm.assert_raises_regex(ParserError, - r"Passed header=\[0,1\] are " - r"too many rows for this " - r"multi_index of columns"): - self.read_html(data, header=[0, 1]) - def test_wikipedia_states_table(self): data = os.path.join(DATA_PATH, 'wikipedia_states.html') assert os.path.isfile(data), '%r is not a file' % data @@ -891,7 +995,7 @@ def test_computer_sales_page(self): def test_invalid_flavor(): url = 'google.com' with pytest.raises(ValueError): - read_html(url, 'google', flavor='not a* valid**++ flaver') + read_html(url, 'google', flavor='not a* valid**++ flavor') def get_elements_from_file(url, element='table'): @@ -939,6 +1043,7 @@ def test_same_ordering(): class ErrorThread(threading.Thread): + def run(self): try: super(ErrorThread, self).run() From 80d9c2b3bcb444cc51126f163cac90074263f19d Mon Sep 17 00:00:00 2001 From: John Owens Date: Thu, 27 Jul 2017 08:29:43 -0700 Subject: [PATCH 002/141] in python 3, lambdas no longer take tuples as args. thanks pep 3113. --- pandas/io/html.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index b2561c519f71f..38da7bb7ef4ce 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -468,10 +468,10 @@ def _expand_colspan_rowspan(self, rows, fill_rowspan=True): # maybe this can be done with a list comprehension, dunno cols = zip( list(flatten( - lmap(lambda (text, nc): [text] * nc, + lmap(lambda text_nc: [text_nc[0]] * text_nc[1], zip(cols_text, col_colspans)))), list(flatten( - lmap(lambda (nc, nr): [nr] * nc, + lmap(lambda nc_nr: [nc_nr[1]] * nc_nr[0], zip(col_colspans, col_rowspans)))) ) # cols is now a list of (text, number of rows) From 26d1f6a13bfdac65b118c2d5ca8174138fea8829 Mon Sep 17 00:00:00 2001 From: John Owens Date: Thu, 27 Jul 2017 09:52:41 -0700 Subject: [PATCH 003/141] fixing lint error --- pandas/tests/io/test_html.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index c016106dfcc80..66dc353ce7065 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -28,7 +28,6 @@ from pandas.io.common import URLError, urlopen, file_path_to_url import pandas.io.html from pandas.io.html import read_html -from pandas._libs.parsers import ParserError import pandas.util.testing as tm from pandas.util.testing import makeCustomDataframe as mkdf, network From 37af4eadbb5ccd3579664374afe32d729897f704 Mon Sep 17 00:00:00 2001 From: John Owens Date: Thu, 27 Jul 2017 10:08:43 -0700 Subject: [PATCH 004/141] in python3, zip does not return a list, so list(zip(...)) --- pandas/io/html.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 38da7bb7ef4ce..9467a32fe670e 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -466,13 +466,13 @@ def _expand_colspan_rowspan(self, rows, fill_rowspan=True): for col in extracted_row] # expand cols using col_colspans # maybe this can be done with a list comprehension, dunno - cols = zip( + cols = list(zip( list(flatten( lmap(lambda text_nc: [text_nc[0]] * text_nc[1], - zip(cols_text, col_colspans)))), + list(zip(cols_text, col_colspans))))), list(flatten( lmap(lambda nc_nr: [nc_nr[1]] * nc_nr[0], - zip(col_colspans, col_rowspans)))) + list(zip(col_colspans, col_rowspans)))))) ) # cols is now a list of (text, number of rows) # now insert any previous rowspans From f064562baff3d1d34725dcd20c048e618df31892 Mon Sep 17 00:00:00 2001 From: John Owens Date: Wed, 6 Sep 2017 14:57:23 -0700 Subject: [PATCH 005/141] documentation changes only --- pandas/io/html.py | 60 +++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 9467a32fe670e..9a2a5ec153db6 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -204,7 +204,7 @@ def _parse_tables(self, doc, match, attrs): Parameters ---------- - doc : tree-like + doc : parser object The DOM from which to parse the table element. match : str or regular expression @@ -221,7 +221,7 @@ def _parse_tables(self, doc, match, attrs): Returns ------- - tables : list of node-like + tables : list of HTML table elements A list of elements to be parsed into raw data. """ raise AbstractMethodError(self) @@ -231,7 +231,7 @@ def _text_getter(self, obj): Parameters ---------- - obj : node-like + obj : an HTML element A DOM node. Returns @@ -246,16 +246,16 @@ def _equals_tag(self, obj, tag): Parameters ---------- - obj : node-like + obj : an HTML element A DOM node. - tag : string + tag : str Tag to be checked for equality Returns ------- boolean - Does the object match tag 'tag'? + boolean indicating if the object is equal to tag 'tag' """ raise AbstractMethodError(self) @@ -265,16 +265,16 @@ def _contains_tag(self, obj, tag): Parameters ---------- - obj : node-like + obj : an HTML element A DOM node. - tag : string + tag : str Tag to be found in this DOM Returns ------- boolean - Does the object contain tag 'tag'? + boolean indicating if the object contains tag 'tag' """ raise AbstractMethodError(self) @@ -283,11 +283,11 @@ def _extract_td(self, obj): Parameters ---------- - obj : node-like + obj : an HTML row element Returns ------- - columns : list of node-like + columns : list of HTML td elements These are the elements of each row, i.e., the columns. """ raise AbstractMethodError(self) @@ -297,13 +297,13 @@ def _extract_tr(self, table): Parameters ---------- - table : node-like + table : an HTML table element A table element that contains row elements. Returns ------- - rows : list of node-like - A list row elements of a table, usually or or ... element. """ raise AbstractMethodError(self) @@ -327,12 +327,12 @@ def _extract_tbody(self, table): Parameters ---------- - table : node-like + table : an HTML table element A table element that contains row elements. Returns ------- - tbody : node-like + tbody : an HTML tbody element A ... element. """ raise AbstractMethodError(self) @@ -342,12 +342,12 @@ def _extract_tfoot(self, table): Parameters ---------- - table : node-like + table : an HTML table element A table element that contains row elements. Returns ------- - tfoot : node-like + tfoot : an HTML tfoot element A ... element. """ raise AbstractMethodError(self) @@ -384,17 +384,17 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): Parameters ---------- - table_html : node-like + table_html : an HTML table element A single table element Returns ------- - header, body, footer - header : list of list of node-like + tuple of (header, body, footer) + header : list of list of HTML header elements List of rows, each of which is a list of parsed header elements - body : list of list of node-like + body : list of list of HTML body elements List of rows, each of which is a list of parsed body elements - footer : list of list of node-like + footer : list of list of HTML footer elements List of rows, each of which is a list of parsed footer elements """ header_rows = [] @@ -441,8 +441,8 @@ def _expand_colspan_rowspan(self, rows, fill_rowspan=True): Parameters ---------- - rows : list of list of node-like - List of rows, each of which is a list of nodes + rows : list of list of HTML td elements + List of rows, each of which is a list of elements in that row fill_rowspan : boolean Should a rowspan fill every item in the rowspan (True) or only the @@ -450,9 +450,9 @@ def _expand_colspan_rowspan(self, rows, fill_rowspan=True): Returns ------- - res : list of list of node-like - List of rows, each of which is a list of nodes, respecting - colspan/rowspan + res : list of list of HTML td elements + List of rows, each of which is a list of elements in that row, + respecting colspan/rowspan """ res = [] saved_span = [] From 5a38278f1858452fa9a3b8953199b7e7c1018441 Mon Sep 17 00:00:00 2001 From: John Owens Date: Thu, 7 Sep 2017 09:00:13 -0700 Subject: [PATCH 006/141] documentation changes only --- pandas/io/html.py | 61 ++++++++++++++++------------------------------- 1 file changed, 21 insertions(+), 40 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 9a2a5ec153db6..d49058ec67d94 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -204,8 +204,7 @@ def _parse_tables(self, doc, match, attrs): Parameters ---------- - doc : parser object - The DOM from which to parse the table element. + doc : the DOM from which to parse the table element. match : str or regular expression The text to search for in the DOM tree. @@ -231,8 +230,7 @@ def _text_getter(self, obj): Parameters ---------- - obj : an HTML element - A DOM node. + obj : a DOM node. Returns ------- @@ -246,8 +244,7 @@ def _equals_tag(self, obj, tag): Parameters ---------- - obj : an HTML element - A DOM node. + obj : a DOM node. tag : str Tag to be checked for equality @@ -265,8 +262,7 @@ def _contains_tag(self, obj, tag): Parameters ---------- - obj : an HTML element - A DOM node. + obj : a DOM node. tag : str Tag to be found in this DOM @@ -287,8 +283,7 @@ def _extract_td(self, obj): Returns ------- - columns : list of HTML td elements - These are the elements of each row, i.e., the columns. + columns : list of HTML td elements (i.e., the columns in the row) """ raise AbstractMethodError(self) @@ -297,13 +292,11 @@ def _extract_tr(self, table): Parameters ---------- - table : an HTML table element - A table element that contains row elements. + table : a table element that contains row elements. Returns ------- - rows : list of HTML row elements - A list of row elements of a table, usually or or ... element. + thead : an HTML ... element. """ raise AbstractMethodError(self) @@ -327,13 +318,11 @@ def _extract_tbody(self, table): Parameters ---------- - table : an HTML table element - A table element that contains row elements. + table : a table element that contains row elements. Returns ------- - tbody : an HTML tbody element - A ... element. + tbody : an HTML ... element. """ raise AbstractMethodError(self) @@ -342,13 +331,11 @@ def _extract_tfoot(self, table): Parameters ---------- - table : an HTML table element - A table element that contains row elements. + table : a table element that contains row elements. Returns ------- - tfoot : an HTML tfoot element - A ... element. + tfoot : an HTML ... element. """ raise AbstractMethodError(self) @@ -357,7 +344,7 @@ def _build_doc(self): Returns ------- - obj : tree-like + obj : the DOM from which to parse the table element. """ raise AbstractMethodError(self) @@ -384,18 +371,14 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): Parameters ---------- - table_html : an HTML table element - A single table element + table_html : a single HTML table element. Returns ------- tuple of (header, body, footer) - header : list of list of HTML header elements - List of rows, each of which is a list of parsed header elements - body : list of list of HTML body elements - List of rows, each of which is a list of parsed body elements - footer : list of list of HTML footer elements - List of rows, each of which is a list of parsed footer elements + header : list of rows, each of which is a list of parsed header elements + body : list of rows, each of which is a list of parsed body elements + footer : list of rows, each of which is a list of parsed footer elements """ header_rows = [] body_rows = [] @@ -441,17 +424,15 @@ def _expand_colspan_rowspan(self, rows, fill_rowspan=True): Parameters ---------- - rows : list of list of HTML td elements - List of rows, each of which is a list of elements in that row + rows : list of rows, each of which is a list of elements in that row fill_rowspan : boolean Should a rowspan fill every item in the rowspan (True) or only the - bottommost element (False)? + bottommost element (False)? Default is True. Returns ------- - res : list of list of HTML td elements - List of rows, each of which is a list of elements in that row, + res : list of rows, each of which is a list of elements in that row, respecting colspan/rowspan """ res = [] From 39f7814011f2c66a29efe432eaae73445023d1c2 Mon Sep 17 00:00:00 2001 From: John Owens Date: Thu, 7 Sep 2017 09:02:02 -0700 Subject: [PATCH 007/141] documentation changes only, limited to 80 cols --- pandas/io/html.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index d49058ec67d94..849042d073603 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -296,7 +296,8 @@ def _extract_tr(self, table): Returns ------- - rows : a list of row elements of a table, usually or or
elements. + rows : list of HTML row elements + A list of row elements of a table, usually
elements. """ raise AbstractMethodError(self) @@ -312,12 +312,12 @@ def _extract_thead(self, table): Parameters ---------- - table : node-like + table : an HTML table element A table element that contains row elements. Returns ------- - thead : node-like + thead : an HTML thead element A
elements. + rows : a list of row elements of a table, usually
elements. """ raise AbstractMethodError(self) @@ -312,13 +305,11 @@ def _extract_thead(self, table): Parameters ---------- - table : an HTML table element - A table element that contains row elements. + table : a table element that contains row elements. Returns ------- - thead : an HTML thead element - A
elements. + rows : a list of row elements of a table, usually
+ elements. """ raise AbstractMethodError(self) @@ -376,9 +377,11 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): Returns ------- tuple of (header, body, footer) - header : list of rows, each of which is a list of parsed header elements + header : list of rows, each of which is a list of parsed header + elements body : list of rows, each of which is a list of parsed body elements - footer : list of rows, each of which is a list of parsed footer elements + footer : list of rows, each of which is a list of parsed footer + elements """ header_rows = [] body_rows = [] From 531863fb99e667c8439cf5e807b341e54ec8ccde Mon Sep 17 00:00:00 2001 From: John Owens Date: Fri, 8 Sep 2017 15:02:04 -0700 Subject: [PATCH 008/141] more documentation edits --- pandas/io/html.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 849042d073603..79f720a6a102f 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -220,8 +220,7 @@ def _parse_tables(self, doc, match, attrs): Returns ------- - tables : list of HTML table elements - A list of elements to be parsed into raw data. + tables : A list of HTML
elements to be parsed into raw data. """ raise AbstractMethodError(self) @@ -251,7 +250,7 @@ def _equals_tag(self, obj, tag): Returns ------- - boolean + is_tag_equal : boolean boolean indicating if the object is equal to tag 'tag' """ raise AbstractMethodError(self) @@ -377,11 +376,11 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): Returns ------- tuple of (header, body, footer) - header : list of rows, each of which is a list of parsed header - elements + header : list of rows, each of which is a list of parsed + header elements body : list of rows, each of which is a list of parsed body elements - footer : list of rows, each of which is a list of parsed footer - elements + footer : list of rows, each of which is a list of parsed + footer elements """ header_rows = [] body_rows = [] @@ -395,7 +394,7 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): footer_rows = self._extract_tr( self._extract_tfoot(table_html)[0]) else: - # otherwise we need to split the body into header/body/foot + # Otherwise we need to split the body into header/body/foot. body_rows = self._extract_tr(table_html) if body_rows == []: # empty table, just return nothing @@ -413,8 +412,9 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): header = self._expand_colspan_rowspan(header_rows, fill_rowspan=False) body = self._expand_colspan_rowspan(body_rows, fill_rowspan=True) footer = self._expand_colspan_rowspan(footer_rows, fill_rowspan=False) - # the below line is lifted from _parse_raw_tfoot. Not sure what it - # does. + + # The below line is lifted from _parse_raw_tfoot. Not sure what + # it does. footer = np.atleast_1d(np.array(footer).squeeze( )) if footer and len(footer) == 1 else footer return header, body, footer From 818d394614450fb9c0cad81c8ee8dfd74dae8aac Mon Sep 17 00:00:00 2001 From: John Owens Date: Sat, 9 Sep 2017 07:46:46 -0700 Subject: [PATCH 009/141] minor documentation edits --- pandas/io/html.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 79f720a6a102f..543d82195306a 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -220,7 +220,7 @@ def _parse_tables(self, doc, match, attrs): Returns ------- - tables : A list of HTML
elements to be parsed into raw data. + tables : list of HTML
elements to be parsed into raw data. """ raise AbstractMethodError(self) @@ -295,7 +295,7 @@ def _extract_tr(self, table): Returns ------- - rows : a list of row elements of a table, usually or or
+ rows : list of row elements of a table, usually
elements. """ raise AbstractMethodError(self) From f3a6aa33ceaf62914e3e264d9949b95de6ed41ab Mon Sep 17 00:00:00 2001 From: John Owens Date: Sat, 9 Sep 2017 12:28:17 -0700 Subject: [PATCH 010/141] better return type explanation in code, added issue number to tests --- pandas/io/html.py | 2 +- pandas/tests/io/test_html.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 543d82195306a..23d1dddb94fd9 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -268,7 +268,7 @@ def _contains_tag(self, obj, tag): Returns ------- - boolean + does_tag_contain : boolean boolean indicating if the object contains tag 'tag' """ raise AbstractMethodError(self) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 66dc353ce7065..ce12df60c565a 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -640,6 +640,7 @@ def test_different_number_of_rows(self): tm.assert_frame_equal(expected, res) def test_colspan_rowspan_are_1(self): + # GH17054 expected = """ @@ -669,6 +670,7 @@ def test_colspan_rowspan_are_1(self): tm.assert_frame_equal(expected, res) def test_colspan_rowspan_are_more_than_1(self): + # GH17054 expected = """
@@ -711,6 +713,7 @@ def test_colspan_rowspan_are_more_than_1(self): tm.assert_frame_equal(expected, res) def test_header_should_be_inferred_from_th_elements(self): + # GH17054 expected = """
From 2f904b25658a160abd0cbe599fdf835b6565f245 Mon Sep 17 00:00:00 2001 From: John Owens Date: Mon, 18 Sep 2017 15:31:20 -0700 Subject: [PATCH 011/141] cleaning up legacy documentation issues --- pandas/io/html.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 23d1dddb94fd9..c151962379acd 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -196,6 +196,12 @@ def __init__(self, io, match, attrs, encoding): self.encoding = encoding def parse_tables(self): + """Parse and return all tables from the DOM. + + Returns + ------- + tables : list of parsed (header, body, footer) tuples from tables + """ tables = self._parse_tables(self._build_doc(), self.match, self.attrs) return (self._build_table(table) for table in tables) @@ -211,12 +217,11 @@ def _parse_tables(self, doc, match, attrs): attrs : dict A dictionary of table attributes that can be used to disambiguate - mutliple tables on a page. + multiple tables on a page. Raises ------ - ValueError - * If `match` does not match any text in the document. + ValueError : If `match` does not match any text in the document. Returns ------- From f4e7592e71f04107a17195c76a9a8a5dc4f09e70 Mon Sep 17 00:00:00 2001 From: John Owens Date: Mon, 18 Sep 2017 15:33:23 -0700 Subject: [PATCH 012/141] remove 'if' --- pandas/io/html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index c151962379acd..225fa91f90a1e 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -221,7 +221,7 @@ def _parse_tables(self, doc, match, attrs): Raises ------ - ValueError : If `match` does not match any text in the document. + ValueError : `match` does not match any text in the document. Returns ------- From 293d9e4c62292446266d27ffdb5ca818ea9e45e7 Mon Sep 17 00:00:00 2001 From: John Owens Date: Mon, 18 Sep 2017 15:37:02 -0700 Subject: [PATCH 013/141] newlines for clarity --- pandas/io/html.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/io/html.py b/pandas/io/html.py index 225fa91f90a1e..d0021e9cc93d0 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -387,14 +387,17 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): footer : list of rows, each of which is a list of parsed footer elements """ + header_rows = [] body_rows = [] footer_rows = [] + # first, are there thead and tbody elements in the table? if (self._contains_tag(table_html, 'thead') and self._contains_tag(table_html, 'tbody')): header_rows = self._extract_tr(self._extract_thead(table_html)[0]) body_rows = self._extract_tr(self._extract_tbody(table_html)[0]) + if self._contains_tag(table_html, 'tfoot'): footer_rows = self._extract_tr( self._extract_tfoot(table_html)[0]) @@ -414,6 +417,7 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): self._extract_td(body_rows[-1])): # this row should be a footer row, move it from body to footer footer_rows.insert(0, body_rows.pop()) + header = self._expand_colspan_rowspan(header_rows, fill_rowspan=False) body = self._expand_colspan_rowspan(body_rows, fill_rowspan=True) footer = self._expand_colspan_rowspan(footer_rows, fill_rowspan=False) @@ -443,6 +447,7 @@ def _expand_colspan_rowspan(self, rows, fill_rowspan=True): res : list of rows, each of which is a list of elements in that row, respecting colspan/rowspan """ + res = [] saved_span = [] for row in rows: From efabae46a22c46c91048c1fad5462b6ecae7ca67 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 26 Jul 2017 19:38:42 -0400 Subject: [PATCH 014/141] DOC: whatsnew typos --- doc/source/whatsnew/v0.21.0.txt | 70 ++++++++++++++++----------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index aed00ca578984..5a6a556c9886d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -134,72 +134,70 @@ Dtype Conversions ^^^^^^^^^^^^^^^^^ - Previously assignments, ``.where()`` and ``.fillna()`` with a ``bool`` assignment, would coerce to - same type (e.g. int / float), or raise for datetimelikes. These will now preseve the bools with ``object`` dtypes. (:issue:`16821`). + same the type (e.g. int / float), or raise for datetimelikes. These will now preseve the bools with ``object`` dtypes. (:issue:`16821`). - .. ipython:: python + .. ipython:: python - s = Series([1, 2, 3]) + s = Series([1, 2, 3]) - .. code-block:: python + .. code-block:: python - In [5]: s[1] = True + In [5]: s[1] = True - In [6]: s - Out[6]: - 0 1 - 1 1 - 2 3 - dtype: int64 + In [6]: s + Out[6]: + 0 1 + 1 1 + 2 3 + dtype: int64 - New Behavior + New Behavior - .. ipython:: python + .. ipython:: python - s[1] = True - s + s[1] = True + s -- Previously as assignment to a datetimelike with a non-datetimelike would coerce the +- Previously, as assignment to a datetimelike with a non-datetimelike would coerce the non-datetime-like item being assigned (:issue:`14145`). - .. ipython:: python + .. ipython:: python - s = pd.Series([pd.Timestamp('2011-01-01'), pd.Timestamp('2012-01-01')]) + s = pd.Series([pd.Timestamp('2011-01-01'), pd.Timestamp('2012-01-01')]) - .. code-block:: python + .. code-block:: python - In [1]: s[1] = 1 + In [1]: s[1] = 1 - In [2]: s - Out[2]: - 0 2011-01-01 00:00:00.000000000 - 1 1970-01-01 00:00:00.000000001 - dtype: datetime64[ns] + In [2]: s + Out[2]: + 0 2011-01-01 00:00:00.000000000 + 1 1970-01-01 00:00:00.000000001 + dtype: datetime64[ns] - These now coerce to ``object`` dtype. + These now coerce to ``object`` dtype. - .. ipython:: python + .. ipython:: python - s[1] = 1 - s + s[1] = 1 + s -- Additional bug fixes w.r.t. dtype conversions. - - - Inconsistent behavior in ``.where()`` with datetimelikes which would raise rather than coerce to ``object`` (:issue:`16402`) - - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) +- Inconsistent behavior in ``.where()`` with datetimelikes which would raise rather than coerce to ``object`` (:issue:`16402`) +- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) .. _whatsnew_0210.api.na_changes: NA naming Changes ^^^^^^^^^^^^^^^^^ -In orde to promote more consistency among the pandas API, we have added additional top-level +In order to promote more consistency among the pandas API, we have added additional top-level functions :func:`isna` and :func:`notna` that are aliases for :func:`isnull` and :func:`notnull`. The naming scheme is now more consistent with methods like ``.dropna()`` and ``.fillna()``. Furthermore in all cases where ``.isnull()`` and ``.notnull()`` methods are defined, these have additional methods named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical``, ``Index``, ``Series``, and ``DataFrame``. (:issue:`15001`). -The configuration option ``mode.use_inf_as_null``is deprecated, and ``mode.use_inf_as_na`` is added as a replacement. +The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement. .. _whatsnew_0210.api: @@ -281,7 +279,7 @@ I/O - Bug in :func:`read_csv` in which columns were not being thoroughly de-duplicated (:issue:`17060`) - Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) -- Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696, :issue:`16798`). +- Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696`, :issue:`16798`). - Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`). - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) From 552677f3bb5d9f269b738bbf3084a82332ffda24 Mon Sep 17 00:00:00 2001 From: John Owens Date: Wed, 26 Jul 2017 16:40:43 -0700 Subject: [PATCH 015/141] ENH: GH17054: read_html() handles rowspan/colspan and infers headers --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/html.py | 354 ++++++++++++++++++++++---------- pandas/tests/io/test_html.py | 127 +++++++++++- 3 files changed, 365 insertions(+), 117 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5a6a556c9886d..4b2dad30bdd05 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -78,6 +78,7 @@ Other Enhancements - :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) - :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) +- :func:`read_html` handles colspan and rowspan arguments and attempts to infer a header if the header is not explicitly specified (:issue:`17054`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/io/html.py b/pandas/io/html.py index a4acb26af5259..b2561c519f71f 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -17,10 +17,10 @@ from pandas.io.common import (_is_url, urlopen, parse_url, _validate_header_arg) from pandas.io.parsers import TextParser -from pandas.compat import (lrange, lmap, u, string_types, iteritems, +from pandas.compat import (lrange, lmap, lfilter, u, string_types, iteritems, raise_with_traceback, binary_type) from pandas import Series -from pandas.core.common import AbstractMethodError +from pandas.core.common import (AbstractMethodError, flatten) from pandas.io.formats.printing import pprint_thing _IMPORTS = False @@ -176,13 +176,15 @@ class _HtmlFrameParser(object): ----- To subclass this class effectively you must override the following methods: * :func:`_build_doc` - * :func:`_text_getter` - * :func:`_parse_td` * :func:`_parse_tables` - * :func:`_parse_tr` - * :func:`_parse_thead` - * :func:`_parse_tbody` - * :func:`_parse_tfoot` + * :func:`_text_getter` + * :func:`_equals_tag` + * :func:`_has_tag` + * :func:`_extract_td` + * :func:`_extract_tr` + * :func:`_extract_thead` + * :func:`_extract_tbody` + * :func:`_extract_tfoot` See each method's respective documentation for details on their functionality. """ @@ -197,29 +199,32 @@ def parse_tables(self): tables = self._parse_tables(self._build_doc(), self.match, self.attrs) return (self._build_table(table) for table in tables) - def _parse_raw_data(self, rows): - """Parse the raw data into a list of lists. + def _parse_tables(self, doc, match, attrs): + """Return all tables from the parsed DOM. Parameters ---------- - rows : iterable of node-like - A list of row elements. + doc : tree-like + The DOM from which to parse the table element. - text_getter : callable - A callable that gets the text from an individual node. This must be - defined by subclasses. + match : str or regular expression + The text to search for in the DOM tree. - column_finder : callable - A callable that takes a row node as input and returns a list of the - column node in that row. This must be defined by subclasses. + attrs : dict + A dictionary of table attributes that can be used to disambiguate + mutliple tables on a page. + + Raises + ------ + ValueError + * If `match` does not match any text in the document. Returns ------- - data : list of list of strings + tables : list of node-like + A list of
elements to be parsed into raw data. """ - data = [[_remove_whitespace(self._text_getter(col)) for col in - self._parse_td(row)] for row in rows] - return data + raise AbstractMethodError(self) def _text_getter(self, obj): """Return the text of an individual DOM node. @@ -236,48 +241,58 @@ def _text_getter(self, obj): """ raise AbstractMethodError(self) - def _parse_td(self, obj): - """Return the td elements from a row element. + def _equals_tag(self, obj, tag): + """Returns whether an individual DOM node matches a tag Parameters ---------- obj : node-like + A DOM node. + + tag : string + Tag to be checked for equality Returns ------- - columns : list of node-like - These are the elements of each row, i.e., the columns. + boolean + Does the object match tag 'tag'? """ raise AbstractMethodError(self) - def _parse_tables(self, doc, match, attrs): - """Return all tables from the parsed DOM. + def _contains_tag(self, obj, tag): + """Returns whether an individual DOM node has a particular tag + contained within it Parameters ---------- - doc : tree-like - The DOM from which to parse the table element. + obj : node-like + A DOM node. - match : str or regular expression - The text to search for in the DOM tree. + tag : string + Tag to be found in this DOM - attrs : dict - A dictionary of table attributes that can be used to disambiguate - mutliple tables on a page. + Returns + ------- + boolean + Does the object contain tag 'tag'? + """ + raise AbstractMethodError(self) - Raises - ------ - ValueError - * If `match` does not match any text in the document. + def _extract_td(self, obj): + """Return the td elements from a row element. + + Parameters + ---------- + obj : node-like Returns ------- - tables : list of node-like - A list of
elements to be parsed into raw data. + columns : list of node-like + These are the elements of each row, i.e., the columns. """ raise AbstractMethodError(self) - def _parse_tr(self, table): + def _extract_tr(self, table): """Return the list of row elements from the parsed table element. Parameters @@ -292,7 +307,7 @@ def _parse_tr(self, table): """ raise AbstractMethodError(self) - def _parse_thead(self, table): + def _extract_thead(self, table): """Return the header of a table. Parameters @@ -307,7 +322,7 @@ def _parse_thead(self, table): """ raise AbstractMethodError(self) - def _parse_tbody(self, table): + def _extract_tbody(self, table): """Return the body of the table. Parameters @@ -322,7 +337,7 @@ def _parse_tbody(self, table): """ raise AbstractMethodError(self) - def _parse_tfoot(self, table): + def _extract_tfoot(self, table): """Return the footer of the table if any. Parameters @@ -346,40 +361,149 @@ def _build_doc(self): """ raise AbstractMethodError(self) - def _build_table(self, table): - header = self._parse_raw_thead(table) - body = self._parse_raw_tbody(table) - footer = self._parse_raw_tfoot(table) + def _build_table(self, table_html): + header, body, footer = self._parse_raw_thead_tbody_tfoot(table_html) + # the above "footer" actually produces a footer. The below "footer" + # rarely does. The below "footer" is the legacy behavior and so I'm + # leaving it for the time being. + footer = self._parse_raw_tfoot(table_html) return header, body, footer - def _parse_raw_thead(self, table): - thead = self._parse_thead(table) + def _parse_raw_thead_tbody_tfoot(self, table_html): + """Given a table, return parsed header, body, and foot. + Header and body are lists-of-lists. Top level list is a list of + rows. Each row is a list of parsed elements. + + Logic: Use , , elements to identify + header, body, and footer, otherwise: + - Put all rows into body + - Move rows from top of body to header only if + all elements inside row are ' % str(i)) + table += (''.format(i=i)) table += '
+ - Move rows from bottom of body to footer only if + all elements inside row are + + Parameters + ---------- + table_html : node-like + A single table element + + Returns + ------- + header, body, footer + header : list of list of node-like + List of rows, each of which is a list of parsed header elements + body : list of list of node-like + List of rows, each of which is a list of parsed body elements + footer : list of list of node-like + List of rows, each of which is a list of parsed footer elements + """ + header_rows = [] + body_rows = [] + footer_rows = [] + # first, are there thead and tbody elements in the table? + if (self._contains_tag(table_html, 'thead') and + self._contains_tag(table_html, 'tbody')): + header_rows = self._extract_tr(self._extract_thead(table_html)[0]) + body_rows = self._extract_tr(self._extract_tbody(table_html)[0]) + if self._contains_tag(table_html, 'tfoot'): + footer_rows = self._extract_tr( + self._extract_tfoot(table_html)[0]) + else: + # otherwise we need to split the body into header/body/foot + body_rows = self._extract_tr(table_html) + if body_rows == []: + # empty table, just return nothing + return [], [], [] + # splitting criterion: if all tags within a row are th, it's part + # of the header/footer + while all(self._equals_tag(t, 'th') for t in + self._extract_td(body_rows[0])): + # this row should be a header row, move it from body to header + header_rows.append(body_rows.pop(0)) + while all(self._equals_tag(t, 'th') for t in + self._extract_td(body_rows[-1])): + # this row should be a footer row, move it from body to footer + footer_rows.insert(0, body_rows.pop()) + header = self._expand_colspan_rowspan(header_rows, fill_rowspan=False) + body = self._expand_colspan_rowspan(body_rows, fill_rowspan=True) + footer = self._expand_colspan_rowspan(footer_rows, fill_rowspan=False) + # the below line is lifted from _parse_raw_tfoot. Not sure what it + # does. + footer = np.atleast_1d(np.array(footer).squeeze( + )) if footer and len(footer) == 1 else footer + return header, body, footer + + def _expand_colspan_rowspan(self, rows, fill_rowspan=True): + """Given a list of rows, return a list of rows that properly handle + colspan/rowspan + + Discussion on behavior of fill_rowspan in #17073 + + Parameters + ---------- + rows : list of list of node-like + List of rows, each of which is a list of nodes + + fill_rowspan : boolean + Should a rowspan fill every item in the rowspan (True) or only the + bottommost element (False)? + + Returns + ------- + res : list of list of node-like + List of rows, each of which is a list of nodes, respecting + colspan/rowspan + """ res = [] - if thead: - trs = self._parse_tr(thead[0]) - for tr in trs: - cols = lmap(self._text_getter, self._parse_td(tr)) - if any([col != '' for col in cols]): - res.append(cols) + saved_span = [] + for row in rows: + extracted_row = self._extract_td(row) + cols_text = [_remove_whitespace( + self._text_getter(col)) for col in extracted_row] + col_colspans = [int(col.get('colspan', 1)) + for col in extracted_row] + col_rowspans = [int(col.get('rowspan', 1)) + for col in extracted_row] + # expand cols using col_colspans + # maybe this can be done with a list comprehension, dunno + cols = zip( + list(flatten( + lmap(lambda (text, nc): [text] * nc, + zip(cols_text, col_colspans)))), + list(flatten( + lmap(lambda (nc, nr): [nr] * nc, + zip(col_colspans, col_rowspans)))) + ) + # cols is now a list of (text, number of rows) + # now insert any previous rowspans + for (col, (text, nr)) in saved_span: + cols.insert(col, (text, nr)) + + # save next saved_span + def advance_item_to_next_row(item): + (col, (text, nr)) = item + if nr == 1: + return None + else: + # only keep the text around if fill_rowspan is set + return (col, (text if fill_rowspan else '', nr - 1)) + saved_span = lfilter(lambda i: i is not None, + lmap(advance_item_to_next_row, + list(enumerate(cols)))) + cols = [text for (text, nr) in cols] + # generate cols with text only + if any([col != '' for col in cols]): + res.append(cols) return res - def _parse_raw_tfoot(self, table): - tfoot = self._parse_tfoot(table) + def _parse_raw_tfoot(self, table_html): + tfoot = self._extract_tfoot(table_html) res = [] if tfoot: - res = lmap(self._text_getter, self._parse_td(tfoot[0])) + res = lmap(self._text_getter, self._extract_td(tfoot[0])) return np.atleast_1d( np.array(res).squeeze()) if res and len(res) == 1 else res - def _parse_raw_tbody(self, table): - tbody = self._parse_tbody(table) - - try: - res = self._parse_tr(tbody[0]) - except IndexError: - res = self._parse_tr(table) - return self._parse_raw_data(res) - class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser): """HTML to DataFrame parser that uses BeautifulSoup under the hood. @@ -401,27 +525,6 @@ def __init__(self, *args, **kwargs): from bs4 import SoupStrainer self._strainer = SoupStrainer('table') - def _text_getter(self, obj): - return obj.text - - def _parse_td(self, row): - return row.find_all(('td', 'th')) - - def _parse_tr(self, element): - return element.find_all('tr') - - def _parse_th(self, element): - return element.find_all('th') - - def _parse_thead(self, table): - return table.find_all('thead') - - def _parse_tbody(self, table): - return table.find_all('tbody') - - def _parse_tfoot(self, table): - return table.find_all('tfoot') - def _parse_tables(self, doc, match, attrs): element_name = self._strainer.name tables = doc.find_all(element_name, attrs=attrs) @@ -443,6 +546,33 @@ def _parse_tables(self, doc, match, attrs): match.pattern) return result + def _text_getter(self, obj): + return obj.text + + def _equals_tag(self, obj, tag): + return obj.name == tag + + def _contains_tag(self, obj, tag): + return obj.find(tag) is not None + + def _extract_td(self, row): + return row.find_all(('td', 'th')) + + def _extract_tr(self, element): + return element.find_all('tr') + + def _extract_th(self, element): + return element.find_all('th') + + def _extract_thead(self, table): + return table.find_all('thead') + + def _extract_tbody(self, table): + return table.find_all('tbody') + + def _extract_tfoot(self, table): + return table.find_all('tfoot') + def _setup_build_doc(self): raw_text = _read(self.io) if not raw_text: @@ -502,16 +632,6 @@ class _LxmlFrameParser(_HtmlFrameParser): def __init__(self, *args, **kwargs): super(_LxmlFrameParser, self).__init__(*args, **kwargs) - def _text_getter(self, obj): - return obj.text_content() - - def _parse_td(self, row): - return row.xpath('.//td|.//th') - - def _parse_tr(self, table): - expr = './/tr[normalize-space()]' - return table.xpath(expr) - def _parse_tables(self, doc, match, kwargs): pattern = match.pattern @@ -531,6 +651,22 @@ def _parse_tables(self, doc, match, kwargs): raise ValueError("No tables found matching regex %r" % pattern) return tables + def _equals_tag(self, obj, tag): + return obj.tag == tag + + def _contains_tag(self, obj, tag): + return obj.find(tag) is not None + + def _text_getter(self, obj): + return obj.text_content() + + def _extract_td(self, row): + return row.xpath('.//td|.//th') + + def _extract_tr(self, table): + expr = './/tr[normalize-space()]' + return table.xpath(expr) + def _build_doc(self): """ Raises @@ -585,13 +721,13 @@ def _build_doc(self): raise XMLSyntaxError("no text parsed from document", 0, 0, 0) return r - def _parse_tbody(self, table): + def _extract_tbody(self, table): return table.xpath('.//tbody') - def _parse_thead(self, table): + def _extract_thead(self, table): return table.xpath('.//thead') - def _parse_tfoot(self, table): + def _extract_tfoot(self, table): return table.xpath('.//tfoot') def _parse_raw_thead(self, table): @@ -599,10 +735,10 @@ def _parse_raw_thead(self, table): thead = table.xpath(expr) res = [] if thead: - trs = self._parse_tr(thead[0]) + trs = self._extract_tr(thead[0]) for tr in trs: cols = [_remove_whitespace(x.text_content()) for x in - self._parse_td(tr)] + self._extract_td(tr)] if any([col != '' for col in cols]): res.append(cols) return res @@ -873,7 +1009,13 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None, This function searches for ```` elements and only for ```` and ```` or ```` argument, it is used to construct + the header, otherwise the function attempts to find the header within + the body (by putting rows with only `` @@ -61,7 +61,7 @@ @@ -123,31 +123,31 @@ Here are just a few of the things that pandas does well: moving window linear regressions, date shifting and lagging, etc. - [missing-data]: http://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data - [insertion-deletion]: http://pandas.pydata.org/pandas-docs/stable/dsintro.html#column-selection-addition-deletion - [alignment]: http://pandas.pydata.org/pandas-docs/stable/dsintro.html?highlight=alignment#intro-to-data-structures - [groupby]: http://pandas.pydata.org/pandas-docs/stable/groupby.html#group-by-split-apply-combine - [conversion]: http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe - [slicing]: http://pandas.pydata.org/pandas-docs/stable/indexing.html#slicing-ranges - [fancy-indexing]: http://pandas.pydata.org/pandas-docs/stable/indexing.html#advanced-indexing-with-ix - [subsetting]: http://pandas.pydata.org/pandas-docs/stable/indexing.html#boolean-indexing - [merging]: http://pandas.pydata.org/pandas-docs/stable/merging.html#database-style-dataframe-joining-merging - [joining]: http://pandas.pydata.org/pandas-docs/stable/merging.html#joining-on-index - [reshape]: http://pandas.pydata.org/pandas-docs/stable/reshaping.html#reshaping-and-pivot-tables - [pivot-table]: http://pandas.pydata.org/pandas-docs/stable/reshaping.html#pivot-tables-and-cross-tabulations - [mi]: http://pandas.pydata.org/pandas-docs/stable/indexing.html#hierarchical-indexing-multiindex - [flat-files]: http://pandas.pydata.org/pandas-docs/stable/io.html#csv-text-files - [excel]: http://pandas.pydata.org/pandas-docs/stable/io.html#excel-files - [db]: http://pandas.pydata.org/pandas-docs/stable/io.html#sql-queries - [hdfstore]: http://pandas.pydata.org/pandas-docs/stable/io.html#hdf5-pytables - [timeseries]: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-series-date-functionality + [missing-data]: https://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data + [insertion-deletion]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html#column-selection-addition-deletion + [alignment]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html?highlight=alignment#intro-to-data-structures + [groupby]: https://pandas.pydata.org/pandas-docs/stable/groupby.html#group-by-split-apply-combine + [conversion]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe + [slicing]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#slicing-ranges + [fancy-indexing]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#advanced-indexing-with-ix + [subsetting]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#boolean-indexing + [merging]: https://pandas.pydata.org/pandas-docs/stable/merging.html#database-style-dataframe-joining-merging + [joining]: https://pandas.pydata.org/pandas-docs/stable/merging.html#joining-on-index + [reshape]: https://pandas.pydata.org/pandas-docs/stable/reshaping.html#reshaping-and-pivot-tables + [pivot-table]: https://pandas.pydata.org/pandas-docs/stable/reshaping.html#pivot-tables-and-cross-tabulations + [mi]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#hierarchical-indexing-multiindex + [flat-files]: https://pandas.pydata.org/pandas-docs/stable/io.html#csv-text-files + [excel]: https://pandas.pydata.org/pandas-docs/stable/io.html#excel-files + [db]: https://pandas.pydata.org/pandas-docs/stable/io.html#sql-queries + [hdfstore]: https://pandas.pydata.org/pandas-docs/stable/io.html#hdf5-pytables + [timeseries]: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-series-date-functionality ## Where to get it The source code is currently hosted on GitHub at: -http://github.com/pandas-dev/pandas +https://github.com/pandas-dev/pandas Binary installers for the latest released version are available at the [Python -package index](http://pypi.python.org/pypi/pandas/) and on conda. +package index](https://pypi.python.org/pypi/pandas) and on conda. ```sh # conda @@ -161,11 +161,11 @@ pip install pandas ## Dependencies - [NumPy](http://www.numpy.org): 1.7.0 or higher -- [python-dateutil](http://labix.org/python-dateutil): 1.5 or higher -- [pytz](http://pytz.sourceforge.net) +- [python-dateutil](https://labix.org/python-dateutil): 1.5 or higher +- [pytz](https://pythonhosted.org/pytz) - Needed for time zone support with ``pandas.date_range`` -See the [full installation instructions](http://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) +See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) for recommended and optional dependencies. ## Installation from sources @@ -197,13 +197,13 @@ mode](https://pip.pypa.io/en/latest/reference/pip_install.html#editable-installs pip install -e . ``` -See the full instructions for [installing from source](http://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source). +See the full instructions for [installing from source](https://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source). ## License -BSD +[BSD 3](LICENSE) ## Documentation -The official documentation is hosted on PyData.org: http://pandas.pydata.org/pandas-docs/stable/ +The official documentation is hosted on PyData.org: https://pandas.pydata.org/pandas-docs/stable The Sphinx documentation should provide a good starting point for learning how to use the library. Expect the docs to continue to expand as time goes on. @@ -223,7 +223,7 @@ Most development discussion is taking place on github in this repo. Further, the ## Contributing to pandas All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome. -A detailed overview on how to contribute can be found in the **[contributing guide.](http://pandas.pydata.org/pandas-docs/stable/contributing.html)** +A detailed overview on how to contribute can be found in the **[contributing guide.](https://pandas.pydata.org/pandas-docs/stable/contributing.html)** If you are simply looking to start working with the pandas codebase, navigate to the [GitHub “issues” tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [Difficulty Novice](https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22Difficulty+Novice%22) where you could start out. diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index a3a90f514f142..a3062b4086673 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -144,7 +144,7 @@ To evaluate single-element pandas objects in a boolean context, use the method ` Bitwise boolean ~~~~~~~~~~~~~~~ -Bitwise boolean operators like ``==`` and ``!=`` will return a boolean ``Series``, +Bitwise boolean operators like ``==`` and ``!=`` return a boolean ``Series``, which is almost always what you want anyways. .. code-block:: python @@ -194,7 +194,7 @@ For lack of ``NA`` (missing) support from the ground up in NumPy and Python in general, we were given the difficult choice between either - A *masked array* solution: an array of data and an array of boolean values - indicating whether a value + indicating whether a value is there or is missing - Using a special sentinel value, bit pattern, or set of sentinel values to denote ``NA`` across the dtypes @@ -247,16 +247,16 @@ dtype in order to store the NAs. These are summarized by this table: ``integer``, cast to ``float64`` ``boolean``, cast to ``object`` -While this may seem like a heavy trade-off, I have found very few -cases where this is an issue in practice. Some explanation for the motivation -here in the next section. +While this may seem like a heavy trade-off, I have found very few cases where +this is an issue in practice i.e. storing values greater than 2**53. Some +explanation for the motivation is in the next section. Why not make NumPy like R? ~~~~~~~~~~~~~~~~~~~~~~~~~~ Many people have suggested that NumPy should simply emulate the ``NA`` support present in the more domain-specific statistical programming language `R -`__. Part of the reason is the NumPy type hierarchy: +`__. Part of the reason is the NumPy type hierarchy: .. csv-table:: :header: "Typeclass","Dtypes" @@ -305,7 +305,7 @@ the ``DataFrame.copy`` method. If you are doing a lot of copying of DataFrame objects shared among threads, we recommend holding locks inside the threads where the data copying occurs. -See `this link `__ +See `this link `__ for more information. @@ -332,5 +332,5 @@ using something similar to the following: s = pd.Series(newx) See `the NumPy documentation on byte order -`__ for more +`__ for more details. From 5ce00e1d2a3fdfc24e9ca36bc36120aeb1ef7ed9 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Aug 2017 05:47:57 -0400 Subject: [PATCH 034/141] ENH: add to/from_parquet with pyarrow & fastparquet (#15838) --- ci/install_travis.sh | 1 + ci/requirements-2.7.sh | 2 +- ci/requirements-3.5.sh | 4 +- ci/requirements-3.5_OSX.sh | 2 +- ci/requirements-3.6.pip | 1 + ci/requirements-3.6.run | 2 + ci/requirements-3.6_DOC.sh | 2 +- ci/requirements-3.6_WIN.run | 2 + doc/source/install.rst | 1 + doc/source/io.rst | 82 ++++++- doc/source/options.rst | 3 + doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/config_init.py | 12 + pandas/core/frame.py | 24 ++ pandas/io/api.py | 1 + pandas/io/feather_format.py | 4 +- pandas/io/parquet.py | 194 +++++++++++++++++ pandas/tests/api/test_api.py | 2 +- pandas/tests/io/test_parquet.py | 374 ++++++++++++++++++++++++++++++++ pandas/util/_print_versions.py | 1 + 20 files changed, 703 insertions(+), 12 deletions(-) create mode 100644 pandas/io/parquet.py create mode 100644 pandas/tests/io/test_parquet.py diff --git a/ci/install_travis.sh b/ci/install_travis.sh index dcc1656ce3dd7..df6969c7cc659 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -153,6 +153,7 @@ fi echo echo "[removing installed pandas]" conda remove pandas -y --force +pip uninstall -y pandas if [ "$BUILD_TEST" ]; then diff --git a/ci/requirements-2.7.sh b/ci/requirements-2.7.sh index 5b20617f55759..e3bd5e46026c5 100644 --- a/ci/requirements-2.7.sh +++ b/ci/requirements-2.7.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 27" -conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 +conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 fastparquet diff --git a/ci/requirements-3.5.sh b/ci/requirements-3.5.sh index 3b8fe793a413d..33db9c28c78a9 100644 --- a/ci/requirements-3.5.sh +++ b/ci/requirements-3.5.sh @@ -4,8 +4,8 @@ source activate pandas echo "install 35" -conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 - # pip install python-dateutil to get latest conda remove -n pandas python-dateutil --force pip install python-dateutil + +conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 diff --git a/ci/requirements-3.5_OSX.sh b/ci/requirements-3.5_OSX.sh index 39ea1a0cf67bf..c2978b175968c 100644 --- a/ci/requirements-3.5_OSX.sh +++ b/ci/requirements-3.5_OSX.sh @@ -4,4 +4,4 @@ source activate pandas echo "install 35_OSX" -conda install -n pandas -c conda-forge feather-format==0.3.1 +conda install -n pandas -c conda-forge feather-format==0.3.1 fastparquet diff --git a/ci/requirements-3.6.pip b/ci/requirements-3.6.pip index e69de29bb2d1d..753a60d6c119a 100644 --- a/ci/requirements-3.6.pip +++ b/ci/requirements-3.6.pip @@ -0,0 +1 @@ +brotlipy diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 00db27d3f2704..822144a80bc9a 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -17,6 +17,8 @@ pymysql feather-format pyarrow psycopg2 +python-snappy +fastparquet beautifulsoup4 s3fs xarray diff --git a/ci/requirements-3.6_DOC.sh b/ci/requirements-3.6_DOC.sh index 8c10a794a13b9..aec0f62148622 100644 --- a/ci/requirements-3.6_DOC.sh +++ b/ci/requirements-3.6_DOC.sh @@ -6,6 +6,6 @@ echo "[install DOC_BUILD deps]" pip install pandas-gbq -conda install -n pandas -c conda-forge feather-format pyarrow nbsphinx pandoc +conda install -n pandas -c conda-forge feather-format pyarrow nbsphinx pandoc fastparquet conda install -n pandas -c r r rpy2 --yes diff --git a/ci/requirements-3.6_WIN.run b/ci/requirements-3.6_WIN.run index 22aae8944d731..226caa458f6ee 100644 --- a/ci/requirements-3.6_WIN.run +++ b/ci/requirements-3.6_WIN.run @@ -13,3 +13,5 @@ numexpr pytables matplotlib blosc +fastparquet +pyarrow diff --git a/doc/source/install.rst b/doc/source/install.rst index c185a7cf4b875..01a01b1b58b4c 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -237,6 +237,7 @@ Optional Dependencies * `xarray `__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended. * `PyTables `__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended. * `Feather Format `__: necessary for feather-based storage, version 0.3.1 or higher. +* ``Apache Parquet Format``, either `pyarrow `__ (>= 0.4.1) or `fastparquet `__ (>= 0.0.6) for parquet-based storage. The `snappy `__ and `brotli `__ are available for compression support. * `SQLAlchemy `__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs `__. Some common drivers are: * `psycopg2 `__: for PostgreSQL diff --git a/doc/source/io.rst b/doc/source/io.rst index bf68a0cae1d27..0b97264abfcd7 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -43,6 +43,7 @@ object. The corresponding ``writer`` functions are object methods that are acces binary;`MS Excel `__;:ref:`read_excel`;:ref:`to_excel` binary;`HDF5 Format `__;:ref:`read_hdf`;:ref:`to_hdf` binary;`Feather Format `__;:ref:`read_feather`;:ref:`to_feather` + binary;`Parquet Format `__;:ref:`read_parquet`;:ref:`to_parquet` binary;`Msgpack `__;:ref:`read_msgpack`;:ref:`to_msgpack` binary;`Stata `__;:ref:`read_stata`;:ref:`to_stata` binary;`SAS `__;:ref:`read_sas`; @@ -209,7 +210,7 @@ buffer_lines : int, default None .. deprecated:: 0.19.0 Argument removed because its value is not respected by the parser - + compact_ints : boolean, default False .. deprecated:: 0.19.0 @@ -4087,7 +4088,7 @@ control compression: ``complevel`` and ``complib``. ``complevel`` specifies if and how hard data is to be compressed. ``complevel=0`` and ``complevel=None`` disables compression and ``0`_: Fast compression and decompression. .. versionadded:: 0.20.2 - + Support for alternative blosc compressors: - + - `blosc:blosclz `_ This is the default compressor for ``blosc`` - `blosc:lz4 @@ -4545,6 +4546,79 @@ Read from a feather file. import os os.remove('example.feather') + +.. _io.parquet: + +Parquet +------- + +.. versionadded:: 0.21.0 + +`Parquet `__ + +.. note:: + + These engines are very similar and should read/write nearly identical parquet format files. + These libraries differ by having different underlying dependencies (``fastparquet`` by using ``numba``, while ``pyarrow`` uses a c-library). + +.. ipython:: python + + df = pd.DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.date_range('20130101', periods=3), + 'g': pd.date_range('20130101', periods=3, tz='US/Eastern'), + 'h': pd.date_range('20130101', periods=3, freq='ns')}) + + df + df.dtypes + +Write to a parquet file. + +.. ipython:: python + + df.to_parquet('example_pa.parquet', engine='pyarrow') + df.to_parquet('example_fp.parquet', engine='fastparquet') + +Read from a parquet file. + +.. ipython:: python + + result = pd.read_parquet('example_pa.parquet', engine='pyarrow') + result = pd.read_parquet('example_fp.parquet', engine='fastparquet') + + result.dtypes + +.. ipython:: python + :suppress: + + import os + os.remove('example_pa.parquet') + os.remove('example_fp.parquet') + .. _io.sql: SQL Queries diff --git a/doc/source/options.rst b/doc/source/options.rst index 83b08acac5720..51d02bc89692a 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -414,6 +414,9 @@ io.hdf.default_format None default format writing format, 'table' io.hdf.dropna_table True drop ALL nan rows when appending to a table +io.parquet.engine None The engine to use as a default for + parquet reading and writing. If None + then try 'pyarrow' and 'fastparquet' mode.chained_assignment warn Raise an exception, warn, or no action if trying to use chained assignment, The default is warn diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index aadb82b8a4973..9f0588fc4475e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -79,6 +79,7 @@ Other Enhancements - :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) - :func:`read_html` handles colspan and rowspan arguments and attempts to infer a header if the header is not explicitly specified (:issue:`17054`) +- Integration with Apache Parquet, including a new top-level ``pd.read_parquet()`` and ``DataFrame.to_parquet()`` method, see :ref:`here `. .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 875ab8249f953..ea5c213dbe057 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -465,3 +465,15 @@ def _register_xlsx(engine, other): except ImportError: # fallback _register_xlsx('openpyxl', 'xlsxwriter') + +# Set up the io.parquet specific configuration. +parquet_engine_doc = """ +: string + The default parquet reader/writer engine. Available options: + 'auto', 'pyarrow', 'fastparquet', the default is 'auto' +""" + +with cf.config_prefix('io.parquet'): + cf.register_option( + 'engine', 'auto', parquet_engine_doc, + validator=is_one_of_factory(['auto', 'pyarrow', 'fastparquet'])) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e546e96f253c7..9d63bd2e120aa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1598,6 +1598,30 @@ def to_feather(self, fname): from pandas.io.feather_format import to_feather to_feather(self, fname) + def to_parquet(self, fname, engine='auto', compression='snappy', + **kwargs): + """ + Write a DataFrame to the binary parquet format. + + .. versionadded:: 0.21.0 + + Parameters + ---------- + fname : str + string file path + engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' + Parquet reader library to use. If 'auto', then the option + 'io.parquet.engine' is used. If 'auto', then the first + library to be installed is used. + compression : str, optional, default 'snappy' + compression method, includes {'gzip', 'snappy', 'brotli'} + kwargs + Additional keyword arguments passed to the engine + """ + from pandas.io.parquet import to_parquet + to_parquet(self, fname, engine, + compression=compression, **kwargs) + @Substitution(header='Write out column names. If a list of string is given, \ it is assumed to be aliases for the column names') @Appender(fmt.docstring_to_string, indents=1) diff --git a/pandas/io/api.py b/pandas/io/api.py index a4a25b78942db..f542a8176dce7 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -13,6 +13,7 @@ from pandas.io.sql import read_sql, read_sql_table, read_sql_query from pandas.io.sas import read_sas from pandas.io.feather_format import read_feather +from pandas.io.parquet import read_parquet from pandas.io.stata import read_stata from pandas.io.pickle import read_pickle, to_pickle from pandas.io.packers import read_msgpack, to_msgpack diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 86d58caa5e816..87a4931421d7d 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -19,7 +19,7 @@ def _try_import(): "you can install via conda\n" "conda install feather-format -c conda-forge\n" "or via pip\n" - "pip install feather-format\n") + "pip install -U feather-format\n") try: feather.__version__ >= LooseVersion('0.3.1') @@ -29,7 +29,7 @@ def _try_import(): "you can install via conda\n" "conda install feather-format -c conda-forge" "or via pip\n" - "pip install feather-format\n") + "pip install -U feather-format\n") return feather diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py new file mode 100644 index 0000000000000..0a4426b55b323 --- /dev/null +++ b/pandas/io/parquet.py @@ -0,0 +1,194 @@ +""" parquet compat """ + +from warnings import catch_warnings +from distutils.version import LooseVersion +from pandas import DataFrame, RangeIndex, Int64Index, get_option +from pandas.compat import range +from pandas.io.common import get_filepath_or_buffer + + +def get_engine(engine): + """ return our implementation """ + + if engine is 'auto': + engine = get_option('io.parquet.engine') + + if engine is 'auto': + # try engines in this order + try: + return PyArrowImpl() + except ImportError: + pass + + try: + return FastParquetImpl() + except ImportError: + pass + + if engine not in ['pyarrow', 'fastparquet']: + raise ValueError("engine must be one of 'pyarrow', 'fastparquet'") + + if engine == 'pyarrow': + return PyArrowImpl() + elif engine == 'fastparquet': + return FastParquetImpl() + + +class PyArrowImpl(object): + + def __init__(self): + # since pandas is a dependency of pyarrow + # we need to import on first use + + try: + import pyarrow + import pyarrow.parquet + except ImportError: + raise ImportError("pyarrow is required for parquet support\n\n" + "you can install via conda\n" + "conda install pyarrow -c conda-forge\n" + "\nor via pip\n" + "pip install -U pyarrow\n") + + if LooseVersion(pyarrow.__version__) < '0.4.1': + raise ImportError("pyarrow >= 0.4.1 is required for parquet" + "support\n\n" + "you can install via conda\n" + "conda install pyarrow -c conda-forge\n" + "\nor via pip\n" + "pip install -U pyarrow\n") + + self.api = pyarrow + + def write(self, df, path, compression='snappy', **kwargs): + path, _, _ = get_filepath_or_buffer(path) + table = self.api.Table.from_pandas(df, timestamps_to_ms=True) + self.api.parquet.write_table( + table, path, compression=compression, **kwargs) + + def read(self, path): + path, _, _ = get_filepath_or_buffer(path) + return self.api.parquet.read_table(path).to_pandas() + + +class FastParquetImpl(object): + + def __init__(self): + # since pandas is a dependency of fastparquet + # we need to import on first use + + try: + import fastparquet + except ImportError: + raise ImportError("fastparquet is required for parquet support\n\n" + "you can install via conda\n" + "conda install fastparquet -c conda-forge\n" + "\nor via pip\n" + "pip install -U fastparquet") + + if LooseVersion(fastparquet.__version__) < '0.1.0': + raise ImportError("fastparquet >= 0.1.0 is required for parquet " + "support\n\n" + "you can install via conda\n" + "conda install fastparquet -c conda-forge\n" + "\nor via pip\n" + "pip install -U fastparquet") + + self.api = fastparquet + + def write(self, df, path, compression='snappy', **kwargs): + # thriftpy/protocol/compact.py:339: + # DeprecationWarning: tostring() is deprecated. + # Use tobytes() instead. + path, _, _ = get_filepath_or_buffer(path) + with catch_warnings(record=True): + self.api.write(path, df, + compression=compression, **kwargs) + + def read(self, path): + path, _, _ = get_filepath_or_buffer(path) + return self.api.ParquetFile(path).to_pandas() + + +def to_parquet(df, path, engine='auto', compression='snappy', **kwargs): + """ + Write a DataFrame to the parquet format. + + Parameters + ---------- + df : DataFrame + path : string + File path + engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' + Parquet reader library to use. If 'auto', then the option + 'io.parquet.engine' is used. If 'auto', then the first + library to be installed is used. + compression : str, optional, default 'snappy' + compression method, includes {'gzip', 'snappy', 'brotli'} + kwargs + Additional keyword arguments passed to the engine + """ + + impl = get_engine(engine) + + if not isinstance(df, DataFrame): + raise ValueError("to_parquet only support IO with DataFrames") + + valid_types = {'string', 'unicode'} + + # validate index + # -------------- + + # validate that we have only a default index + # raise on anything else as we don't serialize the index + + if not isinstance(df.index, Int64Index): + raise ValueError("parquet does not support serializing {} " + "for the index; you can .reset_index()" + "to make the index into column(s)".format( + type(df.index))) + + if not df.index.equals(RangeIndex.from_range(range(len(df)))): + raise ValueError("parquet does not support serializing a " + "non-default index for the index; you " + "can .reset_index() to make the index " + "into column(s)") + + if df.index.name is not None: + raise ValueError("parquet does not serialize index meta-data on a " + "default index") + + # validate columns + # ---------------- + + # must have value column names (strings only) + if df.columns.inferred_type not in valid_types: + raise ValueError("parquet must have string column names") + + return impl.write(df, path, compression=compression) + + +def read_parquet(path, engine='auto', **kwargs): + """ + Load a parquet object from the file path, returning a DataFrame. + + .. versionadded 0.21.0 + + Parameters + ---------- + path : string + File path + engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' + Parquet reader library to use. If 'auto', then the option + 'io.parquet.engine' is used. If 'auto', then the first + library to be installed is used. + kwargs are passed to the engine + + Returns + ------- + DataFrame + + """ + + impl = get_engine(engine) + return impl.read(path) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 433ed7e517b1c..09cccd54b74f8 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -82,7 +82,7 @@ class TestPDApi(Base): 'read_gbq', 'read_hdf', 'read_html', 'read_json', 'read_msgpack', 'read_pickle', 'read_sas', 'read_sql', 'read_sql_query', 'read_sql_table', 'read_stata', - 'read_table', 'read_feather'] + 'read_table', 'read_feather', 'read_parquet'] # top-level to_* funcs funcs_to = ['to_datetime', 'to_msgpack', diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py new file mode 100644 index 0000000000000..ff0935c7dcc6f --- /dev/null +++ b/pandas/tests/io/test_parquet.py @@ -0,0 +1,374 @@ +""" test parquet compat """ + +import pytest +import datetime +from warnings import catch_warnings + +import numpy as np +import pandas as pd +from pandas.compat import PY3, is_platform_windows +from pandas.io.parquet import (to_parquet, read_parquet, get_engine, + PyArrowImpl, FastParquetImpl) +from pandas.util import testing as tm + +try: + import pyarrow # noqa + _HAVE_PYARROW = True +except ImportError: + _HAVE_PYARROW = False + +try: + import fastparquet # noqa + _HAVE_FASTPARQUET = True +except ImportError: + _HAVE_FASTPARQUET = False + + +# setup engines & skips +@pytest.fixture(params=[ + pytest.mark.skipif(not _HAVE_FASTPARQUET, + reason='fastparquet is not installed')('fastparquet'), + pytest.mark.skipif(not _HAVE_PYARROW, + reason='pyarrow is not installed')('pyarrow')]) +def engine(request): + return request.param + + +@pytest.fixture +def pa(): + if not _HAVE_PYARROW: + pytest.skip("pyarrow is not installed") + if is_platform_windows(): + pytest.skip("pyarrow-parquet not building on windows") + return 'pyarrow' + + +@pytest.fixture +def fp(): + if not _HAVE_FASTPARQUET: + pytest.skip("fastparquet is not installed") + return 'fastparquet' + + +@pytest.fixture +def df_compat(): + return pd.DataFrame({'A': [1, 2, 3], 'B': 'foo'}) + + +@pytest.fixture +def df_cross_compat(): + df = pd.DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.date_range('20130101', periods=3), + 'g': pd.date_range('20130101', periods=3, + tz='US/Eastern'), + 'h': pd.date_range('20130101', periods=3, freq='ns')}) + return df + + +def test_invalid_engine(df_compat): + + with pytest.raises(ValueError): + df_compat.to_parquet('foo', 'bar') + + +def test_options_py(df_compat, pa): + # use the set option + + df = df_compat + with tm.ensure_clean() as path: + + with pd.option_context('io.parquet.engine', 'pyarrow'): + df.to_parquet(path) + + result = read_parquet(path, compression=None) + tm.assert_frame_equal(result, df) + + +def test_options_fp(df_compat, fp): + # use the set option + + df = df_compat + with tm.ensure_clean() as path: + + with pd.option_context('io.parquet.engine', 'fastparquet'): + df.to_parquet(path, compression=None) + + result = read_parquet(path, compression=None) + tm.assert_frame_equal(result, df) + + +def test_options_auto(df_compat, fp, pa): + + df = df_compat + with tm.ensure_clean() as path: + + with pd.option_context('io.parquet.engine', 'auto'): + df.to_parquet(path) + + result = read_parquet(path, compression=None) + tm.assert_frame_equal(result, df) + + +def test_options_get_engine(fp, pa): + assert isinstance(get_engine('pyarrow'), PyArrowImpl) + assert isinstance(get_engine('fastparquet'), FastParquetImpl) + + with pd.option_context('io.parquet.engine', 'pyarrow'): + assert isinstance(get_engine('auto'), PyArrowImpl) + assert isinstance(get_engine('pyarrow'), PyArrowImpl) + assert isinstance(get_engine('fastparquet'), FastParquetImpl) + + with pd.option_context('io.parquet.engine', 'fastparquet'): + assert isinstance(get_engine('auto'), FastParquetImpl) + assert isinstance(get_engine('pyarrow'), PyArrowImpl) + assert isinstance(get_engine('fastparquet'), FastParquetImpl) + + with pd.option_context('io.parquet.engine', 'auto'): + assert isinstance(get_engine('auto'), PyArrowImpl) + assert isinstance(get_engine('pyarrow'), PyArrowImpl) + assert isinstance(get_engine('fastparquet'), FastParquetImpl) + + +@pytest.mark.xfail(reason="fp does not ignore pa index __index_level_0__") +def test_cross_engine_pa_fp(df_cross_compat, pa, fp): + # cross-compat with differing reading/writing engines + + df = df_cross_compat + with tm.ensure_clean() as path: + df.to_parquet(path, engine=pa, compression=None) + + result = read_parquet(path, engine=fp, compression=None) + tm.assert_frame_equal(result, df) + + +@pytest.mark.xfail(reason="pyarrow reading fp in some cases") +def test_cross_engine_fp_pa(df_cross_compat, pa, fp): + # cross-compat with differing reading/writing engines + + df = df_cross_compat + with tm.ensure_clean() as path: + df.to_parquet(path, engine=fp, compression=None) + + result = read_parquet(path, engine=pa, compression=None) + tm.assert_frame_equal(result, df) + + +class Base(object): + + def check_error_on_write(self, df, engine, exc): + # check that we are raising the exception + # on writing + + with pytest.raises(exc): + with tm.ensure_clean() as path: + to_parquet(df, path, engine, compression=None) + + def check_round_trip(self, df, engine, expected=None, **kwargs): + + with tm.ensure_clean() as path: + df.to_parquet(path, engine, **kwargs) + result = read_parquet(path, engine) + + if expected is None: + expected = df + tm.assert_frame_equal(result, expected) + + # repeat + to_parquet(df, path, engine, **kwargs) + result = pd.read_parquet(path, engine) + + if expected is None: + expected = df + tm.assert_frame_equal(result, expected) + + +class TestBasic(Base): + + def test_error(self, engine): + + for obj in [pd.Series([1, 2, 3]), 1, 'foo', pd.Timestamp('20130101'), + np.array([1, 2, 3])]: + self.check_error_on_write(obj, engine, ValueError) + + def test_columns_dtypes(self, engine): + + df = pd.DataFrame({'string': list('abc'), + 'int': list(range(1, 4))}) + + # unicode + df.columns = [u'foo', u'bar'] + self.check_round_trip(df, engine, compression=None) + + def test_columns_dtypes_invalid(self, engine): + + df = pd.DataFrame({'string': list('abc'), + 'int': list(range(1, 4))}) + + # numeric + df.columns = [0, 1] + self.check_error_on_write(df, engine, ValueError) + + if PY3: + # bytes on PY3, on PY2 these are str + df.columns = [b'foo', b'bar'] + self.check_error_on_write(df, engine, ValueError) + + # python object + df.columns = [datetime.datetime(2011, 1, 1, 0, 0), + datetime.datetime(2011, 1, 1, 1, 1)] + self.check_error_on_write(df, engine, ValueError) + + def test_write_with_index(self, engine): + + df = pd.DataFrame({'A': [1, 2, 3]}) + self.check_round_trip(df, engine, compression=None) + + # non-default index + for index in [[2, 3, 4], + pd.date_range('20130101', periods=3), + list('abc'), + [1, 3, 4], + pd.MultiIndex.from_tuples([('a', 1), ('a', 2), + ('b', 1)]), + ]: + + df.index = index + self.check_error_on_write(df, engine, ValueError) + + # index with meta-data + df.index = [0, 1, 2] + df.index.name = 'foo' + self.check_error_on_write(df, engine, ValueError) + + # column multi-index + df.index = [0, 1, 2] + df.columns = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)]), + self.check_error_on_write(df, engine, ValueError) + + @pytest.mark.parametrize('compression', [None, 'gzip', 'snappy', 'brotli']) + def test_compression(self, engine, compression): + + if compression == 'snappy': + pytest.importorskip('snappy') + + elif compression == 'brotli': + pytest.importorskip('brotli') + + df = pd.DataFrame({'A': [1, 2, 3]}) + self.check_round_trip(df, engine, compression=compression) + + +class TestParquetPyArrow(Base): + + def test_basic(self, pa): + + df = pd.DataFrame({'string': list('abc'), + 'string_with_nan': ['a', np.nan, 'c'], + 'string_with_none': ['a', None, 'c'], + 'bytes': [b'foo', b'bar', b'baz'], + 'unicode': [u'foo', u'bar', u'baz'], + 'int': list(range(1, 4)), + 'uint': np.arange(3, 6).astype('u1'), + 'float': np.arange(4.0, 7.0, dtype='float64'), + 'float_with_nan': [2., np.nan, 3.], + 'bool': [True, False, True], + 'bool_with_none': [True, None, True], + 'datetime_ns': pd.date_range('20130101', periods=3), + 'datetime_with_nat': [pd.Timestamp('20130101'), + pd.NaT, + pd.Timestamp('20130103')] + }) + + self.check_round_trip(df, pa) + + def test_duplicate_columns(self, pa): + + # not currently able to handle duplicate columns + df = pd.DataFrame(np.arange(12).reshape(4, 3), + columns=list('aaa')).copy() + self.check_error_on_write(df, pa, ValueError) + + def test_unsupported(self, pa): + + # period + df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) + self.check_error_on_write(df, pa, ValueError) + + # categorical + df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) + self.check_error_on_write(df, pa, NotImplementedError) + + # timedelta + df = pd.DataFrame({'a': pd.timedelta_range('1 day', + periods=3)}) + self.check_error_on_write(df, pa, NotImplementedError) + + # mixed python objects + df = pd.DataFrame({'a': ['a', 1, 2.0]}) + self.check_error_on_write(df, pa, ValueError) + + +class TestParquetFastParquet(Base): + + def test_basic(self, fp): + + df = pd.DataFrame( + {'string': list('abc'), + 'string_with_nan': ['a', np.nan, 'c'], + 'string_with_none': ['a', None, 'c'], + 'bytes': [b'foo', b'bar', b'baz'], + 'unicode': [u'foo', u'bar', u'baz'], + 'int': list(range(1, 4)), + 'uint': np.arange(3, 6).astype('u1'), + 'float': np.arange(4.0, 7.0, dtype='float64'), + 'float_with_nan': [2., np.nan, 3.], + 'bool': [True, False, True], + 'datetime': pd.date_range('20130101', periods=3), + 'datetime_with_nat': [pd.Timestamp('20130101'), + pd.NaT, + pd.Timestamp('20130103')], + 'timedelta': pd.timedelta_range('1 day', periods=3), + }) + + self.check_round_trip(df, fp, compression=None) + + @pytest.mark.skip(reason="not supported") + def test_duplicate_columns(self, fp): + + # not currently able to handle duplicate columns + df = pd.DataFrame(np.arange(12).reshape(4, 3), + columns=list('aaa')).copy() + self.check_error_on_write(df, fp, ValueError) + + def test_bool_with_none(self, fp): + df = pd.DataFrame({'a': [True, None, False]}) + expected = pd.DataFrame({'a': [1.0, np.nan, 0.0]}, dtype='float16') + self.check_round_trip(df, fp, expected=expected, compression=None) + + def test_unsupported(self, fp): + + # period + df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) + self.check_error_on_write(df, fp, ValueError) + + # mixed + df = pd.DataFrame({'a': ['a', 1, 2.0]}) + self.check_error_on_write(df, fp, ValueError) + + def test_categorical(self, fp): + df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) + self.check_round_trip(df, fp, compression=None) + + def test_datetime_tz(self, fp): + # doesn't preserve tz + df = pd.DataFrame({'a': pd.date_range('20130101', periods=3, + tz='US/Eastern')}) + + # warns on the coercion + with catch_warnings(record=True): + self.check_round_trip(df, fp, df.astype('datetime64[ns]'), + compression=None) diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index 48b19b02e297e..9ecd4b10365c8 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -94,6 +94,7 @@ def show_versions(as_json=False): ("psycopg2", lambda mod: mod.__version__), ("jinja2", lambda mod: mod.__version__), ("s3fs", lambda mod: mod.__version__), + ("fastparquet", lambda mod: mod.__version__), ("pandas_gbq", lambda mod: mod.__version__), ("pandas_datareader", lambda mod: mod.__version__), ] From 9aadb64a6f8506816112390e9a1ff3d35b6ba9c1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Aug 2017 07:22:17 -0400 Subject: [PATCH 035/141] DOC: doc typos, xref #15838 --- ci/requirements-3.6_DOC.run | 1 + doc/source/install.rst | 2 +- doc/source/io.rst | 4 ++-- doc/source/whatsnew/v0.21.0.txt | 4 +++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/ci/requirements-3.6_DOC.run b/ci/requirements-3.6_DOC.run index f87760b507357..6c45e3371e9cf 100644 --- a/ci/requirements-3.6_DOC.run +++ b/ci/requirements-3.6_DOC.run @@ -12,6 +12,7 @@ lxml beautifulsoup4 html5lib pytables +python-snappy openpyxl xlrd xlwt diff --git a/doc/source/install.rst b/doc/source/install.rst index 01a01b1b58b4c..99d299b75b59b 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -237,7 +237,7 @@ Optional Dependencies * `xarray `__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended. * `PyTables `__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended. * `Feather Format `__: necessary for feather-based storage, version 0.3.1 or higher. -* ``Apache Parquet Format``, either `pyarrow `__ (>= 0.4.1) or `fastparquet `__ (>= 0.0.6) for parquet-based storage. The `snappy `__ and `brotli `__ are available for compression support. +* `Apache Parquet `__, either `pyarrow `__ (>= 0.4.1) or `fastparquet `__ (>= 0.0.6) for parquet-based storage. The `snappy `__ and `brotli `__ are available for compression support. * `SQLAlchemy `__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs `__. Some common drivers are: * `psycopg2 `__: for PostgreSQL diff --git a/doc/source/io.rst b/doc/source/io.rst index 0b97264abfcd7..e6b51b7e2f45c 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4554,7 +4554,7 @@ Parquet .. versionadded:: 0.21.0 -`Parquet `__ provides a partitioned binary columnar serialization for data frames. It is designed to make reading and writing data frames efficient, and to make sharing data across data analysis languages easy. Parquet can use a variety of compression techniques to shrink the file size as much as possible while still maintaining good read performance. @@ -4575,7 +4575,7 @@ You can specifiy an ``engine`` to direct the serialization. This can be one of ` If the engine is NOT specified, then the ``pd.options.io.parquet.engine`` option is checked; if this is also ``auto``, then then ``pyarrow`` is tried, and falling back to ``fastparquet``. -See the documentation for `pyarrow `__ +See the documentation for `pyarrow `__ and `fastparquet `__ .. note:: diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 9f0588fc4475e..8a8c758517469 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -9,6 +9,8 @@ users upgrade to this version. Highlights include: +- Integration with `Apache Parquet `__, including a new top-level :func:`pd.read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. + Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. .. contents:: What's new in v0.21.0 @@ -79,7 +81,7 @@ Other Enhancements - :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) - :func:`read_html` handles colspan and rowspan arguments and attempts to infer a header if the header is not explicitly specified (:issue:`17054`) -- Integration with Apache Parquet, including a new top-level ``pd.read_parquet()`` and ``DataFrame.to_parquet()`` method, see :ref:`here `. +- Integration with `Apache Parquet `__, including a new top-level :func:`pd.read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. .. _whatsnew_0210.api_breaking: From 89fa421a4355f92be95bea8f204796f2eb9d1f5b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Aug 2017 21:02:12 -0400 Subject: [PATCH 036/141] TST: test for categorical index monotonicity (#17152) * correctly determine bottleneck version * tests for categorical index monotonicity * fix Index.is_monotonic to point to Index.is_monotonic_increasing directly --- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/category.py | 9 ++++++++ pandas/core/nanops.py | 3 ++- pandas/tests/indexes/test_category.py | 32 +++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 411428e001c81..4aecc75d95971 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1195,7 +1195,7 @@ def _mpl_repr(self): @property def is_monotonic(self): """ alias for is_monotonic_increasing (deprecated) """ - return self._engine.is_monotonic_increasing + return self.is_monotonic_increasing @property def is_monotonic_increasing(self): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index e8427f847dd2d..ac4698b570d17 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -316,10 +316,19 @@ def _engine(self): # we are going to look things up with the codes themselves return self._engine_type(lambda: self.codes.astype('i8'), len(self)) + # introspection @cache_readonly def is_unique(self): return not self.duplicated().any() + @property + def is_monotonic_increasing(self): + return Index(self.codes).is_monotonic_increasing + + @property + def is_monotonic_decreasing(self): + return Index(self.codes).is_monotonic_decreasing + @Appender(base._shared_docs['unique'] % _index_doc_kwargs) def unique(self): result = base.IndexOpsMixin.unique(self) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index e2777cb56374e..2f4e437c0ae61 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -28,7 +28,8 @@ try: import bottleneck as bn ver = bn.__version__ - _BOTTLENCK_INSTALLED = ver >= LooseVersion(_MIN_BOTTLENECK_VERSION) + _BOTTLENECK_INSTALLED = (LooseVersion(ver) >= + LooseVersion(_MIN_BOTTLENECK_VERSION)) if not _BOTTLENECK_INSTALLED: warnings.warn( diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index a3d72fdb88239..64bd6df361aeb 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -427,6 +427,38 @@ def test_reindex_empty_index(self): tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp)) + def test_is_monotonic(self): + c = CategoricalIndex([1, 2, 3]) + assert c.is_monotonic_increasing + assert not c.is_monotonic_decreasing + + c = CategoricalIndex([1, 2, 3], ordered=True) + assert c.is_monotonic_increasing + assert not c.is_monotonic_decreasing + + c = CategoricalIndex([1, 2, 3], categories=[3, 2, 1]) + assert not c.is_monotonic_increasing + assert c.is_monotonic_decreasing + + c = CategoricalIndex([1, 3, 2], categories=[3, 2, 1]) + assert not c.is_monotonic_increasing + assert not c.is_monotonic_decreasing + + c = CategoricalIndex([1, 2, 3], categories=[3, 2, 1], ordered=True) + assert not c.is_monotonic_increasing + assert c.is_monotonic_decreasing + + # non lexsorted categories + categories = [9, 0, 1, 2, 3] + + c = CategoricalIndex([9, 0], categories=categories) + assert c.is_monotonic_increasing + assert not c.is_monotonic_decreasing + + c = CategoricalIndex([0, 1], categories=categories) + assert c.is_monotonic_increasing + assert not c.is_monotonic_decreasing + def test_duplicates(self): idx = CategoricalIndex([0, 0, 0], name='foo') From ccdae3687583555c60d8467f94592716e4f18330 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 2 Aug 2017 18:03:07 -0700 Subject: [PATCH 037/141] MAINT: Remove non-standard and inconsistently-used imports (#17085) --- pandas/core/frame.py | 35 +++++++++++++++++------------------ pandas/core/generic.py | 7 ++++--- pandas/core/indexing.py | 14 +++++++------- pandas/core/series.py | 8 +++----- 4 files changed, 31 insertions(+), 33 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9d63bd2e120aa..027a427555253 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -20,7 +20,6 @@ import warnings from textwrap import dedent -from numpy import nan as NA import numpy as np import numpy.ma as ma @@ -436,7 +435,7 @@ def _init_dict(self, data, index, columns, dtype=None): else: v = np.empty(len(index), dtype=dtype) - v.fill(NA) + v.fill(np.nan) else: v = data[k] data_names.append(k) @@ -1437,8 +1436,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, columns : sequence, optional Columns to write header : boolean or list of string, default True - Write out column names. If a list of string is given it is assumed - to be aliases for the column names + Write out the column names. If a list of strings is given it is + assumed to be aliases for the column names index : boolean, default True Write row names (index) index_label : string or sequence, or False, default None @@ -1622,8 +1621,9 @@ def to_parquet(self, fname, engine='auto', compression='snappy', to_parquet(self, fname, engine, compression=compression, **kwargs) - @Substitution(header='Write out column names. If a list of string is given, \ -it is assumed to be aliases for the column names') + @Substitution(header='Write out the column names. If a list of strings ' + 'is given, it is assumed to be aliases for the ' + 'column names') @Appender(fmt.docstring_to_string, indents=1) def to_string(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, @@ -2805,7 +2805,7 @@ def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, return frame - def _reindex_index(self, new_index, method, copy, level, fill_value=NA, + def _reindex_index(self, new_index, method, copy, level, fill_value=np.nan, limit=None, tolerance=None): new_index, indexer = self.index.reindex(new_index, method=method, level=level, limit=limit, @@ -2814,8 +2814,8 @@ def _reindex_index(self, new_index, method, copy, level, fill_value=NA, copy=copy, fill_value=fill_value, allow_dups=False) - def _reindex_columns(self, new_columns, method, copy, level, fill_value=NA, - limit=None, tolerance=None): + def _reindex_columns(self, new_columns, method, copy, level, + fill_value=np.nan, limit=None, tolerance=None): new_columns, indexer = self.columns.reindex(new_columns, method=method, level=level, limit=limit, tolerance=tolerance) @@ -3794,7 +3794,7 @@ def _combine_series(self, other, func, fill_value=None, axis=None, def _combine_series_infer(self, other, func, level=None, fill_value=None, try_cast=True): if len(other) == 0: - return self * NA + return self * np.nan if len(self) == 0: # Ambiguous case, use _series so works with DataFrame @@ -3948,7 +3948,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): if do_fill: arr = _ensure_float(arr) - arr[this_mask & other_mask] = NA + arr[this_mask & other_mask] = np.nan # try to downcast back to the original dtype if needs_i8_conversion_i: @@ -4567,7 +4567,7 @@ def _apply_empty_result(self, func, axis, reduce, *args, **kwds): pass if reduce: - return Series(NA, index=self._get_agg_axis(axis)) + return Series(np.nan, index=self._get_agg_axis(axis)) else: return self.copy() @@ -5185,7 +5185,7 @@ def corr(self, method='pearson', min_periods=1): valid = mask[i] & mask[j] if valid.sum() < min_periods: - c = NA + c = np.nan elif i == j: c = 1. elif not valid.all(): @@ -5509,7 +5509,7 @@ def idxmin(self, axis=0, skipna=True): axis = self._get_axis_number(axis) indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna) index = self._get_axis(axis) - result = [index[i] if i >= 0 else NA for i in indices] + result = [index[i] if i >= 0 else np.nan for i in indices] return Series(result, index=self._get_agg_axis(axis)) def idxmax(self, axis=0, skipna=True): @@ -5540,7 +5540,7 @@ def idxmax(self, axis=0, skipna=True): axis = self._get_axis_number(axis) indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna) index = self._get_axis(axis) - result = [index[i] if i >= 0 else NA for i in indices] + result = [index[i] if i >= 0 else np.nan for i in indices] return Series(result, index=self._get_agg_axis(axis)) def _get_agg_axis(self, axis_num): @@ -5778,9 +5778,8 @@ def isin(self, values): 2 True True """ if isinstance(values, dict): - from collections import defaultdict from pandas.core.reshape.concat import concat - values = defaultdict(list, values) + values = collections.defaultdict(list, values) return concat((self.iloc[:, [i]].isin(values[col]) for i, col in enumerate(self.columns)), axis=1) elif isinstance(values, Series): @@ -6143,7 +6142,7 @@ def _homogenize(data, index, dtype=None): v = _dict_compat(v) else: v = dict(v) - v = lib.fast_multiget(v, oindex.values, default=NA) + v = lib.fast_multiget(v, oindex.values, default=np.nan) v = _sanitize_array(v, index, dtype=dtype, copy=False, raise_cast_failure=False) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ec44dce0da9bc..442ec93d94023 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1207,7 +1207,7 @@ def _repr_latex_(self): columns : sequence, optional Columns to write header : boolean or list of string, default True - Write out column names. If a list of string is given it is + Write out the column names. If a list of strings is given it is assumed to be aliases for the column names index : boolean, default True Write row names (index) @@ -1702,8 +1702,9 @@ def to_xarray(self): .. versionadded:: 0.20.0 """ - @Substitution(header='Write out column names. If a list of string is given, \ -it is assumed to be aliases for the column names.') + @Substitution(header='Write out the column names. If a list of strings ' + 'is given, it is assumed to be aliases for the ' + 'column names.') @Appender(_shared_docs['to_latex'] % _shared_doc_kwargs) def to_latex(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 8f6b00fd204cc..109183827de4e 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,5 +1,5 @@ # pylint: disable=W0223 - +import textwrap import warnings import numpy as np from pandas.compat import range, zip @@ -1288,13 +1288,13 @@ class _IXIndexer(_NDFrameIndexer): def __init__(self, obj, name): - _ix_deprecation_warning = """ -.ix is deprecated. Please use -.loc for label based indexing or -.iloc for positional indexing + _ix_deprecation_warning = textwrap.dedent(""" + .ix is deprecated. Please use + .loc for label based indexing or + .iloc for positional indexing -See the documentation here: -http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated""" # noqa + See the documentation here: + http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated""") # noqa warnings.warn(_ix_deprecation_warning, DeprecationWarning, stacklevel=3) diff --git a/pandas/core/series.py b/pandas/core/series.py index 60d268c89a9d7..996b483ff6092 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -10,7 +10,6 @@ import warnings from textwrap import dedent -from numpy import nan, ndarray import numpy as np import numpy.ma as ma @@ -210,13 +209,13 @@ def __init__(self, data=None, index=None, dtype=None, name=None, data = np.nan # GH #12169 elif isinstance(index, (PeriodIndex, TimedeltaIndex)): - data = ([data.get(i, nan) for i in index] + data = ([data.get(i, np.nan) for i in index] if data else np.nan) else: data = lib.fast_multiget(data, index.values, default=np.nan) except TypeError: - data = ([data.get(i, nan) for i in index] + data = ([data.get(i, np.nan) for i in index] if data else np.nan) elif isinstance(data, SingleBlockManager): @@ -1686,7 +1685,7 @@ def _binop(self, other, func, level=None, fill_value=None): result.name = None return result - def combine(self, other, func, fill_value=nan): + def combine(self, other, func, fill_value=np.nan): """ Perform elementwise binary operation on two Series using given function with optional fill value when an index is missing from one Series or @@ -2952,7 +2951,6 @@ def _dir_additions(self): Series._add_numeric_operations() Series._add_series_only_operations() Series._add_series_or_dataframe_operations() -_INDEX_TYPES = ndarray, Index, list, tuple # ----------------------------------------------------------------------------- # Supplementary functions From 5b42bdfc638cf536ac05354f6a82be11f337d56d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Aug 2017 21:05:07 -0400 Subject: [PATCH 038/141] DOC: typos in whatsnew --- doc/source/whatsnew/v0.21.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 8a8c758517469..dbce31ad168c9 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -9,7 +9,7 @@ users upgrade to this version. Highlights include: -- Integration with `Apache Parquet `__, including a new top-level :func:`pd.read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. +- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. From 56957cf6210e9ca5d7a9a0f0eff244d707377d08 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 3 Aug 2017 08:07:18 -0400 Subject: [PATCH 039/141] DOC: whatsnew 0.21.0 fixes --- doc/source/api.rst | 8 ++++++++ doc/source/whatsnew/v0.21.0.txt | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 1a4ee68ef52c4..12e6c7ad7f630 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -109,6 +109,14 @@ Feather read_feather +Parquet +~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + read_parquet + SAS ~~~ diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index dbce31ad168c9..3cce44c515df8 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -220,7 +220,7 @@ Other API Changes - ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`) - Removed the ``@slow`` decorator from ``pandas.util.testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`) - Moved definition of ``MergeError`` to the ``pandas.errors`` module. -- The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is still supported and causes a ``FutureWarning`` to be emitted (:issue:`14636`) +- The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is deprecated and will show a ``FutureWarning`` (:issue:`14636`) .. _whatsnew_0210.deprecations: @@ -318,7 +318,7 @@ Reshaping - Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`) - Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) - Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`) -- Bug in ``pd.crosstab()`` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`) +- Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`) Numeric ^^^^^^^ From d2e21c3ef8d2babe89725d2c4cf149d49d55085f Mon Sep 17 00:00:00 2001 From: Carter Green Date: Thu, 3 Aug 2017 11:37:26 -0500 Subject: [PATCH 040/141] BUG: Fix CSV parsing of singleton list header (#17090) Closes gh-7757. --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/_libs/parsers.pyx | 21 ++++++++++++--------- pandas/io/parsers.py | 7 ++++--- pandas/tests/io/parser/header.py | 7 +++++++ 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 3cce44c515df8..7eeab055ca0a8 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -287,6 +287,7 @@ I/O - Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) - Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696`, :issue:`16798`). - Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`). +- Bug in :func:`read_csv` when called with a single-element list ``header`` would return a ``DataFrame`` of all NaN values (:issue:`7757`) - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 077c355e785a3..ae420da2102b2 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -535,23 +535,26 @@ cdef class TextReader: self.parser_start = 0 self.header = [] else: - if isinstance(header, list) and len(header): - # need to artifically skip the final line - # which is still a header line - header = list(header) - header.append(header[-1] + 1) + if isinstance(header, list): + if len(header) > 1: + # need to artifically skip the final line + # which is still a header line + header = list(header) + header.append(header[-1] + 1) + self.parser.header_end = header[-1] + self.has_mi_columns = 1 + else: + self.parser.header_end = header[0] + self.parser_start = header[-1] + 1 self.parser.header_start = header[0] - self.parser.header_end = header[-1] self.parser.header = header[0] - self.parser_start = header[-1] + 1 - self.has_mi_columns = 1 self.header = header else: self.parser.header_start = header self.parser.header_end = header - self.parser.header = header self.parser_start = header + 1 + self.parser.header = header self.header = [ header ] self.names = names diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 41b0cdd6dd250..9c76d3126890c 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2283,10 +2283,11 @@ def _infer_columns(self): if self.header is not None: header = self.header - # we have a mi columns, so read an extra line if isinstance(header, (list, tuple, np.ndarray)): - have_mi_columns = True - header = list(header) + [header[-1] + 1] + have_mi_columns = len(header) > 1 + # we have a mi columns, so read an extra line + if have_mi_columns: + header = list(header) + [header[-1] + 1] else: have_mi_columns = False header = [header] diff --git a/pandas/tests/io/parser/header.py b/pandas/tests/io/parser/header.py index 4935fd2cd910a..50ae4dae541ac 100644 --- a/pandas/tests/io/parser/header.py +++ b/pandas/tests/io/parser/header.py @@ -286,3 +286,10 @@ def test_non_int_header(self): self.read_csv(StringIO(data), sep=',', header=['a', 'b']) with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(data), sep=',', header='string_header') + + def test_singleton_header(self): + # See GH #7757 + data = """a,b,c\n0,1,2\n1,2,3""" + df = self.read_csv(StringIO(data), header=[0]) + expected = DataFrame({"a": [0, 1], "b": [1, 2], "c": [2, 3]}) + tm.assert_frame_equal(df, expected) From 20487bfd77478766d3e05a9b3f9b5bdb09a69e40 Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 3 Aug 2017 15:06:05 -0600 Subject: [PATCH 041/141] ENH: Support strings containing '%' in add_prefix/add_suffix (#17151) (#17162) --- doc/source/whatsnew/v0.21.0.txt | 3 ++- pandas/core/internals.py | 5 +++-- pandas/tests/frame/test_api.py | 8 ++++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 7eeab055ca0a8..1f794ad7d9ca2 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -81,7 +81,8 @@ Other Enhancements - :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) - :func:`read_html` handles colspan and rowspan arguments and attempts to infer a header if the header is not explicitly specified (:issue:`17054`) -- Integration with `Apache Parquet `__, including a new top-level :func:`pd.read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. +- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. +- :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 25c367fcbd968..37fc1c01061ec 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -5,6 +5,7 @@ import operator from datetime import datetime, timedelta, date from collections import defaultdict +from functools import partial import numpy as np @@ -2959,11 +2960,11 @@ def rename_axis(self, mapper, axis, copy=True, level=None): return obj def add_prefix(self, prefix): - f = (str(prefix) + '%s').__mod__ + f = partial('{prefix}{}'.format, prefix=prefix) return self.rename_axis(f, axis=0) def add_suffix(self, suffix): - f = ('%s' + str(suffix)).__mod__ + f = partial('{}{suffix}'.format, suffix=suffix) return self.rename_axis(f, axis=0) @property diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index f63918c97c614..8c4c13b66ffa9 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -68,6 +68,14 @@ def test_add_prefix_suffix(self): expected = pd.Index(['%s#foo' % c for c in self.frame.columns]) tm.assert_index_equal(with_suffix.columns, expected) + with_pct_prefix = self.frame.add_prefix('%') + expected = pd.Index(['%{}'.format(c) for c in self.frame.columns]) + tm.assert_index_equal(with_pct_prefix.columns, expected) + + with_pct_suffix = self.frame.add_suffix('%') + expected = pd.Index(['{}%'.format(c) for c in self.frame.columns]) + tm.assert_index_equal(with_pct_suffix.columns, expected) + class TestDataFrameMisc(SharedWithSparse, TestData): From b4b4c77046322f98d345ce068ec972f65b60c84d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 4 Aug 2017 09:44:53 +0200 Subject: [PATCH 042/141] REF: repr - allow block to override values that get formatted (#17143) --- pandas/core/internals.py | 8 +++++ pandas/core/series.py | 6 ++++ pandas/io/formats/format.py | 6 ++-- pandas/tests/internals/__init__.py | 0 pandas/tests/internals/test_external_block.py | 29 +++++++++++++++++++ .../tests/{ => internals}/test_internals.py | 0 setup.py | 1 + 7 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 pandas/tests/internals/__init__.py create mode 100644 pandas/tests/internals/test_external_block.py rename pandas/tests/{ => internals}/test_internals.py (100%) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 37fc1c01061ec..0f85c4e046e5a 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -160,6 +160,10 @@ def internal_values(self, dtype=None): """ return self.values + def formatting_values(self): + """Return the internal values used by the DataFrame/SeriesFormatter""" + return self.internal_values() + def get_values(self, dtype=None): """ return an internal format, currently just the ndarray @@ -4317,6 +4321,10 @@ def external_values(self): def internal_values(self): return self._block.internal_values() + def formatting_values(self): + """Return the internal values used by the DataFrame/SeriesFormatter""" + return self._block.formatting_values() + def get_values(self): """ return a dense type view """ return np.array(self._block.to_dense(), copy=False) diff --git a/pandas/core/series.py b/pandas/core/series.py index 996b483ff6092..e42ba3908a29a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -397,6 +397,12 @@ def _values(self): """ return the internal repr of this data """ return self._data.internal_values() + def _formatting_values(self): + """Return the values that can be formatted (used by SeriesFormatter + and DataFrameFormatter) + """ + return self._data.formatting_values() + def get_values(self): """ same as values (but handles sparseness conversions); is a view """ return self._data.get_values() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 2b322431bd301..733fd3bd39b52 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -237,7 +237,8 @@ def _get_formatted_index(self): return fmt_index, have_header def _get_formatted_values(self): - return format_array(self.tr_series._values, None, + values_to_format = self.tr_series._formatting_values() + return format_array(values_to_format, None, float_format=self.float_format, na_rep=self.na_rep) def to_string(self): @@ -694,7 +695,8 @@ def to_latex(self, column_format=None, longtable=False, encoding=None, def _format_col(self, i): frame = self.tr_frame formatter = self._get_formatter(i) - return format_array(frame.iloc[:, i]._values, formatter, + values_to_format = frame.iloc[:, i]._formatting_values() + return format_array(values_to_format, formatter, float_format=self.float_format, na_rep=self.na_rep, space=self.col_space, decimal=self.decimal) diff --git a/pandas/tests/internals/__init__.py b/pandas/tests/internals/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/internals/test_external_block.py b/pandas/tests/internals/test_external_block.py new file mode 100644 index 0000000000000..cccde76c3e1d9 --- /dev/null +++ b/pandas/tests/internals/test_external_block.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# pylint: disable=W0102 + +import numpy as np + +import pandas as pd +from pandas.core.internals import Block, BlockManager, SingleBlockManager + + +class CustomBlock(Block): + + def formatting_values(self): + return np.array(["Val: {}".format(i) for i in self.values]) + + +def test_custom_repr(): + values = np.arange(3, dtype='int64') + + # series + block = CustomBlock(values, placement=slice(0, 3)) + + s = pd.Series(SingleBlockManager(block, pd.RangeIndex(3))) + assert repr(s) == '0 Val: 0\n1 Val: 1\n2 Val: 2\ndtype: int64' + + # dataframe + block = CustomBlock(values.reshape(1, -1), placement=slice(0, 1)) + blk_mgr = BlockManager([block], [['col'], range(3)]) + df = pd.DataFrame(blk_mgr) + assert repr(df) == ' col\n0 Val: 0\n1 Val: 1\n2 Val: 2' diff --git a/pandas/tests/test_internals.py b/pandas/tests/internals/test_internals.py similarity index 100% rename from pandas/tests/test_internals.py rename to pandas/tests/internals/test_internals.py diff --git a/setup.py b/setup.py index d5791862cfb19..a912b25328954 100755 --- a/setup.py +++ b/setup.py @@ -670,6 +670,7 @@ def pxd(name): 'pandas.tests.indexes.datetimes', 'pandas.tests.indexes.timedeltas', 'pandas.tests.indexes.period', + 'pandas.tests.internals', 'pandas.tests.io', 'pandas.tests.io.json', 'pandas.tests.io.parser', From b720f0d0c4cb1210c84289bc364879f607cbf8b5 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 6 Aug 2017 22:49:26 -0700 Subject: [PATCH 043/141] MAINT: Drop unnecessary newlines in issue template --- .github/ISSUE_TEMPLATE.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 6ab03c9907475..237e61487d13a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -8,11 +8,9 @@ [this should explain **why** the current behaviour is a problem and why the expected output is a better solution.] -**Note**: We receive a lot of issues on our GitHub tracker, so it is very possible that your issue has been posted before. -Please check first before submitting so that we do not have to handle and close duplicates! +**Note**: We receive a lot of issues on our GitHub tracker, so it is very possible that your issue has been posted before. Please check first before submitting so that we do not have to handle and close duplicates! -**Note**: Many problems can be resolved by simply upgrading `pandas` to the latest version. Before submitting, please check -if that solution works for you. If possible, you may want to check if `master` addresses this issue, but that is not necessary. +**Note**: Many problems can be resolved by simply upgrading `pandas` to the latest version. Before submitting, please check if that solution works for you. If possible, you may want to check if `master` addresses this issue, but that is not necessary. #### Expected Output From 43dab454f2118c5acc9630230f8a38e3865e995a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 7 Aug 2017 06:44:00 -0400 Subject: [PATCH 044/141] remove direct import of nan Author: Brock Mendel Closes #17185 from jbrockmendel/dont_import_nan and squashes the following commits: ee260b86a [Brock Mendel] remove direct import of nan --- pandas/core/sparse/frame.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index d8c0aa41edac1..f30bd5c36a61b 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -5,7 +5,6 @@ from __future__ import division # pylint: disable=E1101,E1103,W0231,E0202 -from numpy import nan from pandas.compat import lmap from pandas import compat import numpy as np @@ -156,7 +155,7 @@ def _init_dict(self, data, index, columns, dtype=None): v = v.copy() else: if isinstance(v, dict): - v = [v.get(i, nan) for i in index] + v = [v.get(i, np.nan) for i in index] v = sp_maker(v) sdict[k] = v From 94a734a483c6487719a400422ac851856e1c94bc Mon Sep 17 00:00:00 2001 From: Jean Helie Date: Mon, 7 Aug 2017 11:46:06 +0100 Subject: [PATCH 045/141] use == to test String equality (#17171) --- pandas/io/parquet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 0a4426b55b323..09603fd6fdcce 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -10,10 +10,10 @@ def get_engine(engine): """ return our implementation """ - if engine is 'auto': + if engine == 'auto': engine = get_option('io.parquet.engine') - if engine is 'auto': + if engine == 'auto': # try engines in this order try: return PyArrowImpl() From e143ee143d9e37e01a3c39a1fd5db3324604d7f9 Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Mon, 7 Aug 2017 06:05:58 -0500 Subject: [PATCH 046/141] ENH: Add warning when setting into nonexistent attribute (#16951) closes #7175 closes #5904 --- doc/source/indexing.rst | 35 +++++++++++++++++++--- doc/source/whatsnew/v0.21.0.txt | 46 ++++++++++++++++++++++++++++- pandas/core/generic.py | 12 +++++++- pandas/tests/dtypes/test_generic.py | 38 ++++++++++++++++++++++++ pandas/tests/io/test_pytables.py | 4 +-- 5 files changed, 127 insertions(+), 8 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 1659d57b33b84..53a259ad6eb15 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -227,10 +227,6 @@ as an attribute: dfa.A panel.one -You can use attribute access to modify an existing element of a Series or column of a DataFrame, but be careful; -if you try to use attribute access to create a new column, it fails silently, creating a new attribute rather than a -new column. - .. ipython:: python sa.a = 5 @@ -267,6 +263,37 @@ You can also assign a ``dict`` to a row of a ``DataFrame``: x.iloc[1] = dict(x=9, y=99) x +You can use attribute access to modify an existing element of a Series or column of a DataFrame, but be careful; +if you try to use attribute access to create a new column, it creates a new attribute rather than a +new column. In 0.21.0 and later, this will raise a ``UserWarning``: + +.. code-block:: ipython + + In[1]: df = pd.DataFrame({'one': [1., 2., 3.]}) + In[2]: df.two = [4, 5, 6] + UserWarning: Pandas doesn't allow Series to be assigned into nonexistent columns - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute_access + In[3]: df + Out[3]: + one + 0 1.0 + 1 2.0 + 2 3.0 + +Similarly, it is possible to create a column with a name which collides with one of Pandas's +built-in methods or attributes, which can cause confusion later when attempting to access +that column as an attribute. This behavior now warns: + +.. code-block:: ipython + + In[4]: df['sum'] = [5., 7., 9.] + UserWarning: Column name 'sum' collides with a built-in method, which will cause unexpected attribute behavior + In[5]: df.sum + Out[5]: + + Slicing ranges -------------- diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 1f794ad7d9ca2..7acb6ce8c6840 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -29,7 +29,6 @@ New features - Added ``skipna`` parameter to :func:`~pandas.api.types.infer_dtype` to support type inference in the presence of missing values (:issue:`17059`). - .. _whatsnew_0210.enhancements.infer_objects: ``infer_objects`` type conversion @@ -62,6 +61,51 @@ using the :func:`to_numeric` function (or :func:`to_datetime`, :func:`to_timedel df['C'] = pd.to_numeric(df['C'], errors='coerce') df.dtypes +.. _whatsnew_0210.enhancements.attribute_access: + +Improved warnings when attempting to create columns +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +New users are often flummoxed by the relationship between column operations and attribute +access on ``DataFrame`` instances (:issue:`5904` & :issue:`7175`). Two specific instances +of this confusion include attempting to create a new column by setting into an attribute: + +.. code-block:: ipython + + In[1]: df = pd.DataFrame({'one': [1., 2., 3.]}) + In[2]: df.two = [4, 5, 6] + +This does not raise any obvious exceptions, but also does not create a new column: + +.. code-block:: ipython + + In[3]: df + Out[3]: + one + 0 1.0 + 1 2.0 + 2 3.0 + +The second source of confusion is creating a column whose name collides with a method or +attribute already in the instance namespace: + +.. code-block:: ipython + + In[4]: df['sum'] = [5., 7., 9.] + +This does not permit that column to be accessed as an attribute: + +.. code-block:: ipython + + In[5]: df.sum + Out[5]: + + +Both of these now raise a ``UserWarning`` about the potential for unexpected behavior. See :ref:`Attribute Access `. + .. _whatsnew_0210.enhancements.other: Other Enhancements diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 442ec93d94023..2d52eed81d22b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -27,7 +27,7 @@ pandas_dtype) from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask from pandas.core.dtypes.missing import isna, notna -from pandas.core.dtypes.generic import ABCSeries, ABCPanel +from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame from pandas.core.common import (_values_from_object, _maybe_box_datetimelike, @@ -1907,6 +1907,10 @@ def _slice(self, slobj, axis=0, kind=None): return result def _set_item(self, key, value): + if isinstance(key, str) and callable(getattr(self, key, None)): + warnings.warn("Column name '{key}' collides with a built-in " + "method, which will cause unexpected attribute " + "behavior".format(key=key), stacklevel=3) self._data.set(key, value) self._clear_item_cache() @@ -3357,6 +3361,12 @@ def __setattr__(self, name, value): else: object.__setattr__(self, name, value) except (AttributeError, TypeError): + if isinstance(self, ABCDataFrame) and (is_list_like(value)): + warnings.warn("Pandas doesn't allow Series to be assigned " + "into nonexistent columns - see " + "https://pandas.pydata.org/pandas-docs/" + "stable/indexing.html#attribute-access", + stacklevel=2) object.__setattr__(self, name, value) # ---------------------------------------------------------------------- diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 653d7d3082c08..ec850cc34e23b 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd from pandas.core.dtypes import generic as gt +from pandas.util import testing as tm class TestABCClasses(object): @@ -38,3 +39,40 @@ def test_abc_types(self): assert isinstance(self.sparse_array, gt.ABCSparseArray) assert isinstance(self.categorical, gt.ABCCategorical) assert isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) + + +def test_setattr_warnings(): + # GH5904 - Suggestion: Warning for DataFrame colname-methodname clash + # GH7175 - GOTCHA: You can't use dot notation to add a column... + d = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']), + 'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])} + df = pd.DataFrame(d) + + with catch_warnings(record=True) as w: + # successfully add new column + # this should not raise a warning + df['three'] = df.two + 1 + assert len(w) == 0 + assert df.three.sum() > df.two.sum() + + with catch_warnings(record=True) as w: + # successfully modify column in place + # this should not raise a warning + df.one += 1 + assert len(w) == 0 + assert df.one.iloc[0] == 2 + + with catch_warnings(record=True) as w: + # successfully add an attribute to a series + # this should not raise a warning + df.two.not_an_index = [1, 2] + assert len(w) == 0 + + with tm.assert_produces_warning(UserWarning): + # warn when setting column to nonexistent name + df.four = df.two + 2 + assert df.four.sum() > df.two.sum() + + with tm.assert_produces_warning(UserWarning): + # warn when column has same name as method + df['sum'] = df.two diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index fc17b5f85b68c..f33ba7627101e 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2011,7 +2011,7 @@ def check(obj, comparator): df['string'] = 'foo' df['float322'] = 1. df['float322'] = df['float322'].astype('float32') - df['bool'] = df['float322'] > 0 + df['boolean'] = df['float322'] > 0 df['time1'] = Timestamp('20130101') df['time2'] = Timestamp('20130102') check(df, tm.assert_frame_equal) @@ -2141,7 +2141,7 @@ def test_table_values_dtypes_roundtrip(self): df1['string'] = 'foo' df1['float322'] = 1. df1['float322'] = df1['float322'].astype('float32') - df1['bool'] = df1['float32'] > 0 + df1['boolean'] = df1['float32'] > 0 df1['time1'] = Timestamp('20130101') df1['time2'] = Timestamp('20130102') From 5a523bbc1e01c2b33084e7297dcd3a0d3fe30f52 Mon Sep 17 00:00:00 2001 From: Nathan Ford Date: Mon, 7 Aug 2017 07:56:53 -0500 Subject: [PATCH 047/141] DOC: added string processing comparison with SAS (#16497) --- doc/source/comparison_with_sas.rst | 140 +++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/doc/source/comparison_with_sas.rst b/doc/source/comparison_with_sas.rst index 33a347de0bf5b..1f2424d8a22f3 100644 --- a/doc/source/comparison_with_sas.rst +++ b/doc/source/comparison_with_sas.rst @@ -357,6 +357,146 @@ takes a list of columns to sort by. tips = tips.sort_values(['sex', 'total_bill']) tips.head() + +String Processing +----------------- + +Length +~~~~~~ + +SAS determines the length of a character string with the +`LENGTHN `__ +and `LENGTHC `__ +functions. ``LENGTHN`` excludes trailing blanks and ``LENGTHC`` includes trailing blanks. + +.. code-block:: none + + data _null_; + set tips; + put(LENGTHN(time)); + put(LENGTHC(time)); + run; + +Python determines the length of a character string with the ``len`` function. +``len`` includes trailing blanks. Use ``len`` and ``rstrip`` to exclude +trailing blanks. + +.. ipython:: python + + tips['time'].str.len().head() + tips['time'].str.rstrip().str.len().head() + + +Find +~~~~ + +SAS determines the position of a character in a string with the +`FINDW `__ function. +``FINDW`` takes the string defined by the first argument and searches for the first position of the substring +you supply as the second argument. + +.. code-block:: none + + data _null_; + set tips; + put(FINDW(sex,'ale')); + run; + +Python determines the position of a character in a string with the +``find`` function. ``find`` searches for the first position of the +substring. If the substring is found, the function returns its +position. Keep in mind that Python indexes are zero-based and +the function will return -1 if it fails to find the substring. + +.. ipython:: python + + tips['sex'].str.find("ale").head() + + +Substring +~~~~~~~~~ + +SAS extracts a substring from a string based on its position with the +`SUBSTR `__ function. + +.. code-block:: none + + data _null_; + set tips; + put(substr(sex,1,1)); + run; + +With pandas you can use ``[]`` notation to extract a substring +from a string by position locations. Keep in mind that Python +indexes are zero-based. + +.. ipython:: python + + tips['sex'].str[0:1].head() + + +Scan +~~~~ + +The SAS `SCAN `__ +function returns the nth word from a string. The first argument is the string you want to parse and the +second argument specifies which word you want to extract. + +.. code-block:: none + + data firstlast; + input String $60.; + First_Name = scan(string, 1); + Last_Name = scan(string, -1); + datalines2; + John Smith; + Jane Cook; + ;;; + run; + +Python extracts a substring from a string based on its text +by using regular expressions. There are much more powerful +approaches, but this just shows a simple approach. + +.. ipython:: python + + firstlast = pd.DataFrame({'String': ['John Smith', 'Jane Cook']}) + firstlast['First_Name'] = firstlast['String'].str.split(" ", expand=True)[0] + firstlast['Last_Name'] = firstlast['String'].str.rsplit(" ", expand=True)[0] + firstlast + + +Upcase, Lowcase, and Propcase +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The SAS `UPCASE `__ +`LOWCASE `__ and +`PROPCASE `__ +functions change the case of the argument. + +.. code-block:: none + + data firstlast; + input String $60.; + string_up = UPCASE(string); + string_low = LOWCASE(string); + string_prop = PROPCASE(string); + datalines2; + John Smith; + Jane Cook; + ;;; + run; + +The equivalent Python functions are ``upper``, ``lower``, and ``title``. + +.. ipython:: python + + firstlast = pd.DataFrame({'String': ['John Smith', 'Jane Cook']}) + firstlast['string_up'] = firstlast['String'].str.upper() + firstlast['string_low'] = firstlast['String'].str.lower() + firstlast['string_prop'] = firstlast['String'].str.title() + firstlast + Merging ------- From 0bfad7c9345382b916953e59d41ee8a3a019bc43 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 7 Aug 2017 06:10:36 -0700 Subject: [PATCH 048/141] CLN: remove unused get methods in internals (#17169) * Remove unused get methods that would raise AttributeError if called * Remove unnecessary import --- pandas/core/internals.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 0f85c4e046e5a..b9fca1dfbb7a1 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -327,10 +327,6 @@ def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, fill_value=fill_value, mask_info=mask_info) return self.make_block(new_values, fastpath=True) - def get(self, item): - loc = self.items.get_loc(item) - return self.values[loc] - def iget(self, i): return self.values[i] @@ -1662,13 +1658,6 @@ def set(self, locs, values, check=False): assert locs.tolist() == [0] self.values = values - def get(self, item): - if self.ndim == 1: - loc = self.items.get_loc(item) - return self.values[loc] - else: - return self.values - def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False, mgr=None): """ @@ -4730,8 +4719,6 @@ def _concat_indexes(indexes): def _block2d_to_blocknd(values, placement, shape, labels, ref_items): """ pivot to the labels shape """ - from pandas.core.internals import make_block - panel_shape = (len(placement),) + shape # TODO: lexsort depth needs to be 2!! From a4e49093987e225bb5ecc31a9395071417523aef Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 7 Aug 2017 11:04:07 -0700 Subject: [PATCH 049/141] TST: Partial Boolean DataFrame Indexing (#17186) Closes gh-17170 --- pandas/tests/indexing/test_indexing.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 3ecd1f3029cad..f1f51f26df55c 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -861,6 +861,20 @@ def test_maybe_numeric_slice(self): expected = [1] assert result == expected + def test_partial_boolean_frame_indexing(self): + # GH 17170 + df = pd.DataFrame(np.arange(9.).reshape(3, 3), + index=list('abc'), + columns=list('ABC')) + index_df = pd.DataFrame(1, index=list('ab'), columns=list('AB')) + result = df[index_df.notnull()] + expected = pd.DataFrame(np.array([[0., 1., np.nan], + [3., 4., np.nan], + [np.nan] * 3]), + index=list('abc'), + columns=list('ABC')) + tm.assert_frame_equal(result, expected) + class TestSeriesNoneCoercion(object): EXPECTED_RESULTS = [ From e8fab8a1917c9c9f020cdce3e5e33cae1343b6af Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 7 Aug 2017 15:18:33 -0700 Subject: [PATCH 050/141] CLN: Reformat docstring for IPython fixture --- pandas/conftest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 763a41ee2e2aa..90e5ac864e96f 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -56,9 +56,12 @@ def spmatrix(request): @pytest.fixture def ip(): - """An instance of IPython.InteractiveShell. + """ + Get an instance of IPython.InteractiveShell. + Will raise a skip if IPython is not installed. """ + pytest.importorskip('IPython', minversion="6.0.0") from IPython.core.interactiveshell import InteractiveShell return InteractiveShell() From d089d44978ebeaca91508da8ca9122a5e82cedd2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 8 Aug 2017 16:28:42 -0700 Subject: [PATCH 051/141] Define Series.plot and Series.hist in class definition (#17199) --- pandas/core/series.py | 27 +++++++++++++-------------- pandas/plotting/_converter.py | 4 ++-- pandas/plotting/_core.py | 10 +++++++--- pandas/plotting/_tools.py | 7 +++---- 4 files changed, 25 insertions(+), 23 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index e42ba3908a29a..61508c11cae4b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -76,6 +76,8 @@ from pandas._libs import index as libindex, tslib as libts, lib, iNaT from pandas.core.config import get_option +import pandas.plotting._core as gfx + __all__ = ['Series'] _shared_doc_kwargs = dict( @@ -2952,12 +2954,23 @@ def _dir_additions(self): pass return rv + # ---------------------------------------------------------------------- + # Add plotting methods to Series + plot = base.AccessorProperty(gfx.SeriesPlotMethods, + gfx.SeriesPlotMethods) + hist = gfx.hist_series + Series._setup_axes(['index'], info_axis=0, stat_axis=0, aliases={'rows': 0}) Series._add_numeric_operations() Series._add_series_only_operations() Series._add_series_or_dataframe_operations() +# Add arithmetic! +ops.add_flex_arithmetic_methods(Series, **ops.series_flex_funcs) +ops.add_special_arithmetic_methods(Series, **ops.series_special_funcs) + + # ----------------------------------------------------------------------------- # Supplementary functions @@ -3129,17 +3142,3 @@ def create_from_value(value, index, dtype): subarr = np.array(data, dtype=object, copy=copy) return subarr - - -# ---------------------------------------------------------------------- -# Add plotting methods to Series - -import pandas.plotting._core as _gfx # noqa - -Series.plot = base.AccessorProperty(_gfx.SeriesPlotMethods, - _gfx.SeriesPlotMethods) -Series.hist = _gfx.hist_series - -# Add arithmetic! -ops.add_flex_arithmetic_methods(Series, **ops.series_flex_funcs) -ops.add_special_arithmetic_methods(Series, **ops.series_special_funcs) diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py index 97295dfa7baf1..47d15195315ba 100644 --- a/pandas/plotting/_converter.py +++ b/pandas/plotting/_converter.py @@ -18,6 +18,7 @@ is_period_arraylike, is_nested_list_like ) +from pandas.core.dtypes.generic import ABCSeries from pandas.compat import lrange import pandas.compat as compat @@ -25,7 +26,6 @@ import pandas.core.common as com from pandas.core.index import Index -from pandas.core.series import Series from pandas.core.indexes.datetimes import date_range import pandas.core.tools.datetimes as tools import pandas.tseries.frequencies as frequencies @@ -175,7 +175,7 @@ def _dt_to_float_ordinal(dt): preserving hours, minutes, seconds and microseconds. Return value is a :func:`float`. """ - if (isinstance(dt, (np.ndarray, Index, Series) + if (isinstance(dt, (np.ndarray, Index, ABCSeries) ) and is_datetime64_ns_dtype(dt)): base = dates.epoch2num(dt.asi8 / 1.0E9) else: diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index b8d7cebe8a274..e5b9497993172 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -18,10 +18,12 @@ is_number, is_hashable, is_iterator) +from pandas.core.dtypes.generic import ABCSeries + from pandas.core.common import AbstractMethodError, _try_sort from pandas.core.generic import _shared_docs, _shared_doc_kwargs from pandas.core.index import Index, MultiIndex -from pandas.core.series import Series + from pandas.core.indexes.period import PeriodIndex from pandas.compat import range, lrange, map, zip, string_types import pandas.compat as compat @@ -334,7 +336,7 @@ def result(self): def _compute_plot_data(self): data = self.data - if isinstance(data, Series): + if isinstance(data, ABCSeries): label = self.label if label is None and data.name is None: label = 'None' @@ -1575,6 +1577,7 @@ def maybe_color_bp(self, bp): def _make_plot(self): if self.subplots: + from pandas.core.series import Series self._return_obj = Series() for i, (label, y) in enumerate(self._iter_data()): @@ -2338,6 +2341,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, figsize=figsize, layout=layout) axes = _flatten(axes) + from pandas.core.series import Series ret = Series() for (key, group), ax in zip(grouped, axes): d = group.boxplot(ax=ax, column=column, fontsize=fontsize, @@ -2409,7 +2413,6 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None, _axes = _flatten(axes) - result = Series() ax_values = [] for i, col in enumerate(columns): @@ -2422,6 +2425,7 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None, ax_values.append(re_plotf) ax.grid(grid) + from pandas.core.series import Series result = Series(ax_values, index=columns) # Return axes in multiplot case, maybe revisit later # 985 diff --git a/pandas/plotting/_tools.py b/pandas/plotting/_tools.py index 0c2314087525c..389e238ccb96e 100644 --- a/pandas/plotting/_tools.py +++ b/pandas/plotting/_tools.py @@ -8,8 +8,8 @@ import numpy as np from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.generic import ABCSeries from pandas.core.index import Index -from pandas.core.series import Series from pandas.compat import range @@ -25,8 +25,7 @@ def format_date_labels(ax, rot): pass -def table(ax, data, rowLabels=None, colLabels=None, - **kwargs): +def table(ax, data, rowLabels=None, colLabels=None, **kwargs): """ Helper function to convert DataFrame and Series to matplotlib.table @@ -45,7 +44,7 @@ def table(ax, data, rowLabels=None, colLabels=None, matplotlib table object """ from pandas import DataFrame - if isinstance(data, Series): + if isinstance(data, ABCSeries): data = DataFrame(data, columns=[data.name]) elif isinstance(data, DataFrame): pass From b09b274954f39959fd573969aa10784cea8be527 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Wed, 9 Aug 2017 01:30:08 +0200 Subject: [PATCH 052/141] BUG: support pandas objects in iloc with old numpy versions (#17194) closes #17193 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/internals.py | 3 +++ pandas/tests/indexing/test_iloc.py | 13 +++++++++++++ 3 files changed, 17 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 7acb6ce8c6840..1ac5a2174c46d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -323,6 +323,7 @@ Indexing - Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) - Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`) +- Avoids ``IndexError`` when passing an Index or Series to ``.iloc`` with older numpy (:issue:`17193`) I/O ^^^ diff --git a/pandas/core/internals.py b/pandas/core/internals.py index b9fca1dfbb7a1..b616270e47aa6 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -857,6 +857,9 @@ def _is_empty_indexer(indexer): # set else: + if _np_version_under1p9: + # Work around GH 6168 to support old numpy + indexer = getattr(indexer, 'values', indexer) values[indexer] = value # coerce and try to infer the dtypes of the result diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 1ba9f3101e7b6..31fee303a41e2 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -282,6 +282,19 @@ def test_iloc_setitem_list(self): index=["A", "B", "C"], columns=["A", "B", "C"]) tm.assert_frame_equal(df, expected) + def test_iloc_setitem_pandas_object(self): + # GH 17193, affecting old numpy (1.7 and 1.8) + s_orig = Series([0, 1, 2, 3]) + expected = Series([0, -1, -2, 3]) + + s = s_orig.copy() + s.iloc[Series([1, 2])] = [-1, -2] + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.iloc[pd.Index([1, 2])] = [-1, -2] + tm.assert_series_equal(s, expected) + def test_iloc_setitem_dups(self): # GH 6766 From cc8c5d77966a5cd6096a495dd1f1b1250ee8b76f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 8 Aug 2017 16:48:59 -0700 Subject: [PATCH 053/141] Implement _make_accessor classmethod for PandasDelegate (#17166) --- pandas/core/base.py | 11 +++++++++-- pandas/core/categorical.py | 7 +++++++ pandas/core/indexes/accessors.py | 8 ++++++++ pandas/core/series.py | 23 +++-------------------- pandas/core/strings.py | 30 +++++++++++++++--------------- 5 files changed, 42 insertions(+), 37 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index eb785b18bd02b..8f21e3125a27e 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -165,6 +165,12 @@ def __setattr__(self, key, value): class PandasDelegate(PandasObject): """ an abstract base class for delegating methods/properties """ + @classmethod + def _make_accessor(cls, data): + raise AbstractMethodError("_make_accessor should be implemented" + "by subclass and return an instance" + "of `cls`.") + def _delegate_property_get(self, name, *args, **kwargs): raise TypeError("You cannot access the " "property {name}".format(name=name)) @@ -231,9 +237,10 @@ class AccessorProperty(object): """Descriptor for implementing accessor properties like Series.str """ - def __init__(self, accessor_cls, construct_accessor): + def __init__(self, accessor_cls, construct_accessor=None): self.accessor_cls = accessor_cls - self.construct_accessor = construct_accessor + self.construct_accessor = (construct_accessor or + accessor_cls._make_accessor) self.__doc__ = accessor_cls.__doc__ def __get__(self, instance, owner=None): diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 1392ad2f011db..230361931125e 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -2061,6 +2061,13 @@ def _delegate_method(self, name, *args, **kwargs): if res is not None: return Series(res, index=self.index) + @classmethod + def _make_accessor(cls, data): + if not is_categorical_dtype(data.dtype): + raise AttributeError("Can only use .cat accessor with a " + "'category' dtype") + return CategoricalAccessor(data.values, data.index) + CategoricalAccessor._add_delegate_accessors(delegate=Categorical, accessors=["categories", diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index f1fb9a8ad93a7..ce3143b342cec 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -243,3 +243,11 @@ class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): # the Series.dt class property. For Series objects, .dt will always be one # of the more specific classes above. __doc__ = DatetimeProperties.__doc__ + + @classmethod + def _make_accessor(cls, data): + try: + return maybe_to_datetimelike(data) + except Exception: + raise AttributeError("Can only use .dt accessor with " + "datetimelike values") diff --git a/pandas/core/series.py b/pandas/core/series.py index 61508c11cae4b..c8282450b77a9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -54,8 +54,7 @@ from pandas.core.internals import SingleBlockManager from pandas.core.categorical import Categorical, CategoricalAccessor import pandas.core.strings as strings -from pandas.core.indexes.accessors import ( - maybe_to_datetimelike, CombinedDatetimelikeProperties) +from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexes.period import PeriodIndex @@ -2919,27 +2918,11 @@ def to_period(self, freq=None, copy=True): # ------------------------------------------------------------------------- # Datetimelike delegation methods - - def _make_dt_accessor(self): - try: - return maybe_to_datetimelike(self) - except Exception: - raise AttributeError("Can only use .dt accessor with datetimelike " - "values") - - dt = base.AccessorProperty(CombinedDatetimelikeProperties, - _make_dt_accessor) + dt = base.AccessorProperty(CombinedDatetimelikeProperties) # ------------------------------------------------------------------------- # Categorical methods - - def _make_cat_accessor(self): - if not is_categorical_dtype(self.dtype): - raise AttributeError("Can only use .cat accessor with a " - "'category' dtype") - return CategoricalAccessor(self.values, self.index) - - cat = base.AccessorProperty(CategoricalAccessor, _make_cat_accessor) + cat = base.AccessorProperty(CategoricalAccessor) def _dir_deletions(self): return self._accessors diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 30465561a911c..0b1db0277eee3 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1890,18 +1890,14 @@ def rindex(self, sub, start=0, end=None): docstring=_shared_docs['ismethods'] % _shared_docs['isdecimal']) - -class StringAccessorMixin(object): - """ Mixin to add a `.str` acessor to the class.""" - - # string methods - def _make_str_accessor(self): + @classmethod + def _make_accessor(cls, data): from pandas.core.index import Index - if (isinstance(self, ABCSeries) and - not ((is_categorical_dtype(self.dtype) and - is_object_dtype(self.values.categories)) or - (is_object_dtype(self.dtype)))): + if (isinstance(data, ABCSeries) and + not ((is_categorical_dtype(data.dtype) and + is_object_dtype(data.values.categories)) or + (is_object_dtype(data.dtype)))): # it's neither a string series not a categorical series with # strings inside the categories. # this really should exclude all series with any non-string values @@ -1910,23 +1906,27 @@ def _make_str_accessor(self): raise AttributeError("Can only use .str accessor with string " "values, which use np.object_ dtype in " "pandas") - elif isinstance(self, Index): + elif isinstance(data, Index): # can't use ABCIndex to exclude non-str # see scc/inferrence.pyx which can contain string values allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer') - if self.inferred_type not in allowed_types: + if data.inferred_type not in allowed_types: message = ("Can only use .str accessor with string values " "(i.e. inferred_type is 'string', 'unicode' or " "'mixed')") raise AttributeError(message) - if self.nlevels > 1: + if data.nlevels > 1: message = ("Can only use .str accessor with Index, not " "MultiIndex") raise AttributeError(message) - return StringMethods(self) + return StringMethods(data) + + +class StringAccessorMixin(object): + """ Mixin to add a `.str` acessor to the class.""" - str = AccessorProperty(StringMethods, _make_str_accessor) + str = AccessorProperty(StringMethods) def _dir_additions(self): return set() From df9710b4710d6de20a09ecfa217c1326d262c656 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 9 Aug 2017 03:27:34 -0700 Subject: [PATCH 054/141] Create ABCDateOffset (#17165) --- pandas/core/dtypes/generic.py | 2 ++ pandas/core/indexes/base.py | 14 +++++++------- pandas/core/ops.py | 10 +++++++--- pandas/core/tools/datetimes.py | 5 ++--- pandas/tests/dtypes/test_generic.py | 6 ++++++ pandas/tseries/offsets.py | 1 + 6 files changed, 25 insertions(+), 13 deletions(-) diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 90608c18ae503..618bcf6495155 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -52,6 +52,8 @@ def _check(cls, inst): ABCCategorical = create_pandas_abc_type("ABCCategorical", "_typ", ("categorical")) ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period", )) +ABCDateOffset = create_pandas_abc_type("ABCDateOffset", "_typ", + ("dateoffset",)) class _ABCGeneric(type): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4aecc75d95971..de6221987a59a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -13,7 +13,11 @@ from pandas import compat -from pandas.core.dtypes.generic import ABCSeries, ABCMultiIndex, ABCPeriodIndex +from pandas.core.dtypes.generic import ( + ABCSeries, + ABCMultiIndex, + ABCPeriodIndex, + ABCDateOffset) from pandas.core.dtypes.missing import isna, array_equivalent from pandas.core.dtypes.common import ( _ensure_int64, @@ -3814,8 +3818,6 @@ def _validate_for_numeric_binop(self, other, op, opstr): internal method called by ops """ - from pandas.tseries.offsets import DateOffset - # if we are an inheritor of numeric, # but not actually numeric (e.g. DatetimeIndex/PeriodInde) if not self._is_numeric_dtype: @@ -3843,7 +3845,7 @@ def _validate_for_numeric_binop(self, other, op, opstr): if other.dtype.kind not in ['f', 'i', 'u']: raise TypeError("cannot evaluate a numeric op " "with a non-numeric dtype") - elif isinstance(other, (DateOffset, np.timedelta64, + elif isinstance(other, (ABCDateOffset, np.timedelta64, Timedelta, datetime.timedelta)): # higher up to handle pass @@ -3862,12 +3864,10 @@ def _add_numeric_methods_binary(cls): def _make_evaluate_binop(op, opstr, reversed=False, constructor=Index): def _evaluate_numeric_binop(self, other): - - from pandas.tseries.offsets import DateOffset other = self._validate_for_numeric_binop(other, op, opstr) # handle time-based others - if isinstance(other, (DateOffset, np.timedelta64, + if isinstance(other, (ABCDateOffset, np.timedelta64, Timedelta, datetime.timedelta)): return self._evaluate_with_timedelta_like(other, op, opstr) elif isinstance(other, (Timestamp, np.datetime64)): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 4e08e1483d617..82101414e4aa6 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -35,7 +35,11 @@ is_scalar, _ensure_object) from pandas.core.dtypes.cast import maybe_upcast_putmask, find_common_type -from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCPeriodIndex +from pandas.core.dtypes.generic import ( + ABCSeries, + ABCIndex, + ABCPeriodIndex, + ABCDateOffset) # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory @@ -605,10 +609,10 @@ def f(x): def _is_offset(self, arr_or_obj): """ check if obj or all elements of list-like is DateOffset """ - if isinstance(arr_or_obj, pd.DateOffset): + if isinstance(arr_or_obj, ABCDateOffset): return True elif is_list_like(arr_or_obj) and len(arr_or_obj): - return all(isinstance(x, pd.DateOffset) for x in arr_or_obj) + return all(isinstance(x, ABCDateOffset) for x in arr_or_obj) return False diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index a1f323aff7c1a..eebf78d7619eb 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -17,7 +17,7 @@ is_numeric_dtype) from pandas.core.dtypes.generic import ( ABCIndexClass, ABCSeries, - ABCDataFrame) + ABCDataFrame, ABCDateOffset) from pandas.core.dtypes.missing import notna from pandas.core import algorithms @@ -720,8 +720,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): if not isinstance(arg, compat.string_types): return arg - from pandas.tseries.offsets import DateOffset - if isinstance(freq, DateOffset): + if isinstance(freq, ABCDateOffset): freq = freq.rule_code if dayfirst is None: diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index ec850cc34e23b..82444d6c94157 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -40,6 +40,12 @@ def test_abc_types(self): assert isinstance(self.categorical, gt.ABCCategorical) assert isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) + assert isinstance(pd.DateOffset(), gt.ABCDateOffset) + assert isinstance(pd.Period('2012', freq='A-DEC').freq, + gt.ABCDateOffset) + assert not isinstance(pd.Period('2012', freq='A-DEC'), + gt.ABCDateOffset) + def test_setattr_warnings(): # GH5904 - Suggestion: Warning for DataFrame colname-methodname clash diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 2a120a0696836..56ef703e67ca0 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -184,6 +184,7 @@ def __add__(date): ) _use_relativedelta = False _adjust_dst = False + _typ = "dateoffset" # default for prior pickles normalize = False From e71e6d74886213b515d28d488be05bec52aacea0 Mon Sep 17 00:00:00 2001 From: Alex Rychyk Date: Wed, 9 Aug 2017 13:37:20 +0300 Subject: [PATCH 055/141] BUG: resample and apply modify the index type for empty Series (#17149) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/resample.py | 7 ++++++- pandas/tests/test_resample.py | 18 ++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 1ac5a2174c46d..ac0e960a348b6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -349,6 +349,7 @@ Groupby/Resample/Rolling - Bug in :func:`infer_freq` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) - Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) - Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) +- Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`) Sparse ^^^^^^ diff --git a/pandas/core/resample.py b/pandas/core/resample.py index a8a48624fb885..96e7a6a3b3904 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -17,7 +17,7 @@ from pandas.core.indexes.period import PeriodIndex, period_range import pandas.core.common as com import pandas.core.algorithms as algos -from pandas.core.dtypes.generic import ABCDataFrame +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries import pandas.compat as compat from pandas.compat.numpy import function as nv @@ -439,6 +439,11 @@ def _wrap_result(self, result): if isinstance(result, com.ABCSeries) and self._selection is not None: result.name = self._selection + if isinstance(result, ABCSeries) and result.empty: + obj = self.obj + result.index = obj.index._shallow_copy(freq=to_offset(self.freq)) + result.name = getattr(obj, 'name', None) + return result def pad(self, limit=None): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 08fa7992e8da1..d938d5bf9f3ab 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -852,6 +852,16 @@ def test_resample_loffset_arg_type(self): assert_frame_equal(result_agg, expected) assert_frame_equal(result_how, expected) + def test_apply_to_empty_series(self): + # GH 14313 + series = self.create_series()[:0] + + for freq in ['M', 'D', 'H']: + result = series.resample(freq).apply(lambda x: 1) + expected = series.resample(freq).apply(np.sum) + + assert_series_equal(result, expected, check_dtype=False) + class TestDatetimeIndex(Base): _index_factory = lambda x: date_range @@ -2794,6 +2804,14 @@ def test_evenly_divisible_with_no_extra_bins(self): result = df.resample('7D').sum() assert_frame_equal(result, expected) + def test_apply_to_empty_series(self): + # GH 14313 + series = self.create_series()[:0] + + for freq in ['M', 'D', 'H']: + with pytest.raises(TypeError): + series.resample(freq).apply(lambda x: 1) + class TestTimedeltaIndex(Base): _index_factory = lambda x: timedelta_range From e9c7f29646fa1b1b206eef95e5c7c7403c27227e Mon Sep 17 00:00:00 2001 From: topper-123 Date: Wed, 9 Aug 2017 12:24:36 +0100 Subject: [PATCH 056/141] DOC: Updated NDFrame.astype docs (#17203) --- pandas/core/generic.py | 50 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2d52eed81d22b..bd3297f66a469 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3610,8 +3610,7 @@ def blocks(self): mapping={True: 'raise', False: 'ignore'}) def astype(self, dtype, copy=True, errors='raise', **kwargs): """ - Cast object to input numpy.dtype - Return a copy when copy = True (be really careful with this!) + Cast a pandas object to a specified dtype ``dtype``. Parameters ---------- @@ -3620,6 +3619,10 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): the same type. Alternatively, use {col: dtype, ...}, where col is a column label and dtype is a numpy.dtype or Python type to cast one or more of the DataFrame's columns to column-specific types. + copy : bool, default True. + Return a copy when ``copy=True`` (be very careful setting + ``copy=False`` as changes to values then may propagate to other + pandas objects). errors : {'raise', 'ignore'}, default 'raise'. Control raising of exceptions on invalid data for provided dtype. @@ -3636,6 +3639,49 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): Returns ------- casted : type of caller + + Examples + -------- + >>> ser = pd.Series([1, 2], dtype='int32') + >>> ser + 0 1 + 1 2 + dtype: int32 + >>> ser.astype('int64') + 0 1 + 1 2 + dtype: int64 + + Convert to categorical type: + + >>> ser.astype('category') + 0 1 + 1 2 + dtype: category + Categories (2, int64): [1, 2] + + Convert to ordered categorical type with custom ordering: + + >>> ser.astype('category', ordered=True, categories=[2, 1]) + 0 1 + 1 2 + dtype: category + Categories (2, int64): [2 < 1] + + Note that using ``copy=False`` and changing data on a new + pandas object may propagate changes: + + >>> s1 = pd.Series([1,2]) + >>> s2 = s1.astype('int', copy=False) + >>> s2[0] = 10 + >>> s1 # note that s1[0] has changed too + 0 10 + 1 2 + dtype: int64 + + See also + -------- + numpy.ndarray.astype : Cast a numpy array to a specified type. """ if is_dict_like(dtype): if self.ndim == 1: # i.e. Series From 38293d3067223c96466edf43818d1eaae10e1509 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Wed, 9 Aug 2017 16:19:27 -0400 Subject: [PATCH 057/141] MAINT: Minor touch-ups to GitHub PULL_REQUEST_TEMPLATE (#17207) Remove leading space from task-list so that tasks aren't nested. --- .github/PULL_REQUEST_TEMPLATE.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e8b6ee21ad104..4e1e9ce017408 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,4 @@ - - [ ] closes #xxxx - - [ ] tests added / passed - - [ ] passes ``git diff upstream/master -u -- "*.py" | flake8 --diff`` - - [ ] whatsnew entry +- [ ] closes #xxxx +- [ ] tests added / passed +- [ ] passes `git diff upstream/master -u -- "*.py" | flake8 --diff` +- [ ] whatsnew entry From 7280e6c7bfa9224d0df3615a35de629ce5042a73 Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 10 Aug 2017 04:26:58 -0600 Subject: [PATCH 058/141] CLN: replace %s syntax with .format in core.computation (#17209) --- pandas/core/computation/align.py | 11 +++-- pandas/core/computation/engines.py | 5 ++- pandas/core/computation/eval.py | 13 +++--- pandas/core/computation/expr.py | 45 ++++++++++--------- pandas/core/computation/expressions.py | 14 +++--- pandas/core/computation/pytables.py | 61 ++++++++++++++------------ pandas/core/computation/scope.py | 11 +++-- 7 files changed, 87 insertions(+), 73 deletions(-) diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index 1c75301082297..691eaebfd5fc1 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -98,12 +98,11 @@ def _align_core(terms): ordm = np.log10(max(1, abs(reindexer_size - term_axis_size))) if ordm >= 1 and reindexer_size >= 10000: - warnings.warn('Alignment difference on axis {0} is larger ' - 'than an order of magnitude on term {1!r}, ' - 'by more than {2:.4g}; performance may ' - 'suffer'.format(axis, terms[i].name, ordm), - category=PerformanceWarning, - stacklevel=6) + w = ('Alignment difference on axis {axis} is larger ' + 'than an order of magnitude on term {term!r}, by ' + 'more than {ordm:.4g}; performance may suffer' + ).format(axis=axis, term=terms[i].name, ordm=ordm) + warnings.warn(w, category=PerformanceWarning, stacklevel=6) if transpose: f = partial(ti.reindex, index=reindexer, copy=False) diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py index f45d0355e7442..155ff554cf99c 100644 --- a/pandas/core/computation/engines.py +++ b/pandas/core/computation/engines.py @@ -33,8 +33,9 @@ def _check_ne_builtin_clash(expr): if overlap: s = ', '.join(map(repr, overlap)) - raise NumExprClobberingError('Variables in expression "%s" ' - 'overlap with builtins: (%s)' % (expr, s)) + raise NumExprClobberingError('Variables in expression "{expr}" ' + 'overlap with builtins: ({s})' + .format(expr=expr, s=s)) class AbstractEngine(object): diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index ef15e886fd554..d391764794c1c 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -40,8 +40,9 @@ def _check_engine(engine): engine = 'python' if engine not in _engines: - raise KeyError('Invalid engine {0!r} passed, valid engines are' - ' {1}'.format(engine, list(_engines.keys()))) + valid = list(_engines.keys()) + raise KeyError('Invalid engine {engine!r} passed, valid engines are' + ' {valid}'.format(engine=engine, valid=valid)) # TODO: validate this in a more general way (thinking of future engines # that won't necessarily be import-able) @@ -69,8 +70,8 @@ def _check_parser(parser): * If an invalid parser is passed """ if parser not in _parsers: - raise KeyError('Invalid parser {0!r} passed, valid parsers are' - ' {1}'.format(parser, _parsers.keys())) + raise KeyError('Invalid parser {parser!r} passed, valid parsers are' + ' {valid}'.format(parser=parser, valid=_parsers.keys())) def _check_resolvers(resolvers): @@ -78,8 +79,8 @@ def _check_resolvers(resolvers): for resolver in resolvers: if not hasattr(resolver, '__getitem__'): name = type(resolver).__name__ - raise TypeError('Resolver of type %r does not implement ' - 'the __getitem__ method' % name) + raise TypeError('Resolver of type {name!r} does not implement ' + 'the __getitem__ method'.format(name=name)) def _check_expression(expr): diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index 73c27f4d772ca..ae956bce11329 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -189,8 +189,8 @@ def _filter_nodes(superclass, all_nodes=_all_nodes): # and we don't want `stmt` and friends in their so get only the class whose # names are capitalized _base_supported_nodes = (_all_node_names - _unsupported_nodes) | _hacked_nodes -_msg = 'cannot both support and not support {0}'.format(_unsupported_nodes & - _base_supported_nodes) +_msg = 'cannot both support and not support {intersection}'.format( + intersection=_unsupported_nodes & _base_supported_nodes) assert not _unsupported_nodes & _base_supported_nodes, _msg @@ -200,8 +200,8 @@ def _node_not_implemented(node_name, cls): """ def f(self, *args, **kwargs): - raise NotImplementedError("{0!r} nodes are not " - "implemented".format(node_name)) + raise NotImplementedError("{name!r} nodes are not " + "implemented".format(name=node_name)) return f @@ -217,7 +217,7 @@ def disallowed(cls): cls.unsupported_nodes = () for node in nodes: new_method = _node_not_implemented(node, cls) - name = 'visit_{0}'.format(node) + name = 'visit_{node}'.format(node=node) cls.unsupported_nodes += (name,) setattr(cls, name, new_method) return cls @@ -251,13 +251,14 @@ def add_ops(op_classes): """Decorator to add default implementation of ops.""" def f(cls): for op_attr_name, op_class in compat.iteritems(op_classes): - ops = getattr(cls, '{0}_ops'.format(op_attr_name)) - ops_map = getattr(cls, '{0}_op_nodes_map'.format(op_attr_name)) + ops = getattr(cls, '{name}_ops'.format(name=op_attr_name)) + ops_map = getattr(cls, '{name}_op_nodes_map'.format( + name=op_attr_name)) for op in ops: op_node = ops_map[op] if op_node is not None: made_op = _op_maker(op_class, op) - setattr(cls, 'visit_{0}'.format(op_node), made_op) + setattr(cls, 'visit_{node}'.format(node=op_node), made_op) return cls return f @@ -388,9 +389,10 @@ def _maybe_evaluate_binop(self, op, op_class, lhs, rhs, res = op(lhs, rhs) if res.has_invalid_return_type: - raise TypeError("unsupported operand type(s) for {0}:" - " '{1}' and '{2}'".format(res.op, lhs.type, - rhs.type)) + raise TypeError("unsupported operand type(s) for {op}:" + " '{lhs}' and '{rhs}'".format(op=res.op, + lhs=lhs.type, + rhs=rhs.type)) if self.engine != 'pytables': if (res.op in _cmp_ops_syms and @@ -527,7 +529,8 @@ def visit_Attribute(self, node, **kwargs): if isinstance(value, ast.Name) and value.id == attr: return resolved - raise ValueError("Invalid Attribute context {0}".format(ctx.__name__)) + raise ValueError("Invalid Attribute context {name}" + .format(name=ctx.__name__)) def visit_Call_35(self, node, side=None, **kwargs): """ in 3.5 the starargs attribute was changed to be more flexible, @@ -549,7 +552,8 @@ def visit_Call_35(self, node, side=None, **kwargs): raise if res is None: - raise ValueError("Invalid function call {0}".format(node.func.id)) + raise ValueError("Invalid function call {func}" + .format(func=node.func.id)) if hasattr(res, 'value'): res = res.value @@ -558,8 +562,8 @@ def visit_Call_35(self, node, side=None, **kwargs): new_args = [self.visit(arg) for arg in node.args] if node.keywords: - raise TypeError("Function \"{0}\" does not support keyword " - "arguments".format(res.name)) + raise TypeError("Function \"{name}\" does not support keyword " + "arguments".format(name=res.name)) return res(*new_args, **kwargs) @@ -570,7 +574,7 @@ def visit_Call_35(self, node, side=None, **kwargs): for key in node.keywords: if not isinstance(key, ast.keyword): raise ValueError("keyword error in function call " - "'{0}'".format(node.func.id)) + "'{func}'".format(func=node.func.id)) if key.arg: # TODO: bug? @@ -598,7 +602,8 @@ def visit_Call_legacy(self, node, side=None, **kwargs): raise if res is None: - raise ValueError("Invalid function call {0}".format(node.func.id)) + raise ValueError("Invalid function call {func}" + .format(func=node.func.id)) if hasattr(res, 'value'): res = res.value @@ -609,8 +614,8 @@ def visit_Call_legacy(self, node, side=None, **kwargs): args += self.visit(node.starargs) if node.keywords or node.kwargs: - raise TypeError("Function \"{0}\" does not support keyword " - "arguments".format(res.name)) + raise TypeError("Function \"{name}\" does not support keyword " + "arguments".format(name=res.name)) return res(*args, **kwargs) @@ -623,7 +628,7 @@ def visit_Call_legacy(self, node, side=None, **kwargs): for key in node.keywords: if not isinstance(key, ast.keyword): raise ValueError("keyword error in function call " - "'{0}'".format(node.func.id)) + "'{func}'".format(func=node.func.id)) keywords[key.arg] = self.visit(key.value).value if node.kwargs is not None: keywords.update(self.visit(node.kwargs).value) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 83d02af65cc85..af068bd1f32b3 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -103,7 +103,7 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, truediv=True, a_value = getattr(a, "values", a) b_value = getattr(b, "values", b) - result = ne.evaluate('a_value %s b_value' % op_str, + result = ne.evaluate('a_value {op} b_value'.format(op=op_str), local_dict={'a_value': a_value, 'b_value': b_value}, casting='safe', truediv=truediv, @@ -177,15 +177,15 @@ def _bool_arith_check(op_str, a, b, not_allowed=frozenset(('/', '//', '**')), if _has_bool_dtype(a) and _has_bool_dtype(b): if op_str in unsupported: - warnings.warn("evaluating in Python space because the %r operator" - " is not supported by numexpr for the bool " - "dtype, use %r instead" % (op_str, - unsupported[op_str])) + warnings.warn("evaluating in Python space because the {op!r} " + "operator is not supported by numexpr for " + "the bool dtype, use {alt_op!r} instead" + .format(op=op_str, alt_op=unsupported[op_str])) return False if op_str in not_allowed: - raise NotImplementedError("operator %r not implemented for bool " - "dtypes" % op_str) + raise NotImplementedError("operator {op!r} not implemented for " + "bool dtypes".format(op=op_str)) return True diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 5870090856ff9..4b3c608a88be8 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -41,7 +41,8 @@ def _resolve_name(self): # must be a queryables if self.side == 'left': if self.name not in self.env.queryables: - raise NameError('name {0!r} is not defined'.format(self.name)) + raise NameError('name {name!r} is not defined' + .format(name=self.name)) return self.name # resolve the rhs (and allow it to be None) @@ -161,7 +162,7 @@ def metadata(self): def generate(self, v): """ create and return the op string for this TermValue """ val = v.tostring(self.encoding) - return "(%s %s %s)" % (self.lhs, self.op, val) + return "({lhs} {op} {val})".format(lhs=self.lhs, op=self.op, val=val) def convert_value(self, v): """ convert the expression that is in the term to something that is @@ -215,9 +216,8 @@ def stringify(value): # string quoting return TermValue(v, stringify(v), u('string')) else: - raise TypeError(("Cannot compare {v} of type {typ}" - " to {kind} column").format(v=v, typ=type(v), - kind=kind)) + raise TypeError("Cannot compare {v} of type {typ} to {kind} column" + .format(v=v, typ=type(v), kind=kind)) def convert_values(self): pass @@ -226,8 +226,8 @@ def convert_values(self): class FilterBinOp(BinOp): def __unicode__(self): - return pprint_thing("[Filter : [{0}] -> " - "[{1}]".format(self.filter[0], self.filter[1])) + return pprint_thing("[Filter : [{lhs}] -> [{op}]" + .format(lhs=self.filter[0], op=self.filter[1])) def invert(self): """ invert the filter """ @@ -244,7 +244,8 @@ def format(self): def evaluate(self): if not self.is_valid: - raise ValueError("query term is not valid [%s]" % self) + raise ValueError("query term is not valid [{slf}]" + .format(slf=self)) rhs = self.conform(self.rhs) values = [TermValue(v, v, self.kind) for v in rhs] @@ -273,9 +274,8 @@ def evaluate(self): pd.Index([v.value for v in values])) else: - raise TypeError( - "passing a filterable condition to a non-table indexer [%s]" % - self) + raise TypeError("passing a filterable condition to a non-table " + "indexer [{slf}]".format(slf=self)) return self @@ -298,7 +298,8 @@ def evaluate(self): class ConditionBinOp(BinOp): def __unicode__(self): - return pprint_thing("[Condition : [{0}]]".format(self.condition)) + return pprint_thing("[Condition : [{cond}]]" + .format(cond=self.condition)) def invert(self): """ invert the condition """ @@ -315,7 +316,8 @@ def format(self): def evaluate(self): if not self.is_valid: - raise ValueError("query term is not valid [%s]" % self) + raise ValueError("query term is not valid [{slf}]" + .format(slf=self)) # convert values if we are in the table if not self.is_in_table: @@ -330,7 +332,7 @@ def evaluate(self): # too many values to create the expression? if len(values) <= self._max_selectors: vs = [self.generate(v) for v in values] - self.condition = "(%s)" % ' | '.join(vs) + self.condition = "({cond})".format(cond=' | '.join(vs)) # use a filter after reading else: @@ -344,10 +346,9 @@ def evaluate(self): class JointConditionBinOp(ConditionBinOp): def evaluate(self): - self.condition = "(%s %s %s)" % ( - self.lhs.condition, - self.op, - self.rhs.condition) + self.condition = "({lhs} {op} {rhs})".format(lhs=self.lhs.condition, + op=self.op, + rhs=self.rhs.condition) return self @@ -382,7 +383,8 @@ class ExprVisitor(BaseExprVisitor): def __init__(self, env, engine, parser, **kwargs): super(ExprVisitor, self).__init__(env, engine, parser) for bin_op in self.binary_ops: - setattr(self, 'visit_{0}'.format(self.binary_op_nodes_map[bin_op]), + bin_node = self.binary_op_nodes_map[bin_op] + setattr(self, 'visit_{node}'.format(node=bin_node), lambda node, bin_op=bin_op: partial(BinOp, bin_op, **kwargs)) @@ -415,8 +417,8 @@ def visit_Subscript(self, node, **kwargs): try: return self.const_type(value[slobj], self.env) except TypeError: - raise ValueError("cannot subscript {0!r} with " - "{1!r}".format(value, slobj)) + raise ValueError("cannot subscript {value!r} with " + "{slobj!r}".format(value=value, slobj=slobj)) def visit_Attribute(self, node, **kwargs): attr = node.attr @@ -441,7 +443,8 @@ def visit_Attribute(self, node, **kwargs): if isinstance(value, ast.Name) and value.id == attr: return resolved - raise ValueError("Invalid Attribute context {0}".format(ctx.__name__)) + raise ValueError("Invalid Attribute context {name}" + .format(name=ctx.__name__)) def translate_In(self, op): return ast.Eq() if isinstance(op, ast.In) else op @@ -529,7 +532,7 @@ def __init__(self, where, queryables=None, encoding=None, scope_level=0): else: w = _validate_where(w) where[idx] = w - where = ' & ' .join(["(%s)" % w for w in where]) # noqa + where = ' & '.join(map('({})'.format, com.flatten(where))) # noqa self.expr = where self.env = Scope(scope_level + 1, local_dict=local_dict) @@ -552,13 +555,15 @@ def evaluate(self): try: self.condition = self.terms.prune(ConditionBinOp) except AttributeError: - raise ValueError("cannot process expression [{0}], [{1}] is not a " - "valid condition".format(self.expr, self)) + raise ValueError("cannot process expression [{expr}], [{slf}] " + "is not a valid condition".format(expr=self.expr, + slf=self)) try: self.filter = self.terms.prune(FilterBinOp) except AttributeError: - raise ValueError("cannot process expression [{0}], [{1}] is not a " - "valid filter".format(self.expr, self)) + raise ValueError("cannot process expression [{expr}], [{slf}] " + "is not a valid filter".format(expr=self.expr, + slf=self)) return self.condition, self.filter @@ -578,7 +583,7 @@ def tostring(self, encoding): if self.kind == u'string': if encoding is not None: return self.converted - return '"%s"' % self.converted + return '"{converted}"'.format(converted=self.converted) elif self.kind == u'float': # python 2 str(float) is not always # round-trippable so use repr() diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 5a589473f64b7..6a298f5137eb1 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -137,8 +137,10 @@ def __init__(self, level, global_dict=None, local_dict=None, resolvers=(), def __unicode__(self): scope_keys = _get_pretty_string(list(self.scope.keys())) res_keys = _get_pretty_string(list(self.resolvers.keys())) - return '%s(scope=%s, resolvers=%s)' % (type(self).__name__, scope_keys, - res_keys) + unicode_str = '{name}(scope={scope_keys}, resolvers={res_keys})' + return unicode_str.format(name=type(self).__name__, + scope_keys=scope_keys, + res_keys=res_keys) @property def has_resolvers(self): @@ -269,8 +271,9 @@ def add_tmp(self, value): name : basestring The name of the temporary variable created. """ - name = '{0}_{1}_{2}'.format(type(value).__name__, self.ntemps, - _raw_hex_id(self)) + name = '{name}_{num}_{hex_id}'.format(name=type(value).__name__, + num=self.ntemps, + hex_id=_raw_hex_id(self)) # add to inner most scope assert name not in self.temps From 421dcf423f3979e82e538dbd1dd9008bef36ddef Mon Sep 17 00:00:00 2001 From: Chris Billington Date: Thu, 10 Aug 2017 06:36:50 -0400 Subject: [PATCH 059/141] Bugfix for multilevel columns with empty strings in Python 2 (#17099) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/frame.py | 12 ++++++++++-- pandas/tests/test_multilevel.py | 21 ++++++++++++++------- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index ac0e960a348b6..2f2089b4f8ad7 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -324,6 +324,7 @@ Indexing - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) - Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`) - Avoids ``IndexError`` when passing an Index or Series to ``.iloc`` with older numpy (:issue:`17193`) +- Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`) I/O ^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 027a427555253..94cce1b4d05b5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2134,10 +2134,18 @@ def _getitem_multilevel(self, key): result = self._constructor(new_values, index=self.index, columns=result_columns) result = result.__finalize__(self) + + # If there is only one column being returned, and its name is + # either an empty string, or a tuple with an empty string as its + # first element, then treat the empty string as a placeholder + # and return the column as if the user had provided that empty + # string in the key. If the result is a Series, exclude the + # implied empty string from its name. if len(result.columns) == 1: top = result.columns[0] - if ((type(top) == str and top == '') or - (type(top) == tuple and top[0] == '')): + if isinstance(top, tuple): + top = top[0] + if top == '': result = result[''] if isinstance(result, Series): result = self._constructor_sliced(result, diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 0b2dc9ba70f03..a765e2c4ca1bf 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1675,24 +1675,31 @@ def test_int_series_slicing(self): expected = self.ymd.reindex(s.index[5:]) tm.assert_frame_equal(result, expected) - def test_mixed_depth_get(self): + @pytest.mark.parametrize('unicode_strings', [True, False]) + def test_mixed_depth_get(self, unicode_strings): + # If unicode_strings is True, the column labels in dataframe + # construction will use unicode strings in Python 2 (pull request + # #17099). + arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] + if unicode_strings: + arrays = [[u(s) for s in arr] for arr in arrays] + tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(randn(4, 6), columns=index) + df = DataFrame(np.random.randn(4, 6), columns=index) result = df['a'] - expected = df['a', '', ''] - tm.assert_series_equal(result, expected, check_names=False) - assert result.name == 'a' + expected = df['a', '', ''].rename('a') + tm.assert_series_equal(result, expected) result = df['routine1', 'result1'] expected = df['routine1', 'result1', ''] - tm.assert_series_equal(result, expected, check_names=False) - assert result.name == ('routine1', 'result1') + expected = expected.rename(('routine1', 'result1')) + tm.assert_series_equal(result, expected) def test_mixed_depth_insert(self): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], From d5733ee83fe96236d608e9f0045cc304b6690fbb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 10 Aug 2017 12:37:39 +0200 Subject: [PATCH 060/141] CLN/ASV clean-up frame stat ops benchmarks (#17205) --- asv_bench/benchmarks/stat_ops.py | 100 +++++++------------------------ 1 file changed, 22 insertions(+), 78 deletions(-) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 12fbb2478c2a5..1e1eb167b46bf 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -1,92 +1,36 @@ from .pandas_vb_common import * -class stat_ops_frame_mean_float_axis_0(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) - - def time_stat_ops_frame_mean_float_axis_0(self): - self.df.mean() - - -class stat_ops_frame_mean_float_axis_1(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) - - def time_stat_ops_frame_mean_float_axis_1(self): - self.df.mean(1) - - -class stat_ops_frame_mean_int_axis_0(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) - - def time_stat_ops_frame_mean_int_axis_0(self): - self.dfi.mean() - - -class stat_ops_frame_mean_int_axis_1(object): - goal_time = 0.2 +def _set_use_bottleneck_False(): + try: + pd.options.compute.use_bottleneck = False + except: + from pandas.core import nanops + nanops._USE_BOTTLENECK = False - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) - - def time_stat_ops_frame_mean_int_axis_1(self): - self.dfi.mean(1) - - -class stat_ops_frame_sum_float_axis_0(object): - goal_time = 0.2 - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) - - def time_stat_ops_frame_sum_float_axis_0(self): - self.df.sum() - - -class stat_ops_frame_sum_float_axis_1(object): +class FrameOps(object): goal_time = 0.2 - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) + param_names = ['op', 'use_bottleneck', 'dtype', 'axis'] + params = [['mean', 'sum', 'median'], + [True, False], + ['float', 'int'], + [0, 1]] - def time_stat_ops_frame_sum_float_axis_1(self): - self.df.sum(1) + def setup(self, op, use_bottleneck, dtype, axis): + if dtype == 'float': + self.df = DataFrame(np.random.randn(100000, 4)) + elif dtype == 'int': + self.df = DataFrame(np.random.randint(1000, size=(100000, 4))) + if not use_bottleneck: + _set_use_bottleneck_False() -class stat_ops_frame_sum_int_axis_0(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) - - def time_stat_ops_frame_sum_int_axis_0(self): - self.dfi.sum() - - -class stat_ops_frame_sum_int_axis_1(object): - goal_time = 0.2 - - def setup(self): - self.df = DataFrame(np.random.randn(100000, 4)) - self.dfi = DataFrame(np.random.randint(1000, size=self.df.shape)) + self.func = getattr(self.df, op) - def time_stat_ops_frame_sum_int_axis_1(self): - self.dfi.sum(1) + def time_op(self, op, use_bottleneck, dtype, axis): + self.func(axis=axis) class stat_ops_level_frame_sum(object): From 9f69583dd0bd8bc27e54767005c53be02b21ca1d Mon Sep 17 00:00:00 2001 From: XF Date: Thu, 10 Aug 2017 07:32:46 -0500 Subject: [PATCH 061/141] BUG: Rolling apply on DataFrame with Datetime index returns NaN (#17156) --- doc/source/whatsnew/v0.21.0.txt | 2 ++ pandas/_libs/window.pyx | 9 +++++---- pandas/tests/test_window.py | 20 ++++++++++++++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2f2089b4f8ad7..98998acfb72d6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -351,6 +351,8 @@ Groupby/Resample/Rolling - Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) - Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) - Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`) +- Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1` (:issue:`15305`) + Sparse ^^^^^^ diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 2450eea5500cd..bdd371871b6e1 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1428,15 +1428,16 @@ def roll_generic(ndarray[float64_t, cast=True] input, if n == 0: return input + counts = roll_sum(np.concatenate([np.isfinite(input).astype(float), + np.array([0.] * offset)]), + win, minp, index, closed)[offset:] + start, end, N, win, minp, is_variable = get_window_indexer(input, win, minp, index, closed, floor=0) - output = np.empty(N, dtype=float) - counts = roll_sum(np.concatenate([np.isfinite(input).astype(float), - np.array([0.] * offset)]), - win, minp, index, closed)[offset:] + output = np.empty(N, dtype=float) if is_variable: diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 5ab33bd6cc5e1..d94e34c41786b 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -423,6 +423,26 @@ def test_constructor_with_timedelta_window(self): expected = df.rolling('3D').sum() tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + 'window', [timedelta(days=3), pd.Timedelta(days=3), '3D']) + def test_constructor_with_timedelta_window_and_minperiods(self, window): + # GH 15305 + n = 10 + df = pd.DataFrame({'value': np.arange(n)}, + index=pd.date_range('2017-08-08', + periods=n, + freq="D")) + expected = pd.DataFrame({'value': np.append([np.NaN, 1.], + np.arange(3., 27., 3))}, + index=pd.date_range('2017-08-08', + periods=n, + freq="D")) + result_roll_sum = df.rolling(window=window, min_periods=2).sum() + result_roll_generic = df.rolling(window=window, + min_periods=2).apply(sum) + tm.assert_frame_equal(result_roll_sum, expected) + tm.assert_frame_equal(result_roll_generic, expected) + def test_numpy_compat(self): # see gh-12811 r = rwindow.Rolling(Series([2, 4, 6]), window=2) From 1e1ce40b0a318eeeae44292f84a2ba0b74d98af2 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 10 Aug 2017 16:43:37 -0400 Subject: [PATCH 062/141] CLN: Remove import exception handling (#17218) Imports should succeed on all versions of Python that pandas supports. --- pandas/tests/io/parser/compression.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py index 55c0506acb132..797c12139656d 100644 --- a/pandas/tests/io/parser/compression.py +++ b/pandas/tests/io/parser/compression.py @@ -13,10 +13,7 @@ class CompressionTests(object): def test_zip(self): - try: - import zipfile - except ImportError: - pytest.skip('need zipfile to run') + import zipfile with open(self.csv1, 'rb') as data_file: data = data_file.read() @@ -65,10 +62,7 @@ def test_zip(self): f, compression='zip') def test_gzip(self): - try: - import gzip - except ImportError: - pytest.skip('need gzip to run') + import gzip with open(self.csv1, 'rb') as data_file: data = data_file.read() @@ -94,10 +88,7 @@ def test_gzip(self): tm.assert_frame_equal(result, expected) def test_bz2(self): - try: - import bz2 - except ImportError: - pytest.skip('need bz2 to run') + import bz2 with open(self.csv1, 'rb') as data_file: data = data_file.read() From a1509dc30c0cd850d611002a2fd5d89d77092352 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 10 Aug 2017 17:13:02 -0700 Subject: [PATCH 063/141] MAINT: Remove extra the's in deprecation messages (#17222) --- pandas/json.py | 2 +- pandas/parser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/json.py b/pandas/json.py index 0b87aa22394b9..16d6580c87951 100644 --- a/pandas/json.py +++ b/pandas/json.py @@ -3,5 +3,5 @@ import warnings warnings.warn("The pandas.json module is deprecated and will be " "removed in a future version. Please import from " - "the pandas.io.json instead", FutureWarning, stacklevel=2) + "pandas.io.json instead", FutureWarning, stacklevel=2) from pandas._libs.json import dumps, loads diff --git a/pandas/parser.py b/pandas/parser.py index c0c3bf3179a2d..f43a408c943d0 100644 --- a/pandas/parser.py +++ b/pandas/parser.py @@ -3,6 +3,6 @@ import warnings warnings.warn("The pandas.parser module is deprecated and will be " "removed in a future version. Please import from " - "the pandas.io.parser instead", FutureWarning, stacklevel=2) + "pandas.io.parser instead", FutureWarning, stacklevel=2) from pandas._libs.parsers import na_values from pandas.io.common import CParserError From 6788533a028ff24b83f64fdfe186c48a0ab393e6 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 10 Aug 2017 17:18:57 -0700 Subject: [PATCH 064/141] DOC: Patch docs in _decorators.py --- pandas/util/_decorators.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index e406698fafe63..f346995c0a1a4 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -9,8 +9,7 @@ def deprecate(name, alternative, alt_name=None, klass=None, stacklevel=2): """ - - Return a new function that emits a deprecation warning on use + Return a new function that emits a deprecation warning on use. Parameters ---------- @@ -22,8 +21,8 @@ def deprecate(name, alternative, alt_name=None, klass=None, Name to use in preference of alternative.__name__ klass : Warning, default FutureWarning stacklevel : int, default 2 - """ + alt_name = alt_name or alternative.__name__ klass = klass or FutureWarning @@ -35,7 +34,8 @@ def wrapper(*args, **kwargs): def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None, stacklevel=2): - """Decorator to deprecate a keyword argument of a function + """ + Decorator to deprecate a keyword argument of a function. Parameters ---------- @@ -72,8 +72,8 @@ def deprecate_kwarg(old_arg_name, new_arg_name, mapping=None, stacklevel=2): FutureWarning: old='yes' is deprecated, use new=True instead warnings.warn(msg, FutureWarning) yes! - """ + if mapping is not None and not hasattr(mapping, 'get') and \ not callable(mapping): raise TypeError("mapping from old to new argument values " @@ -155,7 +155,12 @@ def __call__(self, func): return func def update(self, *args, **kwargs): - "Assume self.params is a dict and update it with supplied args" + """ + Update self.params with supplied args. + + If called, we assume self.params is a dict. + """ + self.params.update(*args, **kwargs) @classmethod @@ -215,16 +220,16 @@ def indent(text, indents=1): def make_signature(func): """ - Returns a string repr of the arg list of a func call, with any defaults + Returns a string repr of the arg list of a func call, with any defaults. Examples -------- - >>> def f(a,b,c=2) : >>> return a*b*c >>> print(_make_signature(f)) a,b,c=2 """ + spec = signature(func) if spec.defaults is None: n_wo_defaults = len(spec.args) @@ -244,8 +249,8 @@ def make_signature(func): class docstring_wrapper(object): """ - decorator to wrap a function, - provide a dynamically evaluated doc-string + Decorator to wrap a function and provide + a dynamically evaluated doc-string. Parameters ---------- From 619e03161993c408cf215e5d118722f741272227 Mon Sep 17 00:00:00 2001 From: jschendel Date: Fri, 11 Aug 2017 04:22:46 -0600 Subject: [PATCH 065/141] CLN: replace %s syntax with .format in pandas.util (#17224) --- pandas/util/_decorators.py | 26 +++-- pandas/util/_print_versions.py | 25 ++-- pandas/util/_validators.py | 6 +- pandas/util/testing.py | 206 ++++++++++++++++++--------------- 4 files changed, 141 insertions(+), 122 deletions(-) diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index f346995c0a1a4..bb7ffe45c689b 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -27,8 +27,9 @@ def deprecate(name, alternative, alt_name=None, klass=None, klass = klass or FutureWarning def wrapper(*args, **kwargs): - warnings.warn("%s is deprecated. Use %s instead" % (name, alt_name), - klass, stacklevel=stacklevel) + msg = "{name} is deprecated. Use {alt_name} instead".format( + name=name, alt_name=alt_name) + warnings.warn(msg, klass, stacklevel=stacklevel) return alternative(*args, **kwargs) return wrapper @@ -90,19 +91,24 @@ def wrapper(*args, **kwargs): old_arg_value) else: new_arg_value = mapping(old_arg_value) - msg = "the %s=%r keyword is deprecated, " \ - "use %s=%r instead" % \ - (old_arg_name, old_arg_value, - new_arg_name, new_arg_value) + msg = ("the {old_name}={old_val!r} keyword is deprecated, " + "use {new_name}={new_val!r} instead" + ).format(old_name=old_arg_name, + old_val=old_arg_value, + new_name=new_arg_name, + new_val=new_arg_value) else: new_arg_value = old_arg_value - msg = "the '%s' keyword is deprecated, " \ - "use '%s' instead" % (old_arg_name, new_arg_name) + msg = ("the '{old_name}' keyword is deprecated, " + "use '{new_name}' instead" + ).format(old_name=old_arg_name, + new_name=new_arg_name) warnings.warn(msg, FutureWarning, stacklevel=stacklevel) if kwargs.get(new_arg_name, None) is not None: - msg = ("Can only specify '%s' or '%s', not both" % - (old_arg_name, new_arg_name)) + msg = ("Can only specify '{old_name}' or '{new_name}', " + "not both").format(old_name=old_arg_name, + new_name=new_arg_name) raise TypeError(msg) else: kwargs[new_arg_name] = new_arg_value diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index 9ecd4b10365c8..83c1433bf5c39 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -38,18 +38,17 @@ def get_sys_info(): (sysname, nodename, release, version, machine, processor) = platform.uname() blob.extend([ - ("python", "%d.%d.%d.%s.%s" % sys.version_info[:]), + ("python", '.'.join(map(str, sys.version_info))), ("python-bits", struct.calcsize("P") * 8), - ("OS", "%s" % (sysname)), - ("OS-release", "%s" % (release)), - # ("Version", "%s" % (version)), - ("machine", "%s" % (machine)), - ("processor", "%s" % (processor)), - ("byteorder", "%s" % sys.byteorder), - ("LC_ALL", "%s" % os.environ.get('LC_ALL', "None")), - ("LANG", "%s" % os.environ.get('LANG', "None")), - ("LOCALE", "%s.%s" % locale.getlocale()), - + ("OS", "{sysname}".format(sysname=sysname)), + ("OS-release", "{release}".format(release=release)), + # ("Version", "{version}".format(version=version)), + ("machine", "{machine}".format(machine=machine)), + ("processor", "{processor}".format(processor=processor)), + ("byteorder", "{byteorder}".format(byteorder=sys.byteorder)), + ("LC_ALL", "{lc}".format(lc=os.environ.get('LC_ALL', "None"))), + ("LANG", "{lang}".format(lang=os.environ.get('LANG', "None"))), + ("LOCALE", '.'.join(map(str, locale.getlocale()))), ]) except: pass @@ -131,11 +130,11 @@ def show_versions(as_json=False): print("------------------") for k, stat in sys_info: - print("%s: %s" % (k, stat)) + print("{k}: {stat}".format(k=k, stat=stat)) print("") for k, stat in deps_blob: - print("%s: %s" % (k, stat)) + print("{k}: {stat}".format(k=k, stat=stat)) def main(): diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 6b19904f4a665..2661e4a98aedf 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -220,7 +220,7 @@ def validate_args_and_kwargs(fname, args, kwargs, def validate_bool_kwarg(value, arg_name): """ Ensures that argument passed in arg_name is of type bool. """ if not (is_bool(value) or value is None): - raise ValueError('For argument "%s" expected type bool, ' - 'received type %s.' % - (arg_name, type(value).__name__)) + raise ValueError('For argument "{arg}" expected type bool, received ' + 'type {typ}.'.format(arg=arg_name, + typ=type(value).__name__)) return value diff --git a/pandas/util/testing.py b/pandas/util/testing.py index d6ba9561340cc..a000e189dfaa9 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -104,7 +104,7 @@ def round_trip_pickle(obj, path=None): """ if path is None: - path = u('__%s__.pickle' % rands(10)) + path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10))) with ensure_clean(path) as path: pd.to_pickle(obj, path) return pd.read_pickle(path) @@ -244,13 +244,15 @@ def _check_isinstance(left, right, cls): AssertionError : Either `left` or `right` is not an instance of `cls`. """ - err_msg = "{0} Expected type {1}, found {2} instead" + err_msg = "{name} Expected type {exp_type}, found {act_type} instead" cls_name = cls.__name__ if not isinstance(left, cls): - raise AssertionError(err_msg.format(cls_name, cls, type(left))) + raise AssertionError(err_msg.format(name=cls_name, exp_type=cls, + act_type=type(left))) if not isinstance(right, cls): - raise AssertionError(err_msg.format(cls_name, cls, type(right))) + raise AssertionError(err_msg.format(name=cls_name, exp_type=cls, + act_type=type(right))) def assert_dict_equal(left, right, compare_keys=True): @@ -371,7 +373,7 @@ def _skip_if_no_xarray(): if v < LooseVersion('0.7.0'): import pytest - pytest.skip("xarray not version is too low: {0}".format(v)) + pytest.skip("xarray version is too low: {version}".format(version=v)) def _skip_if_windows_python_3(): @@ -436,7 +438,7 @@ def _skip_if_has_locale(): lang, _ = locale.getlocale() if lang is not None: import pytest - pytest.skip("Specific locale is set {0}".format(lang)) + pytest.skip("Specific locale is set {lang}".format(lang=lang)) def _skip_if_not_us_locale(): @@ -444,7 +446,7 @@ def _skip_if_not_us_locale(): lang, _ = locale.getlocale() if lang != 'en_US': import pytest - pytest.skip("Specific locale is set {0}".format(lang)) + pytest.skip("Specific locale is set {lang}".format(lang=lang)) def _skip_if_no_mock(): @@ -505,8 +507,8 @@ def _default_locale_getter(): try: raw_locales = check_output(['locale -a'], shell=True) except subprocess.CalledProcessError as e: - raise type(e)("%s, the 'locale -a' command cannot be found on your " - "system" % e) + raise type(e)("{exception}, the 'locale -a' command cannot be found " + "on your system".format(exception=e)) return raw_locales @@ -563,7 +565,8 @@ def get_locales(prefix=None, normalize=True, if prefix is None: return _valid_locales(out_locales, normalize) - found = re.compile('%s.*' % prefix).findall('\n'.join(out_locales)) + found = re.compile('{prefix}.*'.format(prefix=prefix)) \ + .findall('\n'.join(out_locales)) return _valid_locales(found, normalize) @@ -818,13 +821,13 @@ def ensure_clean(filename=None, return_filelike=False): try: os.close(fd) except Exception as e: - print("Couldn't close file descriptor: %d (file: %s)" % - (fd, filename)) + print("Couldn't close file descriptor: {fdesc} (file: {fname})" + .format(fdesc=fd, fname=filename)) try: if os.path.exists(filename): os.remove(filename) except Exception as e: - print("Exception on removing file: %s" % e) + print("Exception on removing file: {error}".format(error=e)) def get_data_path(f=''): @@ -900,15 +903,17 @@ def _get_ilevel_values(index, level): # level comparison if left.nlevels != right.nlevels: - raise_assert_detail(obj, '{0} levels are different'.format(obj), - '{0}, {1}'.format(left.nlevels, left), - '{0}, {1}'.format(right.nlevels, right)) + msg1 = '{obj} levels are different'.format(obj=obj) + msg2 = '{nlevels}, {left}'.format(nlevels=left.nlevels, left=left) + msg3 = '{nlevels}, {right}'.format(nlevels=right.nlevels, right=right) + raise_assert_detail(obj, msg1, msg2, msg3) # length comparison if len(left) != len(right): - raise_assert_detail(obj, '{0} length are different'.format(obj), - '{0}, {1}'.format(len(left), left), - '{0}, {1}'.format(len(right), right)) + msg1 = '{obj} length are different'.format(obj=obj) + msg2 = '{length}, {left}'.format(length=len(left), left=left) + msg3 = '{length}, {right}'.format(length=len(right), right=right) + raise_assert_detail(obj, msg1, msg2, msg3) # MultiIndex special comparison for little-friendly error messages if left.nlevels > 1: @@ -917,7 +922,7 @@ def _get_ilevel_values(index, level): llevel = _get_ilevel_values(left, level) rlevel = _get_ilevel_values(right, level) - lobj = 'MultiIndex level [{0}]'.format(level) + lobj = 'MultiIndex level [{level}]'.format(level=level) assert_index_equal(llevel, rlevel, exact=exact, check_names=check_names, check_less_precise=check_less_precise, @@ -929,8 +934,8 @@ def _get_ilevel_values(index, level): if not left.equals(right): diff = np.sum((left.values != right.values) .astype(int)) * 100.0 / len(left) - msg = '{0} values are different ({1} %)'\ - .format(obj, np.round(diff, 5)) + msg = '{obj} values are different ({pct} %)'.format( + obj=obj, pct=np.round(diff, 5)) raise_assert_detail(obj, msg, left, right) else: _testing.assert_almost_equal(left.values, right.values, @@ -950,7 +955,7 @@ def _get_ilevel_values(index, level): if check_categorical: if is_categorical_dtype(left) or is_categorical_dtype(right): assert_categorical_equal(left.values, right.values, - obj='{0} category'.format(obj)) + obj='{obj} category'.format(obj=obj)) def assert_class_equal(left, right, exact=True, obj='Input'): @@ -971,12 +976,12 @@ def repr_class(x): # allow equivalence of Int64Index/RangeIndex types = set([type(left).__name__, type(right).__name__]) if len(types - set(['Int64Index', 'RangeIndex'])): - msg = '{0} classes are not equivalent'.format(obj) + msg = '{obj} classes are not equivalent'.format(obj=obj) raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) elif exact: if type(left) != type(right): - msg = '{0} classes are different'.format(obj) + msg = '{obj} classes are different'.format(obj=obj) raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) @@ -1016,23 +1021,22 @@ def assert_attr_equal(attr, left, right, obj='Attributes'): if result: return True else: - raise_assert_detail(obj, 'Attribute "{0}" are different'.format(attr), - left_attr, right_attr) + msg = 'Attribute "{attr}" are different'.format(attr=attr) + raise_assert_detail(obj, msg, left_attr, right_attr) def assert_is_valid_plot_return_object(objs): import matplotlib.pyplot as plt if isinstance(objs, (pd.Series, np.ndarray)): for el in objs.ravel(): - msg = ('one of \'objs\' is not a matplotlib Axes instance, ' - 'type encountered {0!r}') - assert isinstance(el, (plt.Axes, dict)), msg.format( - el.__class__.__name__) + msg = ('one of \'objs\' is not a matplotlib Axes instance, type ' + 'encountered {name!r}').format(name=el.__class__.__name__) + assert isinstance(el, (plt.Axes, dict)), msg else: assert isinstance(objs, (plt.Artist, tuple, dict)), \ ('objs is neither an ndarray of Artist instances nor a ' - 'single Artist instance, tuple, or dict, "objs" is a {0!r} ' - ''.format(objs.__class__.__name__)) + 'single Artist instance, tuple, or dict, "objs" is a {name!r}' + ).format(name=objs.__class__.__name__) def isiterable(obj): @@ -1069,17 +1073,17 @@ def assert_categorical_equal(left, right, check_dtype=True, if check_category_order: assert_index_equal(left.categories, right.categories, - obj='{0}.categories'.format(obj)) + obj='{obj}.categories'.format(obj=obj)) assert_numpy_array_equal(left.codes, right.codes, check_dtype=check_dtype, - obj='{0}.codes'.format(obj)) + obj='{obj}.codes'.format(obj=obj)) else: assert_index_equal(left.categories.sort_values(), right.categories.sort_values(), - obj='{0}.categories'.format(obj)) + obj='{obj}.categories'.format(obj=obj)) assert_index_equal(left.categories.take(left.codes), right.categories.take(right.codes), - obj='{0}.values'.format(obj)) + obj='{obj}.values'.format(obj=obj)) assert_attr_equal('ordered', left, right, obj=obj) @@ -1090,14 +1094,14 @@ def raise_assert_detail(obj, message, left, right, diff=None): if isinstance(right, np.ndarray): right = pprint_thing(right) - msg = """{0} are different + msg = """{obj} are different -{1} -[left]: {2} -[right]: {3}""".format(obj, message, left, right) +{message} +[left]: {left} +[right]: {right}""".format(obj=obj, message=message, left=left, right=right) if diff is not None: - msg = msg + "\n[diff]: {diff}".format(diff=diff) + msg += "\n[diff]: {diff}".format(diff=diff) raise AssertionError(msg) @@ -1138,18 +1142,20 @@ def _get_base(obj): if check_same == 'same': if left_base is not right_base: - msg = "%r is not %r" % (left_base, right_base) + msg = "{left!r} is not {right!r}".format( + left=left_base, right=right_base) raise AssertionError(msg) elif check_same == 'copy': if left_base is right_base: - msg = "%r is %r" % (left_base, right_base) + msg = "{left!r} is {right!r}".format( + left=left_base, right=right_base) raise AssertionError(msg) def _raise(left, right, err_msg): if err_msg is None: if left.shape != right.shape: - raise_assert_detail(obj, '{0} shapes are different' - .format(obj), left.shape, right.shape) + raise_assert_detail(obj, '{obj} shapes are different' + .format(obj=obj), left.shape, right.shape) diff = 0 for l, r in zip(left, right): @@ -1158,8 +1164,8 @@ def _raise(left, right, err_msg): diff += 1 diff = diff * 100.0 / left.size - msg = '{0} values are different ({1} %)'\ - .format(obj, np.round(diff, 5)) + msg = '{obj} values are different ({pct} %)'.format( + obj=obj, pct=np.round(diff, 5)) raise_assert_detail(obj, msg, left, right) raise AssertionError(err_msg) @@ -1226,9 +1232,9 @@ def assert_series_equal(left, right, check_dtype=True, # length comparison if len(left) != len(right): - raise_assert_detail(obj, 'Series length are different', - '{0}, {1}'.format(len(left), left.index), - '{0}, {1}'.format(len(right), right.index)) + msg1 = '{len}, {left}'.format(len=len(left), left=left.index) + msg2 = '{len}, {right}'.format(len=len(right), right=right.index) + raise_assert_detail(obj, 'Series length are different', msg1, msg2) # index comparison assert_index_equal(left.index, right.index, exact=check_index_type, @@ -1236,7 +1242,7 @@ def assert_series_equal(left, right, check_dtype=True, check_less_precise=check_less_precise, check_exact=check_exact, check_categorical=check_categorical, - obj='{0}.index'.format(obj)) + obj='{obj}.index'.format(obj=obj)) if check_dtype: assert_attr_equal('dtype', left, right) @@ -1244,7 +1250,7 @@ def assert_series_equal(left, right, check_dtype=True, if check_exact: assert_numpy_array_equal(left.get_values(), right.get_values(), check_dtype=check_dtype, - obj='{0}'.format(obj),) + obj='{obj}'.format(obj=obj),) elif check_datetimelike_compat: # we want to check only if we have compat dtypes # e.g. integer and M|m are NOT compat, but we can simply check @@ -1257,8 +1263,9 @@ def assert_series_equal(left, right, check_dtype=True, # datetimelike may have different objects (e.g. datetime.datetime # vs Timestamp) but will compare equal if not Index(left.values).equals(Index(right.values)): - msg = '[datetimelike_compat=True] {0} is not equal to {1}.' - raise AssertionError(msg.format(left.values, right.values)) + msg = ('[datetimelike_compat=True] {left} is not equal to ' + '{right}.').format(left=left.values, right=right.values) + raise AssertionError(msg) else: assert_numpy_array_equal(left.get_values(), right.get_values(), check_dtype=check_dtype) @@ -1266,13 +1273,13 @@ def assert_series_equal(left, right, check_dtype=True, # TODO: big hack here l = pd.IntervalIndex(left) r = pd.IntervalIndex(right) - assert_index_equal(l, r, obj='{0}.index'.format(obj)) + assert_index_equal(l, r, obj='{obj}.index'.format(obj=obj)) else: _testing.assert_almost_equal(left.get_values(), right.get_values(), check_less_precise=check_less_precise, check_dtype=check_dtype, - obj='{0}'.format(obj)) + obj='{obj}'.format(obj=obj)) # metadata comparison if check_names: @@ -1281,7 +1288,7 @@ def assert_series_equal(left, right, check_dtype=True, if check_categorical: if is_categorical_dtype(left) or is_categorical_dtype(right): assert_categorical_equal(left.values, right.values, - obj='{0} category'.format(obj)) + obj='{obj} category'.format(obj=obj)) # This could be refactored to use the NDFrame.equals method @@ -1348,8 +1355,8 @@ def assert_frame_equal(left, right, check_dtype=True, if left.shape != right.shape: raise_assert_detail(obj, 'DataFrame shape mismatch', - '({0}, {1})'.format(*left.shape), - '({0}, {1})'.format(*right.shape)) + '{shape!r}'.format(shape=left.shape), + '{shape!r}'.format(shape=right.shape)) if check_like: left, right = left.reindex_like(right), right @@ -1360,7 +1367,7 @@ def assert_frame_equal(left, right, check_dtype=True, check_less_precise=check_less_precise, check_exact=check_exact, check_categorical=check_categorical, - obj='{0}.index'.format(obj)) + obj='{obj}.index'.format(obj=obj)) # column comparison assert_index_equal(left.columns, right.columns, exact=check_column_type, @@ -1368,7 +1375,7 @@ def assert_frame_equal(left, right, check_dtype=True, check_less_precise=check_less_precise, check_exact=check_exact, check_categorical=check_categorical, - obj='{0}.columns'.format(obj)) + obj='{obj}.columns'.format(obj=obj)) # compare by blocks if by_blocks: @@ -1393,7 +1400,7 @@ def assert_frame_equal(left, right, check_dtype=True, check_exact=check_exact, check_names=check_names, check_datetimelike_compat=check_datetimelike_compat, check_categorical=check_categorical, - obj='DataFrame.iloc[:, {0}]'.format(i)) + obj='DataFrame.iloc[:, {idx}]'.format(idx=i)) def assert_panelnd_equal(left, right, @@ -1448,13 +1455,15 @@ def assert_panelnd_equal(left, right, # can potentially be slow for i, item in enumerate(left._get_axis(0)): - assert item in right, "non-matching item (right) '%s'" % item + msg = "non-matching item (right) '{item}'".format(item=item) + assert item in right, msg litem = left.iloc[i] ritem = right.iloc[i] assert_func(litem, ritem, check_less_precise=check_less_precise) for i, item in enumerate(right._get_axis(0)): - assert item in left, "non-matching item (left) '%s'" % item + msg = "non-matching item (left) '{item}'".format(item=item) + assert item in left, msg # TODO: strangely check_names fails in py3 ? @@ -1526,7 +1535,7 @@ def assert_sp_series_equal(left, right, check_dtype=True, exact_indices=True, assert_class_equal(left, right, obj=obj) assert_index_equal(left.index, right.index, - obj='{0}.index'.format(obj)) + obj='{obj}.index'.format(obj=obj)) assert_sp_array_equal(left.block.values, right.block.values) @@ -1563,9 +1572,9 @@ def assert_sp_frame_equal(left, right, check_dtype=True, exact_indices=True, assert_class_equal(left, right, obj=obj) assert_index_equal(left.index, right.index, - obj='{0}.index'.format(obj)) + obj='{obj}.index'.format(obj=obj)) assert_index_equal(left.columns, right.columns, - obj='{0}.columns'.format(obj)) + obj='{obj}.columns'.format(obj=obj)) for col, series in compat.iteritems(left): assert (col in right) @@ -1599,7 +1608,7 @@ def assert_sp_list_equal(left, right): def assert_contains_all(iterable, dic): for k in iterable: - assert k in dic, "Did not contain item: '%r'" % k + assert k in dic, "Did not contain item: '{key!r}'".format(key=k) def assert_copy(iter1, iter2, **eql_kwargs): @@ -1613,10 +1622,10 @@ def assert_copy(iter1, iter2, **eql_kwargs): """ for elem1, elem2 in zip(iter1, iter2): assert_almost_equal(elem1, elem2, **eql_kwargs) - assert elem1 is not elem2, ("Expected object %r and " - "object %r to be different " - "objects, were same." - % (type(elem1), type(elem2))) + msg = ("Expected object {obj1!r} and object {obj2!r} to be " + "different objects, but they were the same object." + ).format(obj1=type(elem1), obj2=type(elem2)) + assert elem1 is not elem2, msg def getCols(k): @@ -1870,8 +1879,9 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, idx.name = names[0] return idx elif idx_type is not None: - raise ValueError('"%s" is not a legal value for `idx_type`, use ' - '"i"/"f"/"s"/"u"/"dt/"p"/"td".' % idx_type) + raise ValueError('"{idx_type}" is not a legal value for `idx_type`, ' + 'use "i"/"f"/"s"/"u"/"dt/"p"/"td".' + .format(idx_type=idx_type)) if len(ndupe_l) < nlevels: ndupe_l.extend([1] * (nlevels - len(ndupe_l))) @@ -1890,7 +1900,7 @@ def keyfunc(x): div_factor = nentries // ndupe_l[i] + 1 cnt = Counter() for j in range(div_factor): - label = prefix + '_l%d_g' % i + str(j) + label = '{prefix}_l{i}_g{j}'.format(prefix=prefix, i=i, j=j) cnt[label] = ndupe_l[i] # cute Counter trick result = list(sorted(cnt.elements(), key=keyfunc))[:nentries] @@ -1983,7 +1993,7 @@ def makeCustomDataframe(nrows, ncols, c_idx_names=True, r_idx_names=True, # by default, generate data based on location if data_gen_f is None: - data_gen_f = lambda r, c: "R%dC%d" % (r, c) + data_gen_f = lambda r, c: "R{rows}C{cols}".format(rows=r, cols=c) data = [[data_gen_f(r, c) for c in range(ncols)] for r in range(nrows)] @@ -2112,13 +2122,13 @@ def skip_if_no_package(pkg_name, min_version=None, max_version=None, import pytest if app: - msg = '%s requires %s' % (app, pkg_name) + msg = '{app} requires {pkg_name}'.format(app=app, pkg_name=pkg_name) else: - msg = 'module requires %s' % pkg_name + msg = 'module requires {pkg_name}'.format(pkg_name=pkg_name) if min_version: - msg += ' with version >= %s' % (min_version,) + msg += ' with version >= {min_version}'.format(min_version=min_version) if max_version: - msg += ' with version < %s' % (max_version,) + msg += ' with version < {max_version}'.format(max_version=max_version) try: mod = __import__(pkg_name) except ImportError: @@ -2126,7 +2136,8 @@ def skip_if_no_package(pkg_name, min_version=None, max_version=None, try: have_version = mod.__version__ except AttributeError: - pytest.skip('Cannot find version for %s' % pkg_name) + pytest.skip('Cannot find version for {pkg_name}' + .format(pkg_name=pkg_name)) if min_version and checker(have_version) < checker(min_version): pytest.skip(msg) if max_version and checker(have_version) >= checker(max_version): @@ -2332,7 +2343,7 @@ def wrapper(*args, **kwargs): if errno in skip_errnos: skip("Skipping test due to known errno" - " and error %s" % e) + " and error {error}".format(error=e)) try: e_str = traceback.format_exc(e) @@ -2341,7 +2352,7 @@ def wrapper(*args, **kwargs): if any([m.lower() in e_str.lower() for m in _skip_on_messages]): skip("Skipping test because exception " - "message is known and error %s" % e) + "message is known and error {error}".format(error=e)) if not isinstance(e, error_classes): raise @@ -2350,7 +2361,7 @@ def wrapper(*args, **kwargs): raise else: skip("Skipping test due to lack of connectivity" - " and error %s" % e) + " and error {error}".format(e)) return wrapper @@ -2488,7 +2499,7 @@ def __exit__(self, exc_type, exc_value, trace_back): if not exc_type: exp_name = getattr(expected, "__name__", str(expected)) - raise AssertionError("{0} not raised.".format(exp_name)) + raise AssertionError("{name} not raised.".format(name=exp_name)) return self.exception_matches(exc_type, exc_value, trace_back) @@ -2523,8 +2534,9 @@ def exception_matches(self, exc_type, exc_value, trace_back): val = str(exc_value) if not self.regexp.search(val): - e = AssertionError('"%s" does not match "%s"' % - (self.regexp.pattern, str(val))) + msg = '"{pat}" does not match "{val}"'.format( + pat=self.regexp.pattern, val=val) + e = AssertionError(msg) raise_with_traceback(e, trace_back) return True @@ -2591,18 +2603,20 @@ def assert_produces_warning(expected_warning=Warning, filter_level="always", from inspect import getframeinfo, stack caller = getframeinfo(stack()[2][0]) msg = ("Warning not set with correct stacklevel. " - "File where warning is raised: {0} != {1}. " - "Warning message: {2}".format( - actual_warning.filename, caller.filename, - actual_warning.message)) + "File where warning is raised: {actual} != " + "{caller}. Warning message: {message}" + ).format(actual=actual_warning.filename, + caller=caller.filename, + message=actual_warning.message) assert actual_warning.filename == caller.filename, msg else: extra_warnings.append(actual_warning.category.__name__) if expected_warning: - assert saw_warning, ("Did not see expected warning of class %r." - % expected_warning.__name__) - assert not extra_warnings, ("Caused unexpected warning(s): %r." - % extra_warnings) + msg = "Did not see expected warning of class {name!r}.".format( + name=expected_warning.__name__) + assert saw_warning, msg + assert not extra_warnings, ("Caused unexpected warning(s): {extra!r}." + ).format(extra=extra_warnings) class RNGContext(object): From 9e2699793747e0205690b3e0149fa339ac42daa7 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Fri, 11 Aug 2017 11:23:33 +0100 Subject: [PATCH 066/141] Add 'See also' sections (#17223) --- pandas/core/generic.py | 3 +++ pandas/core/tools/datetimes.py | 4 ++++ pandas/core/tools/numeric.py | 7 +++++++ pandas/core/tools/timedeltas.py | 5 +++++ 4 files changed, 19 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bd3297f66a469..5a7f37bba91aa 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3681,6 +3681,9 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): See also -------- + pandas.to_datetime : Convert argument to datetime. + pandas.to_timedelta : Convert argument to timedelta. + pandas.to_numeric : Convert argument to a numeric type. numpy.ndarray.astype : Cast a numpy array to a specified type. """ if is_dict_like(dtype): diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index eebf78d7619eb..6ff4302937d07 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -335,6 +335,10 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, 1 1960-01-03 2 1960-01-04 + See also + -------- + pandas.DataFrame.astype : Cast argument to a specified dtype. + pandas.to_timedelta : Convert argument to timedelta. """ from pandas.core.indexes.datetimes import DatetimeIndex diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index eda88a2f7e474..c584e29f682dd 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -84,6 +84,13 @@ def to_numeric(arg, errors='raise', downcast=None): 2 2.0 3 -3.0 dtype: float64 + + See also + -------- + pandas.DataFrame.astype : Cast argument to a specified dtype. + pandas.to_datetime : Convert argument to datetime. + pandas.to_timedelta : Convert argument to timedelta. + numpy.ndarray.astype : Cast a numpy array to a specified type. """ if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'): raise ValueError('invalid downcasting method provided') diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index fe03f89fdb2c5..f2d99d26a87b8 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -61,6 +61,11 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): >>> pd.to_timedelta(np.arange(5), unit='d') TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq=None) + + See also + -------- + pandas.DataFrame.astype : Cast argument to a specified dtype. + pandas.to_datetime : Convert argument to datetime. """ unit = _validate_timedelta_unit(unit) From a7311d25b600aa256b4992ab3b85f0a9e1190a04 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 11 Aug 2017 03:36:13 -0700 Subject: [PATCH 067/141] move pivot_table doc-string to DataFrame (#17174) --- pandas/core/frame.py | 86 ++++++++++++++++++++++++++++ pandas/core/reshape/pivot.py | 107 +++++++---------------------------- 2 files changed, 107 insertions(+), 86 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 94cce1b4d05b5..2c82fe4c348d5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4154,6 +4154,92 @@ def pivot(self, index=None, columns=None, values=None): from pandas.core.reshape.reshape import pivot return pivot(self, index=index, columns=columns, values=values) + _shared_docs['pivot_table'] = """ + Create a spreadsheet-style pivot table as a DataFrame. The levels in + the pivot table will be stored in MultiIndex objects (hierarchical + indexes) on the index and columns of the result DataFrame + + Parameters + ----------%s + values : column to aggregate, optional + index : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table index. If an array is passed, + it is being used as the same manner as column values. + columns : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table column. If an array is passed, + it is being used as the same manner as column values. + aggfunc : function or list of functions, default numpy.mean + If list of functions passed, the resulting pivot table will have + hierarchical columns whose top level are the function names + (inferred from the function objects themselves) + fill_value : scalar, default None + Value to replace missing values with + margins : boolean, default False + Add all row / columns (e.g. for subtotal / grand totals) + dropna : boolean, default True + Do not include columns whose entries are all NaN + margins_name : string, default 'All' + Name of the row / column that will contain the totals + when margins is True. + + Examples + -------- + >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + ... "bar", "bar", "bar", "bar"], + ... "B": ["one", "one", "one", "two", "two", + ... "one", "one", "two", "two"], + ... "C": ["small", "large", "large", "small", + ... "small", "large", "small", "small", + ... "large"], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) + >>> df + A B C D + 0 foo one small 1 + 1 foo one large 2 + 2 foo one large 2 + 3 foo two small 3 + 4 foo two small 3 + 5 bar one large 4 + 6 bar one small 5 + 7 bar two small 6 + 8 bar two large 7 + + >>> table = pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc=np.sum) + >>> table + ... # doctest: +NORMALIZE_WHITESPACE + C large small + A B + bar one 4.0 5.0 + two 7.0 6.0 + foo one 4.0 1.0 + two NaN 6.0 + + Returns + ------- + table : DataFrame + + See also + -------- + DataFrame.pivot : pivot without aggregation that can handle + non-numeric data + """ + + @Substitution('') + @Appender(_shared_docs['pivot_table']) + def pivot_table(self, values=None, index=None, columns=None, + aggfunc='mean', fill_value=None, margins=False, + dropna=True, margins_name='All'): + from pandas.core.reshape.pivot import pivot_table + return pivot_table(self, values=values, index=index, columns=columns, + aggfunc=aggfunc, fill_value=fill_value, + margins=margins, dropna=dropna, + margins_name=margins_name) + def stack(self, level=-1, dropna=True): """ Pivot a level of the (possibly hierarchical) column labels, returning a diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index d4ea49c130add..e61adf3aac30a 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -2,95 +2,30 @@ from pandas.core.dtypes.common import is_list_like, is_scalar +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries + from pandas.core.reshape.concat import concat -from pandas import Series, DataFrame, MultiIndex, Index +from pandas.core.series import Series from pandas.core.groupby import Grouper from pandas.core.reshape.util import cartesian_product -from pandas.core.index import _get_combined_index +from pandas.core.index import Index, _get_combined_index from pandas.compat import range, lrange, zip from pandas import compat import pandas.core.common as com +from pandas.util._decorators import Appender, Substitution + +from pandas.core.frame import _shared_docs +# Note: We need to make sure `frame` is imported before `pivot`, otherwise +# _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency + import numpy as np +@Substitution('\ndata : DataFrame') +@Appender(_shared_docs['pivot_table'], indents=1) def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All'): - """ - Create a spreadsheet-style pivot table as a DataFrame. The levels in the - pivot table will be stored in MultiIndex objects (hierarchical indexes) on - the index and columns of the result DataFrame - - Parameters - ---------- - data : DataFrame - values : column to aggregate, optional - index : column, Grouper, array, or list of the previous - If an array is passed, it must be the same length as the data. The list - can contain any of the other types (except list). - Keys to group by on the pivot table index. If an array is passed, it - is being used as the same manner as column values. - columns : column, Grouper, array, or list of the previous - If an array is passed, it must be the same length as the data. The list - can contain any of the other types (except list). - Keys to group by on the pivot table column. If an array is passed, it - is being used as the same manner as column values. - aggfunc : function or list of functions, default numpy.mean - If list of functions passed, the resulting pivot table will have - hierarchical columns whose top level are the function names (inferred - from the function objects themselves) - fill_value : scalar, default None - Value to replace missing values with - margins : boolean, default False - Add all row / columns (e.g. for subtotal / grand totals) - dropna : boolean, default True - Do not include columns whose entries are all NaN - margins_name : string, default 'All' - Name of the row / column that will contain the totals - when margins is True. - - Examples - -------- - >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", - ... "bar", "bar", "bar", "bar"], - ... "B": ["one", "one", "one", "two", "two", - ... "one", "one", "two", "two"], - ... "C": ["small", "large", "large", "small", - ... "small", "large", "small", "small", - ... "large"], - ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) - >>> df - A B C D - 0 foo one small 1 - 1 foo one large 2 - 2 foo one large 2 - 3 foo two small 3 - 4 foo two small 3 - 5 bar one large 4 - 6 bar one small 5 - 7 bar two small 6 - 8 bar two large 7 - - >>> table = pivot_table(df, values='D', index=['A', 'B'], - ... columns=['C'], aggfunc=np.sum) - >>> table - ... # doctest: +NORMALIZE_WHITESPACE - C large small - A B - bar one 4.0 5.0 - two 7.0 6.0 - foo one 4.0 1.0 - two NaN 6.0 - - Returns - ------- - table : DataFrame - - See also - -------- - DataFrame.pivot : pivot without aggregation that can handle - non-numeric data - """ index = _convert_by(index) columns = _convert_by(columns) @@ -162,6 +97,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', table = agged.unstack(to_unstack) if not dropna: + from pandas import MultiIndex try: m = MultiIndex.from_arrays(cartesian_product(table.index.levels), names=table.index.names) @@ -176,7 +112,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', except AttributeError: pass # it's a single level or a series - if isinstance(table, DataFrame): + if isinstance(table, ABCDataFrame): table = table.sort_index(axis=1) if fill_value is not None: @@ -197,16 +133,13 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', if len(index) == 0 and len(columns) > 0: table = table.T - # GH 15193 Makse sure empty columns are removed if dropna=True - if isinstance(table, DataFrame) and dropna: + # GH 15193 Make sure empty columns are removed if dropna=True + if isinstance(table, ABCDataFrame) and dropna: table = table.dropna(how='all', axis=1) return table -DataFrame.pivot_table = pivot_table - - def _add_margins(table, data, values, rows, cols, aggfunc, margins_name='All', fill_value=None): if not isinstance(margins_name, compat.string_types): @@ -230,7 +163,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, else: key = margins_name - if not values and isinstance(table, Series): + if not values and isinstance(table, ABCSeries): # If there are no values and the table is a series, then there is only # one column in the data. Compute grand margin and return it. return table.append(Series({key: grand_margin[margins_name]})) @@ -257,6 +190,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, else: row_margin[k] = grand_margin[k[0]] + from pandas import DataFrame margin_dummy = DataFrame(row_margin, columns=[key]).T row_names = result.index.names @@ -402,7 +336,7 @@ def _convert_by(by): if by is None: by = [] elif (is_scalar(by) or - isinstance(by, (np.ndarray, Index, Series, Grouper)) or + isinstance(by, (np.ndarray, Index, ABCSeries, Grouper)) or hasattr(by, '__call__')): by = [by] else: @@ -523,6 +457,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, if values is not None and aggfunc is None: raise ValueError("values cannot be used without an aggfunc.") + from pandas import DataFrame df = DataFrame(data, index=common_idx) if values is None: df['__dummy__'] = 0 @@ -620,7 +555,7 @@ def _get_names(arrs, names, prefix='row'): if names is None: names = [] for i, arr in enumerate(arrs): - if isinstance(arr, Series) and arr.name is not None: + if isinstance(arr, ABCSeries) and arr.name is not None: names.append(arr.name) else: names.append('%s_%d' % (prefix, i)) From 1ac9ede2fc319bae2f5c9c5bf801251537cb296c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 12 Aug 2017 10:30:26 -0700 Subject: [PATCH 068/141] Remove import of pandas as pd in core.window (#17233) --- pandas/core/window.py | 55 ++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 5866f1e8a76bd..4bd959f52673c 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -17,7 +17,8 @@ ABCDataFrame, ABCDatetimeIndex, ABCTimedeltaIndex, - ABCPeriodIndex) + ABCPeriodIndex, + ABCDateOffset) from pandas.core.dtypes.common import ( is_integer, is_bool, @@ -28,13 +29,12 @@ is_list_like, _ensure_float64, is_scalar) -import pandas as pd from pandas.core.base import (PandasObject, SelectionMixin, GroupByMixin) import pandas.core.common as com import pandas._libs.window as _window -from pandas.tseries.offsets import DateOffset + from pandas import compat from pandas.compat.numpy import function as nv from pandas.util._decorators import (Substitution, Appender, @@ -254,7 +254,8 @@ def _wrap_result(self, result, block=None, obj=None): # coerce if necessary if block is not None: if is_timedelta64_dtype(block.values.dtype): - result = pd.to_timedelta( + from pandas import to_timedelta + result = to_timedelta( result.ravel(), unit='ns').values.reshape(result.shape) if result.ndim == 1: @@ -275,7 +276,7 @@ def _wrap_results(self, results, blocks, obj): obj : conformed data (may be resampled) """ - from pandas import Series + from pandas import Series, concat from pandas.core.index import _ensure_index final = [] @@ -290,8 +291,7 @@ def _wrap_results(self, results, blocks, obj): # we want to put it back into the results # in the same location columns = self._selected_obj.columns - if self.on is not None \ - and not self._on.equals(obj.index): + if self.on is not None and not self._on.equals(obj.index): name = self._on.name final.append(Series(self._on, index=obj.index, name=name)) @@ -309,8 +309,7 @@ def _wrap_results(self, results, blocks, obj): if not len(final): return obj.astype('float64') - return pd.concat(final, axis=1).reindex(columns=columns, - copy=False) + return concat(final, axis=1).reindex(columns=columns, copy=False) def _center_window(self, result, window): """ center the result in the window """ @@ -318,10 +317,9 @@ def _center_window(self, result, window): raise ValueError("Requested axis is larger then no. of argument " "dimensions") - from pandas import Series, DataFrame offset = _offset(window, True) if offset > 0: - if isinstance(result, (Series, DataFrame)): + if isinstance(result, (ABCSeries, ABCDataFrame)): result = result.slice_shift(-offset, axis=self.axis) else: lead_indexer = [slice(None)] * result.ndim @@ -1085,7 +1083,8 @@ def _on(self): return self.obj.index elif (isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns): - return pd.Index(self.obj[self.on]) + from pandas import Index + return Index(self.obj[self.on]) else: raise ValueError("invalid on specified as {0}, " "must be a column (if DataFrame) " @@ -1096,7 +1095,7 @@ def validate(self): # we allow rolling on a datetimelike index if ((self.obj.empty or self.is_datetimelike) and - isinstance(self.window, (compat.string_types, DateOffset, + isinstance(self.window, (compat.string_types, ABCDateOffset, timedelta))): self._validate_monotonic() @@ -1871,19 +1870,19 @@ def _cov(x, y): def _flex_binary_moment(arg1, arg2, f, pairwise=False): - from pandas import Series, DataFrame - if not (isinstance(arg1, (np.ndarray, Series, DataFrame)) and - isinstance(arg2, (np.ndarray, Series, DataFrame))): + if not (isinstance(arg1, (np.ndarray, ABCSeries, ABCDataFrame)) and + isinstance(arg2, (np.ndarray, ABCSeries, ABCDataFrame))): raise TypeError("arguments to moment function must be of type " "np.ndarray/Series/DataFrame") - if (isinstance(arg1, (np.ndarray, Series)) and - isinstance(arg2, (np.ndarray, Series))): + if (isinstance(arg1, (np.ndarray, ABCSeries)) and + isinstance(arg2, (np.ndarray, ABCSeries))): X, Y = _prep_binary(arg1, arg2) return f(X, Y) - elif isinstance(arg1, DataFrame): + elif isinstance(arg1, ABCDataFrame): + from pandas import DataFrame def dataframe_from_int_dict(data, frame_template): result = DataFrame(data, index=frame_template.index) @@ -1892,7 +1891,7 @@ def dataframe_from_int_dict(data, frame_template): return result results = {} - if isinstance(arg2, DataFrame): + if isinstance(arg2, ABCDataFrame): if pairwise is False: if arg1 is arg2: # special case in order to handle duplicate column names @@ -1929,7 +1928,7 @@ def dataframe_from_int_dict(data, frame_template): # TODO: not the most efficient (perf-wise) # though not bad code-wise - from pandas import Panel, MultiIndex + from pandas import Panel, MultiIndex, concat with warnings.catch_warnings(record=True): p = Panel.from_dict(results).swapaxes('items', 'major') @@ -1939,7 +1938,7 @@ def dataframe_from_int_dict(data, frame_template): p.minor_axis = arg2.columns[p.minor_axis] if len(p.items): - result = pd.concat( + result = concat( [p.iloc[i].T for i in range(len(p.items))], keys=p.items) else: @@ -2034,8 +2033,7 @@ def _zsqrt(x): result = np.sqrt(x) mask = x < 0 - from pandas import DataFrame - if isinstance(x, DataFrame): + if isinstance(x, ABCDataFrame): if mask.values.any(): result[mask] = 0 else: @@ -2060,8 +2058,7 @@ def _prep_binary(arg1, arg2): def rolling(obj, win_type=None, **kwds): - from pandas import Series, DataFrame - if not isinstance(obj, (Series, DataFrame)): + if not isinstance(obj, (ABCSeries, ABCDataFrame)): raise TypeError('invalid type: %s' % type(obj)) if win_type is not None: @@ -2074,8 +2071,7 @@ def rolling(obj, win_type=None, **kwds): def expanding(obj, **kwds): - from pandas import Series, DataFrame - if not isinstance(obj, (Series, DataFrame)): + if not isinstance(obj, (ABCSeries, ABCDataFrame)): raise TypeError('invalid type: %s' % type(obj)) return Expanding(obj, **kwds) @@ -2085,8 +2081,7 @@ def expanding(obj, **kwds): def ewm(obj, **kwds): - from pandas import Series, DataFrame - if not isinstance(obj, (Series, DataFrame)): + if not isinstance(obj, (ABCSeries, ABCDataFrame)): raise TypeError('invalid type: %s' % type(obj)) return EWM(obj, **kwds) From a2d8d238d2e1f03c04cfaf91797e069c517c5cbd Mon Sep 17 00:00:00 2001 From: kernc Date: Sat, 12 Aug 2017 19:33:03 +0200 Subject: [PATCH 069/141] TST: Move more frame tests to SharedWithSparse (#17227) --- pandas/tests/frame/test_api.py | 132 +++++++++++++++++------------- pandas/tests/sparse/test_frame.py | 6 ++ 2 files changed, 79 insertions(+), 59 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 8c4c13b66ffa9..53a1b9525a0dd 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -28,6 +28,20 @@ class SharedWithSparse(object): + """ + A collection of tests DataFrame and SparseDataFrame can share. + + In generic tests on this class, use ``self._assert_frame_equal()`` and + ``self._assert_series_equal()`` which are implemented in sub-classes + and dispatch correctly. + """ + def _assert_frame_equal(self, left, right): + """Dispatch to frame class dependent assertion""" + raise NotImplementedError + + def _assert_series_equal(self, left, right): + """Dispatch to series class dependent assertion""" + raise NotImplementedError def test_copy_index_name_checking(self): # don't want to be able to modify the index stored elsewhere after @@ -76,11 +90,6 @@ def test_add_prefix_suffix(self): expected = pd.Index(['{}%'.format(c) for c in self.frame.columns]) tm.assert_index_equal(with_pct_suffix.columns, expected) - -class TestDataFrameMisc(SharedWithSparse, TestData): - - klass = DataFrame - def test_get_axis(self): f = self.frame assert f._get_axis_number(0) == 0 @@ -118,13 +127,13 @@ def test_column_contains_typeerror(self): pass def test_not_hashable(self): - df = pd.DataFrame([1]) + df = self.klass([1]) pytest.raises(TypeError, hash, df) pytest.raises(TypeError, hash, self.empty) def test_new_empty_index(self): - df1 = DataFrame(randn(0, 3)) - df2 = DataFrame(randn(0, 3)) + df1 = self.klass(randn(0, 3)) + df2 = self.klass(randn(0, 3)) df1.index.name = 'foo' assert df2.index.name is None @@ -135,7 +144,7 @@ def test_array_interface(self): assert result.index is self.frame.index assert result.columns is self.frame.columns - assert_frame_equal(result, self.frame.apply(np.sqrt)) + self._assert_frame_equal(result, self.frame.apply(np.sqrt)) def test_get_agg_axis(self): cols = self.frame._get_agg_axis(0) @@ -160,36 +169,36 @@ def test_nonzero(self): assert not df.empty def test_iteritems(self): - df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) + df = self.klass([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) for k, v in compat.iteritems(df): - assert type(v) == Series + assert type(v) == self.klass._constructor_sliced def test_iter(self): assert tm.equalContents(list(self.frame), self.frame.columns) def test_iterrows(self): - for i, (k, v) in enumerate(self.frame.iterrows()): - exp = self.frame.xs(self.frame.index[i]) - assert_series_equal(v, exp) + for k, v in self.frame.iterrows(): + exp = self.frame.loc[k] + self._assert_series_equal(v, exp) - for i, (k, v) in enumerate(self.mixed_frame.iterrows()): - exp = self.mixed_frame.xs(self.mixed_frame.index[i]) - assert_series_equal(v, exp) + for k, v in self.mixed_frame.iterrows(): + exp = self.mixed_frame.loc[k] + self._assert_series_equal(v, exp) def test_itertuples(self): for i, tup in enumerate(self.frame.itertuples()): - s = Series(tup[1:]) + s = self.klass._constructor_sliced(tup[1:]) s.name = tup[0] expected = self.frame.iloc[i, :].reset_index(drop=True) - assert_series_equal(s, expected) + self._assert_series_equal(s, expected) - df = DataFrame({'floats': np.random.randn(5), - 'ints': lrange(5)}, columns=['floats', 'ints']) + df = self.klass({'floats': np.random.randn(5), + 'ints': lrange(5)}, columns=['floats', 'ints']) for tup in df.itertuples(index=False): assert isinstance(tup[1], np.integer) - df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) + df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]}) dfaa = df[['a', 'a']] assert (list(dfaa.itertuples()) == @@ -237,7 +246,7 @@ def test_as_matrix(self): mat = self.mixed_frame.as_matrix(['foo', 'A']) assert mat[0, 0] == 'bar' - df = DataFrame({'real': [1, 2, 3], 'complex': [1j, 2j, 3j]}) + df = self.klass({'real': [1, 2, 3], 'complex': [1j, 2j, 3j]}) mat = df.as_matrix() assert mat[0, 0] == 1j @@ -246,20 +255,6 @@ def test_as_matrix(self): expected = self.frame.reindex(columns=['A', 'B']).values assert_almost_equal(mat, expected) - def test_values(self): - self.frame.values[:, 0] = 5. - assert (self.frame.values[:, 0] == 5).all() - - def test_deepcopy(self): - cp = deepcopy(self.frame) - series = cp['A'] - series[:] = 10 - for idx, value in compat.iteritems(series): - assert self.frame['A'][idx] != value - - # --------------------------------------------------------------------- - # Transposing - def test_transpose(self): frame = self.frame dft = frame.T @@ -272,23 +267,17 @@ def test_transpose(self): # mixed type index, data = tm.getMixedTypeDict() - mixed = DataFrame(data, index=index) + mixed = self.klass(data, index=index) mixed_T = mixed.T for col, s in compat.iteritems(mixed_T): assert s.dtype == np.object_ - def test_transpose_get_view(self): - dft = self.frame.T - dft.values[:, 5:10] = 5 - - assert (self.frame.values[5:10] == 5).all() - def test_swapaxes(self): - df = DataFrame(np.random.randn(10, 5)) - assert_frame_equal(df.T, df.swapaxes(0, 1)) - assert_frame_equal(df.T, df.swapaxes(1, 0)) - assert_frame_equal(df, df.swapaxes(0, 0)) + df = self.klass(np.random.randn(10, 5)) + self._assert_frame_equal(df.T, df.swapaxes(0, 1)) + self._assert_frame_equal(df.T, df.swapaxes(1, 0)) + self._assert_frame_equal(df, df.swapaxes(0, 0)) pytest.raises(ValueError, df.swapaxes, 2, 5) def test_axis_aliases(self): @@ -308,8 +297,8 @@ def test_more_asMatrix(self): assert values.shape[1] == len(self.mixed_frame.columns) def test_repr_with_mi_nat(self): - df = DataFrame({'X': [1, 2]}, - index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']]) + df = self.klass({'X': [1, 2]}, + index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']]) res = repr(df) exp = ' X\nNaT a 1\n2013-01-01 b 2' assert res == exp @@ -324,31 +313,56 @@ def test_series_put_names(self): assert v.name == k def test_empty_nonzero(self): - df = DataFrame([1, 2, 3]) + df = self.klass([1, 2, 3]) assert not df.empty - df = pd.DataFrame(index=[1], columns=[1]) + df = self.klass(index=[1], columns=[1]) assert not df.empty - df = DataFrame(index=['a', 'b'], columns=['c', 'd']).dropna() + df = self.klass(index=['a', 'b'], columns=['c', 'd']).dropna() assert df.empty assert df.T.empty - empty_frames = [pd.DataFrame(), - pd.DataFrame(index=[1]), - pd.DataFrame(columns=[1]), - pd.DataFrame({1: []})] + empty_frames = [self.klass(), + self.klass(index=[1]), + self.klass(columns=[1]), + self.klass({1: []})] for df in empty_frames: assert df.empty assert df.T.empty def test_with_datetimelikes(self): - df = DataFrame({'A': date_range('20130101', periods=10), - 'B': timedelta_range('1 day', periods=10)}) + df = self.klass({'A': date_range('20130101', periods=10), + 'B': timedelta_range('1 day', periods=10)}) t = df.T result = t.get_dtype_counts() expected = Series({'object': 10}) tm.assert_series_equal(result, expected) + +class TestDataFrameMisc(SharedWithSparse, TestData): + + klass = DataFrame + # SharedWithSparse tests use generic, klass-agnostic assertion + _assert_frame_equal = staticmethod(assert_frame_equal) + _assert_series_equal = staticmethod(assert_series_equal) + + def test_values(self): + self.frame.values[:, 0] = 5. + assert (self.frame.values[:, 0] == 5).all() + + def test_deepcopy(self): + cp = deepcopy(self.frame) + series = cp['A'] + series[:] = 10 + for idx, value in compat.iteritems(series): + assert self.frame['A'][idx] != value + + def test_transpose_get_view(self): + dft = self.frame.T + dft.values[:, 5:10] = 5 + + assert (self.frame.values[5:10] == 5).all() + def test_inplace_return_self(self): # re #1893 diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index f0f8954e5785b..004af5066fe83 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -29,6 +29,10 @@ class TestSparseDataFrame(SharedWithSparse): klass = SparseDataFrame + # SharedWithSparse tests use generic, klass-agnostic assertion + _assert_frame_equal = staticmethod(tm.assert_sp_frame_equal) + _assert_series_equal = staticmethod(tm.assert_sp_series_equal) + def setup_method(self, method): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], @@ -43,6 +47,8 @@ def setup_method(self, method): self.frame = SparseDataFrame(self.data, index=self.dates) self.iframe = SparseDataFrame(self.data, index=self.dates, default_kind='integer') + self.mixed_frame = self.frame.copy(False) + self.mixed_frame['foo'] = pd.SparseArray(['bar'] * len(self.dates)) values = self.frame.values.copy() values[np.isnan(values)] = 0 From 013b983b9e32b187453e8d4d354687c0396c0ef9 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Sat, 12 Aug 2017 19:33:52 +0200 Subject: [PATCH 070/141] REF: _get_objs_combined_axis (#17217) --- pandas/core/frame.py | 6 ++---- pandas/core/indexes/api.py | 11 +++++++++++ pandas/core/panel.py | 7 +++---- pandas/core/reshape/concat.py | 19 ++++++++----------- pandas/core/reshape/pivot.py | 9 ++------- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2c82fe4c348d5..467ef52de234e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6126,12 +6126,10 @@ def _list_to_arrays(data, columns, coerce_float=False, dtype=None): def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): - from pandas.core.index import _get_combined_index + from pandas.core.index import _get_objs_combined_axis if columns is None: - columns = _get_combined_index([ - s.index for s in data if getattr(s, 'index', None) is not None - ]) + columns = _get_objs_combined_axis(data) indexer_cache = {} diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index d90c681abc03f..db73a6878258a 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -23,11 +23,22 @@ 'PeriodIndex', 'DatetimeIndex', '_new_Index', 'NaT', '_ensure_index', '_get_na_value', '_get_combined_index', + '_get_objs_combined_axis', '_get_distinct_indexes', '_union_indexes', '_get_consensus_names', '_all_indexes_same'] +def _get_objs_combined_axis(objs, intersect=False, axis=0): + # Extract combined index: return intersection or union (depending on the + # value of "intersect") of indexes on given axis, or None if all objects + # lack indexes (e.g. they are numpy arrays) + obs_idxes = [obj._get_axis(axis) for obj in objs + if hasattr(obj, '_get_axis')] + if obs_idxes: + return _get_combined_index(obs_idxes, intersect=intersect) + + def _get_combined_index(indexes, intersect=False): # TODO: handle index names! indexes = _get_distinct_indexes(indexes) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index e4515efe109c5..a3e35492ad9af 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -26,7 +26,7 @@ from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, - _get_combined_index) + _get_objs_combined_axis) from pandas.io.formats.printing import pprint_thing from pandas.core.indexing import maybe_droplevels from pandas.core.internals import (BlockManager, @@ -1448,7 +1448,6 @@ def _extract_axis(self, data, axis=0, intersect=False): index = Index([]) elif len(data) > 0: raw_lengths = [] - indexes = [] have_raw_arrays = False have_frames = False @@ -1456,13 +1455,13 @@ def _extract_axis(self, data, axis=0, intersect=False): for v in data.values(): if isinstance(v, self._constructor_sliced): have_frames = True - indexes.append(v._get_axis(axis)) elif v is not None: have_raw_arrays = True raw_lengths.append(v.shape[axis]) if have_frames: - index = _get_combined_index(indexes, intersect=intersect) + index = _get_objs_combined_axis(data.values(), axis=axis, + intersect=intersect) if have_raw_arrays: lengths = list(set(raw_lengths)) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index e199ec2710367..20d561738dc78 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -4,7 +4,7 @@ import numpy as np from pandas import compat, DataFrame, Series, Index, MultiIndex -from pandas.core.index import (_get_combined_index, +from pandas.core.index import (_get_objs_combined_axis, _ensure_index, _get_consensus_names, _all_indexes_same) from pandas.core.categorical import (_factorize_from_iterable, @@ -445,16 +445,13 @@ def _get_new_axes(self): return new_axes def _get_comb_axis(self, i): - if self._is_series: - all_indexes = [x.index for x in self.objs] - else: - try: - all_indexes = [x._data.axes[i] for x in self.objs] - except IndexError: - types = [type(x).__name__ for x in self.objs] - raise TypeError("Cannot concatenate list of %s" % types) - - return _get_combined_index(all_indexes, intersect=self.intersect) + data_axis = self.objs[0]._get_block_manager_axis(i) + try: + return _get_objs_combined_axis(self.objs, axis=data_axis, + intersect=self.intersect) + except IndexError: + types = [type(x).__name__ for x in self.objs] + raise TypeError("Cannot concatenate list of %s" % types) def _get_concat_axis(self): """ diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index e61adf3aac30a..e08c307bba818 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -8,7 +8,7 @@ from pandas.core.series import Series from pandas.core.groupby import Grouper from pandas.core.reshape.util import cartesian_product -from pandas.core.index import Index, _get_combined_index +from pandas.core.index import Index, _get_objs_combined_axis from pandas.compat import range, lrange, zip from pandas import compat import pandas.core.common as com @@ -440,12 +440,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, rownames = _get_names(index, rownames, prefix='row') colnames = _get_names(columns, colnames, prefix='col') - obs_idxes = [obj.index for objs in (index, columns) for obj in objs - if hasattr(obj, 'index')] - if obs_idxes: - common_idx = _get_combined_index(obs_idxes, intersect=True) - else: - common_idx = None + common_idx = _get_objs_combined_axis(index + columns, intersect=True) data = {} data.update(zip(rownames, index)) From fddb66d94a7bd697d92d15219fcd8345e64ceb99 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Mon, 14 Aug 2017 06:31:41 -0400 Subject: [PATCH 071/141] ENH/PERF: Remove frequency inference from .dt accessor (#17210) * ENH/PERF: Remove frequency inference from .dt accessor * BENCH: Add DatetimeAccessor benchmark * DOC: Whatsnew --- asv_bench/benchmarks/timeseries.py | 14 ++++++++++++++ doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/accessors.py | 19 ++++++++++++------- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index efe713639fec9..b7151ad2eaa99 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -510,3 +510,17 @@ def time_begin_incr_rng(self): def time_begin_decr_rng(self): self.rng - self.semi_month_begin + + +class DatetimeAccessor(object): + def setup(self): + self.N = 100000 + self.series = pd.Series( + pd.date_range(start='1/1/2000', periods=self.N, freq='T') + ) + + def time_dt_accessor(self): + self.series.dt + + def time_dt_accessor_normalize(self): + self.series.dt.normalize() diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 98998acfb72d6..6620afab850a7 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -297,6 +297,7 @@ Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Improved performance of instantiating :class:`SparseDataFrame` (:issue:`16773`) +- :attr:`Series.dt` no longer performs frequency inference, yielding a large speedup when accessing the attribute (:issue:`17210`) .. _whatsnew_0210.bug_fixes: diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index ce3143b342cec..88297ac70984d 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -61,23 +61,20 @@ def maybe_to_datetimelike(data, copy=False): data = orig.values.categories if is_datetime64_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), + return DatetimeProperties(DatetimeIndex(data, copy=copy), index, name=name, orig=orig) elif is_datetime64tz_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer', - ambiguous='infer'), + return DatetimeProperties(DatetimeIndex(data, copy=copy), index, data.name, orig=orig) elif is_timedelta64_dtype(data.dtype): - return TimedeltaProperties(TimedeltaIndex(data, copy=copy, - freq='infer'), index, + return TimedeltaProperties(TimedeltaIndex(data, copy=copy), index, name=name, orig=orig) else: if is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index, name=name, orig=orig) if is_datetime_arraylike(data): - return DatetimeProperties(DatetimeIndex(data, copy=copy, - freq='infer'), index, + return DatetimeProperties(DatetimeIndex(data, copy=copy), index, name=name, orig=orig) raise TypeError("cannot convert an object of type {0} to a " @@ -162,6 +159,10 @@ class DatetimeProperties(Properties): def to_pydatetime(self): return self.values.to_pydatetime() + @property + def freq(self): + return self.values.inferred_freq + DatetimeProperties._add_delegate_accessors( delegate=DatetimeIndex, @@ -202,6 +203,10 @@ def components(self): """ return self.values.components.set_index(self.index) + @property + def freq(self): + return self.values.inferred_freq + TimedeltaProperties._add_delegate_accessors( delegate=TimedeltaIndex, From 2e551561512672c095a4b0deb6548f2be4ab27f5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 14 Aug 2017 11:19:38 -0700 Subject: [PATCH 072/141] Fix apparent typo in tests (#17247) --- pandas/tests/indexes/datetimes/test_tools.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 7ff9c2b23cbfb..9764b65d330af 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1112,9 +1112,9 @@ def test_parsers(self): result3 = Timestamp('NaT') result4 = DatetimeIndex(['NaT'])[0] assert result1 is tslib.NaT - assert result1 is tslib.NaT - assert result1 is tslib.NaT - assert result1 is tslib.NaT + assert result2 is tslib.NaT + assert result3 is tslib.NaT + assert result4 is tslib.NaT def test_parsers_quarter_invalid(self): From b49446ebe859bfc646a0df04b150d14890915bf4 Mon Sep 17 00:00:00 2001 From: mattip Date: Tue, 15 Aug 2017 06:29:29 -0400 Subject: [PATCH 073/141] COMPAT: avoid calling getsizeof() on PyPy closes #17228 Author: mattip Closes #17229 from mattip/getsizeof-unavailable and squashes the following commits: d2623e423 [mattip] COMPAT: avoid calling getsizeof() on PyPy --- doc/source/whatsnew/v0.21.0.txt | 3 +- pandas/compat/__init__.py | 2 + pandas/core/base.py | 6 +-- pandas/core/indexes/multi.py | 6 ++- pandas/core/indexes/range.py | 8 +++- pandas/tests/frame/test_repr_info.py | 68 ++++++++++++++++++++-------- pandas/tests/test_base.py | 4 +- pandas/tests/test_categorical.py | 11 +++-- pandas/util/testing.py | 1 - 9 files changed, 76 insertions(+), 33 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 6620afab850a7..5140a96d2a09e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -309,8 +309,9 @@ Bug Fixes Conversion ^^^^^^^^^^ -- Bug in assignment against datetime-like data with ``int`` may incorrectly converte to datetime-like (:issue:`14145`) +- Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`) - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) +- Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, so an approximation is used instead (:issue:`17228`) Indexing diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 33b41d61aa978..b367fda002b74 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -31,6 +31,7 @@ from distutils.version import LooseVersion from itertools import product import sys +import platform import types from unicodedata import east_asian_width import struct @@ -41,6 +42,7 @@ PY3 = (sys.version_info[0] >= 3) PY35 = (sys.version_info >= (3, 5)) PY36 = (sys.version_info >= (3, 6)) +PYPY = (platform.python_implementation() == 'PyPy') try: import __builtin__ as builtins diff --git a/pandas/core/base.py b/pandas/core/base.py index 8f21e3125a27e..4ae4736035793 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -15,6 +15,7 @@ import pandas.core.nanops as nanops import pandas._libs.lib as lib from pandas.compat.numpy import function as nv +from pandas.compat import PYPY from pandas.util._decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError @@ -1061,7 +1062,7 @@ def memory_usage(self, deep=False): Notes ----- Memory usage does not include memory consumed by elements that - are not components of the array if deep=False + are not components of the array if deep=False or if used on PyPy See Also -------- @@ -1071,9 +1072,8 @@ def memory_usage(self, deep=False): return self.values.memory_usage(deep=deep) v = self.values.nbytes - if deep and is_object_dtype(self): + if deep and is_object_dtype(self) and not PYPY: v += lib.memory_usage_of_objects(self.values) - return v def factorize(self, sort=False, na_sentinel=-1): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 420788f9008cd..ea45b4700172f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -465,9 +465,13 @@ def _nbytes(self, deep=False): *this is in internal routine* """ + + # for implementations with no useful getsizeof (PyPy) + objsize = 24 + level_nbytes = sum((i.memory_usage(deep=deep) for i in self.levels)) label_nbytes = sum((i.nbytes for i in self.labels)) - names_nbytes = sum((getsizeof(i) for i in self.names)) + names_nbytes = sum((getsizeof(i, objsize) for i in self.names)) result = level_nbytes + label_nbytes + names_nbytes # include our engine hashtable diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 5071b50bbebdf..ac4cc6986cace 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -194,8 +194,12 @@ def _format_data(self): @cache_readonly def nbytes(self): - """ return the number of bytes in the underlying data """ - return sum([getsizeof(getattr(self, v)) for v in + """ + Return the number of bytes in the underlying data + On implementations where this is undetermined (PyPy) + assume 24 bytes for each value + """ + return sum([getsizeof(getattr(self, v), 24) for v in ['_start', '_stop', '_step']]) def memory_usage(self, deep=False): diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index c317ad542659a..37f8c0cc85b23 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -11,7 +11,7 @@ import pytest from pandas import (DataFrame, compat, option_context) -from pandas.compat import StringIO, lrange, u +from pandas.compat import StringIO, lrange, u, PYPY import pandas.io.formats.format as fmt import pandas as pd @@ -323,23 +323,6 @@ def test_info_memory_usage(self): # excluded column with object dtype, so estimate is accurate assert not re.match(r"memory usage: [^+]+\+", res[-1]) - df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo']) - df_with_object_index.info(buf=buf, memory_usage=True) - res = buf.getvalue().splitlines() - assert re.match(r"memory usage: [^+]+\+", res[-1]) - - df_with_object_index.info(buf=buf, memory_usage='deep') - res = buf.getvalue().splitlines() - assert re.match(r"memory usage: [^+]+$", res[-1]) - - assert (df_with_object_index.memory_usage( - index=True, deep=True).sum() > df_with_object_index.memory_usage( - index=True).sum()) - - df_object = pd.DataFrame({'a': ['a']}) - assert (df_object.memory_usage(deep=True).sum() > - df_object.memory_usage().sum()) - # Test a DataFrame with duplicate columns dtypes = ['int64', 'int64', 'int64', 'float64'] data = {} @@ -349,6 +332,15 @@ def test_info_memory_usage(self): df = DataFrame(data) df.columns = dtypes + df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo']) + df_with_object_index.info(buf=buf, memory_usage=True) + res = buf.getvalue().splitlines() + assert re.match(r"memory usage: [^+]+\+", res[-1]) + + df_with_object_index.info(buf=buf, memory_usage='deep') + res = buf.getvalue().splitlines() + assert re.match(r"memory usage: [^+]+$", res[-1]) + # Ensure df size is as expected # (cols * rows * bytes) + index size df_size = df.memory_usage().sum() @@ -377,9 +369,47 @@ def test_info_memory_usage(self): df.memory_usage(index=True) df.index.values.nbytes + mem = df.memory_usage(deep=True).sum() + assert mem > 0 + + @pytest.mark.skipif(PYPY, + reason="on PyPy deep=True doesn't change result") + def test_info_memory_usage_deep_not_pypy(self): + df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo']) + assert (df_with_object_index.memory_usage( + index=True, deep=True).sum() > + df_with_object_index.memory_usage( + index=True).sum()) + + df_object = pd.DataFrame({'a': ['a']}) + assert (df_object.memory_usage(deep=True).sum() > + df_object.memory_usage().sum()) + + @pytest.mark.skipif(not PYPY, + reason="on PyPy deep=True does not change result") + def test_info_memory_usage_deep_pypy(self): + df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo']) + assert (df_with_object_index.memory_usage( + index=True, deep=True).sum() == + df_with_object_index.memory_usage( + index=True).sum()) + + df_object = pd.DataFrame({'a': ['a']}) + assert (df_object.memory_usage(deep=True).sum() == + df_object.memory_usage().sum()) + + @pytest.mark.skipif(PYPY, reason="PyPy getsizeof() fails by design") + def test_usage_via_getsizeof(self): + df = DataFrame( + data=1, + index=pd.MultiIndex.from_product( + [['a'], range(1000)]), + columns=['A'] + ) + mem = df.memory_usage(deep=True).sum() # sys.getsizeof will call the .memory_usage with # deep=True, and add on some GC overhead - diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df) + diff = mem - sys.getsizeof(df) assert abs(diff) < 100 def test_info_memory_usage_qualified(self): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 9af4a9edeb8b1..9e92c7cf1a9b8 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -15,7 +15,7 @@ import pandas.util.testing as tm from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, Timedelta, IntervalIndex, Interval) -from pandas.compat import StringIO +from pandas.compat import StringIO, PYPY from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.base import PandasDelegate, NoNewAttributesMixin from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin @@ -144,6 +144,7 @@ def f(): pytest.raises(TypeError, f) + @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") def test_memory_usage(self): # Delegate does not implement memory_usage. # Check that we fall back to in-built `__sizeof__` @@ -941,6 +942,7 @@ def test_fillna(self): # check shallow_copied assert o is not result + @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") def test_memory_usage(self): for o in self.objs: res = o.memory_usage() diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index eecdd672095b0..a0b585a16ad9a 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -24,7 +24,7 @@ period_range, PeriodIndex, timedelta_range, TimedeltaIndex, NaT, Interval, IntervalIndex) -from pandas.compat import range, lrange, u, PY3 +from pandas.compat import range, lrange, u, PY3, PYPY from pandas.core.config import option_context @@ -1448,10 +1448,11 @@ def test_memory_usage(self): cat = pd.Categorical(['foo', 'foo', 'bar']) assert cat.memory_usage(deep=True) > cat.nbytes - # sys.getsizeof will call the .memory_usage with - # deep=True, and add on some GC overhead - diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) - assert abs(diff) < 100 + if not PYPY: + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) + assert abs(diff) < 100 def test_searchsorted(self): # https://github.com/pandas-dev/pandas/issues/8420 diff --git a/pandas/util/testing.py b/pandas/util/testing.py index a000e189dfaa9..5a17cb6d7dc47 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -56,7 +56,6 @@ K = 4 _RAISE_NETWORK_ERROR_DEFAULT = False - # set testing_mode _testing_mode_warnings = (DeprecationWarning, compat.ResourceWarning) From 536b761c2b9447774146966d7c697fa0f7fc9a60 Mon Sep 17 00:00:00 2001 From: jschendel Date: Tue, 15 Aug 2017 12:26:06 -0600 Subject: [PATCH 074/141] CLN: replace %s syntax with .format in pandas.core.reshape (#17252) Replaced %s syntax with .format in pandas.core.reshape. Additionally, made some of the existing positional .format code more explicit. --- pandas/core/reshape/concat.py | 29 ++++++++++--------- pandas/core/reshape/merge.py | 53 ++++++++++++++++++---------------- pandas/core/reshape/pivot.py | 8 ++--- pandas/core/reshape/reshape.py | 31 +++++++++++--------- pandas/core/reshape/tile.py | 4 +-- 5 files changed, 67 insertions(+), 58 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 20d561738dc78..9e180c624963c 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -220,7 +220,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, if isinstance(objs, (NDFrame, compat.string_types)): raise TypeError('first argument must be an iterable of pandas ' 'objects, you passed an object of type ' - '"{0}"'.format(type(objs).__name__)) + '"{name}"'.format(name=type(objs).__name__)) if join == 'outer': self.intersect = False @@ -309,8 +309,8 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, self._is_series = isinstance(sample, Series) if not 0 <= axis <= sample.ndim: - raise AssertionError("axis must be between 0 and {0}, " - "input was {1}".format(sample.ndim, axis)) + raise AssertionError("axis must be between 0 and {ndim}, input was" + " {axis}".format(ndim=sample.ndim, axis=axis)) # if we have mixed ndims, then convert to highest ndim # creating column numbers as needed @@ -431,8 +431,8 @@ def _get_new_axes(self): new_axes[i] = self._get_comb_axis(i) else: if len(self.join_axes) != ndim - 1: - raise AssertionError("length of join_axes must not be " - "equal to {0}".format(ndim - 1)) + raise AssertionError("length of join_axes must not be equal " + "to {length}".format(length=ndim - 1)) # ufff... indices = compat.lrange(ndim) @@ -451,7 +451,8 @@ def _get_comb_axis(self, i): intersect=self.intersect) except IndexError: types = [type(x).__name__ for x in self.objs] - raise TypeError("Cannot concatenate list of %s" % types) + raise TypeError("Cannot concatenate list of {types}" + .format(types=types)) def _get_concat_axis(self): """ @@ -470,8 +471,8 @@ def _get_concat_axis(self): for i, x in enumerate(self.objs): if not isinstance(x, Series): raise TypeError("Cannot concatenate type 'Series' " - "with object of type " - "%r" % type(x).__name__) + "with object of type {type!r}" + .format(type=type(x).__name__)) if x.name is not None: names[i] = x.name has_names = True @@ -505,8 +506,8 @@ def _maybe_check_integrity(self, concat_index): if self.verify_integrity: if not concat_index.is_unique: overlap = concat_index.get_duplicates() - raise ValueError('Indexes have overlapping values: %s' - % str(overlap)) + raise ValueError('Indexes have overlapping values: ' + '{overlap!s}'.format(overlap=overlap)) def _concat_indexes(indexes): @@ -547,8 +548,8 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): try: i = level.get_loc(key) except KeyError: - raise ValueError('Key %s not in level %s' - % (str(key), str(level))) + raise ValueError('Key {key!s} not in level {level!s}' + .format(key=key, level=level)) to_concat.append(np.repeat(i, len(index))) label_list.append(np.concatenate(to_concat)) @@ -597,8 +598,8 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): mask = mapped == -1 if mask.any(): - raise ValueError('Values not found in passed level: %s' - % str(hlevel[mask])) + raise ValueError('Values not found in passed level: {hlevel!s}' + .format(hlevel=hlevel[mask])) new_labels.append(np.repeat(mapped, n)) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 00828b8241f4c..947300a28e510 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -534,28 +534,27 @@ def __init__(self, left, right, how='inner', on=None, 'indicator option can only accept boolean or string arguments') if not isinstance(left, DataFrame): - raise ValueError( - 'can not merge DataFrame with instance of ' - 'type {0}'.format(type(left))) + raise ValueError('can not merge DataFrame with instance of ' + 'type {left}'.format(left=type(left))) if not isinstance(right, DataFrame): - raise ValueError( - 'can not merge DataFrame with instance of ' - 'type {0}'.format(type(right))) + raise ValueError('can not merge DataFrame with instance of ' + 'type {right}'.format(right=type(right))) if not is_bool(left_index): raise ValueError( 'left_index parameter must be of type bool, not ' - '{0}'.format(type(left_index))) + '{left_index}'.format(left_index=type(left_index))) if not is_bool(right_index): raise ValueError( 'right_index parameter must be of type bool, not ' - '{0}'.format(type(right_index))) + '{right_index}'.format(right_index=type(right_index))) # warn user when merging between different levels if left.columns.nlevels != right.columns.nlevels: msg = ('merging between different levels can give an unintended ' - 'result ({0} levels on the left, {1} on the right)') - msg = msg.format(left.columns.nlevels, right.columns.nlevels) + 'result ({left} levels on the left, {right} on the right)' + ).format(left=left.columns.nlevels, + right=right.columns.nlevels) warnings.warn(msg, UserWarning) self._validate_specification() @@ -613,7 +612,8 @@ def _indicator_pre_merge(self, left, right): for i in ['_left_indicator', '_right_indicator']: if i in columns: raise ValueError("Cannot use `indicator=True` option when " - "data contains a column named {}".format(i)) + "data contains a column named {name}" + .format(name=i)) if self.indicator_name in columns: raise ValueError( "Cannot use name of an existing column for indicator column") @@ -717,7 +717,7 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer): if name in result: result[name] = key_col else: - result.insert(i, name or 'key_%d' % i, key_col) + result.insert(i, name or 'key_{i}'.format(i=i), key_col) def _get_join_indexers(self): """ return the join indexers """ @@ -952,8 +952,8 @@ def _validate_specification(self): if len(common_cols) == 0: raise MergeError('No common columns to perform merge on') if not common_cols.is_unique: - raise MergeError("Data columns not unique: %s" - % repr(common_cols)) + raise MergeError("Data columns not unique: {common!r}" + .format(common=common_cols)) self.left_on = self.right_on = common_cols elif self.on is not None: if self.left_on is not None or self.right_on is not None: @@ -1119,12 +1119,14 @@ def get_result(self): def _asof_function(direction, on_type): - return getattr(libjoin, 'asof_join_%s_%s' % (direction, on_type), None) + name = 'asof_join_{dir}_{on}'.format(dir=direction, on=on_type) + return getattr(libjoin, name, None) def _asof_by_function(direction, on_type, by_type): - return getattr(libjoin, 'asof_join_%s_%s_by_%s' % - (direction, on_type, by_type), None) + name = 'asof_join_{dir}_{on}_by_{by}'.format( + dir=direction, on=on_type, by=by_type) + return getattr(libjoin, name, None) _type_casters = { @@ -1153,7 +1155,7 @@ def _get_cython_type(dtype): type_name = _get_dtype(dtype).name ctype = _cython_types.get(type_name, 'object') if ctype == 'error': - raise MergeError('unsupported type: ' + type_name) + raise MergeError('unsupported type: {type}'.format(type=type_name)) return ctype @@ -1235,7 +1237,8 @@ def _validate_specification(self): # check 'direction' is valid if self.direction not in ['backward', 'forward', 'nearest']: - raise MergeError('direction invalid: ' + self.direction) + raise MergeError('direction invalid: {direction}' + .format(direction=self.direction)) @property def _asof_key(self): @@ -1264,7 +1267,7 @@ def _get_merge_keys(self): lt = left_join_keys[-1] msg = "incompatible tolerance, must be compat " \ - "with type {0}".format(type(lt)) + "with type {lt}".format(lt=type(lt)) if is_datetime64_dtype(lt) or is_datetime64tz_dtype(lt): if not isinstance(self.tolerance, Timedelta): @@ -1283,8 +1286,8 @@ def _get_merge_keys(self): # validate allow_exact_matches if not is_bool(self.allow_exact_matches): - raise MergeError("allow_exact_matches must be boolean, " - "passed {0}".format(self.allow_exact_matches)) + msg = "allow_exact_matches must be boolean, passed {passed}" + raise MergeError(msg.format(passed=self.allow_exact_matches)) return left_join_keys, right_join_keys, join_names @@ -1306,11 +1309,11 @@ def flip(xs): tolerance = self.tolerance # we required sortedness in the join keys - msg = " keys must be sorted" + msg = "{side} keys must be sorted" if not Index(left_values).is_monotonic: - raise ValueError('left' + msg) + raise ValueError(msg.format(side='left')) if not Index(right_values).is_monotonic: - raise ValueError('right' + msg) + raise ValueError(msg.format(side='right')) # initial type conversion as needed if needs_i8_conversion(left_values): diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index e08c307bba818..f07123ca18489 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -145,10 +145,10 @@ def _add_margins(table, data, values, rows, cols, aggfunc, if not isinstance(margins_name, compat.string_types): raise ValueError('margins_name argument must be a string') - exception_msg = 'Conflicting name "{0}" in margins'.format(margins_name) + msg = 'Conflicting name "{name}" in margins'.format(name=margins_name) for level in table.index.names: if margins_name in table.index.get_level_values(level): - raise ValueError(exception_msg) + raise ValueError(msg) grand_margin = _compute_grand_margin(data, values, aggfunc, margins_name) @@ -156,7 +156,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, if hasattr(table, 'columns'): for level in table.columns.names[1:]: if margins_name in table.columns.get_level_values(level): - raise ValueError(exception_msg) + raise ValueError(msg) if len(rows) > 1: key = (margins_name,) + ('',) * (len(rows) - 1) @@ -553,7 +553,7 @@ def _get_names(arrs, names, prefix='row'): if isinstance(arr, ABCSeries) and arr.name is not None: names.append(arr.name) else: - names.append('%s_%d' % (prefix, i)) + names.append('{prefix}_{i}'.format(prefix=prefix, i=i)) else: if len(names) != len(arrs): raise AssertionError('arrays and names must have the same length') diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index b7638471f2ad0..455da9246783c 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -1,6 +1,6 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0703,W0622,W0613,W0201 -from pandas.compat import range, zip +from pandas.compat import range, text_type, zip from pandas import compat import itertools import re @@ -91,8 +91,8 @@ def __init__(self, values, index, level=-1, value_columns=None, if isinstance(self.index, MultiIndex): if index._reference_duplicate_name(level): - msg = ("Ambiguous reference to {0}. The index " - "names are not unique.".format(level)) + msg = ("Ambiguous reference to {level}. The index " + "names are not unique.".format(level=level)) raise ValueError(msg) self.level = self.index._get_level_number(level) @@ -229,7 +229,7 @@ def get_new_values(self): sorted_values = sorted_values.astype(name, copy=False) # fill in our values & mask - f = getattr(_reshape, "unstack_{}".format(name)) + f = getattr(_reshape, "unstack_{name}".format(name=name)) f(sorted_values, mask.view('u1'), stride, @@ -516,8 +516,8 @@ def factorize(index): N, K = frame.shape if isinstance(frame.columns, MultiIndex): if frame.columns._reference_duplicate_name(level): - msg = ("Ambiguous reference to {0}. The column " - "names are not unique.".format(level)) + msg = ("Ambiguous reference to {level}. The column " + "names are not unique.".format(level=level)) raise ValueError(msg) # Will also convert negative level numbers and check if out of bounds. @@ -747,7 +747,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, if len(frame.columns.names) == len(set(frame.columns.names)): var_name = frame.columns.names else: - var_name = ['variable_%s' % i + var_name = ['variable_{i}'.format(i=i) for i in range(len(frame.columns.names))] else: var_name = [frame.columns.name if frame.columns.name is not None @@ -1027,7 +1027,8 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): in a typicaly case. """ def get_var_names(df, stub, sep, suffix): - regex = "^{0}{1}{2}".format(re.escape(stub), re.escape(sep), suffix) + regex = "^{stub}{sep}{suffix}".format( + stub=re.escape(stub), sep=re.escape(sep), suffix=suffix) return df.filter(regex=regex).columns.tolist() def melt_stub(df, stub, i, j, value_vars, sep): @@ -1180,13 +1181,14 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, # validate prefixes and separator to avoid silently dropping cols def check_len(item, name): - length_msg = ("Length of '{0}' ({1}) did not match the length of " - "the columns being encoded ({2}).") + len_msg = ("Length of '{name}' ({len_item}) did not match the " + "length of the columns being encoded ({len_enc}).") if is_list_like(item): if not len(item) == len(columns_to_encode): - raise ValueError(length_msg.format(name, len(item), - len(columns_to_encode))) + len_msg = len_msg.format(name=name, len_item=len(item), + len_enc=len(columns_to_encode)) + raise ValueError(len_msg) check_len(prefix, 'prefix') check_len(prefix_sep, 'prefix_sep') @@ -1253,7 +1255,10 @@ def get_empty_Frame(data, sparse): number_of_cols = len(levels) if prefix is not None: - dummy_cols = ['%s%s%s' % (prefix, prefix_sep, v) for v in levels] + dummy_strs = [u'{prefix}{sep}{level}' if isinstance(v, text_type) + else '{prefix}{sep}{level}' for v in levels] + dummy_cols = [dummy_str.format(prefix=prefix, sep=prefix_sep, level=v) + for dummy_str, v in zip(dummy_strs, levels)] else: dummy_cols = levels diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 1cb39faa2e869..2f5538556fa6d 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -229,9 +229,9 @@ def _bins_to_cuts(x, bins, right=True, labels=None, unique_bins = algos.unique(bins) if len(unique_bins) < len(bins) and len(bins) != 2: if duplicates == 'raise': - raise ValueError("Bin edges must be unique: {}.\nYou " + raise ValueError("Bin edges must be unique: {bins!r}.\nYou " "can drop duplicate edges by setting " - "the 'duplicates' kwarg".format(repr(bins))) + "the 'duplicates' kwarg".format(bins=bins)) else: bins = unique_bins From a1ff67184b6ffc09aa2aef80eabbd1393c46379b Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Tue, 15 Aug 2017 16:23:44 -0400 Subject: [PATCH 075/141] ENH: Infer compression from non-string paths (#17206) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/common.py | 14 ++++++------ pandas/io/parsers.py | 10 ++++----- pandas/io/pickle.py | 4 ++-- pandas/tests/io/test_common.py | 38 ++++++++++++++++++++++++--------- 5 files changed, 44 insertions(+), 23 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5140a96d2a09e..93a274425780b 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -127,6 +127,7 @@ Other Enhancements - :func:`read_html` handles colspan and rowspan arguments and attempts to infer a header if the header is not explicitly specified (:issue:`17054`) - Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) +- `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`). .. _whatsnew_0210.api_breaking: diff --git a/pandas/io/common.py b/pandas/io/common.py index cbfc33dbebb81..69a7e69ea724b 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -272,13 +272,15 @@ def _infer_compression(filepath_or_buffer, compression): if compression is None: return None - # Cannot infer compression of a buffer. Hence assume no compression. - is_path = isinstance(filepath_or_buffer, compat.string_types) - if compression == 'infer' and not is_path: - return None - - # Infer compression from the filename/URL extension + # Infer compression if compression == 'infer': + # Convert all path types (e.g. pathlib.Path) to strings + filepath_or_buffer = _stringify_path(filepath_or_buffer) + if not isinstance(filepath_or_buffer, compat.string_types): + # Cannot infer compression of a buffer, assume no compression + return None + + # Infer compression from the filename/URL extension for compression, extension in _compression_to_extension.items(): if filepath_or_buffer.endswith(extension): return compression diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9c76d3126890c..05a04f268f72b 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -208,11 +208,11 @@ `_ for more information on ``iterator`` and ``chunksize``. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' - For on-the-fly decompression of on-disk data. If 'infer', then use gzip, - bz2, zip or xz if filepath_or_buffer is a string ending in '.gz', '.bz2', - '.zip', or 'xz', respectively, and no decompression otherwise. If using - 'zip', the ZIP file must contain only one data file to be read in. - Set to None for no decompression. + For on-the-fly decompression of on-disk data. If 'infer' and + `filepath_or_buffer` is path-like, then detect compression from the + following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no + decompression). If using 'zip', the ZIP file must contain only one data + file to be read in. Set to None for no decompression. .. versionadded:: 0.18.1 support for 'zip' and 'xz' compression. diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 6f345092c514d..143b76575e36b 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -62,8 +62,8 @@ def read_pickle(path, compression='infer'): File path compression : {'infer', 'gzip', 'bz2', 'xz', 'zip', None}, default 'infer' For on-the-fly decompression of on-disk data. If 'infer', then use - gzip, bz2, xz or zip if path is a string ending in '.gz', '.bz2', 'xz', - or 'zip' respectively, and no decompression otherwise. + gzip, bz2, xz or zip if path ends in '.gz', '.bz2', '.xz', + or '.zip' respectively, and no decompression otherwise. Set to None for no decompression. .. versionadded:: 0.20.0 diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index b527e3c5dc254..30904593fedc4 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -14,16 +14,6 @@ from pandas import read_csv, concat -try: - from pathlib import Path -except ImportError: - pass - -try: - from py.path import local as LocalPath -except ImportError: - pass - class CustomFSPath(object): """For testing fspath on unknown objects""" @@ -34,6 +24,21 @@ def __fspath__(self): return self.path +# Functions that consume a string path and return a string or path-like object +path_types = [str, CustomFSPath] + +try: + from pathlib import Path + path_types.append(Path) +except ImportError: + pass + +try: + from py.path import local as LocalPath + path_types.append(LocalPath) +except ImportError: + pass + HERE = os.path.dirname(__file__) @@ -83,6 +88,19 @@ def test_stringify_path_fspath(self): result = common._stringify_path(p) assert result == 'foo/bar.csv' + @pytest.mark.parametrize('extension,expected', [ + ('', None), + ('.gz', 'gzip'), + ('.bz2', 'bz2'), + ('.zip', 'zip'), + ('.xz', 'xz'), + ]) + @pytest.mark.parametrize('path_type', path_types) + def test_infer_compression_from_path(self, extension, expected, path_type): + path = path_type('foo/bar.csv' + extension) + compression = common._infer_compression(path, compression='infer') + assert compression == expected + def test_get_filepath_or_buffer_with_path(self): filename = '~/sometest' filepath_or_buffer, _, _ = common.get_filepath_or_buffer(filename) From df1b0dc73f332a448c60397538cd5e66763d5686 Mon Sep 17 00:00:00 2001 From: jschendel Date: Tue, 15 Aug 2017 14:42:39 -0600 Subject: [PATCH 076/141] Fix bugs in IntervalIndex.is_non_overlapping_monotonic (#17238) --- doc/source/whatsnew/v0.21.0.txt | 2 ++ pandas/core/indexes/interval.py | 13 +++++++-- pandas/tests/indexes/test_interval.py | 41 +++++++++++++++++++++++++-- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 93a274425780b..01b9c792dfd30 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -313,6 +313,8 @@ Conversion - Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`) - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) - Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, so an approximation is used instead (:issue:`17228`) +- Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods. Previously returned a ``numpy.bool_``. (:issue:`17237`) +- Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`) Indexing diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index aa2ad21ae37fd..e90378184e3f3 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -556,8 +556,17 @@ def is_non_overlapping_monotonic(self): # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... ) # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...) # we already require left <= right - return ((self.right[:-1] <= self.left[1:]).all() or - (self.left[:-1] >= self.right[1:]).all()) + + # strict inequality for closed == 'both'; equality implies overlapping + # at a point when both sides of intervals are included + if self.closed == 'both': + return bool((self.right[:-1] < self.left[1:]).all() or + (self.left[:-1] > self.right[1:]).all()) + + # non-strict inequality when closed != 'both'; at least one side is + # not included in the intervals, so equality does not imply overlapping + return bool((self.right[:-1] <= self.left[1:]).all() or + (self.left[:-1] >= self.right[1:]).all()) @Appender(_index_shared_docs['_convert_scalar_indexer']) def _convert_scalar_indexer(self, key, kind=None): diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index fe86a2121761a..18eefc3fbdca6 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -371,8 +371,9 @@ def slice_locs_cases(self, breaks): assert index.slice_locs(1, 1) == (1, 1) assert index.slice_locs(1, 2) == (1, 2) - index = IntervalIndex.from_breaks([0, 1, 2], closed='both') - assert index.slice_locs(1, 1) == (0, 2) + index = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], + closed='both') + assert index.slice_locs(1, 1) == (0, 1) assert index.slice_locs(1, 2) == (0, 2) def test_slice_locs_int64(self): @@ -681,6 +682,42 @@ def f(): pytest.raises(ValueError, f) + def test_is_non_overlapping_monotonic(self): + # Should be True in all cases + tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] + for closed in ('left', 'right', 'neither', 'both'): + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is True + + idx = IntervalIndex.from_tuples(reversed(tpls), closed=closed) + assert idx.is_non_overlapping_monotonic is True + + # Should be False in all cases (overlapping) + tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] + for closed in ('left', 'right', 'neither', 'both'): + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is False + + idx = IntervalIndex.from_tuples(reversed(tpls), closed=closed) + assert idx.is_non_overlapping_monotonic is False + + # Should be False in all cases (non-monotonic) + tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] + for closed in ('left', 'right', 'neither', 'both'): + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is False + + idx = IntervalIndex.from_tuples(reversed(tpls), closed=closed) + assert idx.is_non_overlapping_monotonic is False + + # Should be False for closed='both', overwise True (GH16560) + idx = IntervalIndex.from_breaks(range(4), closed='both') + assert idx.is_non_overlapping_monotonic is False + + for closed in ('left', 'right', 'neither'): + idx = IntervalIndex.from_breaks(range(4), closed=closed) + assert idx.is_non_overlapping_monotonic is True + class TestIntervalRange(object): From 8fe1cc3c10cfb9afc4deb1315896d69bf4c7d531 Mon Sep 17 00:00:00 2001 From: Daniel Grady Date: Tue, 15 Aug 2017 15:44:54 -0700 Subject: [PATCH 077/141] BUG: Fix behavior of argmax and argmin with inf (#16449) (#16449) Closes #13595 --- doc/source/whatsnew/v0.21.0.txt | 3 +- pandas/core/nanops.py | 8 ++--- pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/series/test_operators.py | 47 +++++++++++++++++++++++++++ 4 files changed, 54 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 01b9c792dfd30..9e740a8c75792 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -267,7 +267,7 @@ Other API Changes - Removed the ``@slow`` decorator from ``pandas.util.testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`) - Moved definition of ``MergeError`` to the ``pandas.errors`` module. - The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is deprecated and will show a ``FutureWarning`` (:issue:`14636`) - +- :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`) .. _whatsnew_0210.deprecations: @@ -375,6 +375,7 @@ Reshaping - Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) - Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`) - Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`) +- :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`). Numeric ^^^^^^^ diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 2f4e437c0ae61..b2bbf1c75b7ea 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -486,23 +486,23 @@ def reduction(values, axis=None, skipna=True): nanmax = _nanminmax('max', fill_value_typ='-inf') +@disallow('O') def nanargmax(values, axis=None, skipna=True): """ Returns -1 in the NA case """ - values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf', - isfinite=True) + values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf') result = values.argmax(axis) result = _maybe_arg_null_out(result, axis, mask, skipna) return result +@disallow('O') def nanargmin(values, axis=None, skipna=True): """ Returns -1 in the NA case """ - values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf', - isfinite=True) + values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf') result = values.argmin(axis) result = _maybe_arg_null_out(result, axis, mask, skipna) return result diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 0dea1e8447b2b..f9e1a0d2e744a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2339,7 +2339,7 @@ def test_non_cython_api(self): assert_frame_equal(result, expected) # idxmax - expected = DataFrame([[0], [nan]], columns=['B'], index=[1, 3]) + expected = DataFrame([[0.0], [nan]], columns=['B'], index=[1, 3]) expected.index.name = 'A' result = g.idxmax() assert_frame_equal(result, expected) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 991c5ff625554..4888f8fe996b6 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1857,3 +1857,50 @@ def test_op_duplicate_index(self): result = s1 + s2 expected = pd.Series([11, 12, np.nan], index=[1, 1, 2]) assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "test_input,error_type", + [ + (pd.Series([]), ValueError), + + # For strings, or any Series with dtype 'O' + (pd.Series(['foo', 'bar', 'baz']), TypeError), + (pd.Series([(1,), (2,)]), TypeError), + + # For mixed data types + ( + pd.Series(['foo', 'foo', 'bar', 'bar', None, np.nan, 'baz']), + TypeError + ), + ] + ) + def test_assert_argminmax_raises(self, test_input, error_type): + """ + Cases where ``Series.argmax`` and related should raise an exception + """ + with pytest.raises(error_type): + test_input.argmin() + with pytest.raises(error_type): + test_input.argmin(skipna=False) + with pytest.raises(error_type): + test_input.argmax() + with pytest.raises(error_type): + test_input.argmax(skipna=False) + + def test_argminmax_with_inf(self): + # For numeric data with NA and Inf (GH #13595) + s = pd.Series([0, -np.inf, np.inf, np.nan]) + + assert s.argmin() == 1 + assert np.isnan(s.argmin(skipna=False)) + + assert s.argmax() == 2 + assert np.isnan(s.argmax(skipna=False)) + + # Using old-style behavior that treats floating point nan, -inf, and + # +inf as missing + with pd.option_context('mode.use_inf_as_na', True): + assert s.argmin() == 0 + assert np.isnan(s.argmin(skipna=False)) + assert s.argmax() == 0 + np.isnan(s.argmax(skipna=False)) From 357e7ae6ef5e035ca482b9ffcd96e8bb335b0511 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 16 Aug 2017 13:52:29 -0700 Subject: [PATCH 078/141] CLN: Remove have_pytz (#17266) Closes gh-17251 --- pandas/_libs/index.pyx | 10 +++------- pandas/_libs/period.pyx | 11 +++-------- pandas/_libs/tslib.pyx | 17 ++--------------- 3 files changed, 8 insertions(+), 30 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 273dc06886088..b71963fdef1c1 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -32,13 +32,9 @@ cdef extern from "datetime.h": cdef int64_t iNaT = util.get_nat() -try: - from dateutil.tz import tzutc as _du_utc - import pytz - UTC = pytz.utc - have_pytz = True -except ImportError: - have_pytz = False +from dateutil.tz import tzutc as _du_utc +import pytz +UTC = pytz.utc PyDateTime_IMPORT diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 1db31387de5a7..506fec28f5041 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -3,8 +3,7 @@ import operator from cpython cimport ( PyObject_RichCompareBool, - Py_EQ, Py_NE, -) + Py_EQ, Py_NE) from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray, NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA) @@ -24,14 +23,13 @@ cimport util, lib from lib cimport is_null_datetimelike, is_period from pandas._libs import tslib, lib from pandas._libs.tslib import (Timedelta, Timestamp, iNaT, - NaT, have_pytz, _get_utcoffset) + NaT, _get_utcoffset) from tslib cimport ( maybe_get_tz, _is_utc, _is_tzlocal, _get_dst_info, - _nat_scalar_rules, -) + _nat_scalar_rules) from pandas.tseries import offsets from pandas.core.tools.datetimes import parse_time_string @@ -610,9 +608,6 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, ndarray[int64_t] trans, deltas, pos pandas_datetimestruct dts - if not have_pytz: - raise Exception('Could not find pytz module') - if _is_utc(tz): for i in range(n): if stamps[i] == NPY_NAT: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 44be9ba56b84a..e1ba4169ed629 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -4080,12 +4080,8 @@ def i8_to_pydt(int64_t i8, object tzinfo = None): #---------------------------------------------------------------------- # time zone conversion helpers -try: - import pytz - UTC = pytz.utc - have_pytz = True -except: - have_pytz = False +import pytz +UTC = pytz.utc @cython.boundscheck(False) @@ -4112,9 +4108,6 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): int64_t v, offset, delta pandas_datetimestruct dts - if not have_pytz: - import pytz - if len(vals) == 0: return np.array([], dtype=np.int64) @@ -4229,9 +4222,6 @@ def tz_convert_single(int64_t val, object tz1, object tz2): int64_t v, offset, utc_date pandas_datetimestruct dts - if not have_pytz: - import pytz - if val == NPY_NAT: return val @@ -4444,9 +4434,6 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, assert is_coerce or is_raise - if not have_pytz: - raise Exception("Could not find pytz module") - if tz == UTC or tz is None: return vals From aa97aa63161b857e8c90f73936b6ddc6e262f772 Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 17 Aug 2017 04:10:52 -0600 Subject: [PATCH 079/141] CLN: replace %s syntax with .format in core.dtypes and core.sparse (#17270) --- pandas/core/dtypes/cast.py | 20 ++++++++++-------- pandas/core/dtypes/common.py | 8 ++++---- pandas/core/sparse/array.py | 39 +++++++++++++++++++----------------- pandas/core/sparse/frame.py | 16 +++++++-------- pandas/core/sparse/list.py | 5 +++-- pandas/core/sparse/series.py | 6 ++++-- 6 files changed, 52 insertions(+), 42 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 723e4f70da4e9..c2cf6afc1a7b5 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -516,7 +516,8 @@ def maybe_cast_item(obj, item, dtype): if dtype in (np.object_, np.bool_): obj[item] = chunk.astype(np.object_) elif not issubclass(dtype, (np.integer, np.bool_)): # pragma: no cover - raise ValueError("Unexpected dtype encountered: %s" % dtype) + raise ValueError("Unexpected dtype encountered: {dtype}" + .format(dtype=dtype)) def invalidate_string_dtypes(dtype_set): @@ -620,8 +621,9 @@ def astype_nansafe(arr, dtype, copy=True): elif dtype == np.int64: return arr.view(dtype) elif dtype != _NS_DTYPE: - raise TypeError("cannot astype a datetimelike from [%s] to [%s]" % - (arr.dtype, dtype)) + raise TypeError("cannot astype a datetimelike from [{from_dtype}] " + "to [{to_dtype}]".format(from_dtype=arr.dtype, + to_dtype=dtype)) return arr.astype(_NS_DTYPE) elif is_timedelta64_dtype(arr): if dtype == np.int64: @@ -640,8 +642,9 @@ def astype_nansafe(arr, dtype, copy=True): result[mask] = np.nan return result - raise TypeError("cannot astype a timedelta from [%s] to [%s]" % - (arr.dtype, dtype)) + raise TypeError("cannot astype a timedelta from [{from_dtype}] " + "to [{to_dtype}]".format(from_dtype=arr.dtype, + to_dtype=dtype)) return arr.astype(_TD_DTYPE) elif (np.issubdtype(arr.dtype, np.floating) and @@ -926,7 +929,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): dtype = _NS_DTYPE else: raise TypeError("cannot convert datetimelike to " - "dtype [%s]" % dtype) + "dtype [{dtype}]".format(dtype=dtype)) elif is_datetime64tz: # our NaT doesn't support tz's @@ -943,7 +946,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): dtype = _TD_DTYPE else: raise TypeError("cannot convert timedeltalike to " - "dtype [%s]" % dtype) + "dtype [{dtype}]".format(dtype=dtype)) if is_scalar(value): if value == iNaT or isna(value): @@ -982,7 +985,8 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): return tslib.ints_to_pydatetime(ints) # we have a non-castable dtype that was passed - raise TypeError('Cannot cast datetime64 to %s' % dtype) + raise TypeError('Cannot cast datetime64 to {dtype}' + .format(dtype=dtype)) else: diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 37f99bd344e6c..c47e61dc446be 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1854,10 +1854,10 @@ def _validate_date_like_dtype(dtype): try: typ = np.datetime_data(dtype)[0] except ValueError as e: - raise TypeError('%s' % e) + raise TypeError('{error}'.format(error=e)) if typ != 'generic' and typ != 'ns': - raise ValueError('%r is too specific of a frequency, try passing %r' % - (dtype.name, dtype.type.__name__)) + msg = '{name!r} is too specific of a frequency, try passing {type!r}' + raise ValueError(msg.format(name=dtype.name, type=dtype.type.__name__)) _string_dtypes = frozenset(map(_get_dtype_from_object, (binary_type, @@ -1924,6 +1924,6 @@ def pandas_dtype(dtype): if dtype in [object, np.object_, 'object', 'O']: return npdtype elif npdtype.kind == 'O': - raise TypeError('dtype {0} not understood'.format(dtype)) + raise TypeError('dtype {dtype} not understood'.format(dtype=dtype)) return npdtype diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 4a12dd1af28c9..2f830a98db649 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -52,8 +52,8 @@ def _arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, def wrapper(self, other): if isinstance(other, np.ndarray): if len(self) != len(other): - raise AssertionError("length mismatch: %d vs. %d" % - (len(self), len(other))) + raise AssertionError("length mismatch: {self} vs. {other}" + .format(self=len(self), other=len(other))) if not isinstance(other, ABCSparseArray): dtype = getattr(other, 'dtype', None) other = SparseArray(other, fill_value=self.fill_value, @@ -66,7 +66,8 @@ def wrapper(self, other): return _wrap_result(name, result, self.sp_index, fill) else: # pragma: no cover - raise TypeError('operation with %s not supported' % type(other)) + raise TypeError('operation with {other} not supported' + .format(other=type(other))) if name.startswith("__"): name = name[2:-2] @@ -218,9 +219,9 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer', else: values = _sanitize_values(data) if len(values) != sparse_index.npoints: - raise AssertionError("Non array-like type {0} must have" - " the same length as the" - " index".format(type(values))) + raise AssertionError("Non array-like type {type} must " + "have the same length as the index" + .format(type=type(values))) # Create array, do *not* copy data by default if copy: subarr = np.array(values, dtype=dtype, copy=True) @@ -330,9 +331,10 @@ def __len__(self): return 0 def __unicode__(self): - return '%s\nFill: %s\n%s' % (printing.pprint_thing(self), - printing.pprint_thing(self.fill_value), - printing.pprint_thing(self.sp_index)) + return '{self}\nFill: {fill}\n{index}'.format( + self=printing.pprint_thing(self), + fill=printing.pprint_thing(self.fill_value), + index=printing.pprint_thing(self.sp_index)) def disable(self, other): raise NotImplementedError('inplace binary ops not supported') @@ -377,8 +379,8 @@ def fill_value(self, value): if is_dtype_equal(self.dtype, new_dtype): self._fill_value = fill_value else: - msg = 'unable to set fill_value {0} to {1} dtype' - raise ValueError(msg.format(value, self.dtype)) + msg = 'unable to set fill_value {fill} to {dtype} dtype' + raise ValueError(msg.format(fill=value, dtype=self.dtype)) def get_values(self, fill=None): """ return a dense representation """ @@ -466,7 +468,8 @@ def take(self, indices, axis=0, allow_fill=True, nv.validate_take(tuple(), kwargs) if axis: - raise ValueError("axis must be 0, input was {0}".format(axis)) + raise ValueError("axis must be 0, input was {axis}" + .format(axis=axis)) if is_integer(indices): # return scalar @@ -482,12 +485,12 @@ def take(self, indices, axis=0, allow_fill=True, 'all indices must be >= -1') raise ValueError(msg) elif (n <= indices).any(): - msg = 'index is out of bounds for size {0}' - raise IndexError(msg.format(n)) + msg = 'index is out of bounds for size {size}'.format(size=n) + raise IndexError(msg) else: if ((indices < -n) | (n <= indices)).any(): - msg = 'index is out of bounds for size {0}' - raise IndexError(msg.format(n)) + msg = 'index is out of bounds for size {size}'.format(size=n) + raise IndexError(msg) indices = indices.astype(np.int32) if not (allow_fill and fill_value is not None): @@ -543,8 +546,8 @@ def astype(self, dtype=None, copy=True): else: fill_value = dtype.type(self.fill_value) except ValueError: - msg = 'unable to coerce current fill_value {0} to {1} dtype' - raise ValueError(msg.format(self.fill_value, dtype)) + msg = 'unable to coerce current fill_value {fill} to {dtype} dtype' + raise ValueError(msg.format(fill=self.fill_value, dtype=dtype)) return self._simple_new(sp_values, self.sp_index, fill_value=fill_value) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index f30bd5c36a61b..1e98e919baa33 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -214,11 +214,11 @@ def _prep_index(self, data, index, columns): columns = _default_index(K) if len(columns) != K: - raise ValueError('Column length mismatch: %d vs. %d' % - (len(columns), K)) + raise ValueError('Column length mismatch: {columns} vs. {K}' + .format(columns=len(columns), K=K)) if len(index) != N: - raise ValueError('Index length mismatch: %d vs. %d' % - (len(index), N)) + raise ValueError('Index length mismatch: {index} vs. {N}' + .format(index=len(index), N=N)) return index, columns def to_coo(self): @@ -725,17 +725,17 @@ def _maybe_rename_join(self, other, lsuffix, rsuffix): to_rename = self.columns.intersection(other.columns) if len(to_rename) > 0: if not lsuffix and not rsuffix: - raise ValueError('columns overlap but no suffix specified: %s' - % to_rename) + raise ValueError('columns overlap but no suffix specified: ' + '{to_rename}'.format(to_rename=to_rename)) def lrenamer(x): if x in to_rename: - return '%s%s' % (x, lsuffix) + return '{x}{lsuffix}'.format(x=x, lsuffix=lsuffix) return x def rrenamer(x): if x in to_rename: - return '%s%s' % (x, rsuffix) + return '{x}{rsuffix}'.format(x=x, rsuffix=rsuffix) return x this = self.rename(columns=lrenamer) diff --git a/pandas/core/sparse/list.py b/pandas/core/sparse/list.py index e2a8c6a29cc23..f3e64b7efc764 100644 --- a/pandas/core/sparse/list.py +++ b/pandas/core/sparse/list.py @@ -35,7 +35,8 @@ def __init__(self, data=None, fill_value=np.nan): def __unicode__(self): contents = '\n'.join(repr(c) for c in self._chunks) - return '%s\n%s' % (object.__repr__(self), pprint_thing(contents)) + return '{self}\n{contents}'.format(self=object.__repr__(self), + contents=pprint_thing(contents)) def __len__(self): return sum(len(c) for c in self._chunks) @@ -43,7 +44,7 @@ def __len__(self): def __getitem__(self, i): if i < 0: if i + len(self) < 0: # pragma: no cover - raise ValueError('%d out of range' % i) + raise ValueError('{index} out of range'.format(index=i)) i += len(self) passed = 0 diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 62d20e73dbfcb..99aec2dd11569 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -65,7 +65,8 @@ def wrapper(self, other): index=self.index, name=self.name) else: # pragma: no cover - raise TypeError('operation with %s not supported' % type(other)) + raise TypeError('operation with {other} not supported' + .format(other=type(other))) wrapper.__name__ = name if name.startswith("__"): @@ -295,7 +296,8 @@ def shape(self): def __unicode__(self): # currently, unicode is same as repr...fixes infinite loop series_rep = Series.__unicode__(self) - rep = '%s\n%s' % (series_rep, repr(self.sp_index)) + rep = '{series}\n{index!r}'.format(series=series_rep, + index=self.sp_index) return rep def __array_wrap__(self, result, context=None): From a618becb6ee1b1b18e216be0f683ec648baabe9a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 17 Aug 2017 03:13:36 -0700 Subject: [PATCH 080/141] Replace imports of * with explicit imports (#17269) xref #17234 --- pandas/_libs/hashtable.pyx | 22 +++++++++++- pandas/_libs/index.pyx | 8 ++--- pandas/_libs/join_func_helper.pxi.in | 3 +- pandas/_libs/lib.pyx | 24 +++++++------ pandas/_libs/parsers.pyx | 11 ++++-- pandas/_libs/period.pyx | 12 ++++++- pandas/_libs/src/properties.pyx | 3 +- pandas/_libs/src/skiplist.pyx | 6 +--- pandas/_libs/tslib.pyx | 52 +++++++++++++++++++--------- pandas/_libs/window.pyx | 8 ++++- 10 files changed, 106 insertions(+), 43 deletions(-) diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index 101e2c031f26e..2462b7af7b0fe 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -2,7 +2,27 @@ from cpython cimport PyObject, Py_INCREF, PyList_Check, PyTuple_Check -from khash cimport * +from khash cimport ( + khiter_t, + + kh_str_t, kh_init_str, kh_put_str, kh_exist_str, + kh_get_str, kh_destroy_str, kh_resize_str, + + kh_put_strbox, kh_get_strbox, kh_init_strbox, + + kh_int64_t, kh_init_int64, kh_resize_int64, kh_destroy_int64, + kh_get_int64, kh_exist_int64, kh_put_int64, + + kh_float64_t, kh_exist_float64, kh_put_float64, kh_init_float64, + kh_get_float64, kh_destroy_float64, kh_resize_float64, + + kh_resize_uint64, kh_exist_uint64, kh_destroy_uint64, kh_put_uint64, + kh_get_uint64, kh_init_uint64, + + kh_destroy_pymap, kh_exist_pymap, kh_init_pymap, kh_get_pymap, + kh_put_pymap, kh_resize_pymap) + + from numpy cimport * from libc.stdlib cimport malloc, free diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index b71963fdef1c1..42ba0c1cadaec 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -1,8 +1,6 @@ # cython: profile=False -from numpy cimport ndarray - -from numpy cimport (float64_t, int32_t, int64_t, uint8_t, +from numpy cimport (ndarray, float64_t, int32_t, int64_t, uint8_t, uint64_t, NPY_DATETIME, NPY_TIMEDELTA) cimport cython @@ -16,7 +14,9 @@ cimport util import numpy as np cimport tslib -from hashtable cimport * + +from hashtable cimport HashTable + from pandas._libs import tslib, algos, hashtable as _hash from pandas._libs.tslib import Timestamp, Timedelta from datetime import datetime, timedelta diff --git a/pandas/_libs/join_func_helper.pxi.in b/pandas/_libs/join_func_helper.pxi.in index 9cca9bba2a197..73d231b8588dc 100644 --- a/pandas/_libs/join_func_helper.pxi.in +++ b/pandas/_libs/join_func_helper.pxi.in @@ -9,6 +9,8 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in # asof_join_by #---------------------------------------------------------------------- +from hashtable cimport PyObjectHashTable, UInt64HashTable, Int64HashTable + {{py: # table_type, by_dtype @@ -23,7 +25,6 @@ on_dtypes = ['uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', }} -from hashtable cimport * {{for table_type, by_dtype in by_dtypes}} {{for on_dtype in on_dtypes}} diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 0458d4ae9f3de..53ca41e4b2489 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -10,21 +10,14 @@ from numpy cimport * np.import_array() -cdef extern from "numpy/arrayobject.h": - cdef enum NPY_TYPES: - NPY_intp "NPY_INTP" - from libc.stdlib cimport malloc, free -from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem, - PyDict_Contains, PyDict_Keys, - Py_INCREF, PyTuple_SET_ITEM, +from cpython cimport (Py_INCREF, PyTuple_SET_ITEM, PyList_Check, PyFloat_Check, PyString_Check, PyBytes_Check, - PyTuple_SetItem, + PyUnicode_Check, PyTuple_New, - PyObject_SetAttrString, PyObject_RichCompareBool, PyBytes_GET_SIZE, PyUnicode_GET_SIZE, @@ -55,7 +48,18 @@ cdef double NAN = nan from datetime import datetime as pydatetime # this is our tseries.pxd -from datetime cimport * +from datetime cimport ( + get_timedelta64_value, get_datetime64_value, + npy_timedelta, npy_datetime, + PyDateTime_Check, PyDate_Check, PyTime_Check, PyDelta_Check, + PyDateTime_GET_YEAR, + PyDateTime_GET_MONTH, + PyDateTime_GET_DAY, + PyDateTime_DATE_GET_HOUR, + PyDateTime_DATE_GET_MINUTE, + PyDateTime_DATE_GET_SECOND, + PyDateTime_IMPORT) + from tslib cimport (convert_to_tsobject, convert_to_timedelta64, _check_all_nulls) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index ae420da2102b2..3e8b5c4bd3feb 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -32,7 +32,7 @@ cdef extern from "stdlib.h": cimport cython cimport numpy as cnp -from numpy cimport ndarray, uint8_t, uint64_t +from numpy cimport ndarray, uint8_t, uint64_t, int64_t import numpy as np cimport util @@ -57,7 +57,14 @@ import os cnp.import_array() -from khash cimport * +from khash cimport ( + khiter_t, + kh_str_t, kh_init_str, kh_put_str, kh_exist_str, + kh_get_str, kh_destroy_str, + kh_float64_t, kh_get_float64, kh_destroy_float64, + kh_put_float64, kh_init_float64, + kh_strbox_t, kh_put_strbox, kh_get_strbox, kh_init_strbox, + kh_destroy_strbox) import sys diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 506fec28f5041..e017d863e1907 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -2,6 +2,7 @@ from datetime import datetime, date, timedelta import operator from cpython cimport ( + PyUnicode_Check, PyObject_RichCompareBool, Py_EQ, Py_NE) @@ -18,7 +19,16 @@ from pandas import compat from pandas.compat import PY2 cimport cython -from datetime cimport * + +from datetime cimport ( + is_leapyear, + PyDateTime_IMPORT, + pandas_datetimestruct, + pandas_datetimestruct_to_datetime, + pandas_datetime_to_datetimestruct, + PANDAS_FR_ns, + INT32_MIN) + cimport util, lib from lib cimport is_null_datetimelike, is_period from pandas._libs import tslib, lib diff --git a/pandas/_libs/src/properties.pyx b/pandas/_libs/src/properties.pyx index e619a3b6edd9a..4a3fd4b771a17 100644 --- a/pandas/_libs/src/properties.pyx +++ b/pandas/_libs/src/properties.pyx @@ -1,4 +1,5 @@ -from cpython cimport PyDict_Contains, PyDict_GetItem, PyDict_GetItem +from cpython cimport ( + PyDict_Contains, PyDict_GetItem, PyDict_GetItem, PyDict_SetItem) cdef class cache_readonly(object): diff --git a/pandas/_libs/src/skiplist.pyx b/pandas/_libs/src/skiplist.pyx index 3017931e25115..559b529822a69 100644 --- a/pandas/_libs/src/skiplist.pyx +++ b/pandas/_libs/src/skiplist.pyx @@ -6,10 +6,6 @@ # Cython version: Wes McKinney -cdef extern from "numpy/arrayobject.h": - - void import_array() - cdef extern from "math.h": double log(double x) @@ -25,7 +21,7 @@ import numpy as np from random import random # initialize numpy -import_array() +np.import_array() # TODO: optimize this, make less messy diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index e1ba4169ed629..32b8c92a50269 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -4,8 +4,8 @@ import warnings cimport numpy as np from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray, + float64_t, NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA) -from datetime cimport get_datetime64_value, get_timedelta64_value import numpy as np import sys @@ -30,20 +30,47 @@ cdef extern from "datetime_helper.h": double total_seconds(object) # this is our datetime.pxd -from datetime cimport cmp_pandas_datetimestruct from libc.stdlib cimport free from util cimport (is_integer_object, is_float_object, is_datetime64_object, is_timedelta64_object, INT64_MAX) cimport util -from datetime cimport * -from khash cimport * -cimport cython - +# this is our datetime.pxd +from datetime cimport ( + pandas_datetimestruct, + pandas_datetime_to_datetimestruct, + pandas_datetimestruct_to_datetime, + cmp_pandas_datetimestruct, + days_per_month_table, + get_datetime64_value, + get_timedelta64_value, + get_datetime64_unit, + PANDAS_DATETIMEUNIT, + _string_to_dts, + _pydatetime_to_dts, + _date_to_datetime64, + npy_datetime, + is_leapyear, + dayofweek, + PANDAS_FR_ns, + PyDateTime_Check, PyDate_Check, + PyDateTime_IMPORT, + timedelta, datetime + ) + +# stdlib datetime imports from datetime import timedelta, datetime from datetime import time as datetime_time +from khash cimport ( + khiter_t, + kh_destroy_int64, kh_put_int64, + kh_init_int64, kh_int64_t, + kh_resize_int64, kh_get_int64) + +cimport cython + import re # dateutil compat @@ -81,15 +108,6 @@ PyDateTime_IMPORT cdef int64_t NPY_NAT = util.get_nat() iNaT = NPY_NAT -# < numpy 1.7 compat for NaT -compat_NaT = np.array([NPY_NAT]).astype('m8[ns]').item() - - -try: - basestring -except NameError: # py3 - basestring = str - cdef inline object create_timestamp_from_ts( int64_t value, pandas_datetimestruct dts, @@ -314,7 +332,7 @@ class Timestamp(_Timestamp): tz : string / timezone object, default None Timezone to localize to """ - if isinstance(tz, basestring): + if isinstance(tz, string_types): tz = maybe_get_tz(tz) return cls(datetime.now(tz)) @@ -615,7 +633,7 @@ class Timestamp(_Timestamp): if self.tzinfo is None: # tz naive, localize tz = maybe_get_tz(tz) - if not isinstance(ambiguous, basestring): + if not isinstance(ambiguous, string_types): ambiguous = [ambiguous] value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz, ambiguous=ambiguous, errors=errors)[0] diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index bdd371871b6e1..9fb3d0662eb4f 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -56,7 +56,13 @@ cdef inline int int_min(int a, int b): return a if a <= b else b from util cimport numeric -from skiplist cimport * +from skiplist cimport ( + skiplist_t, + skiplist_init, + skiplist_destroy, + skiplist_get, + skiplist_insert, + skiplist_remove) cdef extern from "../src/headers/math.h": double sqrt(double x) nogil From db3ea2ffcd9a771c197ac9dbd78d0ba167cdff3f Mon Sep 17 00:00:00 2001 From: Sangwoong Yoon Date: Fri, 18 Aug 2017 07:39:37 +0900 Subject: [PATCH 081/141] TST: pytest deprecation warnings GH17197 (#17253) Test parameters with marks are updated according to the updated API of Pytest. https://docs.pytest.org/en/latest/changelog.html#pytest-3-2-0-2017-07-30 https://docs.pytest.org/en/latest/parametrize.html --- appveyor.yml | 2 +- ci/install_circle.sh | 2 +- ci/install_travis.sh | 2 +- ci/requirements_all.txt | 2 +- ci/requirements_dev.txt | 2 +- doc/source/contributing.rst | 24 ++++++++++++++++++++---- pandas/tests/computation/test_eval.py | 15 ++++++++------- pandas/tests/io/parser/test_network.py | 6 ++++-- pandas/tests/io/test_excel.py | 6 ++++-- pandas/tests/io/test_parquet.py | 12 ++++++++---- pandas/tests/test_window.py | 5 +++-- 11 files changed, 52 insertions(+), 26 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 684b859c206b2..65e62f887554e 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -72,7 +72,7 @@ install: - cmd: conda info -a # create our env - - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest pytest-xdist + - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist - cmd: activate pandas - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run - cmd: echo "installing requirements from %REQ%" diff --git a/ci/install_circle.sh b/ci/install_circle.sh index 00e14b10ebbd6..29ca69970104b 100755 --- a/ci/install_circle.sh +++ b/ci/install_circle.sh @@ -64,7 +64,7 @@ fi # create envbuild deps echo "[create env: ${REQ_BUILD}]" time conda create -n pandas -q --file=${REQ_BUILD} || exit 1 -time conda install -n pandas pytest || exit 1 +time conda install -n pandas pytest>=3.1.0 || exit 1 source activate pandas diff --git a/ci/install_travis.sh b/ci/install_travis.sh index df6969c7cc659..ad8f0bdd8a597 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -103,7 +103,7 @@ if [ -e ${REQ} ]; then time bash $REQ || exit 1 fi -time conda install -n pandas pytest +time conda install -n pandas pytest>=3.1.0 time pip install pytest-xdist if [ "$LINT" ]; then diff --git a/ci/requirements_all.txt b/ci/requirements_all.txt index de37ec4d20be4..b153b6989df86 100644 --- a/ci/requirements_all.txt +++ b/ci/requirements_all.txt @@ -1,4 +1,4 @@ -pytest +pytest>=3.1.0 pytest-cov pytest-xdist flake8 diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index 1e051802ec9f8..c7190c506ba18 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -2,6 +2,6 @@ python-dateutil pytz numpy cython -pytest +pytest>=3.1.0 pytest-cov flake8 diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index b44d0f36b86a1..e172d0d2a71a2 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -598,6 +598,10 @@ Like many packages, *pandas* uses `pytest extensions in `numpy.testing `_. +.. note:: + + The earliest supported pytest version is 3.1.0. + Writing tests ~~~~~~~~~~~~~ @@ -654,7 +658,9 @@ Using ``pytest`` Here is an example of a self-contained set of tests that illustrate multiple features that we like to use. - functional style: tests are like ``test_*`` and *only* take arguments that are either fixtures or parameters +- ``pytest.mark`` can be used to set metadata on test functions, e.g. ``skip`` or ``xfail``. - using ``parametrize``: allow testing of multiple cases +- to set a mark on a parameter, ``pytest.param(..., marks=...)`` syntax should be used - ``fixture``, code for object construction, on a per-test basis - using bare ``assert`` for scalars and truth-testing - ``tm.assert_series_equal`` (and its counter part ``tm.assert_frame_equal``), for pandas object comparisons. @@ -673,6 +679,13 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place def test_dtypes(dtype): assert str(np.dtype(dtype)) == dtype + @pytest.mark.parametrize('dtype', ['float32', + pytest.param('int16', marks=pytest.mark.skip), + pytest.param('int32', + marks=pytest.mark.xfail(reason='to show how it works'))]) + def test_mark(dtype): + assert str(np.dtype(dtype)) == 'float32' + @pytest.fixture def series(): return pd.Series([1, 2, 3]) @@ -695,13 +708,16 @@ A test run of this yields ((pandas) bash-3.2$ pytest test_cool_feature.py -v =========================== test session starts =========================== - platform darwin -- Python 3.5.2, pytest-3.0.5, py-1.4.31, pluggy-0.4.0 - collected 8 items + platform darwin -- Python 3.6.2, pytest-3.2.1, py-1.4.31, pluggy-0.4.0 + collected 11 items tester.py::test_dtypes[int8] PASSED tester.py::test_dtypes[int16] PASSED tester.py::test_dtypes[int32] PASSED tester.py::test_dtypes[int64] PASSED + tester.py::test_mark[float32] PASSED + tester.py::test_mark[int16] SKIPPED + tester.py::test_mark[int32] xfail tester.py::test_series[int8] PASSED tester.py::test_series[int16] PASSED tester.py::test_series[int32] PASSED @@ -714,8 +730,8 @@ Tests that we have ``parametrized`` are now accessible via the test name, for ex ((pandas) bash-3.2$ pytest test_cool_feature.py -v -k int8 =========================== test session starts =========================== - platform darwin -- Python 3.5.2, pytest-3.0.5, py-1.4.31, pluggy-0.4.0 - collected 8 items + platform darwin -- Python 3.6.2, pytest-3.2.1, py-1.4.31, pluggy-0.4.0 + collected 11 items test_cool_feature.py::test_dtypes[int8] PASSED test_cool_feature.py::test_series[int8] PASSED diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 7fc091ebb1892..d2874b1606e72 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -38,13 +38,14 @@ @pytest.fixture(params=( - pytest.mark.skipif(engine == 'numexpr' and not _USE_NUMEXPR, - reason='numexpr enabled->{enabled}, ' - 'installed->{installed}'.format( - enabled=_USE_NUMEXPR, - installed=_NUMEXPR_INSTALLED))(engine) - for engine in _engines # noqa -)) + pytest.param(engine, + marks=pytest.mark.skipif( + engine == 'numexpr' and not _USE_NUMEXPR, + reason='numexpr enabled->{enabled}, ' + 'installed->{installed}'.format( + enabled=_USE_NUMEXPR, + installed=_NUMEXPR_INSTALLED))) + for engine in _engines)) # noqa def engine(request): return request.param diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index cfa60248605ad..3344243f8137a 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -23,8 +23,10 @@ def salaries_table(): @pytest.mark.parametrize( "compression,extension", [('gzip', '.gz'), ('bz2', '.bz2'), ('zip', '.zip'), - pytest.mark.skipif(not tm._check_if_lzma(), - reason='need backports.lzma to run')(('xz', '.xz'))]) + pytest.param('xz', '.xz', + marks=pytest.mark.skipif(not tm._check_if_lzma(), + reason='need backports.lzma ' + 'to run'))]) @pytest.mark.parametrize('mode', ['explicit', 'infer']) @pytest.mark.parametrize('engine', ['python', 'c']) def test_compressed_urls(salaries_table, compression, extension, mode, engine): diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 856e8d6466526..92147b46097b8 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -2400,8 +2400,10 @@ def check_called(func): @pytest.mark.parametrize('engine', [ - pytest.mark.xfail('xlwt', reason='xlwt does not support ' - 'openpyxl-compatible style dicts'), + pytest.param('xlwt', + marks=pytest.mark.xfail(reason='xlwt does not support ' + 'openpyxl-compatible ' + 'style dicts')), 'xlsxwriter', 'openpyxl', ]) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index ff0935c7dcc6f..78c72e2a05566 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -26,10 +26,14 @@ # setup engines & skips @pytest.fixture(params=[ - pytest.mark.skipif(not _HAVE_FASTPARQUET, - reason='fastparquet is not installed')('fastparquet'), - pytest.mark.skipif(not _HAVE_PYARROW, - reason='pyarrow is not installed')('pyarrow')]) + pytest.param('fastparquet', + marks=pytest.mark.skipif(not _HAVE_FASTPARQUET, + reason='fastparquet is ' + 'not installed')), + pytest.param('pyarrow', + marks=pytest.mark.skipif(not _HAVE_PYARROW, + reason='pyarrow is ' + 'not installed'))]) def engine(request): return request.param diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index d94e34c41786b..21a9b05d48126 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -530,8 +530,9 @@ def test_numpy_compat(self): @pytest.mark.parametrize( 'expander', - [1, pytest.mark.xfail( - reason='GH 16425 expanding with offset not supported')('1s')]) + [1, pytest.param('ls', marks=pytest.mark.xfail( + reason='GH 16425 expanding with ' + 'offset not supported'))]) def test_empty_df_expanding(self, expander): # GH 15819 Verifies that datetime and integer expanding windows can be # applied to empty DataFrames From de60666f3222dc89412223b2ad1e2bf288044cf6 Mon Sep 17 00:00:00 2001 From: Makarov Andrey Date: Fri, 18 Aug 2017 03:52:34 +0300 Subject: [PATCH 082/141] Handle more date/datetime/time formats (#15871) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/sas/sas7bdat.py | 16 +- pandas/io/sas/sas_constants.py | 24 + pandas/tests/io/sas/data/datetime.csv | 5 + pandas/tests/io/sas/data/datetime.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/productsales.csv | 2880 ++++++++++---------- pandas/tests/io/sas/test_sas7bdat.py | 15 +- 7 files changed, 1494 insertions(+), 1447 deletions(-) create mode 100644 pandas/tests/io/sas/data/datetime.csv create mode 100644 pandas/tests/io/sas/data/datetime.sas7bdat diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 9e740a8c75792..bef492529b6a1 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -128,6 +128,7 @@ Other Enhancements - Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) - `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`). +- :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`). .. _whatsnew_0210.api_breaking: diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 20b0cf85e95b7..2b3a91e2062b1 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -44,8 +44,8 @@ class SAS7BDATReader(BaseIterator): index : column identifier, defaults to None Column to use as index. convert_dates : boolean, defaults to True - Attempt to convert dates to Pandas datetime values. Note all - SAS date formats are supported. + Attempt to convert dates to Pandas datetime values. Note that + some rarely used SAS date formats may be unsupported. blank_missing : boolean, defaults to True Convert empty strings to missing values (SAS uses blanks to indicate missing character variables). @@ -655,9 +655,15 @@ def _chunk_to_dataframe(self): rslt[name] = self._byte_chunk[jb, :].view( dtype=self.byte_order + 'd') rslt[name] = np.asarray(rslt[name], dtype=np.float64) - if self.convert_dates and (self.column_formats[j] == "MMDDYY"): - epoch = pd.datetime(1960, 1, 1) - rslt[name] = epoch + pd.to_timedelta(rslt[name], unit='d') + if self.convert_dates: + unit = None + if self.column_formats[j] in const.sas_date_formats: + unit = 'd' + elif self.column_formats[j] in const.sas_datetime_formats: + unit = 's' + if unit: + rslt[name] = pd.to_datetime(rslt[name], unit=unit, + origin="1960-01-01") jb += 1 elif self.column_types[j] == b's': rslt[name] = self._string_chunk[js, :] diff --git a/pandas/io/sas/sas_constants.py b/pandas/io/sas/sas_constants.py index 65ae1e9102cb2..c4b3588164305 100644 --- a/pandas/io/sas/sas_constants.py +++ b/pandas/io/sas/sas_constants.py @@ -145,3 +145,27 @@ class index: b"\xFF\xFF\xFF\xFE": index.columnListIndex, b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": index.columnListIndex, b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": index.columnListIndex} + + +# List of frequently used SAS date and datetime formats +# http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm +# https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java +sas_date_formats = ("DATE", "DAY", "DDMMYY", "DOWNAME", "JULDAY", "JULIAN", + "MMDDYY", "MMYY", "MMYYC", "MMYYD", "MMYYP", "MMYYS", + "MMYYN", "MONNAME", "MONTH", "MONYY", "QTR", "QTRR", + "NENGO", "WEEKDATE", "WEEKDATX", "WEEKDAY", "WEEKV", + "WORDDATE", "WORDDATX", "YEAR", "YYMM", "YYMMC", "YYMMD", + "YYMMP", "YYMMS", "YYMMN", "YYMON", "YYMMDD", "YYQ", + "YYQC", "YYQD", "YYQP", "YYQS", "YYQN", "YYQR", "YYQRC", + "YYQRD", "YYQRP", "YYQRS", "YYQRN", + "YYMMDDP", "YYMMDDC", "E8601DA", "YYMMDDN", "MMDDYYC", + "MMDDYYS", "MMDDYYD", "YYMMDDS", "B8601DA", "DDMMYYN", + "YYMMDDD", "DDMMYYB", "DDMMYYP", "MMDDYYP", "YYMMDDB", + "MMDDYYN", "DDMMYYC", "DDMMYYD", "DDMMYYS", + "MINGUO") + +sas_datetime_formats = ("DATETIME", "DTWKDATX", + "B8601DN", "B8601DT", "B8601DX", "B8601DZ", "B8601LX", + "E8601DN", "E8601DT", "E8601DX", "E8601DZ", "E8601LX", + "DATEAMPM", "DTDATE", "DTMONYY", "DTMONYY", "DTWKDATX", + "DTYEAR", "TOD", "MDYAMPM") diff --git a/pandas/tests/io/sas/data/datetime.csv b/pandas/tests/io/sas/data/datetime.csv new file mode 100644 index 0000000000000..6126f6d04eaf0 --- /dev/null +++ b/pandas/tests/io/sas/data/datetime.csv @@ -0,0 +1,5 @@ +Date1,Date2,DateTime,DateTimeHi,Taiw +1677-09-22,1677-09-22,1677-09-21 00:12:44,1677-09-21 00:12:43.145226,1912-01-01 +1960-01-01,1960-01-01,1960-01-01 00:00:00,1960-01-01 00:00:00.000000,1960-01-01 +2016-02-29,2016-02-29,2016-02-29 23:59:59,2016-02-29 23:59:59.123456,2016-02-29 +2262-04-11,2262-04-11,2262-04-11 23:47:16,2262-04-11 23:47:16.854774,2262-04-11 diff --git a/pandas/tests/io/sas/data/datetime.sas7bdat b/pandas/tests/io/sas/data/datetime.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..6469dbf29f8eeeafb4703f01657eae4d0872ef10 GIT binary patch literal 131072 zcmeIy!E0Pa7y$6uG_|QjNibjzl`cUf(556Ypaq4jB(VY0M!VHk1REh>Al?e~Ac){W zXbvg`59Z)O`~zAqf?_Du9z_sAk>Ei*cn)gQHtWpnd)dueN>l`e--MU%n|a^Ny!pMC zgwSeNetF~U<$?aMK6|$(?7ud)GI+4Be`qL-4^_IxySK~sRCbJyS7OQIA@uF3lqDx? zC#y5FPfbtN>eKbvx#{uh-)nJxEzNx3`He%LeED7N<#$ef^l6NFpTRFCTQO~2i9fA& z^vLMM=-AxYa}STd_{jV-Ct7#@T9)>Hdf@%z<<&X+^6JQX8Y`c%^S(X-0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1pY4qmsUsC%V%BwJ!W0~;hq?(TOs^dY2F!#d2dnl)6Eb@i}CrG zUrgh%KM?cBiu~%05ax^g?U+v#`3q$p`@?Pd#%56y$Jg5Om5#hvCWYKSICOn2CCykp zyz*YSa`D@Ta?Aw>zs~n_zKdUzJ>N{GvSwWSd}8VQ+Q&a%%ds^c?{922>vF7qc;Ra0 ziMVf$xvsdMa*Y510t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oU&Z0$Vp4jdF#%W71Q0a(pvXVtg&!wPUn> z&VjH$+#j!2JD2AycdXPt=e}5+u2wsptLN`9XJ0Jd`D{~ppUKeOS|(=g`~3Ml6M4?z z&hs=Hjp6Xa>uI5Cy!M8Dv2Jhb#&)Zbc1XQS*xI(b8Rtx;+eORI)?LZZSmu54UYQS; zc~2$pQI41M<8qDBvJ)OGc7L>p$BMI`4e3lFyt?Dmx8mHlw&%t(bhKxe7SGogURs`7 zIJY&~2+o12OcKNIsbHlAXwK3(?fORp@ZetmXsdOOCtG=6NUS)1;q`uf7s`MKHS zPo6r_TrUnaTLenal3e~GQSne?gV*8ZM~q4IU*?jLC&^^5=j literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/productsales.csv b/pandas/tests/io/sas/data/productsales.csv index fea9b68912297..1f6a4424e1a97 100644 --- a/pandas/tests/io/sas/data/productsales.csv +++ b/pandas/tests/io/sas/data/productsales.csv @@ -1,1441 +1,1441 @@ ACTUAL,PREDICT,COUNTRY,REGION,DIVISION,PRODTYPE,PRODUCT,QUARTER,YEAR,MONTH -925,850,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -999,297,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -608,846,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -642,533,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -656,646,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -948,486,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -612,717,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -114,564,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -685,230,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -657,494,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -608,903,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -353,266,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -107,190,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -354,139,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -101,217,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -553,560,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -877,148,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -431,762,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -511,457,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -157,532,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -520,629,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -114,491,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -277,0,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -561,979,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -220,585,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,12054 -444,267,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,12085 -178,487,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,12113 -756,764,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,12144 -329,312,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,12174 -910,531,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,12205 -530,536,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,12235 -101,773,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,12266 -515,143,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,12297 -730,126,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,12327 -993,862,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,12358 -954,754,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,12388 -267,410,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,12419 -347,701,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,12450 -991,204,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,12478 -923,509,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,12509 -437,378,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,12539 -737,507,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,12570 -104,49,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,12600 -840,876,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,12631 -704,66,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,12662 -889,819,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,12692 -107,351,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,12723 -571,201,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,12753 -688,209,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,12054 -544,51,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,12085 -954,135,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,12113 -445,47,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,12144 -829,379,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,12174 -464,758,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,12205 -968,475,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,12235 -842,343,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,12266 -721,507,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,12297 -966,269,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,12327 -332,699,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,12358 -328,824,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,12388 -355,497,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,12419 -506,44,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,12450 -585,522,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,12478 -634,378,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,12509 -662,689,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,12539 -783,90,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,12570 -786,720,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,12600 -710,343,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,12631 -950,457,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,12662 -274,947,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,12692 -406,834,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,12723 -515,71,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,12753 -35,282,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -995,538,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -670,679,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -406,601,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -825,577,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -467,908,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -709,819,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -522,687,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -688,157,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -956,111,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -129,31,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -687,790,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -877,795,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -845,379,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -425,114,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -899,475,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -987,747,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -641,372,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -448,415,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -341,955,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -137,356,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -235,316,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -482,351,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -678,164,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -240,386,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,12054 -605,113,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,12085 -274,68,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,12113 -422,885,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,12144 -763,575,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,12174 -561,743,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,12205 -339,816,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,12235 -877,203,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,12266 -192,581,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,12297 -604,815,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,12327 -55,333,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,12358 -87,40,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,12388 -942,672,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,12419 -912,23,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,12450 -768,948,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,12478 -951,291,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,12509 -768,839,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,12539 -978,864,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,12570 -20,337,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,12600 -298,95,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,12631 -193,535,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,12662 -336,191,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,12692 -617,412,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,12723 -709,711,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,12753 -5,425,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -164,215,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -422,948,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -424,544,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -854,764,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -168,446,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -8,957,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -748,967,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -682,11,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -300,110,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -672,263,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -894,215,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -944,965,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -403,423,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -596,753,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -481,770,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -503,263,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -126,79,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -721,441,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -271,858,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -721,667,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -157,193,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -991,394,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -499,680,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -284,414,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,12054 -705,770,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,12085 -737,679,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,12113 -745,7,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,12144 -633,713,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,12174 -983,851,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,12205 -591,944,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,12235 -42,130,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,12266 -771,485,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,12297 -465,23,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,12327 -296,193,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,12358 -890,7,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,12388 -312,919,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,12419 -777,768,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,12450 -364,854,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,12478 -601,411,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,12509 -823,736,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,12539 -847,10,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,12570 -490,311,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,12600 -387,348,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,12631 -688,458,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,12662 -650,195,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,12692 -447,658,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,12723 -91,704,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,12753 -197,807,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,12054 -51,861,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,12085 -570,873,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,12113 -423,933,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,12144 -524,355,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,12174 -416,794,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,12205 -789,645,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,12235 -551,700,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,12266 -400,831,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,12297 -361,800,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,12327 -189,830,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,12358 -554,828,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,12388 -585,12,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,12419 -281,501,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,12450 -629,914,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,12478 -43,685,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,12509 -533,755,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,12539 -882,708,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,12570 -790,595,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,12600 -600,32,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,12631 -148,49,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,12662 -237,727,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,12692 -488,239,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,12723 -457,273,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,12753 -401,986,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -181,544,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -995,182,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -120,197,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -119,435,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -319,974,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -333,524,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -923,688,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -634,750,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -493,155,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -461,860,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -304,102,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -641,425,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -992,224,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -202,408,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -770,524,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -202,816,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -14,515,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -134,793,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -977,460,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -174,732,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -429,435,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -514,38,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -784,616,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -973,225,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,12054 -511,402,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,12085 -30,697,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,12113 -895,567,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,12144 -557,231,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,12174 -282,372,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,12205 -909,15,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,12235 -276,866,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,12266 -234,452,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,12297 -479,663,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,12327 -782,982,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,12358 -755,813,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,12388 -689,523,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,12419 -496,871,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,12450 -24,511,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,12478 -379,819,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,12509 -441,525,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,12539 -49,13,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,12570 -243,694,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,12600 -295,782,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,12631 -395,839,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,12662 -929,461,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,12692 -997,303,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,12723 -889,421,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,12753 -72,421,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -926,433,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -850,394,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -826,338,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -651,764,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -854,216,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -899,96,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -309,550,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -943,636,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -138,427,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -99,652,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -270,478,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -862,18,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -574,40,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -359,453,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -958,987,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -791,26,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -284,101,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -190,969,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -527,492,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -112,263,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -271,593,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -643,923,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -554,146,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -211,305,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,12054 -368,318,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,12085 -778,417,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,12113 -808,623,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,12144 -46,761,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,12174 -466,272,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,12205 -18,988,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,12235 -87,821,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,12266 -765,962,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,12297 -62,615,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,12327 -13,523,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,12358 -775,806,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,12388 -636,586,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,12419 -458,520,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,12450 -206,908,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,12478 -310,30,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,12509 -813,247,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,12539 -22,647,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,12570 -742,55,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,12600 -394,154,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,12631 -957,344,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,12662 -205,95,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,12692 -198,665,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,12723 -638,145,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,12753 -155,925,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,12054 -688,395,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,12085 -730,749,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,12113 -208,279,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,12144 -525,288,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,12174 -483,509,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,12205 -748,255,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,12235 -6,214,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,12266 -168,473,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,12297 -301,702,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,12327 -9,814,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,12358 -778,231,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,12388 -799,422,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,12419 -309,572,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,12450 -433,363,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,12478 -969,919,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,12509 -181,355,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,12539 -787,992,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,12570 -971,147,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,12600 -440,183,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,12631 -209,375,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,12662 -537,77,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,12692 -364,308,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,12723 -377,660,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,12753 -251,555,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -607,455,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -127,888,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -513,652,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -146,799,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -917,249,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -776,539,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -330,198,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -981,340,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -862,152,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -612,347,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -607,565,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -786,855,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -160,87,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -199,69,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -972,807,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -870,565,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -494,798,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -975,714,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -760,17,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -180,797,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -256,422,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -422,621,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -859,661,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -586,363,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,12054 -441,910,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,12085 -597,998,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,12113 -717,95,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,12144 -713,731,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,12174 -591,718,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,12205 -492,467,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,12235 -170,126,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,12266 -684,127,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,12297 -981,746,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,12327 -966,878,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,12358 -439,27,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,12388 -151,569,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,12419 -602,812,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,12450 -187,603,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,12478 -415,506,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,12509 -61,185,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,12539 -839,692,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,12570 -596,565,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,12600 -751,512,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,12631 -460,86,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,12662 -922,399,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,12692 -153,672,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,12723 -928,801,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,12753 -951,730,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -394,408,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -615,982,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -653,499,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -180,307,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -649,741,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -921,640,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -11,300,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -696,929,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -795,309,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -550,340,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -320,228,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -845,1000,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -245,21,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -142,583,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -717,506,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -3,405,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -790,556,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -646,72,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -230,103,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -938,262,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -629,102,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -317,841,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -812,159,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -141,570,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,12054 -64,375,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,12085 -207,298,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,12113 -435,32,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,12144 -96,760,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,12174 -252,338,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,12205 -956,149,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,12235 -633,343,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,12266 -190,151,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,12297 -227,44,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,12327 -24,583,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,12358 -420,230,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,12388 -910,907,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,12419 -709,783,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,12450 -810,117,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,12478 -723,416,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,12509 -911,318,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,12539 -230,888,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,12570 -448,60,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,12600 -945,596,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,12631 -508,576,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,12662 -262,576,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,12692 -441,280,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,12723 -15,219,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,12753 -795,133,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,12054 -301,273,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,12085 -304,86,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,12113 -49,400,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,12144 -576,364,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,12174 -669,63,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,12205 -325,929,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,12235 -272,344,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,12266 -80,768,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,12297 -46,668,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,12327 -223,407,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,12358 -774,536,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,12388 -784,657,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,12419 -92,215,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,12450 -67,966,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,12478 -747,674,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,12509 -686,574,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,12539 -93,266,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,12570 -192,680,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,12600 -51,362,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,12631 -498,412,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,12662 -546,431,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,12692 -485,94,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,12723 -925,345,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,12753 -292,445,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -540,632,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -21,855,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -100,36,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -49,250,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -353,427,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -911,367,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -823,245,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -278,893,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -576,490,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -655,88,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -763,964,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -88,62,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -746,506,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -927,680,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -297,153,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -291,403,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -838,98,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -112,376,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -509,477,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -472,50,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -495,592,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -1000,813,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -241,740,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -693,873,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,12054 -903,459,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,12085 -791,224,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,12113 -108,562,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,12144 -845,199,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,12174 -452,275,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,12205 -479,355,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,12235 -410,947,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,12266 -379,454,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,12297 -740,450,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,12327 -471,575,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,12358 -325,6,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,12388 -455,847,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,12419 -563,338,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,12450 -879,517,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,12478 -312,630,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,12509 -587,381,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,12539 -628,864,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,12570 -486,416,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,12600 -811,852,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,12631 -990,815,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,12662 -35,23,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,12692 -764,527,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,12723 -619,693,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,12753 -996,977,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -554,549,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -540,951,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -140,390,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -554,204,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -724,78,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -693,613,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -866,745,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -833,56,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -164,887,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -753,651,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -60,691,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -688,767,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -883,709,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -109,417,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -950,326,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -438,599,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -286,818,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -342,13,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -383,185,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -80,140,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -322,717,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -749,852,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -606,125,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -641,325,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,12054 -494,648,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,12085 -428,365,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,12113 -936,120,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,12144 -597,347,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,12174 -728,638,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,12205 -933,732,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,12235 -663,465,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,12266 -394,262,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,12297 -334,947,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,12327 -114,694,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,12358 -89,482,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,12388 -874,600,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,12419 -674,94,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,12450 -347,323,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,12478 -105,49,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,12509 -286,70,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,12539 -669,844,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,12570 -786,773,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,12600 -104,68,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,12631 -770,110,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,12662 -263,42,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,12692 -900,171,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,12723 -630,644,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,12753 -597,408,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,12054 -185,45,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,12085 -175,522,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,12113 -576,166,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,12144 -957,885,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,12174 -993,713,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,12205 -500,838,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,12235 -410,267,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,12266 -592,967,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,12297 -64,529,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,12327 -208,656,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,12358 -273,665,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,12388 -906,419,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,12419 -429,776,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,12450 -961,971,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,12478 -338,248,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,12509 -472,486,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,12539 -903,674,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,12570 -299,603,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,12600 -948,492,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,12631 -931,512,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,12662 -570,391,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,12692 -97,313,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,12723 -674,758,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,12753 -468,304,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -430,846,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -893,912,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -519,810,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -267,122,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -908,102,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -176,161,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -673,450,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -798,215,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -291,765,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -583,557,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -442,739,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -951,811,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -430,780,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -559,645,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -726,365,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -944,597,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -497,126,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -388,655,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -81,604,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -111,280,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -288,115,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -845,205,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -745,672,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -352,339,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,12054 -234,70,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,12085 -167,528,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,12113 -606,220,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,12144 -670,691,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,12174 -764,197,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,12205 -659,239,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,12235 -996,50,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,12266 -424,135,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,12297 -899,972,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,12327 -392,475,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,12358 -555,868,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,12388 -860,451,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,12419 -114,565,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,12450 -943,116,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,12478 -365,385,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,12509 -249,375,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,12539 -192,357,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,12570 -328,230,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,12600 -311,829,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,12631 -576,971,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,12662 -915,280,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,12692 -522,853,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,12723 -625,953,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,12753 -873,874,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -498,578,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -808,768,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -742,178,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -744,916,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -30,917,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -747,633,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -672,107,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -564,523,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -785,924,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -825,481,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -243,240,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -959,819,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -123,602,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -714,538,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -252,632,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -715,952,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -670,480,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -81,700,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -653,726,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -795,526,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -182,410,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -725,307,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -101,73,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -143,232,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,12054 -15,993,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,12085 -742,652,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,12113 -339,761,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,12144 -39,428,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,12174 -465,4,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,12205 -889,101,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,12235 -856,869,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,12266 -358,271,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,12297 -452,633,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,12327 -387,481,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,12358 -824,302,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,12388 -185,245,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,12419 -151,941,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,12450 -419,721,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,12478 -643,893,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,12509 -63,898,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,12539 -202,94,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,12570 -332,962,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,12600 -723,71,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,12631 -148,108,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,12662 -840,71,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,12692 -601,767,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,12723 -962,323,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,12753 -166,982,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,12054 -531,614,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,12085 -963,839,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,12113 -994,388,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,12144 -978,296,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,12174 -72,429,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,12205 -33,901,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,12235 -428,350,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,12266 -413,581,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,12297 -737,583,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,12327 -85,92,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,12358 -916,647,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,12388 -785,771,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,12419 -302,26,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,12450 -1000,598,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,12478 -458,715,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,12509 -896,74,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,12539 -615,580,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,12570 -174,848,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,12600 -651,118,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,12631 -784,54,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,12662 -121,929,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,12692 -341,393,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,12723 -615,820,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,12753 -697,336,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -215,299,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -197,747,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -205,154,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -256,486,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -377,251,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -577,225,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -686,77,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -332,74,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -534,596,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -485,493,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -594,782,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -413,487,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -13,127,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -483,538,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -820,94,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -745,252,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -79,722,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -36,536,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -950,958,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -74,466,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -458,309,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -609,680,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -429,539,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -956,511,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,12054 -205,505,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,12085 -629,720,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,12113 -277,823,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,12144 -266,21,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,12174 -872,142,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,12205 -435,95,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,12235 -988,398,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,12266 -953,328,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,12297 -556,151,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,12327 -211,978,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,12358 -389,918,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,12388 -351,542,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,12419 -14,96,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,12450 -181,496,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,12478 -452,77,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,12509 -511,236,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,12539 -193,913,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,12570 -797,49,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,12600 -988,967,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,12631 -487,502,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,12662 -941,790,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,12692 -577,121,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,12723 -456,55,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,12753 -982,739,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -593,683,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -702,610,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -528,248,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -873,530,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -301,889,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -769,245,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -724,473,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -466,938,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -774,150,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -111,772,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -954,201,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -780,945,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -210,177,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -93,378,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -332,83,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -186,803,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -782,398,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -41,215,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -222,194,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -992,287,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -477,410,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -948,50,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -817,204,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -597,239,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,12054 -649,637,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,12085 -3,938,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,12113 -731,788,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,12144 -181,399,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,12174 -468,576,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,12205 -891,187,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,12235 -226,703,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,12266 -28,455,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,12297 -609,244,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,12327 -224,868,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,12358 -230,353,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,12388 -216,101,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,12419 -282,924,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,12450 -501,144,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,12478 -320,0,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,12509 -720,910,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,12539 -464,259,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,12570 -363,107,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,12600 -49,63,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,12631 -223,270,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,12662 -452,554,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,12692 -210,154,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,12723 -444,205,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,12753 -222,441,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,12054 -678,183,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,12085 -25,459,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,12113 -57,810,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,12144 -981,268,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,12174 -740,916,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,12205 -408,742,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,12235 -966,522,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,12266 -107,299,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,12297 -488,677,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,12327 -759,709,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,12358 -504,310,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,12388 -99,160,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,12419 -503,698,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,12450 -724,540,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,12478 -309,901,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,12509 -625,34,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,12539 -294,536,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,12570 -890,780,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,12600 -501,716,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,12631 -34,532,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,12662 -203,871,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,12692 -140,199,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,12723 -845,845,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,12753 -774,591,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -645,378,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -986,942,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -296,686,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -936,720,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -341,546,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -32,845,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -277,667,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -548,627,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -727,142,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -812,655,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -168,556,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -150,459,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -136,89,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -695,726,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -363,38,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -853,60,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -621,369,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -764,381,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -669,465,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -772,981,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -228,758,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -261,31,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -821,237,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -100,285,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,12054 -465,94,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,12085 -350,561,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,12113 -991,143,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,12144 -910,95,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,12174 -206,341,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,12205 -263,388,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,12235 -374,272,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,12266 -875,890,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,12297 -810,734,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,12327 -398,364,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,12358 -565,619,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,12388 -417,517,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,12419 -291,781,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,12450 -251,327,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,12478 -449,48,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,12509 -774,809,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,12539 -386,73,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,12570 -22,936,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,12600 -940,400,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,12631 -132,736,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,12662 -103,211,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,12692 -152,271,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,12723 -952,855,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,12753 -872,923,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -748,854,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -749,769,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -876,271,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -860,383,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -900,29,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -705,185,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -913,351,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -315,560,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -466,840,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -233,517,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -906,949,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -148,633,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -661,636,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -847,138,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -768,481,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -866,408,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -475,130,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -112,813,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -136,661,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -763,311,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -388,872,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -996,643,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -486,174,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -494,528,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,12054 -771,124,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,12085 -49,126,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,12113 -322,440,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,12144 -878,881,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,12174 -827,292,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,12205 -852,873,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,12235 -716,357,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,12266 -81,247,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,12297 -916,18,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,12327 -673,395,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,12358 -242,620,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,12388 -914,946,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,12419 -902,72,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,12450 -707,691,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,12478 -223,95,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,12509 -619,878,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,12539 -254,757,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,12570 -688,898,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,12600 -477,172,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,12631 -280,419,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,12662 -546,849,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,12692 -630,807,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,12723 -455,599,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,12753 -505,59,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,12054 -823,790,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,12085 -891,574,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,12113 -840,96,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,12144 -436,376,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,12174 -168,352,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,12205 -177,741,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,12235 -727,12,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,12266 -278,157,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,12297 -443,10,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,12327 -905,544,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,12358 -881,817,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,12388 -507,754,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,12419 -363,425,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,12450 -603,492,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,12478 -473,485,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,12509 -128,369,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,12539 -105,560,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,12570 -325,651,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,12600 -711,326,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,12631 -983,180,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,12662 -241,935,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,12692 -71,403,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,12723 -395,345,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,12753 -168,278,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -512,376,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -291,104,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -776,543,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -271,798,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -946,333,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -195,833,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -165,132,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -238,629,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -409,337,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -720,300,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -309,470,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -812,875,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -441,237,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -500,272,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -517,860,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -924,415,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -572,140,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -768,367,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -692,195,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -28,245,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -202,285,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -76,98,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -421,932,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -636,898,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,12054 -52,330,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,12085 -184,603,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,12113 -739,280,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,12144 -841,507,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,12174 -65,202,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,12205 -623,513,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,12235 -517,132,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,12266 -636,21,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,12297 -845,657,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,12327 -232,195,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,12358 -26,323,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,12388 -680,299,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,12419 -364,811,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,12450 -572,739,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,12478 -145,889,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,12509 -644,189,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,12539 -87,698,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,12570 -620,646,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,12600 -535,562,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,12631 -661,753,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,12662 -884,425,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,12692 -689,693,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,12723 -646,941,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,12753 -4,975,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -813,455,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -773,260,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -205,69,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -657,147,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -154,533,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -747,881,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -787,457,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -867,441,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -307,859,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -571,177,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -92,633,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -269,382,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -764,707,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -662,566,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -818,349,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -617,128,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -649,231,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -895,258,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -750,812,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -738,362,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -107,133,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -278,60,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -32,88,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -129,378,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,12054 -187,569,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,12085 -670,186,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,12113 -678,875,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,12144 -423,636,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,12174 -389,360,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,12205 -257,677,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,12235 -780,708,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,12266 -159,158,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,12297 -97,384,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,12327 -479,927,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,12358 -9,134,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,12388 -614,273,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,12419 -261,27,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,12450 -115,209,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,12478 -358,470,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,12509 -133,219,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,12539 -891,907,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,12570 -702,778,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,12600 -58,998,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,12631 -606,194,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,12662 -668,933,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,12692 -813,708,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,12723 -450,949,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,12753 -956,579,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,12054 -276,131,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,12085 -889,689,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,12113 -708,908,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,12144 -14,524,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,12174 -904,336,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,12205 -272,916,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,12235 -257,236,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,12266 -343,965,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,12297 -80,350,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,12327 -530,599,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,12358 -340,901,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,12388 -595,935,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,12419 -47,667,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,12450 -279,104,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,12478 -293,803,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,12509 -162,64,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,12539 -935,825,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,12570 -689,839,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,12600 -484,184,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,12631 -230,348,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,12662 -164,904,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,12692 -401,219,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,12723 -607,381,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,12753 -229,524,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -786,902,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -92,212,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -455,762,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -409,182,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -166,442,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -277,919,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -92,67,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -631,741,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -390,617,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -403,214,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -964,202,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -223,788,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -684,639,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -645,336,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -470,937,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -424,399,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -862,21,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -736,125,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -554,635,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -790,229,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -115,770,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -853,622,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -643,109,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -794,975,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,12054 -892,820,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,12085 -728,123,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,12113 -744,135,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,12144 -678,535,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,12174 -768,971,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,12205 -234,166,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,12235 -333,814,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,12266 -968,557,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,12297 -119,820,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,12327 -469,486,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,12358 -261,429,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,12388 -984,65,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,12419 -845,977,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,12450 -374,410,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,12478 -687,150,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,12509 -157,630,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,12539 -49,488,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,12570 -817,112,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,12600 -223,598,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,12631 -433,705,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,12662 -41,226,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,12692 -396,979,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,12723 -131,19,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,12753 -521,204,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -751,805,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -45,549,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -144,912,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -119,427,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -728,1,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -120,540,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -657,940,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -409,644,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -881,821,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -113,560,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -831,309,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -129,1000,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -76,945,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -260,931,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -882,504,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -157,950,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -443,278,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -111,225,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -497,6,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -321,124,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -194,206,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -684,320,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -634,270,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -622,278,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,12054 -689,447,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,12085 -120,170,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,12113 -374,87,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,12144 -926,384,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,12174 -687,574,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,12205 -600,585,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,12235 -779,947,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,12266 -223,984,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,12297 -628,189,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,12327 -326,364,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,12358 -836,49,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,12388 -361,851,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,12419 -444,643,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,12450 -501,143,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,12478 -743,763,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,12509 -861,987,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,12539 -203,264,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,12570 -762,439,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,12600 -705,750,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,12631 -153,37,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,12662 -436,95,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,12692 -428,79,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,12723 -804,832,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,12753 -805,649,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,12054 -860,838,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,12085 -104,439,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,12113 -434,207,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,12144 -912,804,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,12174 -571,875,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,12205 -267,473,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,12235 -415,845,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,12266 -261,91,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,12297 -746,630,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,12327 -30,185,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,12358 -662,317,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,12388 -916,88,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,12419 -415,607,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,12450 -514,35,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,12478 -756,680,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,12509 -461,78,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,12539 -460,117,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,12570 -305,440,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,12600 -198,652,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,12631 -234,249,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,12662 -638,658,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,12692 -88,563,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,12723 -751,737,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,12753 -816,789,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -437,988,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -715,220,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -780,946,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -245,986,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -201,129,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -815,433,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -865,492,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -634,306,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -901,154,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -789,206,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -882,81,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -953,882,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -862,848,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -628,664,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -765,389,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -741,182,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -61,505,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -470,861,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -869,263,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -650,400,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -750,556,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -602,497,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -54,181,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -384,619,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,12054 -161,332,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,12085 -977,669,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,12113 -615,487,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,12144 -783,994,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,12174 -977,331,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,12205 -375,739,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,12235 -298,665,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,12266 -104,921,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,12297 -713,862,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,12327 -556,662,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,12358 -323,517,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,12388 -391,352,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,12419 -593,166,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,12450 -906,859,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,12478 -130,571,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,12509 -613,976,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,12539 -58,466,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,12570 -314,79,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,12600 -67,864,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,12631 -654,623,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,12662 -312,170,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,12692 -349,662,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,12723 -415,763,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,12753 -404,896,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12054 -22,973,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12085 -744,161,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,12113 -804,934,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12144 -101,697,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12174 -293,116,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,12205 -266,84,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12235 -372,604,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12266 -38,371,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,12297 -385,783,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12327 -262,335,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12358 -961,321,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,12388 -831,177,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12419 -579,371,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12450 -301,583,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,12478 -693,364,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12509 -895,343,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12539 -320,854,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,12570 -284,691,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12600 -362,387,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12631 -132,298,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,12662 -42,635,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12692 -118,81,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12723 -42,375,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,12753 -18,846,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,12054 -512,933,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,12085 -337,237,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,12113 -167,964,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,12144 -749,382,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,12174 -890,610,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,12205 -910,148,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,12235 -403,837,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,12266 -403,85,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,12297 -661,425,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,12327 -485,633,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,12358 -789,515,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,12388 -415,512,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,12419 -418,156,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,12450 -163,464,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,12478 -298,813,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,12509 -584,455,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,12539 -797,366,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,12570 -767,734,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,12600 -984,451,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,12631 -388,134,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,12662 -924,547,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,12692 -566,802,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,12723 -390,61,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,12753 -608,556,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,12054 -840,202,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,12085 -112,964,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,12113 -288,112,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,12144 -408,445,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,12174 -876,884,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,12205 -224,348,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,12235 -133,564,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,12266 -662,568,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,12297 -68,882,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,12327 -626,542,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,12358 -678,119,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,12388 -361,248,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,12419 -464,868,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,12450 -681,841,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,12478 -377,484,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,12509 -222,986,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,12539 -972,39,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,12570 -56,930,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,12600 -695,252,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,12631 -908,794,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,12662 -328,658,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,12692 -891,139,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,12723 -265,331,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,12753 -251,261,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12054 -783,122,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12085 -425,296,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,12113 -859,391,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12144 -314,75,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12174 -153,731,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,12205 -955,883,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12235 -654,707,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12266 -693,97,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,12297 -757,390,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12327 -221,237,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12358 -942,496,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,12388 -31,814,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12419 -540,765,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12450 -352,308,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,12478 -904,327,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12509 -436,266,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12539 -281,699,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,12570 -801,599,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12600 -273,950,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12631 -716,117,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,12662 -902,632,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12692 -341,35,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12723 -155,562,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,12753 -796,144,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,12054 -257,142,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,12085 -611,273,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,12113 -6,915,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,12144 -125,920,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,12174 -745,294,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,12205 -437,681,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,12235 -906,86,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,12266 -844,764,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,12297 -413,269,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,12327 -869,138,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,12358 -403,834,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,12388 -137,112,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,12419 -922,921,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,12450 -202,859,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,12478 -955,442,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,12509 -781,593,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,12539 -12,346,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,12570 -931,312,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,12600 -95,690,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,12631 -795,344,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,12662 -542,784,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,12692 -935,639,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,12723 -269,726,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,12753 -197,596,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12054 -828,263,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12085 -461,194,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,12113 -35,895,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12144 -88,502,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12174 -832,342,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,12205 -900,421,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12235 -368,901,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12266 -201,474,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,12297 -758,571,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12327 -504,511,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12358 -864,379,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,12388 -574,68,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12419 -61,210,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12450 -565,478,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,12478 -475,296,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12509 -44,664,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12539 -145,880,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,12570 -813,607,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12600 -703,97,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12631 -757,908,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,12662 -96,152,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12692 -860,622,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12723 -750,309,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,12753 -585,912,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,12054 -127,429,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,12085 -669,580,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,12113 -708,179,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,12144 -830,119,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,12174 -550,369,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,12205 -762,882,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,12235 -468,727,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,12266 -151,823,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,12297 -103,783,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,12327 -876,884,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,12358 -881,891,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,12388 -116,909,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,12419 -677,765,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,12450 -477,180,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,12478 -154,712,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,12509 -331,175,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,12539 -784,869,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,12570 -563,820,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,12600 -229,554,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,12631 -451,126,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,12662 -974,760,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,12692 -484,446,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,12723 -69,254,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,12753 -755,516,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,12054 -331,779,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,12085 -482,987,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,12113 -632,318,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,12144 -750,427,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,12174 -618,86,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,12205 -935,553,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,12235 -716,315,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,12266 -205,328,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,12297 -215,521,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,12327 -871,156,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,12358 -552,841,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,12388 -619,623,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,12419 -701,849,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,12450 -104,438,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,12478 -114,719,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,12509 -854,906,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,12539 -563,267,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,12570 -73,542,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,12600 -427,552,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,12631 -348,428,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,12662 -148,158,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,12692 -895,379,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,12723 -394,142,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,12753 -792,588,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12054 -175,506,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12085 -208,382,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,12113 -354,132,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12144 -163,652,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12174 -336,723,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,12205 -804,682,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12235 -863,382,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12266 -326,125,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,12297 -568,321,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12327 -691,922,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12358 -152,884,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,12388 -565,38,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12419 -38,194,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12450 -185,996,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,12478 -318,532,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12509 -960,391,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12539 -122,104,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,12570 -400,22,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12600 -301,650,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12631 -909,143,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,12662 -433,999,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12692 -508,415,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12723 -648,350,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,12753 -793,342,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,12054 -129,215,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,12085 -481,52,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,12113 -406,292,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,12144 -512,862,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,12174 -668,309,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,12205 -551,886,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,12235 -124,172,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,12266 -655,912,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,12297 -523,666,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,12327 -739,656,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,12358 -87,145,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,12388 -890,664,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,12419 -665,639,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,12450 -329,707,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,12478 -417,891,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,12509 -828,466,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,12539 -298,451,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,12570 -356,451,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,12600 -909,874,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,12631 -251,805,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,12662 -526,426,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,12692 -652,932,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,12723 -573,581,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,12753 +925,850,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +999,297,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +608,846,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +642,533,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +656,646,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +948,486,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +612,717,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +114,564,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +685,230,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +657,494,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +608,903,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +353,266,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +107,190,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +354,139,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +101,217,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +553,560,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +877,148,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +431,762,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +511,457,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +157,532,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +520,629,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +114,491,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +277,0,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +561,979,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +220,585,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +444,267,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +178,487,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +756,764,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +329,312,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +910,531,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +530,536,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +101,773,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +515,143,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +730,126,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +993,862,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +954,754,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +267,410,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +347,701,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +991,204,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +923,509,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +437,378,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +737,507,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +104,49,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +840,876,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +704,66,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +889,819,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +107,351,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +571,201,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +688,209,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +544,51,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +954,135,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +445,47,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +829,379,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +464,758,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +968,475,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +842,343,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +721,507,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +966,269,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +332,699,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +328,824,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +355,497,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +506,44,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +585,522,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +634,378,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +662,689,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +783,90,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +786,720,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +710,343,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +950,457,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +274,947,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +406,834,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +515,71,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +35,282,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +995,538,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +670,679,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +406,601,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +825,577,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +467,908,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +709,819,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +522,687,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +688,157,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +956,111,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +129,31,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +687,790,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +877,795,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +845,379,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +425,114,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +899,475,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +987,747,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +641,372,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +448,415,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +341,955,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +137,356,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +235,316,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +482,351,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +678,164,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +240,386,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +605,113,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +274,68,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +422,885,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +763,575,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +561,743,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +339,816,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +877,203,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +192,581,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +604,815,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +55,333,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +87,40,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +942,672,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +912,23,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +768,948,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +951,291,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +768,839,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +978,864,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +20,337,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +298,95,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +193,535,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +336,191,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +617,412,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +709,711,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +5,425,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +164,215,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +422,948,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +424,544,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +854,764,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +168,446,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +8,957,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +748,967,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +682,11,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +300,110,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +672,263,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +894,215,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +944,965,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +403,423,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +596,753,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +481,770,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +503,263,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +126,79,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +721,441,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +271,858,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +721,667,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +157,193,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +991,394,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +499,680,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +284,414,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +705,770,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +737,679,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +745,7,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +633,713,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +983,851,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +591,944,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +42,130,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +771,485,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +465,23,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +296,193,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +890,7,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +312,919,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +777,768,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +364,854,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +601,411,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +823,736,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +847,10,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +490,311,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +387,348,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +688,458,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +650,195,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +447,658,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +91,704,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +197,807,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +51,861,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +570,873,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +423,933,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +524,355,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +416,794,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +789,645,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +551,700,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +400,831,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +361,800,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +189,830,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +554,828,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +585,12,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +281,501,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +629,914,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +43,685,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +533,755,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +882,708,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +790,595,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +600,32,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +148,49,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +237,727,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +488,239,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +457,273,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +401,986,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +181,544,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +995,182,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +120,197,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +119,435,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +319,974,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +333,524,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +923,688,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +634,750,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +493,155,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +461,860,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +304,102,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +641,425,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +992,224,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +202,408,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +770,524,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +202,816,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +14,515,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +134,793,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +977,460,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +174,732,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +429,435,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +514,38,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +784,616,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +973,225,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +511,402,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +30,697,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +895,567,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +557,231,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +282,372,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +909,15,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +276,866,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +234,452,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +479,663,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +782,982,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +755,813,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +689,523,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +496,871,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +24,511,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +379,819,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +441,525,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +49,13,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +243,694,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +295,782,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +395,839,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +929,461,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +997,303,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +889,421,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +72,421,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +926,433,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +850,394,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +826,338,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +651,764,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +854,216,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +899,96,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +309,550,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +943,636,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +138,427,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +99,652,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +270,478,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +862,18,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +574,40,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +359,453,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +958,987,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +791,26,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +284,101,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +190,969,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +527,492,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +112,263,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +271,593,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +643,923,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +554,146,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +211,305,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +368,318,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +778,417,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +808,623,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +46,761,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +466,272,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +18,988,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +87,821,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +765,962,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +62,615,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +13,523,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +775,806,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +636,586,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +458,520,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +206,908,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +310,30,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +813,247,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +22,647,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +742,55,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +394,154,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +957,344,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +205,95,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +198,665,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +638,145,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +155,925,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +688,395,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +730,749,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +208,279,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +525,288,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +483,509,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +748,255,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +6,214,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +168,473,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +301,702,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +9,814,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +778,231,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +799,422,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +309,572,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +433,363,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +969,919,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +181,355,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +787,992,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +971,147,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +440,183,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +209,375,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +537,77,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +364,308,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +377,660,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +251,555,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +607,455,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +127,888,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +513,652,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +146,799,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +917,249,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +776,539,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +330,198,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +981,340,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +862,152,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +612,347,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +607,565,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +786,855,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +160,87,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +199,69,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +972,807,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +870,565,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +494,798,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +975,714,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +760,17,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +180,797,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +256,422,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +422,621,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +859,661,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +586,363,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +441,910,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +597,998,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +717,95,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +713,731,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +591,718,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +492,467,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +170,126,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +684,127,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +981,746,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +966,878,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +439,27,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +151,569,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +602,812,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +187,603,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +415,506,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +61,185,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +839,692,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +596,565,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +751,512,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +460,86,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +922,399,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +153,672,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +928,801,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +951,730,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +394,408,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +615,982,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +653,499,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +180,307,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +649,741,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +921,640,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +11,300,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +696,929,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +795,309,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +550,340,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +320,228,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +845,1000,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +245,21,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +142,583,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +717,506,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +3,405,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +790,556,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +646,72,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +230,103,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +938,262,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +629,102,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +317,841,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +812,159,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +141,570,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +64,375,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +207,298,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +435,32,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +96,760,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +252,338,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +956,149,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +633,343,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +190,151,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +227,44,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +24,583,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +420,230,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +910,907,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +709,783,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +810,117,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +723,416,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +911,318,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +230,888,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +448,60,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +945,596,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +508,576,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +262,576,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +441,280,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +15,219,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +795,133,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +301,273,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +304,86,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +49,400,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +576,364,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +669,63,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +325,929,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +272,344,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +80,768,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +46,668,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +223,407,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +774,536,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +784,657,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +92,215,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +67,966,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +747,674,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +686,574,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +93,266,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +192,680,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +51,362,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +498,412,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +546,431,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +485,94,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +925,345,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +292,445,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +540,632,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +21,855,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +100,36,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +49,250,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +353,427,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +911,367,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +823,245,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +278,893,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +576,490,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +655,88,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +763,964,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +88,62,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +746,506,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +927,680,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +297,153,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +291,403,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +838,98,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +112,376,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +509,477,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +472,50,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +495,592,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +1000,813,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +241,740,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +693,873,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +903,459,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +791,224,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +108,562,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +845,199,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +452,275,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +479,355,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +410,947,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +379,454,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +740,450,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +471,575,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +325,6,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +455,847,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +563,338,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +879,517,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +312,630,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +587,381,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +628,864,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +486,416,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +811,852,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +990,815,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +35,23,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +764,527,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +619,693,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +996,977,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +554,549,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +540,951,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +140,390,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +554,204,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +724,78,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +693,613,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +866,745,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +833,56,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +164,887,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +753,651,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +60,691,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +688,767,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +883,709,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +109,417,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +950,326,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +438,599,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +286,818,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +342,13,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +383,185,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +80,140,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +322,717,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +749,852,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +606,125,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +641,325,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +494,648,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +428,365,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +936,120,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +597,347,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +728,638,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +933,732,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +663,465,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +394,262,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +334,947,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +114,694,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +89,482,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +874,600,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +674,94,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +347,323,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +105,49,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +286,70,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +669,844,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +786,773,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +104,68,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +770,110,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +263,42,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +900,171,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +630,644,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +597,408,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +185,45,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +175,522,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +576,166,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +957,885,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +993,713,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +500,838,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +410,267,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +592,967,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +64,529,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +208,656,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +273,665,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +906,419,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +429,776,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +961,971,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +338,248,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +472,486,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +903,674,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +299,603,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +948,492,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +931,512,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +570,391,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +97,313,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +674,758,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +468,304,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +430,846,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +893,912,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +519,810,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +267,122,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +908,102,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +176,161,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +673,450,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +798,215,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +291,765,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +583,557,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +442,739,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +951,811,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +430,780,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +559,645,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +726,365,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +944,597,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +497,126,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +388,655,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +81,604,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +111,280,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +288,115,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +845,205,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +745,672,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +352,339,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +234,70,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +167,528,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +606,220,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +670,691,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +764,197,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +659,239,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +996,50,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +424,135,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +899,972,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +392,475,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +555,868,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +860,451,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +114,565,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +943,116,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +365,385,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +249,375,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +192,357,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +328,230,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +311,829,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +576,971,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +915,280,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +522,853,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +625,953,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +873,874,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +498,578,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +808,768,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +742,178,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +744,916,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +30,917,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +747,633,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +672,107,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +564,523,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +785,924,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +825,481,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +243,240,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +959,819,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +123,602,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +714,538,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +252,632,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +715,952,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +670,480,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +81,700,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +653,726,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +795,526,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +182,410,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +725,307,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +101,73,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +143,232,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +15,993,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +742,652,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +339,761,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +39,428,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +465,4,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +889,101,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +856,869,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +358,271,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +452,633,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +387,481,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +824,302,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +185,245,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +151,941,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +419,721,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +643,893,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +63,898,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +202,94,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +332,962,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +723,71,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +148,108,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +840,71,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +601,767,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +962,323,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +166,982,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +531,614,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +963,839,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +994,388,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +978,296,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +72,429,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +33,901,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +428,350,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +413,581,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +737,583,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +85,92,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +916,647,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +785,771,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +302,26,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +1000,598,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +458,715,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +896,74,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +615,580,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +174,848,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +651,118,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +784,54,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +121,929,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +341,393,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +615,820,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +697,336,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +215,299,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +197,747,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +205,154,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +256,486,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +377,251,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +577,225,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +686,77,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +332,74,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +534,596,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +485,493,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +594,782,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +413,487,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +13,127,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +483,538,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +820,94,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +745,252,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +79,722,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +36,536,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +950,958,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +74,466,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +458,309,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +609,680,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +429,539,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +956,511,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +205,505,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +629,720,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +277,823,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +266,21,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +872,142,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +435,95,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +988,398,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +953,328,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +556,151,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +211,978,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +389,918,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +351,542,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +14,96,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +181,496,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +452,77,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +511,236,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +193,913,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +797,49,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +988,967,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +487,502,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +941,790,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +577,121,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +456,55,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +982,739,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +593,683,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +702,610,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +528,248,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +873,530,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +301,889,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +769,245,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +724,473,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +466,938,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +774,150,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +111,772,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +954,201,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +780,945,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +210,177,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +93,378,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +332,83,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +186,803,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +782,398,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +41,215,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +222,194,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +992,287,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +477,410,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +948,50,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +817,204,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +597,239,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +649,637,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +3,938,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +731,788,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +181,399,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +468,576,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +891,187,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +226,703,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +28,455,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +609,244,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +224,868,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +230,353,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +216,101,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +282,924,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +501,144,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +320,0,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +720,910,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +464,259,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +363,107,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +49,63,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +223,270,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +452,554,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +210,154,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +444,205,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +222,441,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +678,183,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +25,459,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +57,810,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +981,268,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +740,916,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +408,742,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +966,522,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +107,299,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +488,677,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +759,709,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +504,310,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +99,160,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +503,698,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +724,540,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +309,901,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +625,34,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +294,536,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +890,780,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +501,716,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +34,532,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +203,871,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +140,199,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +845,845,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +774,591,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +645,378,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +986,942,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +296,686,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +936,720,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +341,546,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +32,845,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +277,667,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +548,627,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +727,142,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +812,655,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +168,556,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +150,459,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +136,89,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +695,726,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +363,38,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +853,60,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +621,369,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +764,381,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +669,465,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +772,981,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +228,758,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +261,31,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +821,237,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +100,285,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +465,94,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +350,561,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +991,143,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +910,95,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +206,341,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +263,388,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +374,272,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +875,890,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +810,734,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +398,364,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +565,619,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +417,517,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +291,781,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +251,327,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +449,48,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +774,809,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +386,73,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +22,936,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +940,400,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +132,736,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +103,211,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +152,271,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +952,855,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +872,923,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +748,854,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +749,769,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +876,271,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +860,383,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +900,29,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +705,185,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +913,351,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +315,560,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +466,840,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +233,517,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +906,949,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +148,633,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +661,636,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +847,138,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +768,481,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +866,408,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +475,130,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +112,813,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +136,661,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +763,311,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +388,872,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +996,643,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +486,174,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +494,528,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +771,124,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +49,126,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +322,440,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +878,881,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +827,292,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +852,873,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +716,357,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +81,247,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +916,18,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +673,395,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +242,620,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +914,946,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +902,72,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +707,691,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +223,95,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +619,878,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +254,757,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +688,898,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +477,172,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +280,419,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +546,849,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +630,807,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +455,599,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +505,59,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +823,790,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +891,574,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +840,96,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +436,376,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +168,352,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +177,741,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +727,12,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +278,157,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +443,10,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +905,544,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +881,817,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +507,754,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +363,425,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +603,492,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +473,485,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +128,369,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +105,560,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +325,651,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +711,326,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +983,180,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +241,935,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +71,403,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +395,345,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +168,278,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +512,376,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +291,104,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +776,543,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +271,798,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +946,333,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +195,833,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +165,132,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +238,629,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +409,337,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +720,300,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +309,470,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +812,875,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +441,237,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +500,272,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +517,860,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +924,415,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +572,140,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +768,367,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +692,195,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +28,245,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +202,285,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +76,98,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +421,932,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +636,898,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +52,330,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +184,603,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +739,280,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +841,507,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +65,202,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +623,513,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +517,132,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +636,21,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +845,657,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +232,195,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +26,323,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +680,299,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +364,811,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +572,739,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +145,889,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +644,189,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +87,698,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +620,646,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +535,562,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +661,753,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +884,425,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +689,693,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +646,941,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +4,975,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +813,455,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +773,260,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +205,69,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +657,147,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +154,533,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +747,881,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +787,457,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +867,441,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +307,859,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +571,177,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +92,633,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +269,382,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +764,707,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +662,566,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +818,349,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +617,128,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +649,231,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +895,258,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +750,812,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +738,362,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +107,133,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +278,60,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +32,88,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +129,378,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +187,569,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +670,186,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +678,875,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +423,636,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +389,360,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +257,677,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +780,708,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +159,158,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +97,384,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +479,927,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +9,134,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +614,273,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +261,27,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +115,209,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +358,470,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +133,219,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +891,907,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +702,778,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +58,998,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +606,194,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +668,933,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +813,708,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +450,949,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +956,579,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +276,131,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +889,689,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +708,908,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +14,524,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +904,336,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +272,916,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +257,236,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +343,965,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +80,350,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +530,599,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +340,901,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +595,935,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +47,667,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +279,104,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +293,803,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +162,64,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +935,825,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +689,839,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +484,184,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +230,348,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +164,904,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +401,219,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +607,381,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +229,524,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +786,902,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +92,212,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +455,762,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +409,182,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +166,442,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +277,919,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +92,67,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +631,741,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +390,617,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +403,214,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +964,202,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +223,788,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +684,639,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +645,336,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +470,937,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +424,399,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +862,21,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +736,125,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +554,635,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +790,229,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +115,770,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +853,622,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +643,109,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +794,975,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +892,820,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +728,123,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +744,135,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +678,535,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +768,971,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +234,166,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +333,814,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +968,557,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +119,820,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +469,486,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +261,429,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +984,65,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +845,977,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +374,410,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +687,150,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +157,630,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +49,488,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +817,112,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +223,598,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +433,705,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +41,226,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +396,979,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +131,19,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +521,204,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +751,805,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +45,549,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +144,912,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +119,427,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +728,1,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +120,540,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +657,940,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +409,644,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +881,821,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +113,560,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +831,309,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +129,1000,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +76,945,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +260,931,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +882,504,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +157,950,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +443,278,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +111,225,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +497,6,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +321,124,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +194,206,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +684,320,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +634,270,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +622,278,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +689,447,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +120,170,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +374,87,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +926,384,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +687,574,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +600,585,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +779,947,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +223,984,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +628,189,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +326,364,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +836,49,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +361,851,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +444,643,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +501,143,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +743,763,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +861,987,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +203,264,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +762,439,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +705,750,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +153,37,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +436,95,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +428,79,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +804,832,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +805,649,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +860,838,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +104,439,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +434,207,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +912,804,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +571,875,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +267,473,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +415,845,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +261,91,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +746,630,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +30,185,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +662,317,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +916,88,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +415,607,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +514,35,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +756,680,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +461,78,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +460,117,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +305,440,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +198,652,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +234,249,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +638,658,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +88,563,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +751,737,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +816,789,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +437,988,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +715,220,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +780,946,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +245,986,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +201,129,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +815,433,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +865,492,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +634,306,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +901,154,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +789,206,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +882,81,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +953,882,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +862,848,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +628,664,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +765,389,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +741,182,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +61,505,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +470,861,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +869,263,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +650,400,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +750,556,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +602,497,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +54,181,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +384,619,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +161,332,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +977,669,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +615,487,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +783,994,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +977,331,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +375,739,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +298,665,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +104,921,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +713,862,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +556,662,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +323,517,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +391,352,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +593,166,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +906,859,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +130,571,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +613,976,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +58,466,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +314,79,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +67,864,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +654,623,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +312,170,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +349,662,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +415,763,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +404,896,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +22,973,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +744,161,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +804,934,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +101,697,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +293,116,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +266,84,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +372,604,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +38,371,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +385,783,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +262,335,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +961,321,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +831,177,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +579,371,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +301,583,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +693,364,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +895,343,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +320,854,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +284,691,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +362,387,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +132,298,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +42,635,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +118,81,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +42,375,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +18,846,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +512,933,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +337,237,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +167,964,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +749,382,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +890,610,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +910,148,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +403,837,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +403,85,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +661,425,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +485,633,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +789,515,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +415,512,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +418,156,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +163,464,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +298,813,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +584,455,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +797,366,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +767,734,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +984,451,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +388,134,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +924,547,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +566,802,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +390,61,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +608,556,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +840,202,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +112,964,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +288,112,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +408,445,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +876,884,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +224,348,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +133,564,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +662,568,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +68,882,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +626,542,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +678,119,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +361,248,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +464,868,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +681,841,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +377,484,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +222,986,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +972,39,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +56,930,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +695,252,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +908,794,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +328,658,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +891,139,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +265,331,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +251,261,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +783,122,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +425,296,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +859,391,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +314,75,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +153,731,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +955,883,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +654,707,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +693,97,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +757,390,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +221,237,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +942,496,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +31,814,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +540,765,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +352,308,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +904,327,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +436,266,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +281,699,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +801,599,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +273,950,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +716,117,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +902,632,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +341,35,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +155,562,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +796,144,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +257,142,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +611,273,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +6,915,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +125,920,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +745,294,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +437,681,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +906,86,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +844,764,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +413,269,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +869,138,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +403,834,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +137,112,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +922,921,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +202,859,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +955,442,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +781,593,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +12,346,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +931,312,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +95,690,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +795,344,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +542,784,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +935,639,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +269,726,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +197,596,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +828,263,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +461,194,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +35,895,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +88,502,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +832,342,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +900,421,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +368,901,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +201,474,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +758,571,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +504,511,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +864,379,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +574,68,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +61,210,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +565,478,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +475,296,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +44,664,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +145,880,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +813,607,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +703,97,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +757,908,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +96,152,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +860,622,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +750,309,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +585,912,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +127,429,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +669,580,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +708,179,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +830,119,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +550,369,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +762,882,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +468,727,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +151,823,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +103,783,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +876,884,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +881,891,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +116,909,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +677,765,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +477,180,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +154,712,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +331,175,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +784,869,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +563,820,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +229,554,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +451,126,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +974,760,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +484,446,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +69,254,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +755,516,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +331,779,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +482,987,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +632,318,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +750,427,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +618,86,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +935,553,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +716,315,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +205,328,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +215,521,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +871,156,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +552,841,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +619,623,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +701,849,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +104,438,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +114,719,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +854,906,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +563,267,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +73,542,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +427,552,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +348,428,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +148,158,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +895,379,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +394,142,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +792,588,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +175,506,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +208,382,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +354,132,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +163,652,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +336,723,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +804,682,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +863,382,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +326,125,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +568,321,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +691,922,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +152,884,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +565,38,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +38,194,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +185,996,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +318,532,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +960,391,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +122,104,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +400,22,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +301,650,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +909,143,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +433,999,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +508,415,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +648,350,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +793,342,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +129,215,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +481,52,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +406,292,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +512,862,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +668,309,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +551,886,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +124,172,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +655,912,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +523,666,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +739,656,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +87,145,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +890,664,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +665,639,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +329,707,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +417,891,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +828,466,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +298,451,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +356,451,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +909,874,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +251,805,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +526,426,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +652,932,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +573,581,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 730bf94cb2987..c3fb85811ca2a 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -139,8 +139,8 @@ def test_productsales(): fname = os.path.join(dirpath, "productsales.sas7bdat") df = pd.read_sas(fname, encoding='utf-8') fname = os.path.join(dirpath, "productsales.csv") - df0 = pd.read_csv(fname) - vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR", "MONTH"] + df0 = pd.read_csv(fname, parse_dates=['MONTH']) + vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"] df0[vn] = df0[vn].astype(np.float64) tm.assert_frame_equal(df, df0) @@ -163,3 +163,14 @@ def test_airline(): df0 = pd.read_csv(fname) df0 = df0.astype(np.float64) tm.assert_frame_equal(df, df0, check_exact=False) + + +def test_date_time(): + # Support of different SAS date/datetime formats (PR #15871) + dirpath = tm.get_data_path() + fname = os.path.join(dirpath, "datetime.sas7bdat") + df = pd.read_sas(fname) + fname = os.path.join(dirpath, "datetime.csv") + df0 = pd.read_csv(fname, parse_dates=['Date1', 'Date2', 'DateTime', + 'DateTimeHi', 'Taiw']) + tm.assert_frame_equal(df, df0) From 0bbda546e59e1de0bf193d6354c23815e6d2a64a Mon Sep 17 00:00:00 2001 From: zzgao Date: Fri, 18 Aug 2017 08:27:20 -0700 Subject: [PATCH 083/141] DOC: add example on json_normalize (#16438) --- doc/source/io.rst | 7 +++++++ pandas/io/json/normalize.py | 11 ++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index e6b51b7e2f45c..e338407361705 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2020,6 +2020,13 @@ into a flat table. .. ipython:: python from pandas.io.json import json_normalize + data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}}, + {'name': {'given': 'Mose', 'family': 'Regner'}}, + {'id': 2, 'name': 'Faye Raker'}] + json_normalize(data) + +.. ipython:: python + data = [{'state': 'Florida', 'shortname': 'FL', 'info': { diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index 401d8d9ead2b8..72776ed01de15 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -135,6 +135,16 @@ def json_normalize(data, record_path=None, meta=None, Examples -------- + >>> from pandas.io.json import json_normalize + >>> data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}}, + ... {'name': {'given': 'Mose', 'family': 'Regner'}}, + ... {'id': 2, 'name': 'Faye Raker'}] + >>> json_normalize(data) + id name name.family name.first name.given name.last + 0 1.0 NaN NaN Coleen NaN Volk + 1 NaN NaN Regner NaN Mose NaN + 2 2.0 Faye Raker NaN NaN NaN NaN + >>> data = [{'state': 'Florida', ... 'shortname': 'FL', ... 'info': { @@ -150,7 +160,6 @@ def json_normalize(data, record_path=None, meta=None, ... }, ... 'counties': [{'name': 'Summit', 'population': 1234}, ... {'name': 'Cuyahoga', 'population': 1337}]}] - >>> from pandas.io.json import json_normalize >>> result = json_normalize(data, 'counties', ['state', 'shortname', ... ['info', 'governor']]) >>> result From c148dd2b9bc5abd3c69f0d18eac201212032b9b7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 19 Aug 2017 06:27:05 -0500 Subject: [PATCH 084/141] BUG: Have object dtype for empty Categorical.categories (#17249) * BUG: Have object dtype for empty Categorical ctor Previously we had a `Float64Index`, which is inconsistent with, e.g., the regular Index constructor. * TST: Update tests in multi for new return Previously these relied worked around the return type by wrapping list-likes in `np.array` and relying on that to cast to float. These workarounds are no longer nescessary. * TST: Update union_categorical tests This relied on `NaN` being a float and empty being a float. Not a necessary test anymore. * TST: set object dtype --- doc/source/whatsnew/v0.21.0.txt | 3 +++ pandas/core/categorical.py | 5 ++++- pandas/tests/indexes/test_multi.py | 9 ++++----- pandas/tests/reshape/test_concat.py | 2 +- pandas/tests/reshape/test_union_categoricals.py | 12 +++--------- pandas/tests/test_categorical.py | 10 ++++++++++ 6 files changed, 25 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index bef492529b6a1..d2bf8ffdc8e10 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -386,6 +386,9 @@ Numeric Categorical ^^^^^^^^^^^ - Bug in :func:`Series.isin` when called with a categorical (:issue`16639`) +- Bug in the categorical constructor with empty values and categories causing + the ``.categories`` to be an empty ``Float64Index`` rather than an empty + ``Index`` with object dtype (:issue:`17248`) Other diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 230361931125e..1c2a29333001c 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -290,7 +290,10 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False): # On list with NaNs, int values will be converted to float. Use # "object" dtype to prevent this. In the end objects will be # casted to int/... in the category assignment step. - dtype = 'object' if isna(values).any() else None + if len(values) == 0 or isna(values).any(): + dtype = 'object' + else: + dtype = None values = _sanitize_array(values, None, dtype=dtype) if categories is None: diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index da1b309f5a621..c66775f4690cc 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -776,7 +776,7 @@ def test_from_arrays_empty(self): arrays = [[]] * N names = list('ABC')[:N] result = MultiIndex.from_arrays(arrays=arrays, names=names) - expected = MultiIndex(levels=[np.array([])] * N, labels=[[]] * N, + expected = MultiIndex(levels=[[]] * N, labels=[[]] * N, names=names) tm.assert_index_equal(result, expected) @@ -829,7 +829,7 @@ def test_from_product_empty(self): # 1 level result = MultiIndex.from_product([[]], names=['A']) - expected = pd.Float64Index([], name='A') + expected = pd.Index([], name='A') tm.assert_index_equal(result, expected) # 2 levels @@ -838,7 +838,7 @@ def test_from_product_empty(self): names = ['A', 'B'] for first, second in zip(l1, l2): result = MultiIndex.from_product([first, second], names=names) - expected = MultiIndex(levels=[np.array(first), np.array(second)], + expected = MultiIndex(levels=[first, second], labels=[[], []], names=names) tm.assert_index_equal(result, expected) @@ -847,8 +847,7 @@ def test_from_product_empty(self): for N in range(4): lvl2 = lrange(N) result = MultiIndex.from_product([[], lvl2, []], names=names) - expected = MultiIndex(levels=[np.array(A) - for A in [[], lvl2, []]], + expected = MultiIndex(levels=[[], lvl2, []], labels=[[], [], []], names=names) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 46fea86c45925..52cd18126859a 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -680,7 +680,7 @@ def test_concat_categorical_empty(self): tm.assert_series_equal(s1.append(s2, ignore_index=True), s2) s1 = pd.Series([], dtype='category') - s2 = pd.Series([]) + s2 = pd.Series([], dtype='object') # different dtype => not-category tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py index fe8d54005ba9b..eb80fb54b4016 100644 --- a/pandas/tests/reshape/test_union_categoricals.py +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -107,17 +107,11 @@ def test_union_categoricals_empty(self): exp = Categorical([]) tm.assert_categorical_equal(res, exp) - res = union_categoricals([pd.Categorical([]), - pd.Categorical([1.0])]) - exp = Categorical([1.0]) + res = union_categoricals([Categorical([]), + Categorical(['1'])]) + exp = Categorical(['1']) tm.assert_categorical_equal(res, exp) - # to make dtype equal - nanc = pd.Categorical(np.array([np.nan], dtype=np.float64)) - res = union_categoricals([nanc, - pd.Categorical([])]) - tm.assert_categorical_equal(res, nanc) - def test_union_categorical_same_category(self): # check fastpath c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index a0b585a16ad9a..7bbe220378993 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -112,6 +112,16 @@ def test_setitem_listlike(self): result = c.codes[np.array([100000]).astype(np.int64)] tm.assert_numpy_array_equal(result, np.array([5], dtype='int8')) + def test_constructor_empty(self): + # GH 17248 + c = Categorical([]) + expected = Index([]) + tm.assert_index_equal(c.categories, expected) + + c = Categorical([], categories=[1, 2, 3]) + expected = pd.Int64Index([1, 2, 3]) + tm.assert_index_equal(c.categories, expected) + def test_constructor_unsortable(self): # it works! From 155c11a406f2d537a77756bbf847f60125ef312a Mon Sep 17 00:00:00 2001 From: jschendel Date: Sat, 19 Aug 2017 10:51:05 -0600 Subject: [PATCH 085/141] CLN: replace %s syntax with .format in pandas.tseries (#17290) --- pandas/tseries/frequencies.py | 38 +++++----- pandas/tseries/holiday.py | 14 ++-- pandas/tseries/offsets.py | 137 +++++++++++++++++++--------------- 3 files changed, 105 insertions(+), 84 deletions(-) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index aa33a3849acb3..7f34bcaf52926 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -409,16 +409,17 @@ def _get_freq_str(base, mult=1): need_suffix = ['QS', 'BQ', 'BQS', 'YS', 'AS', 'BY', 'BA', 'BYS', 'BAS'] for __prefix in need_suffix: for _m in tslib._MONTHS: - _offset_to_period_map['%s-%s' % (__prefix, _m)] = \ - _offset_to_period_map[__prefix] + _alias = '{prefix}-{month}'.format(prefix=__prefix, month=_m) + _offset_to_period_map[_alias] = _offset_to_period_map[__prefix] for __prefix in ['A', 'Q']: for _m in tslib._MONTHS: - _alias = '%s-%s' % (__prefix, _m) + _alias = '{prefix}-{month}'.format(prefix=__prefix, month=_m) _offset_to_period_map[_alias] = _alias _days = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] for _d in _days: - _offset_to_period_map['W-%s' % _d] = 'W-%s' % _d + _alias = 'W-{day}'.format(day=_d) + _offset_to_period_map[_alias] = _alias def get_period_alias(offset_str): @@ -587,7 +588,7 @@ def _base_and_stride(freqstr): groups = opattern.match(freqstr) if not groups: - raise ValueError("Could not evaluate %s" % freqstr) + raise ValueError("Could not evaluate {freq}".format(freq=freqstr)) stride = groups.group(1) @@ -775,8 +776,8 @@ def infer_freq(index, warn=True): if not (is_datetime64_dtype(values) or is_timedelta64_dtype(values) or values.dtype == object): - raise TypeError("cannot infer freq from a non-convertible " - "dtype on a Series of {0}".format(index.dtype)) + raise TypeError("cannot infer freq from a non-convertible dtype " + "on a Series of {dtype}".format(dtype=index.dtype)) index = values if is_period_arraylike(index): @@ -789,7 +790,7 @@ def infer_freq(index, warn=True): if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex): if isinstance(index, (pd.Int64Index, pd.Float64Index)): raise TypeError("cannot infer freq from a non-convertible index " - "type {0}".format(type(index))) + "type {type}".format(type=type(index))) index = index.values if not isinstance(index, pd.DatetimeIndex): @@ -956,15 +957,17 @@ def _infer_daily_rule(self): if annual_rule: nyears = self.ydiffs[0] month = _month_aliases[self.rep_stamp.month] - return _maybe_add_count('%s-%s' % (annual_rule, month), nyears) + alias = '{prefix}-{month}'.format(prefix=annual_rule, month=month) + return _maybe_add_count(alias, nyears) quarterly_rule = self._get_quarterly_rule() if quarterly_rule: nquarters = self.mdiffs[0] / 3 mod_dict = {0: 12, 2: 11, 1: 10} month = _month_aliases[mod_dict[self.rep_stamp.month % 3]] - return _maybe_add_count('%s-%s' % (quarterly_rule, month), - nquarters) + alias = '{prefix}-{month}'.format(prefix=quarterly_rule, + month=month) + return _maybe_add_count(alias, nquarters) monthly_rule = self._get_monthly_rule() if monthly_rule: @@ -974,8 +977,8 @@ def _infer_daily_rule(self): days = self.deltas[0] / _ONE_DAY if days % 7 == 0: # Weekly - alias = _weekday_rule_aliases[self.rep_stamp.weekday()] - return _maybe_add_count('W-%s' % alias, days / 7) + day = _weekday_rule_aliases[self.rep_stamp.weekday()] + return _maybe_add_count('W-{day}'.format(day=day), days / 7) else: return _maybe_add_count('D', days) @@ -1048,7 +1051,7 @@ def _get_wom_rule(self): week = week_of_months[0] + 1 wd = _weekday_rule_aliases[weekdays[0]] - return 'WOM-%d%s' % (week, wd) + return 'WOM-{week}{weekday}'.format(week=week, weekday=wd) class _TimedeltaFrequencyInferer(_FrequencyInferer): @@ -1058,15 +1061,16 @@ def _infer_daily_rule(self): days = self.deltas[0] / _ONE_DAY if days % 7 == 0: # Weekly - alias = _weekday_rule_aliases[self.rep_stamp.weekday()] - return _maybe_add_count('W-%s' % alias, days / 7) + wd = _weekday_rule_aliases[self.rep_stamp.weekday()] + alias = 'W-{weekday}'.format(weekday=wd) + return _maybe_add_count(alias, days / 7) else: return _maybe_add_count('D', days) def _maybe_add_count(base, count): if count != 1: - return '%d%s' % (count, base) + return '{count}{base}'.format(count=int(count), base=base) else: return base diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 9acb52ebe0e9f..d8bfa3013f8f7 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -174,16 +174,16 @@ class from pandas.tseries.offsets def __repr__(self): info = '' if self.year is not None: - info += 'year=%s, ' % self.year - info += 'month=%s, day=%s, ' % (self.month, self.day) + info += 'year={year}, '.format(year=self.year) + info += 'month={mon}, day={day}, '.format(mon=self.month, day=self.day) if self.offset is not None: - info += 'offset=%s' % self.offset + info += 'offset={offset}'.format(offset=self.offset) if self.observance is not None: - info += 'observance=%s' % self.observance + info += 'observance={obs}'.format(obs=self.observance) - repr = 'Holiday: %s (%s)' % (self.name, info) + repr = 'Holiday: {name} ({info})'.format(name=self.name, info=info) return repr def dates(self, start_date, end_date, return_name=False): @@ -374,8 +374,8 @@ def holidays(self, start=None, end=None, return_name=False): DatetimeIndex of holidays """ if self.rules is None: - raise Exception('Holiday Calendar %s does not have any ' - 'rules specified' % self.name) + raise Exception('Holiday Calendar {name} does not have any ' + 'rules specified'.format(name=self.name)) if start is None: start = AbstractHolidayCalendar.start_date diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 56ef703e67ca0..29cdda5548896 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -261,10 +261,10 @@ def apply_index(self, i): """ if not type(self) is DateOffset: - raise NotImplementedError("DateOffset subclass %s " + raise NotImplementedError("DateOffset subclass {name} " "does not have a vectorized " - "implementation" - % (self.__class__.__name__,)) + "implementation".format( + name=self.__class__.__name__)) relativedelta_fast = set(['years', 'months', 'weeks', 'days', 'hours', 'minutes', 'seconds', 'microseconds']) @@ -295,10 +295,10 @@ def apply_index(self, i): return i + (self._offset * self.n) else: # relativedelta with other keywords + kwd = set(self.kwds) - relativedelta_fast raise NotImplementedError("DateOffset with relativedelta " - "keyword(s) %s not able to be " - "applied vectorized" % - (set(self.kwds) - relativedelta_fast),) + "keyword(s) {kwd} not able to be " + "applied vectorized".format(kwd=kwd)) def isAnchored(self): return (self.n == 1) @@ -339,19 +339,20 @@ def __repr__(self): if attr not in exclude: attrs.append('='.join((attr, repr(getattr(self, attr))))) + plural = '' if abs(self.n) != 1: plural = 's' - else: - plural = '' - n_str = "" + n_str = '' if self.n != 1: - n_str = "%s * " % self.n + n_str = '{n} * '.format(n=self.n) - out = '<%s' % n_str + className + plural + attrs_str = '' if attrs: - out += ': ' + ', '.join(attrs) - out += '>' + attrs_str = ': ' + ', '.join(attrs) + + repr_content = ''.join([n_str, className, plural, attrs_str]) + out = '<{content}>'.format(content=repr_content) return out @property @@ -501,7 +502,7 @@ def freqstr(self): return repr(self) if self.n != 1: - fstr = '%d%s' % (self.n, code) + fstr = '{n}{code}'.format(n=self.n, code=code) else: fstr = code @@ -509,7 +510,7 @@ def freqstr(self): @property def nanos(self): - raise ValueError("{0} is a non-fixed frequency".format(self)) + raise ValueError("{name} is a non-fixed frequency".format(name=self)) class SingleConstructorOffset(DateOffset): @@ -518,7 +519,7 @@ class SingleConstructorOffset(DateOffset): def _from_name(cls, suffix=None): # default _from_name calls cls with no args if suffix: - raise ValueError("Bad freq suffix %s" % suffix) + raise ValueError("Bad freq suffix {suffix}".format(suffix=suffix)) return cls() @@ -531,21 +532,21 @@ class BusinessMixin(object): def __repr__(self): className = getattr(self, '_outputName', self.__class__.__name__) + plural = '' if abs(self.n) != 1: plural = 's' - else: - plural = '' - n_str = "" + n_str = '' if self.n != 1: - n_str = "%s * " % self.n + n_str = '{n} * '.format(n=self.n) - out = '<%s' % n_str + className + plural + self._repr_attrs() + '>' + repr_content = ''.join([n_str, className, plural, self._repr_attrs()]) + out = '<{content}>'.format(content=repr_content) return out def _repr_attrs(self): if self.offset: - attrs = ['offset=%s' % repr(self.offset)] + attrs = ['offset={offset!r}'.format(offset=self.offset)] else: attrs = None out = '' @@ -601,7 +602,7 @@ def freqstr(self): return repr(self) if self.n != 1: - fstr = '%d%s' % (self.n, code) + fstr = '{n}{code}'.format(n=self.n, code=code) else: fstr = code @@ -1109,7 +1110,8 @@ def name(self): if self.isAnchored: return self.rule_code else: - return "%s-%s" % (self.rule_code, _int_to_month[self.n]) + return "{code}-{month}".format(code=self.rule_code, + month=_int_to_month[self.n]) class MonthEnd(MonthOffset): @@ -1176,9 +1178,9 @@ def __init__(self, n=1, day_of_month=None, normalize=False, **kwds): else: self.day_of_month = int(day_of_month) if not self._min_day_of_month <= self.day_of_month <= 27: - raise ValueError('day_of_month must be ' - '{}<=day_of_month<=27, got {}'.format( - self._min_day_of_month, self.day_of_month)) + msg = 'day_of_month must be {min}<=day_of_month<=27, got {day}' + raise ValueError(msg.format(min=self._min_day_of_month, + day=self.day_of_month)) self.n = int(n) self.normalize = normalize self.kwds = kwds @@ -1190,7 +1192,7 @@ def _from_name(cls, suffix=None): @property def rule_code(self): - suffix = '-{}'.format(self.day_of_month) + suffix = '-{day_of_month}'.format(day_of_month=self.day_of_month) return self._prefix + suffix @apply_wraps @@ -1576,8 +1578,8 @@ def __init__(self, n=1, normalize=False, **kwds): if self.weekday is not None: if self.weekday < 0 or self.weekday > 6: - raise ValueError('Day must be 0<=day<=6, got %d' % - self.weekday) + raise ValueError('Day must be 0<=day<=6, got {day}' + .format(day=self.weekday)) self._inc = timedelta(weeks=1) self.kwds = kwds @@ -1630,7 +1632,7 @@ def onOffset(self, dt): def rule_code(self): suffix = '' if self.weekday is not None: - suffix = '-%s' % (_int_to_weekday[self.weekday]) + suffix = '-{weekday}'.format(weekday=_int_to_weekday[self.weekday]) return self._prefix + suffix @classmethod @@ -1696,11 +1698,11 @@ def __init__(self, n=1, normalize=False, **kwds): raise ValueError('N cannot be 0') if self.weekday < 0 or self.weekday > 6: - raise ValueError('Day must be 0<=day<=6, got %d' % - self.weekday) + raise ValueError('Day must be 0<=day<=6, got {day}' + .format(day=self.weekday)) if self.week < 0 or self.week > 3: - raise ValueError('Week must be 0<=day<=3, got %d' % - self.week) + raise ValueError('Week must be 0<=week<=3, got {week}' + .format(week=self.week)) self.kwds = kwds @@ -1746,15 +1748,18 @@ def onOffset(self, dt): @property def rule_code(self): - return '%s-%d%s' % (self._prefix, self.week + 1, - _int_to_weekday.get(self.weekday, '')) + weekday = _int_to_weekday.get(self.weekday, '') + return '{prefix}-{week}{weekday}'.format(prefix=self._prefix, + week=self.week + 1, + weekday=weekday) _prefix = 'WOM' @classmethod def _from_name(cls, suffix=None): if not suffix: - raise ValueError("Prefix %r requires a suffix." % (cls._prefix)) + raise ValueError("Prefix {prefix!r} requires a suffix." + .format(prefix=cls._prefix)) # TODO: handle n here... # only one digit weeks (1 --> week 0, 2 --> week 1, etc.) week = int(suffix[0]) - 1 @@ -1789,8 +1794,8 @@ def __init__(self, n=1, normalize=False, **kwds): raise ValueError('N cannot be 0') if self.weekday < 0 or self.weekday > 6: - raise ValueError('Day must be 0<=day<=6, got %d' % - self.weekday) + raise ValueError('Day must be 0<=day<=6, got {day}' + .format(day=self.weekday)) self.kwds = kwds @@ -1829,14 +1834,17 @@ def onOffset(self, dt): @property def rule_code(self): - return '%s-%s' % (self._prefix, _int_to_weekday.get(self.weekday, '')) + weekday = _int_to_weekday.get(self.weekday, '') + return '{prefix}-{weekday}'.format(prefix=self._prefix, + weekday=weekday) _prefix = 'LWOM' @classmethod def _from_name(cls, suffix=None): if not suffix: - raise ValueError("Prefix %r requires a suffix." % (cls._prefix)) + raise ValueError("Prefix {prefix!r} requires a suffix." + .format(prefix=cls._prefix)) # TODO: handle n here... weekday = _weekday_to_int[suffix] return cls(weekday=weekday) @@ -1876,7 +1884,8 @@ def _from_name(cls, suffix=None): @property def rule_code(self): - return '%s-%s' % (self._prefix, _int_to_month[self.startingMonth]) + month = _int_to_month[self.startingMonth] + return '{prefix}-{month}'.format(prefix=self._prefix, month=month) class BQuarterEnd(QuarterOffset): @@ -2045,8 +2054,7 @@ def apply(self, other): @apply_index_wraps def apply_index(self, i): freq_month = 12 if self.startingMonth == 1 else self.startingMonth - 1 - # freq_month = self.startingMonth - freqstr = 'Q-%s' % (_int_to_month[freq_month],) + freqstr = 'Q-{month}'.format(month=_int_to_month[freq_month]) return self._beg_apply_index(i, freqstr) @@ -2071,7 +2079,8 @@ def _from_name(cls, suffix=None): @property def rule_code(self): - return '%s-%s' % (self._prefix, _int_to_month[self.month]) + month = _int_to_month[self.month] + return '{prefix}-{month}'.format(prefix=self._prefix, month=month) class BYearEnd(YearOffset): @@ -2246,7 +2255,7 @@ def _rollf(date): @apply_index_wraps def apply_index(self, i): freq_month = 12 if self.month == 1 else self.month - 1 - freqstr = 'A-%s' % (_int_to_month[freq_month],) + freqstr = 'A-{month}'.format(month=_int_to_month[freq_month]) return self._beg_apply_index(i, freqstr) def onOffset(self, dt): @@ -2312,7 +2321,8 @@ def __init__(self, n=1, normalize=False, **kwds): raise ValueError('N cannot be 0') if self.variation not in ["nearest", "last"]: - raise ValueError('%s is not a valid variation' % self.variation) + raise ValueError('{variation} is not a valid variation' + .format(variation=self.variation)) if self.variation == "nearest": weekday_offset = weekday(self.weekday) @@ -2438,8 +2448,9 @@ def _get_year_end_last(self, dt): @property def rule_code(self): + prefix = self._get_prefix() suffix = self.get_rule_code_suffix() - return "%s-%s" % (self._get_prefix(), suffix) + return "{prefix}-{suffix}".format(prefix=prefix, suffix=suffix) def _get_prefix(self): return self._prefix @@ -2451,9 +2462,11 @@ def _get_suffix_prefix(self): return self._suffix_prefix_last def get_rule_code_suffix(self): - return '%s-%s-%s' % (self._get_suffix_prefix(), - _int_to_month[self.startingMonth], - _int_to_weekday[self.weekday]) + prefix = self._get_suffix_prefix() + month = _int_to_month[self.startingMonth] + weekday = _int_to_weekday[self.weekday] + return '{prefix}-{month}-{weekday}'.format(prefix=prefix, month=month, + weekday=weekday) @classmethod def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): @@ -2463,7 +2476,7 @@ def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): variation = "last" else: raise ValueError( - "Unable to parse varion_code: %s" % (varion_code,)) + "Unable to parse varion_code: {code}".format(code=varion_code)) startingMonth = _month_to_int[startingMonth_code] weekday = _weekday_to_int[weekday_code] @@ -2628,8 +2641,9 @@ def onOffset(self, dt): @property def rule_code(self): suffix = self._offset.get_rule_code_suffix() - return "%s-%s" % (self._prefix, - "%s-%d" % (suffix, self.qtr_with_extra_week)) + qtr = self.qtr_with_extra_week + return "{prefix}-{suffix}-{qtr}".format(prefix=self._prefix, + suffix=suffix, qtr=qtr) @classmethod def _from_name(cls, *args): @@ -2712,8 +2726,8 @@ def __add__(self, other): except ApplyTypeError: return NotImplemented except OverflowError: - raise OverflowError("the add operation between {} and {} " - "will overflow".format(self, other)) + raise OverflowError("the add operation between {self} and {other} " + "will overflow".format(self=self, other=other)) def __eq__(self, other): if isinstance(other, compat.string_types): @@ -2771,7 +2785,8 @@ def apply(self, other): elif isinstance(other, type(self)): return type(self)(self.n + other.n) - raise ApplyTypeError('Unhandled type: %s' % type(other).__name__) + raise ApplyTypeError('Unhandled type: {type_str}' + .format(type_str=type(other).__name__)) _prefix = 'undefined' @@ -2921,7 +2936,8 @@ def generate_range(start=None, end=None, periods=None, # faster than cur + offset next_date = offset.apply(cur) if next_date <= cur: - raise ValueError('Offset %s did not increment date' % offset) + raise ValueError('Offset {offset} did not increment date' + .format(offset=offset)) cur = next_date else: while cur >= end: @@ -2930,7 +2946,8 @@ def generate_range(start=None, end=None, periods=None, # faster than cur + offset next_date = offset.apply(cur) if next_date >= cur: - raise ValueError('Offset %s did not decrement date' % offset) + raise ValueError('Offset {offset} did not decrement date' + .format(offset=offset)) cur = next_date From e4aeed27126be756f46f4b8bfc53861679384318 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 19 Aug 2017 17:55:34 -0400 Subject: [PATCH 086/141] TST: parameterize consistency tests for rolling/expanding windows (#17292) --- pandas/tests/test_window.py | 403 ++++++++++++++++++------------------ 1 file changed, 203 insertions(+), 200 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 21a9b05d48126..1cc0ad8bb4041 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -2009,6 +2009,15 @@ def no_nans(x): _consistency_data = _create_consistency_data() +def _rolling_consistency_cases(): + for window in [1, 2, 3, 10, 20]: + for min_periods in set([0, 1, 2, 3, 4, window]): + if min_periods and (min_periods > window): + continue + for center in [False, True]: + yield window, min_periods, center + + class TestMomentsConsistency(Base): base_functions = [ (lambda v: Series(v).count(), None, 'count'), @@ -2177,7 +2186,11 @@ def _non_null_values(x): (mean_x * mean_y)) @pytest.mark.slow - def test_ewm_consistency(self): + @pytest.mark.parametrize( + 'min_periods, adjust, ignore_na', product([0, 1, 2, 3, 4], + [True, False], + [False, True])) + def test_ewm_consistency(self, min_periods, adjust, ignore_na): def _weights(s, com, adjust, ignore_na): if isinstance(s, DataFrame): if not len(s.columns): @@ -2231,52 +2244,51 @@ def _ewma(s, com, min_periods, adjust, ignore_na): return result com = 3. - for min_periods, adjust, ignore_na in product([0, 1, 2, 3, 4], - [True, False], - [False, True]): - # test consistency between different ewm* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: x.expanding().count(), - mean=lambda x: x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).mean(), - mock_mean=lambda x: _ewma(x, com=com, - min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na), - corr=lambda x, y: x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).corr(y), - var_unbiased=lambda x: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).var(bias=False)), - std_unbiased=lambda x: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .std(bias=False)), - cov_unbiased=lambda x, y: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .cov(y, bias=False)), - var_biased=lambda x: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .var(bias=True)), - std_biased=lambda x: x.ewm(com=com, min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na).std(bias=True), - cov_biased=lambda x, y: ( - x.ewm(com=com, min_periods=min_periods, - adjust=adjust, ignore_na=ignore_na) - .cov(y, bias=True)), - var_debiasing_factors=lambda x: ( - _variance_debiasing_factors(x, com=com, adjust=adjust, - ignore_na=ignore_na))) + # test consistency between different ewm* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: x.expanding().count(), + mean=lambda x: x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).mean(), + mock_mean=lambda x: _ewma(x, com=com, + min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na), + corr=lambda x, y: x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).corr(y), + var_unbiased=lambda x: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).var(bias=False)), + std_unbiased=lambda x: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .std(bias=False)), + cov_unbiased=lambda x, y: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .cov(y, bias=False)), + var_biased=lambda x: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .var(bias=True)), + std_biased=lambda x: x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).std(bias=True), + cov_biased=lambda x, y: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .cov(y, bias=True)), + var_debiasing_factors=lambda x: ( + _variance_debiasing_factors(x, com=com, adjust=adjust, + ignore_na=ignore_na))) @pytest.mark.slow - def test_expanding_consistency(self): + @pytest.mark.parametrize( + 'min_periods', [0, 1, 2, 3, 4]) + def test_expanding_consistency(self, min_periods): # suppress warnings about empty slices, as we are deliberately testing # with empty/0-length Series/DataFrames @@ -2285,72 +2297,72 @@ def test_expanding_consistency(self): message=".*(empty slice|0 for slice).*", category=RuntimeWarning) - for min_periods in [0, 1, 2, 3, 4]: - - # test consistency between different expanding_* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: x.expanding().count(), - mean=lambda x: x.expanding( - min_periods=min_periods).mean(), - mock_mean=lambda x: x.expanding( - min_periods=min_periods).sum() / x.expanding().count(), - corr=lambda x, y: x.expanding( - min_periods=min_periods).corr(y), - var_unbiased=lambda x: x.expanding( - min_periods=min_periods).var(), - std_unbiased=lambda x: x.expanding( - min_periods=min_periods).std(), - cov_unbiased=lambda x, y: x.expanding( - min_periods=min_periods).cov(y), - var_biased=lambda x: x.expanding( - min_periods=min_periods).var(ddof=0), - std_biased=lambda x: x.expanding( - min_periods=min_periods).std(ddof=0), - cov_biased=lambda x, y: x.expanding( - min_periods=min_periods).cov(y, ddof=0), - var_debiasing_factors=lambda x: ( - x.expanding().count() / - (x.expanding().count() - 1.) - .replace(0., np.nan))) - - # test consistency between expanding_xyz() and either (a) - # expanding_apply of Series.xyz(), or (b) expanding_apply of - # np.nanxyz() - for (x, is_constant, no_nans) in self.data: - functions = self.base_functions - - # GH 8269 - if no_nans: - functions = self.base_functions + self.no_nan_functions - for (f, require_min_periods, name) in functions: - expanding_f = getattr( - x.expanding(min_periods=min_periods), name) - - if (require_min_periods and - (min_periods is not None) and - (min_periods < require_min_periods)): - continue - - if name == 'count': - expanding_f_result = expanding_f() - expanding_apply_f_result = x.expanding( - min_periods=0).apply(func=f) + # test consistency between different expanding_* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: x.expanding().count(), + mean=lambda x: x.expanding( + min_periods=min_periods).mean(), + mock_mean=lambda x: x.expanding( + min_periods=min_periods).sum() / x.expanding().count(), + corr=lambda x, y: x.expanding( + min_periods=min_periods).corr(y), + var_unbiased=lambda x: x.expanding( + min_periods=min_periods).var(), + std_unbiased=lambda x: x.expanding( + min_periods=min_periods).std(), + cov_unbiased=lambda x, y: x.expanding( + min_periods=min_periods).cov(y), + var_biased=lambda x: x.expanding( + min_periods=min_periods).var(ddof=0), + std_biased=lambda x: x.expanding( + min_periods=min_periods).std(ddof=0), + cov_biased=lambda x, y: x.expanding( + min_periods=min_periods).cov(y, ddof=0), + var_debiasing_factors=lambda x: ( + x.expanding().count() / + (x.expanding().count() - 1.) + .replace(0., np.nan))) + + # test consistency between expanding_xyz() and either (a) + # expanding_apply of Series.xyz(), or (b) expanding_apply of + # np.nanxyz() + for (x, is_constant, no_nans) in self.data: + functions = self.base_functions + + # GH 8269 + if no_nans: + functions = self.base_functions + self.no_nan_functions + for (f, require_min_periods, name) in functions: + expanding_f = getattr( + x.expanding(min_periods=min_periods), name) + + if (require_min_periods and + (min_periods is not None) and + (min_periods < require_min_periods)): + continue + + if name == 'count': + expanding_f_result = expanding_f() + expanding_apply_f_result = x.expanding( + min_periods=0).apply(func=f) + else: + if name in ['cov', 'corr']: + expanding_f_result = expanding_f( + pairwise=False) else: - if name in ['cov', 'corr']: - expanding_f_result = expanding_f( - pairwise=False) - else: - expanding_f_result = expanding_f() - expanding_apply_f_result = x.expanding( - min_periods=min_periods).apply(func=f) - - if not tm._incompat_bottleneck_version(name): - assert_equal(expanding_f_result, - expanding_apply_f_result) + expanding_f_result = expanding_f() + expanding_apply_f_result = x.expanding( + min_periods=min_periods).apply(func=f) + + if not tm._incompat_bottleneck_version(name): + assert_equal(expanding_f_result, + expanding_apply_f_result) @pytest.mark.slow - def test_rolling_consistency(self): + @pytest.mark.parametrize( + 'window,min_periods,center', list(_rolling_consistency_cases())) + def test_rolling_consistency(self, window, min_periods, center): # suppress warnings about empty slices, as we are deliberately testing # with empty/0-length Series/DataFrames @@ -2359,100 +2371,91 @@ def test_rolling_consistency(self): message=".*(empty slice|0 for slice).*", category=RuntimeWarning) - def cases(): - for window in [1, 2, 3, 10, 20]: - for min_periods in set([0, 1, 2, 3, 4, window]): - if min_periods and (min_periods > window): - continue - for center in [False, True]: - yield window, min_periods, center - - for window, min_periods, center in cases(): - # test consistency between different rolling_* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: ( - x.rolling(window=window, center=center) - .count()), - mean=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).mean()), - mock_mean=lambda x: ( - x.rolling(window=window, - min_periods=min_periods, - center=center).sum() - .divide(x.rolling(window=window, - min_periods=min_periods, - center=center).count())), - corr=lambda x, y: ( - x.rolling(window=window, min_periods=min_periods, - center=center).corr(y)), - - var_unbiased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).var()), - - std_unbiased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).std()), - - cov_unbiased=lambda x, y: ( - x.rolling(window=window, min_periods=min_periods, - center=center).cov(y)), - - var_biased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).var(ddof=0)), - - std_biased=lambda x: ( - x.rolling(window=window, min_periods=min_periods, - center=center).std(ddof=0)), - - cov_biased=lambda x, y: ( - x.rolling(window=window, min_periods=min_periods, - center=center).cov(y, ddof=0)), - var_debiasing_factors=lambda x: ( - x.rolling(window=window, center=center).count() - .divide((x.rolling(window=window, center=center) - .count() - 1.) - .replace(0., np.nan)))) - - # test consistency between rolling_xyz() and either (a) - # rolling_apply of Series.xyz(), or (b) rolling_apply of - # np.nanxyz() - for (x, is_constant, no_nans) in self.data: - functions = self.base_functions - - # GH 8269 - if no_nans: - functions = self.base_functions + self.no_nan_functions - for (f, require_min_periods, name) in functions: - rolling_f = getattr( - x.rolling(window=window, center=center, - min_periods=min_periods), name) - - if require_min_periods and ( - min_periods is not None) and ( - min_periods < require_min_periods): - continue + # test consistency between different rolling_* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: ( + x.rolling(window=window, center=center) + .count()), + mean=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).mean()), + mock_mean=lambda x: ( + x.rolling(window=window, + min_periods=min_periods, + center=center).sum() + .divide(x.rolling(window=window, + min_periods=min_periods, + center=center).count())), + corr=lambda x, y: ( + x.rolling(window=window, min_periods=min_periods, + center=center).corr(y)), - if name == 'count': - rolling_f_result = rolling_f() - rolling_apply_f_result = x.rolling( - window=window, min_periods=0, - center=center).apply(func=f) + var_unbiased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).var()), + + std_unbiased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).std()), + + cov_unbiased=lambda x, y: ( + x.rolling(window=window, min_periods=min_periods, + center=center).cov(y)), + + var_biased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).var(ddof=0)), + + std_biased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).std(ddof=0)), + + cov_biased=lambda x, y: ( + x.rolling(window=window, min_periods=min_periods, + center=center).cov(y, ddof=0)), + var_debiasing_factors=lambda x: ( + x.rolling(window=window, center=center).count() + .divide((x.rolling(window=window, center=center) + .count() - 1.) + .replace(0., np.nan)))) + + # test consistency between rolling_xyz() and either (a) + # rolling_apply of Series.xyz(), or (b) rolling_apply of + # np.nanxyz() + for (x, is_constant, no_nans) in self.data: + functions = self.base_functions + + # GH 8269 + if no_nans: + functions = self.base_functions + self.no_nan_functions + for (f, require_min_periods, name) in functions: + rolling_f = getattr( + x.rolling(window=window, center=center, + min_periods=min_periods), name) + + if require_min_periods and ( + min_periods is not None) and ( + min_periods < require_min_periods): + continue + + if name == 'count': + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling( + window=window, min_periods=0, + center=center).apply(func=f) + else: + if name in ['cov', 'corr']: + rolling_f_result = rolling_f( + pairwise=False) else: - if name in ['cov', 'corr']: - rolling_f_result = rolling_f( - pairwise=False) - else: - rolling_f_result = rolling_f() - rolling_apply_f_result = x.rolling( - window=window, min_periods=min_periods, - center=center).apply(func=f) - if not tm._incompat_bottleneck_version(name): - assert_equal(rolling_f_result, - rolling_apply_f_result) + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling( + window=window, min_periods=min_periods, + center=center).apply(func=f) + if not tm._incompat_bottleneck_version(name): + assert_equal(rolling_f_result, + rolling_apply_f_result) # binary moments def test_rolling_cov(self): From db11418086ad032ab3002647f29577835a483581 Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Sat, 19 Aug 2017 17:59:19 -0400 Subject: [PATCH 087/141] FIX: define `DataFrame.items` for all versions of python (#17214) --- doc/source/whatsnew/v0.21.0.txt | 4 ++++ pandas/core/frame.py | 3 +-- pandas/core/series.py | 3 +-- pandas/tests/frame/test_api.py | 11 ++++++++++- pandas/tests/series/test_api.py | 10 ++++++++++ 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d2bf8ffdc8e10..97e4b7deddf04 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -129,6 +129,10 @@ Other Enhancements - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) - `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`). - :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`). +- :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`) + + + .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 467ef52de234e..b5b3df64d24c0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -802,8 +802,7 @@ def itertuples(self, index=True, name="Pandas"): # fallback to regular tuples return zip(*arrays) - if compat.PY3: # pragma: no cover - items = iteritems + items = iteritems def __len__(self): """Returns length of info axis, but here we use the index """ diff --git a/pandas/core/series.py b/pandas/core/series.py index c8282450b77a9..75dc3d6403650 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1110,8 +1110,7 @@ def iteritems(self): """ return zip(iter(self.index), iter(self)) - if compat.PY3: # pragma: no cover - items = iteritems + items = iteritems # ---------------------------------------------------------------------- # Misc public methods diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 53a1b9525a0dd..a62fcb506a34b 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -171,7 +171,16 @@ def test_nonzero(self): def test_iteritems(self): df = self.klass([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) for k, v in compat.iteritems(df): - assert type(v) == self.klass._constructor_sliced + assert isinstance(v, self.klass._constructor_sliced) + + def test_items(self): + # issue #17213, #13918 + cols = ['a', 'b', 'c'] + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols) + for c, (k, v) in zip(cols, df.items()): + assert c == k + assert isinstance(v, Series) + assert (df[k] == v).all() def test_iter(self): assert tm.equalContents(list(self.frame), self.frame.columns) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 8e22dd38030ee..b7fbe803f8d3b 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -301,6 +301,16 @@ def test_iteritems(self): # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(self.series.iteritems(), 'reverse') + def test_items(self): + for idx, val in self.series.items(): + assert val == self.series[idx] + + for idx, val in self.ts.items(): + assert val == self.ts[idx] + + # assert is lazy (genrators don't define reverse, lists do) + assert not hasattr(self.series.items(), 'reverse') + def test_raise_on_info(self): s = Series(np.random.randn(10)) with pytest.raises(AttributeError): From a256e26f72c1f44fa41cbc8bc2ff1ea68582c7d1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 20 Aug 2017 16:25:43 -0500 Subject: [PATCH 088/141] PERF: Update ASV publish config (#17293) Stricter cutoffs for considering regressions [ci skip] --- asv_bench/asv.conf.json | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 59c05400d06b0..ced4f2b12445f 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -117,8 +117,10 @@ // with results. If the commit is `null`, regression detection is // skipped for the matching benchmark. // - // "regressions_first_commits": { - // "some_benchmark": "352cdf", // Consider regressions only after this commit - // "another_benchmark": null, // Skip regression detection altogether - // } + "regressions_first_commits": { + "*": "v0.20.0" + }, + "regression_thresholds": { + "*": 0.05 + } } From 75d46a6b90d154837d2c7234faa81722dc72ab37 Mon Sep 17 00:00:00 2001 From: Yosuke Nakabayashi Date: Mon, 21 Aug 2017 09:50:44 +0200 Subject: [PATCH 089/141] DOC: Expand docstrings for head / tail methods (#16941) --- pandas/core/generic.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5a7f37bba91aa..d9d75c870b20c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2978,14 +2978,36 @@ def filter(self, items=None, like=None, regex=None, axis=None): def head(self, n=5): """ - Returns first n rows + Return the first n rows. + + Parameters + ---------- + n : int, default 5 + Number of rows to select. + + Returns + ------- + obj_head : type of caller + The first n rows of the caller object. """ + return self.iloc[:n] def tail(self, n=5): """ - Returns last n rows + Return the last n rows. + + Parameters + ---------- + n : int, default 5 + Number of rows to select. + + Returns + ------- + obj_tail : type of caller + The last n rows of the caller object. """ + if n == 0: return self.iloc[0:0] return self.iloc[-n:] From 172abfbf9ce7ea39a9892d45d6dba4b769999dc1 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 21 Aug 2017 00:56:39 -0700 Subject: [PATCH 090/141] MAINT: Use set literal for unsupported + depr args Initializes unsupported and deprecated argument sets with set literals instead of the set constructor in pandas/io/parsers.py, as the former is slightly faster than the latter. --- pandas/io/parsers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 05a04f268f72b..a9821be3fa5e2 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -487,18 +487,18 @@ def _read(filepath_or_buffer, kwds): 'widths': None, } -_c_unsupported = set(['skipfooter']) -_python_unsupported = set([ +_c_unsupported = {'skipfooter'} +_python_unsupported = { 'low_memory', 'buffer_lines', 'float_precision', -]) -_deprecated_args = set([ +} +_deprecated_args = { 'as_recarray', 'buffer_lines', 'compact_ints', 'use_unsigned', -]) +} def _make_parser_function(name, sep=','): From 1982acad9fb9f85e68c8f5d4ee56fc2978a7f516 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 21 Aug 2017 01:14:50 -0700 Subject: [PATCH 091/141] DOC: Add proper docstring to maybe_convert_indices Patches several spelling errors and expands current doc to a proper doc-string. --- pandas/core/indexing.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 109183827de4e..929c2346ba5b0 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1985,9 +1985,31 @@ def get_indexer(_i, _idx): def maybe_convert_indices(indices, n): - """ if we have negative indicies, translate to postive here - if have indicies that are out-of-bounds, raise an IndexError """ + Attempt to convert indices into valid, positive indices. + + If we have negative indices, translate to positive here. + If we have indices that are out-of-bounds, raise an IndexError. + + Parameters + ---------- + indices : array-like + The array of indices that we are to convert. + n : int + The number of elements in the array that we are indexing. + + Returns + ------- + valid_indices : array-like + An array-like of positive indices that correspond to the ones + that were passed in initially to this function. + + Raises + ------ + IndexError : one of the converted indices either exceeded the number + of elements (specified by `n`) OR was still negative. + """ + if isinstance(indices, list): indices = np.array(indices) if len(indices) == 0: From 393bb19fd22d1cea3860fd0835f41a1c57f99f62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?agust=C3=ADn=20m=C3=A9ndez?= Date: Mon, 21 Aug 2017 10:27:24 +0200 Subject: [PATCH 092/141] DOC: Improving docstring of take method (#16948) --- pandas/core/generic.py | 67 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d9d75c870b20c..c83b1073afc8e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2063,18 +2063,77 @@ def __delitem__(self, key): def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs): """ - Analogous to ndarray.take + Return the elements in the given *positional* indices along an axis. + + This means that we are not indexing according to actual values in + the index attribute of the object. We are indexing according to the + actual position of the element in the object. Parameters ---------- - indices : list / array of ints + indices : array-like + An array of ints indicating which positions to take. axis : int, default 0 - convert : translate neg to pos indices (default) - is_copy : mark the returned frame as a copy + The axis on which to select elements. "0" means that we are + selecting rows, "1" means that we are selecting columns, etc. + convert : bool, default True + Whether to convert negative indices to positive ones, just as with + indexing into Python lists. For example, if `-1` was passed in, + this index would be converted ``n - 1``. + is_copy : bool, default True + Whether to return a copy of the original object or not. + + Examples + -------- + >>> df = pd.DataFrame([('falcon', 'bird', 389.0), + ('parrot', 'bird', 24.0), + ('lion', 'mammal', 80.5), + ('monkey', 'mammal', np.nan)], + columns=('name', 'class', 'max_speed'), + index=[0, 2, 3, 1]) + >>> df + name class max_speed + 0 falcon bird 389.0 + 2 parrot bird 24.0 + 3 lion mammal 80.5 + 1 monkey mammal NaN + + Take elements at positions 0 and 3 along the axis 0 (default). + + Note how the actual indices selected (0 and 1) do not correspond to + our selected indices 0 and 3. That's because we are selecting the 0th + and 3rd rows, not rows whose indices equal 0 and 3. + + >>> df.take([0, 3]) + 0 falcon bird 389.0 + 1 monkey mammal NaN + + Take elements at indices 1 and 2 along the axis 1 (column selection). + + >>> df.take([1, 2], axis=1) + class max_speed + 0 bird 389.0 + 2 bird 24.0 + 3 mammal 80.5 + 1 mammal NaN + + We may take elements using negative integers for positive indices, + starting from the end of the object, just like with Python lists. + + >>> df.take([-1, -2]) + name class max_speed + 1 monkey mammal NaN + 3 lion mammal 80.5 Returns ------- taken : type of caller + An array-like containing the elements taken from the object. + + See Also + -------- + numpy.ndarray.take + numpy.take """ nv.validate_take(tuple(), kwargs) self._consolidate_inplace() From 595e0a488d4282f773c69648351bff9bd817a126 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 21 Aug 2017 14:39:50 -0500 Subject: [PATCH 093/141] BUG: Fixed regex in asv.conf.json (#17300) In https://github.com/pandas-dev/pandas/pull/17293 I messed up the syntax. I used a glob instead of a regex. According to the docs at http://asv.readthedocs.io/en/latest/asv.conf.json.html#regressions-thresholds we want to use a regex. I've actually manually tested this change and verified that it works. [ci skip] --- asv_bench/asv.conf.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index ced4f2b12445f..9c333f62810f4 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -118,9 +118,9 @@ // skipped for the matching benchmark. // "regressions_first_commits": { - "*": "v0.20.0" + ".*": "v0.20.0" }, "regression_thresholds": { - "*": 0.05 + ".*": 0.05 } } From 6a45d36c4e742593ca0a06a3c9a2df58c591fb91 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 21 Aug 2017 16:49:17 -0700 Subject: [PATCH 094/141] Remove unnecessary usage of _TSObject (#17297) --- pandas/_libs/period.pyx | 20 -------------------- pandas/_libs/src/datetime.pxd | 32 -------------------------------- pandas/_libs/tslib.pyx | 35 ++++++----------------------------- 3 files changed, 6 insertions(+), 81 deletions(-) diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index e017d863e1907..6ba7ec0270f30 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -120,26 +120,6 @@ initialize_daytime_conversion_factor_matrix() # Period logic #---------------------------------------------------------------------- -cdef inline int64_t apply_mult(int64_t period_ord, int64_t mult): - """ - Get freq+multiple ordinal value from corresponding freq-only ordinal value. - For example, 5min ordinal will be 1/5th the 1min ordinal (rounding down to - integer). - """ - if mult == 1: - return period_ord - - return (period_ord - 1) // mult - -cdef inline int64_t remove_mult(int64_t period_ord_w_mult, int64_t mult): - """ - Get freq-only ordinal value from corresponding freq+multiple ordinal. - """ - if mult == 1: - return period_ord_w_mult - - return period_ord_w_mult * mult + 1; - @cython.wraparound(False) @cython.boundscheck(False) diff --git a/pandas/_libs/src/datetime.pxd b/pandas/_libs/src/datetime.pxd index 2267c8282ec14..23620e790c132 100644 --- a/pandas/_libs/src/datetime.pxd +++ b/pandas/_libs/src/datetime.pxd @@ -88,11 +88,6 @@ cdef extern from "datetime/np_datetime.h": int cmp_pandas_datetimestruct(pandas_datetimestruct *a, pandas_datetimestruct *b) - int convert_pydatetime_to_datetimestruct(PyObject *obj, - pandas_datetimestruct *out, - PANDAS_DATETIMEUNIT *out_bestunit, - int apply_tzinfo) - npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *d) nogil void pandas_datetime_to_datetimestruct(npy_datetime val, @@ -112,12 +107,6 @@ cdef extern from "datetime/np_datetime_strings.h": PANDAS_DATETIMEUNIT *out_bestunit, npy_bool *out_special) - int make_iso_8601_datetime(pandas_datetimestruct *dts, char *outstr, int outlen, - int local, PANDAS_DATETIMEUNIT base, int tzoffset, - NPY_CASTING casting) - - int get_datetime_iso_8601_strlen(int local, PANDAS_DATETIMEUNIT base) - # int parse_python_string(object obj, pandas_datetimestruct *out) except -1 @@ -152,16 +141,6 @@ cdef inline int _cstring_to_dts(char *val, int length, return result -cdef inline object _datetime64_to_datetime(int64_t val): - cdef pandas_datetimestruct dts - pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts) - return _dts_to_pydatetime(&dts) - -cdef inline object _dts_to_pydatetime(pandas_datetimestruct *dts): - return PyDateTime_FromDateAndTime(dts.year, dts.month, - dts.day, dts.hour, - dts.min, dts.sec, dts.us) - cdef inline int64_t _pydatetime_to_dts(object val, pandas_datetimestruct *dts): dts.year = PyDateTime_GET_YEAR(val) dts.month = PyDateTime_GET_MONTH(val) @@ -173,17 +152,6 @@ cdef inline int64_t _pydatetime_to_dts(object val, pandas_datetimestruct *dts): dts.ps = dts.as = 0 return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts) -cdef inline int64_t _dtlike_to_datetime64(object val, - pandas_datetimestruct *dts): - dts.year = val.year - dts.month = val.month - dts.day = val.day - dts.hour = val.hour - dts.min = val.minute - dts.sec = val.second - dts.us = val.microsecond - dts.ps = dts.as = 0 - return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts) cdef inline int64_t _date_to_datetime64(object val, pandas_datetimestruct *dts): diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 32b8c92a50269..c4a38ec660a4c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -705,7 +705,6 @@ class Timestamp(_Timestamp): pandas_datetimestruct dts int64_t value object _tzinfo, result, k, v - _TSObject ts # set to naive if needed _tzinfo = self.tzinfo @@ -1009,10 +1008,6 @@ def unique_deltas(ndarray[int64_t] arr): return result -cdef inline bint _is_multiple(int64_t us, int64_t mult): - return us % mult == 0 - - cdef inline bint _cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: if op == Py_EQ: return lhs == rhs @@ -4694,7 +4689,6 @@ def get_date_field(ndarray[int64_t] dtindex, object field): field and return an array of these values. """ cdef: - _TSObject ts Py_ssize_t i, count = 0 ndarray[int32_t] out ndarray[int32_t, ndim=2] _month_offset @@ -4876,7 +4870,6 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, (defined by frequency). """ cdef: - _TSObject ts Py_ssize_t i int count = 0 bint is_business = 0 @@ -4925,9 +4918,8 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) dom = dts.day - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) if (dom == 1 and dow < 5) or (dom <= 3 and dow == 0): out[i] = 1 @@ -4951,13 +4943,12 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) mo_off = _month_offset[isleap, dts.month - 1] dom = dts.day doy = mo_off + dom ldom = _month_offset[isleap, dts.month] - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) if (ldom == doy and dow < 5) or ( dow == 4 and (ldom - doy <= 2)): @@ -4986,9 +4977,8 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) dom = dts.day - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) if ((dts.month - start_month) % 3 == 0) and ( (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): @@ -5013,13 +5003,12 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) mo_off = _month_offset[isleap, dts.month - 1] dom = dts.day doy = mo_off + dom ldom = _month_offset[isleap, dts.month] - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) if ((dts.month - end_month) % 3 == 0) and ( (ldom == doy and dow < 5) or ( @@ -5049,9 +5038,8 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) dom = dts.day - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) if (dts.month == start_month) and ( (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): @@ -5076,12 +5064,11 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) dom = dts.day mo_off = _month_offset[isleap, dts.month - 1] doy = mo_off + dom - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) ldom = _month_offset[isleap, dts.month] if (dts.month == end_month) and ( @@ -5095,7 +5082,6 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) mo_off = _month_offset[isleap, dts.month - 1] dom = dts.day @@ -5117,7 +5103,6 @@ def get_date_name_field(ndarray[int64_t] dtindex, object field): name based on requested field (e.g. weekday_name) """ cdef: - _TSObject ts Py_ssize_t i, count = 0 ndarray[object] out pandas_datetimestruct dts @@ -5143,10 +5128,6 @@ def get_date_name_field(ndarray[int64_t] dtindex, object field): raise ValueError("Field %s not supported" % field) -cdef inline int m8_weekday(int64_t val): - ts = convert_to_tsobject(val, None, None, 0, 0) - return ts_dayofweek(ts) - cdef int64_t DAY_NS = 86400000000000LL @@ -5156,11 +5137,9 @@ def date_normalize(ndarray[int64_t] stamps, tz=None): cdef: Py_ssize_t i, n = len(stamps) pandas_datetimestruct dts - _TSObject tso ndarray[int64_t] result = np.empty(n, dtype=np.int64) if tz is not None: - tso = _TSObject() tz = maybe_get_tz(tz) result = _normalize_local(stamps, tz) else: @@ -5305,8 +5284,6 @@ def monthrange(int64_t year, int64_t month): return (dayofweek(year, month, 1), days) -cdef inline int64_t ts_dayofweek(_TSObject ts): - return dayofweek(ts.dts.year, ts.dts.month, ts.dts.day) cdef inline int days_in_month(pandas_datetimestruct dts) nogil: return days_per_month_table[is_leapyear(dts.year)][dts.month -1] From 5f077f3d7c88999a4fb7715043bd367bc6951f57 Mon Sep 17 00:00:00 2001 From: Michael Gasvoda Date: Mon, 21 Aug 2017 19:51:18 -0400 Subject: [PATCH 095/141] BUG: clip should handle null values closes #17276 Author: Michael Gasvoda Author: mgasvoda Closes #17288 from mgasvoda/master and squashes the following commits: a1dbdf293 [mgasvoda] Merge branch 'master' into master 9333952c2 [Michael Gasvoda] Checking output of tests 4e0464eaf [Michael Gasvoda] fixing whatsnew text c44204080 [Michael Gasvoda] formatting fixes 7e2367879 [Michael Gasvoda] formatting updates 781ea724a [Michael Gasvoda] whatsnew entry d9627fe4c [Michael Gasvoda] adding clip tests 9aa0159e9 [Michael Gasvoda] Treating na values as none for clips --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/generic.py | 12 ++++++++---- pandas/tests/frame/test_analytics.py | 26 ++++++++++---------------- pandas/tests/series/test_analytics.py | 11 +++++++++++ 4 files changed, 30 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 97e4b7deddf04..f82303ccedfa3 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -133,7 +133,6 @@ Other Enhancements - .. _whatsnew_0210.api_breaking: Backwards incompatible API changes @@ -385,6 +384,7 @@ Reshaping Numeric ^^^^^^^ - Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`) +- :func:`Series.clip()` and :func:`DataFrame.clip()` now treat NA values for upper and lower arguments as ``None`` instead of raising ``ValueError`` (:issue:`17276`). Categorical diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c83b1073afc8e..5c9e1f22ddd20 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4741,9 +4741,6 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): if axis is not None: axis = self._get_axis_number(axis) - if np.any(isna(threshold)): - raise ValueError("Cannot use an NA value as a clip threshold") - # method is self.le for upper bound and self.ge for lower bound if is_scalar(threshold) and is_number(threshold): if method.__name__ == 'le': @@ -4823,6 +4820,14 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, axis = nv.validate_clip_with_axis(axis, args, kwargs) + # GH 17276 + # numpy doesn't like NaN as a clip value + # so ignore + if np.any(pd.isnull(lower)): + lower = None + if np.any(pd.isnull(upper)): + upper = None + # GH 2747 (arguments were reversed) if lower is not None and upper is not None: if is_scalar(lower) and is_scalar(upper): @@ -4839,7 +4844,6 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, if upper is not None: if inplace: result = self - result = result.clip_upper(upper, axis, inplace=inplace) return result diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 484a09f11b58a..93514a8a42215 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1931,22 +1931,16 @@ def test_clip_against_frame(self, axis): tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask]) tm.assert_frame_equal(clipped_df[mask], df[mask]) - def test_clip_na(self): - msg = "Cannot use an NA" - with tm.assert_raises_regex(ValueError, msg): - self.frame.clip(lower=np.nan) - - with tm.assert_raises_regex(ValueError, msg): - self.frame.clip(lower=[np.nan]) - - with tm.assert_raises_regex(ValueError, msg): - self.frame.clip(upper=np.nan) - - with tm.assert_raises_regex(ValueError, msg): - self.frame.clip(upper=[np.nan]) - - with tm.assert_raises_regex(ValueError, msg): - self.frame.clip(lower=np.nan, upper=np.nan) + def test_clip_with_na_args(self): + """Should process np.nan argument as None """ + # GH # 17276 + tm.assert_frame_equal(self.frame.clip(np.nan), self.frame) + tm.assert_frame_equal(self.frame.clip(upper=[1, 2, np.nan]), + self.frame) + tm.assert_frame_equal(self.frame.clip(lower=[1, np.nan, 3]), + self.frame) + tm.assert_frame_equal(self.frame.clip(upper=np.nan, lower=np.nan), + self.frame) # Matrix-like diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 44da0968d7024..f1d044f7a1132 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1000,6 +1000,17 @@ def test_clip_types_and_nulls(self): assert list(isna(s)) == list(isna(l)) assert list(isna(s)) == list(isna(u)) + def test_clip_with_na_args(self): + """Should process np.nan argument as None """ + # GH # 17276 + s = Series([1, 2, 3]) + + assert_series_equal(s.clip(np.nan), Series([1, 2, 3])) + assert_series_equal(s.clip(upper=[1, 1, np.nan]), Series([1, 2, 3])) + assert_series_equal(s.clip(lower=[1, np.nan, 1]), Series([1, 2, 3])) + assert_series_equal(s.clip(upper=np.nan, lower=np.nan), + Series([1, 2, 3])) + def test_clip_against_series(self): # GH #6966 From a10fa92974e66fdf4f8a2f726dade3eb79f77881 Mon Sep 17 00:00:00 2001 From: ante328 Date: Tue, 22 Aug 2017 01:55:10 +0200 Subject: [PATCH 096/141] BUG: fillna returns frame when inplace=True if value is a dict (#16156) (#17279) --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/generic.py | 3 ++- pandas/tests/frame/test_missing.py | 3 +++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f82303ccedfa3..e395264c723f0 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -319,7 +319,7 @@ Conversion - Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, so an approximation is used instead (:issue:`17228`) - Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods. Previously returned a ``numpy.bool_``. (:issue:`17237`) - Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`) - +- Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`) Indexing ^^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5c9e1f22ddd20..e84e4eac3f34d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4135,7 +4135,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, continue obj = result[k] obj.fillna(v, limit=limit, inplace=True, downcast=downcast) - return result + return result if not inplace else None + elif not is_list_like(value): new_data = self._data.fillna(value=value, limit=limit, inplace=inplace, diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 77f0357685cab..ebd15b3180a33 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -407,6 +407,9 @@ def test_fillna_inplace(self): df.fillna(value=0, inplace=True) tm.assert_frame_equal(df, expected) + expected = df.fillna(value={0: 0}, inplace=True) + assert expected is None + df[1][:4] = np.nan df[3][-4:] = np.nan expected = df.fillna(method='ffill') From 8dfb95b31d10f676b069cef3a64e3f4e509a7aa8 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Tue, 22 Aug 2017 10:11:10 +0200 Subject: [PATCH 097/141] CLN: Index.append() refactoring (#16236) --- pandas/core/dtypes/concat.py | 48 ++++++++++++++++++++++- pandas/core/indexes/base.py | 11 +++--- pandas/core/indexes/category.py | 6 ++- pandas/core/indexes/datetimelike.py | 2 +- pandas/core/indexes/interval.py | 4 +- pandas/core/indexes/range.py | 59 ++--------------------------- 6 files changed, 63 insertions(+), 67 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 292d5f608d4cb..0ce45eea119ed 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -19,7 +19,7 @@ _TD_DTYPE) from pandas.core.dtypes.generic import ( ABCDatetimeIndex, ABCTimedeltaIndex, - ABCPeriodIndex) + ABCPeriodIndex, ABCRangeIndex) def get_dtype_kinds(l): @@ -41,6 +41,8 @@ def get_dtype_kinds(l): typ = 'category' elif is_sparse(arr): typ = 'sparse' + elif isinstance(arr, ABCRangeIndex): + typ = 'range' elif is_datetimetz(arr): # if to_concat contains different tz, # the result must be object dtype @@ -559,3 +561,47 @@ def convert_sparse(x, axis): # coerce to object if needed result = result.astype('object') return result + + +def _concat_rangeindex_same_dtype(indexes): + """ + Concatenates multiple RangeIndex instances. All members of "indexes" must + be of type RangeIndex; result will be RangeIndex if possible, Int64Index + otherwise. E.g.: + indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6) + indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) + """ + + start = step = next = None + + for obj in indexes: + if not len(obj): + continue + + if start is None: + # This is set by the first non-empty index + start = obj._start + if step is None and len(obj) > 1: + step = obj._step + elif step is None: + # First non-empty index had only one element + if obj._start == start: + return _concat_index_asobject(indexes) + step = obj._start - start + + non_consecutive = ((step != obj._step and len(obj) > 1) or + (next is not None and obj._start != next)) + if non_consecutive: + # Int64Index._append_same_dtype([ix.astype(int) for ix in indexes]) + # would be preferred... but it currently resorts to + # _concat_index_asobject anyway. + return _concat_index_asobject(indexes) + + if step is not None: + next = obj[-1] + step + + if start is None: + start = obj._start + step = obj._step + stop = obj._stop if next is None else next + return indexes[0].__class__(start, stop, step) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index de6221987a59a..a21e6df3ffc93 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1745,18 +1745,17 @@ def append(self, other): names = set([obj.name for obj in to_concat]) name = None if len(names) > 1 else self.name - if self.is_categorical(): - # if calling index is category, don't check dtype of others - from pandas.core.indexes.category import CategoricalIndex - return CategoricalIndex._append_same_dtype(self, to_concat, name) + return self._concat(to_concat, name) + + def _concat(self, to_concat, name): typs = _concat.get_dtype_kinds(to_concat) if len(typs) == 1: - return self._append_same_dtype(to_concat, name=name) + return self._concat_same_dtype(to_concat, name=name) return _concat._concat_index_asobject(to_concat, name=name) - def _append_same_dtype(self, to_concat, name): + def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class """ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index ac4698b570d17..f22407308e094 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -633,7 +633,11 @@ def insert(self, loc, item): codes = np.concatenate((codes[:loc], code, codes[loc:])) return self._create_from_codes(codes) - def _append_same_dtype(self, to_concat, name): + def _concat(self, to_concat, name): + # if calling index is category, don't check dtype of others + return CategoricalIndex._concat_same_dtype(self, to_concat, name) + + def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class ValueError if other is not in the categories diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 845c71b6c41d8..c3232627fce74 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -837,7 +837,7 @@ def summary(self, name=None): result = result.replace("'", "") return result - def _append_same_dtype(self, to_concat, name): + def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class """ diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index e90378184e3f3..e0ed6c7ea35c0 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -876,7 +876,7 @@ def _as_like_interval_index(self, other, error_msg): raise ValueError(error_msg) return other - def _append_same_dtype(self, to_concat, name): + def _concat_same_dtype(self, to_concat, name): """ assert that we all have the same .closed we allow a 0-len index here as well @@ -885,7 +885,7 @@ def _append_same_dtype(self, to_concat, name): msg = ('can only append two IntervalIndex objects ' 'that are closed on the same side') raise ValueError(msg) - return super(IntervalIndex, self)._append_same_dtype(to_concat, name) + return super(IntervalIndex, self)._concat_same_dtype(to_concat, name) @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index ac4cc6986cace..82412d3a7ef57 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -14,6 +14,7 @@ from pandas.compat.numpy import function as nv from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util._decorators import Appender, cache_readonly +import pandas.core.dtypes.concat as _concat import pandas.core.indexes.base as ibase from pandas.core.indexes.numeric import Int64Index @@ -447,62 +448,8 @@ def join(self, other, how='left', level=None, return_indexers=False, return super(RangeIndex, self).join(other, how, level, return_indexers, sort) - def append(self, other): - """ - Append a collection of Index options together - - Parameters - ---------- - other : Index or list/tuple of indices - - Returns - ------- - appended : RangeIndex if all indexes are consecutive RangeIndexes, - otherwise Int64Index or Index - """ - - to_concat = [self] - - if isinstance(other, (list, tuple)): - to_concat = to_concat + list(other) - else: - to_concat.append(other) - - if not all([isinstance(i, RangeIndex) for i in to_concat]): - return super(RangeIndex, self).append(other) - - start = step = next = None - - for obj in to_concat: - if not len(obj): - continue - - if start is None: - # This is set by the first non-empty index - start = obj._start - if step is None and len(obj) > 1: - step = obj._step - elif step is None: - # First non-empty index had only one element - if obj._start == start: - return super(RangeIndex, self).append(other) - step = obj._start - start - - non_consecutive = ((step != obj._step and len(obj) > 1) or - (next is not None and obj._start != next)) - if non_consecutive: - return super(RangeIndex, self).append(other) - - if step is not None: - next = obj[-1] + step - - if start is None: - start = obj._start - step = obj._step - stop = obj._stop if next is None else next - names = set([obj.name for obj in to_concat]) - name = None if len(names) > 1 else self.name - return RangeIndex(start, stop, step, name=name) + def _concat_same_dtype(self, indexes, name): + return _concat._concat_rangeindex_same_dtype(indexes).rename(name) def __len__(self): """ From 8326c83ba28a8d2ca4da92c585686d1c2b792454 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 22 Aug 2017 05:50:57 -0400 Subject: [PATCH 098/141] DEPS: set min versions (#17002) closes #15206, numpy >= 1.9 closes #15543, matplotlib >= 1.4.3 scipy >= 0.14.0 --- .travis.yml | 6 +- ci/install_travis.sh | 2 +- ci/requirements-2.7_COMPAT.build | 2 +- ci/requirements-2.7_COMPAT.run | 9 +- ci/requirements-2.7_LOCALE.build | 2 +- ci/requirements-2.7_LOCALE.run | 5 +- ci/requirements-2.7_SLOW.build | 2 +- ci/requirements-2.7_SLOW.run | 4 +- ci/script_multi.sh | 6 + ci/script_single.sh | 8 + doc/source/install.rst | 6 +- doc/source/whatsnew/v0.21.0.txt | 22 ++- pandas/_libs/sparse.pyx | 2 - pandas/compat/numpy/__init__.py | 14 +- pandas/core/algorithms.py | 7 +- pandas/core/generic.py | 5 +- pandas/core/groupby.py | 8 +- pandas/core/internals.py | 16 +- pandas/tests/frame/test_quantile.py | 42 ----- pandas/tests/frame/test_rank.py | 12 +- .../tests/indexes/datetimes/test_datetime.py | 8 +- pandas/tests/indexes/period/test_indexing.py | 34 ++-- .../indexes/timedeltas/test_timedelta.py | 8 +- pandas/tests/plotting/common.py | 3 +- pandas/tests/plotting/test_datetimelike.py | 2 + pandas/tests/plotting/test_frame.py | 163 ++++++++++-------- pandas/tests/plotting/test_misc.py | 45 +---- pandas/tests/plotting/test_series.py | 12 ++ pandas/tests/series/test_operators.py | 16 +- pandas/tests/series/test_quantile.py | 27 +-- pandas/tests/series/test_rank.py | 9 +- pandas/tests/sparse/test_array.py | 7 +- pandas/tests/test_nanops.py | 18 +- pandas/tests/test_resample.py | 2 +- pandas/tests/tools/test_numeric.py | 5 +- setup.py | 2 +- 36 files changed, 221 insertions(+), 320 deletions(-) diff --git a/.travis.yml b/.travis.yml index 897d31cf23a3b..034e2a32bb75c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,7 +37,7 @@ matrix: - JOB="3.5_OSX" TEST_ARGS="--skip-slow --skip-network" - dist: trusty env: - - JOB="2.7_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8" + - JOB="2.7_LOCALE" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true addons: apt: packages: @@ -62,7 +62,7 @@ matrix: # In allow_failures - dist: trusty env: - - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network" + - JOB="2.7_SLOW" SLOW=true # In allow_failures - dist: trusty env: @@ -82,7 +82,7 @@ matrix: allow_failures: - dist: trusty env: - - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network" + - JOB="2.7_SLOW" SLOW=true - dist: trusty env: - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true diff --git a/ci/install_travis.sh b/ci/install_travis.sh index ad8f0bdd8a597..d26689f2e6b4b 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -47,7 +47,7 @@ which conda echo echo "[update conda]" conda config --set ssl_verify false || exit 1 -conda config --set always_yes true --set changeps1 false || exit 1 +conda config --set quiet true --set always_yes true --set changeps1 false || exit 1 conda update -q conda echo diff --git a/ci/requirements-2.7_COMPAT.build b/ci/requirements-2.7_COMPAT.build index 0e1ccf9eac9bf..d9c932daa110b 100644 --- a/ci/requirements-2.7_COMPAT.build +++ b/ci/requirements-2.7_COMPAT.build @@ -1,5 +1,5 @@ python=2.7* -numpy=1.7.1 +numpy=1.9.2 cython=0.23 dateutil=1.5 pytz=2013b diff --git a/ci/requirements-2.7_COMPAT.run b/ci/requirements-2.7_COMPAT.run index b94f4ab7b27d1..39bf720140733 100644 --- a/ci/requirements-2.7_COMPAT.run +++ b/ci/requirements-2.7_COMPAT.run @@ -1,11 +1,12 @@ -numpy=1.7.1 +numpy=1.9.2 dateutil=1.5 pytz=2013b -scipy=0.11.0 +scipy=0.14.0 xlwt=0.7.5 xlrd=0.9.2 -numexpr=2.2.2 -pytables=3.0.0 +bottleneck=1.0.0 +numexpr=2.4.4 # we test that we correctly don't use an unsupported numexpr +pytables=3.2.2 psycopg2 pymysql=0.6.0 sqlalchemy=0.7.8 diff --git a/ci/requirements-2.7_LOCALE.build b/ci/requirements-2.7_LOCALE.build index 4a37ce8fbe161..96cb184ec2665 100644 --- a/ci/requirements-2.7_LOCALE.build +++ b/ci/requirements-2.7_LOCALE.build @@ -1,5 +1,5 @@ python=2.7* python-dateutil pytz=2013b -numpy=1.8.2 +numpy=1.9.2 cython=0.23 diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run index 8e360cf74b081..00006106f7009 100644 --- a/ci/requirements-2.7_LOCALE.run +++ b/ci/requirements-2.7_LOCALE.run @@ -1,11 +1,12 @@ python-dateutil pytz=2013b -numpy=1.8.2 +numpy=1.9.2 xlwt=0.7.5 openpyxl=1.6.2 xlsxwriter=0.5.2 xlrd=0.9.2 -matplotlib=1.3.1 +bottleneck=1.0.0 +matplotlib=1.4.3 sqlalchemy=0.8.1 lxml=3.2.1 scipy diff --git a/ci/requirements-2.7_SLOW.build b/ci/requirements-2.7_SLOW.build index 0f4a2c6792e6b..a665ab9edd585 100644 --- a/ci/requirements-2.7_SLOW.build +++ b/ci/requirements-2.7_SLOW.build @@ -1,5 +1,5 @@ python=2.7* python-dateutil pytz -numpy=1.8.2 +numpy=1.10* cython diff --git a/ci/requirements-2.7_SLOW.run b/ci/requirements-2.7_SLOW.run index 0a549554f5219..f7708283ad04a 100644 --- a/ci/requirements-2.7_SLOW.run +++ b/ci/requirements-2.7_SLOW.run @@ -1,7 +1,7 @@ python-dateutil pytz -numpy=1.8.2 -matplotlib=1.3.1 +numpy=1.10* +matplotlib=1.4.3 scipy patsy xlwt diff --git a/ci/script_multi.sh b/ci/script_multi.sh index d79fc43fbe175..ee9fbcaad5ef5 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -36,9 +36,15 @@ elif [ "$COVERAGE" ]; then echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas +elif [ "$SLOW" ]; then + TEST_ARGS="--only-slow --skip-network" + echo pytest -r xX -m "not single and slow" -v --junitxml=/tmp/multiple.xml $TEST_ARGS pandas + pytest -r xX -m "not single and slow" -v --junitxml=/tmp/multiple.xml $TEST_ARGS pandas + else echo pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest + fi RET="$?" diff --git a/ci/script_single.sh b/ci/script_single.sh index 245b4e6152c4d..375e9879e950f 100755 --- a/ci/script_single.sh +++ b/ci/script_single.sh @@ -12,16 +12,24 @@ if [ -n "$LOCALE_OVERRIDE" ]; then python -c "$pycmd" fi +if [ "$SLOW" ]; then + TEST_ARGS="--only-slow --skip-network" +fi + if [ "$BUILD_TEST" ]; then echo "We are not running pytest as this is a build test." + elif [ "$DOC" ]; then echo "We are not running pytest as this is a doc-build" + elif [ "$COVERAGE" ]; then echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas + else echo pytest -m "single" -r xX --junitxml=/tmp/single.xml $TEST_ARGS pandas pytest -m "single" -r xX --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest + fi RET="$?" diff --git a/doc/source/install.rst b/doc/source/install.rst index 99d299b75b59b..f92c43839ee31 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -203,7 +203,7 @@ Dependencies ------------ * `setuptools `__ -* `NumPy `__: 1.7.1 or higher +* `NumPy `__: 1.9.0 or higher * `python-dateutil `__: 1.5 or higher * `pytz `__: Needed for time zone support @@ -233,7 +233,7 @@ Optional Dependencies * `Cython `__: Only necessary to build development version. Version 0.23 or higher. -* `SciPy `__: miscellaneous statistical functions +* `SciPy `__: miscellaneous statistical functions, Version 0.14.0 or higher * `xarray `__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended. * `PyTables `__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended. * `Feather Format `__: necessary for feather-based storage, version 0.3.1 or higher. @@ -244,7 +244,7 @@ Optional Dependencies * `pymysql `__: for MySQL. * `SQLite `__: for SQLite, this is included in Python's standard library by default. -* `matplotlib `__: for plotting +* `matplotlib `__: for plotting, Version 1.4.3 or higher. * For Excel I/O: * `xlrd/xlwt `__: Excel reading (xlrd) and writing (xlwt) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index e395264c723f0..7532730f6be27 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -138,6 +138,27 @@ Other Enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0210.api_breaking.deps: + +Dependencies have increased minimum versions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We have updated our minimum supported versions of dependencies (:issue:`15206`, :issue:`15543`, :issue:`15214`) +). If installed, we now require: + + +--------------+-----------------+----------+ + | Package | Minimum Version | Required | + +======================+=========+==========+ + | Numpy | 1.9.0 | X | + +--------------+-----------------+----------+ + | Matplotlib | 1.4.3 | | + +--------------+-----------------+----------+ + | Scipy | 0.14.0 | | + +--------------+-----------------+----------+ + | Bottleneck | 1.0.0 | | + +--------------+-----------------+----------+ + .. _whatsnew_0210.api_breaking.pandas_eval: Improved error handling during item assignment in pd.eval @@ -259,7 +280,6 @@ Other API Changes ^^^^^^^^^^^^^^^^^ - Support has been dropped for Python 3.4 (:issue:`15251`) -- Support has been dropped for bottleneck < 1.0.0 (:issue:`15214`) - The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`) - Accessing a non-existent attribute on a closed :class:`~pandas.HDFStore` will now raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 0c2e056ead7fa..1cc7f5ace95ea 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -12,8 +12,6 @@ from distutils.version import LooseVersion # numpy versioning _np_version = np.version.short_version -_np_version_under1p8 = LooseVersion(_np_version) < '1.8' -_np_version_under1p9 = LooseVersion(_np_version) < '1.9' _np_version_under1p10 = LooseVersion(_np_version) < '1.10' _np_version_under1p11 = LooseVersion(_np_version) < '1.11' diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 2c5a18973afa8..5112957b49875 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -9,19 +9,18 @@ # numpy versioning _np_version = np.__version__ _nlv = LooseVersion(_np_version) -_np_version_under1p8 = _nlv < '1.8' -_np_version_under1p9 = _nlv < '1.9' _np_version_under1p10 = _nlv < '1.10' _np_version_under1p11 = _nlv < '1.11' _np_version_under1p12 = _nlv < '1.12' _np_version_under1p13 = _nlv < '1.13' _np_version_under1p14 = _nlv < '1.14' +_np_version_under1p15 = _nlv < '1.15' -if _nlv < '1.7.0': +if _nlv < '1.9': raise ImportError('this version of pandas is incompatible with ' - 'numpy < 1.7.0\n' + 'numpy < 1.9.0\n' 'your numpy version is {0}.\n' - 'Please upgrade numpy to >= 1.7.0 to use ' + 'Please upgrade numpy to >= 1.9.0 to use ' 'this pandas version'.format(_np_version)) @@ -70,11 +69,10 @@ def np_array_datetime64_compat(arr, *args, **kwargs): __all__ = ['np', - '_np_version_under1p8', - '_np_version_under1p9', '_np_version_under1p10', '_np_version_under1p11', '_np_version_under1p12', '_np_version_under1p13', - '_np_version_under1p14' + '_np_version_under1p14', + '_np_version_under1p15' ] diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f2359f3ff1a9d..ffd03096e2a27 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -6,7 +6,6 @@ from warnings import warn, catch_warnings import numpy as np -from pandas import compat, _np_version_under1p8 from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.generic import ( ABCSeries, ABCIndex, @@ -407,14 +406,12 @@ def isin(comps, values): comps, dtype, _ = _ensure_data(comps) values, _, _ = _ensure_data(values, dtype=dtype) - # GH11232 - # work-around for numpy < 1.8 and comparisions on py3 # faster for larger cases to use np.in1d f = lambda x, y: htable.ismember_object(x, values) + # GH16012 # Ensure np.in1d doesn't get object types or it *may* throw an exception - if ((_np_version_under1p8 and compat.PY3) or len(comps) > 1000000 and - not is_object_dtype(comps)): + if len(comps) > 1000000 and not is_object_dtype(comps): f = lambda x, y: np.in1d(x, y) elif is_integer_dtype(comps): try: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e84e4eac3f34d..f8366c804e3e7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1827,11 +1827,8 @@ def _box_item_values(self, key, values): def _maybe_cache_changed(self, item, value): """The object has called back to us saying maybe it has changed. - - numpy < 1.8 has an issue with object arrays and aliasing - GH6026 """ - self._data.set(item, value, check=pd._np_version_under1p8) + self._data.set(item, value, check=False) @property def _is_cached(self): diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index a388892e925b6..aa7c4517c0a01 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -13,7 +13,7 @@ ) from pandas import compat -from pandas.compat.numpy import function as nv, _np_version_under1p8 +from pandas.compat.numpy import function as nv from pandas.compat import set_function_name from pandas.core.dtypes.common import ( @@ -3257,11 +3257,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, d = np.diff(np.r_[idx, len(ids)]) if dropna: m = ids[lab == -1] - if _np_version_under1p8: - mi, ml = algorithms.factorize(m) - d[ml] = d[ml] - np.bincount(mi) - else: - np.add.at(d, m, -1) + np.add.at(d, m, -1) acc = rep(d)[mask] else: acc = rep(d) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index b616270e47aa6..83b382ec0ed72 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -69,8 +69,7 @@ import pandas.core.computation.expressions as expressions from pandas.util._decorators import cache_readonly from pandas.util._validators import validate_bool_kwarg - -from pandas import compat, _np_version_under1p9 +from pandas import compat from pandas.compat import range, map, zip, u @@ -857,9 +856,6 @@ def _is_empty_indexer(indexer): # set else: - if _np_version_under1p9: - # Work around GH 6168 to support old numpy - indexer = getattr(indexer, 'values', indexer) values[indexer] = value # coerce and try to infer the dtypes of the result @@ -1482,15 +1478,7 @@ def quantile(self, qs, interpolation='linear', axis=0, mgr=None): tuple of (axis, block) """ - if _np_version_under1p9: - if interpolation != 'linear': - raise ValueError("Interpolation methods other than linear " - "are not supported in numpy < 1.9.") - - kw = {} - if not _np_version_under1p9: - kw.update({'interpolation': interpolation}) - + kw = {'interpolation': interpolation} values = self.get_values() values, _, _, _ = self._try_coerce_args(values, values) diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 2482e493dbefd..2f264874378bc 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -12,7 +12,6 @@ from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm -from pandas import _np_version_under1p9 from pandas.tests.frame.common import TestData @@ -103,9 +102,6 @@ def test_quantile_axis_parameter(self): def test_quantile_interpolation(self): # see gh-10174 - if _np_version_under1p9: - pytest.skip("Numpy version under 1.9") - from numpy import percentile # interpolation = linear (default case) @@ -166,44 +162,6 @@ def test_quantile_interpolation(self): index=[.25, .5], columns=['a', 'b', 'c']) assert_frame_equal(result, expected) - def test_quantile_interpolation_np_lt_1p9(self): - # see gh-10174 - if not _np_version_under1p9: - pytest.skip("Numpy version is greater than 1.9") - - from numpy import percentile - - # interpolation = linear (default case) - q = self.tsframe.quantile(0.1, axis=0, interpolation='linear') - assert q['A'] == percentile(self.tsframe['A'], 10) - q = self.intframe.quantile(0.1) - assert q['A'] == percentile(self.intframe['A'], 10) - - # test with and without interpolation keyword - q1 = self.intframe.quantile(0.1) - assert q1['A'] == np.percentile(self.intframe['A'], 10) - assert_series_equal(q, q1) - - # interpolation method other than default linear - msg = "Interpolation methods other than linear" - df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) - with tm.assert_raises_regex(ValueError, msg): - df.quantile(.5, axis=1, interpolation='nearest') - - with tm.assert_raises_regex(ValueError, msg): - df.quantile([.5, .75], axis=1, interpolation='lower') - - # test degenerate case - df = DataFrame({'x': [], 'y': []}) - with tm.assert_raises_regex(ValueError, msg): - q = df.quantile(0.1, axis=0, interpolation='higher') - - # multi - df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], - columns=['a', 'b', 'c']) - with tm.assert_raises_regex(ValueError, msg): - df.quantile([.25, .5], interpolation='midpoint') - def test_quantile_multi(self): df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=['a', 'b', 'c']) diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py index acf887d047c9e..58f4d9b770173 100644 --- a/pandas/tests/frame/test_rank.py +++ b/pandas/tests/frame/test_rank.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import pytest from datetime import timedelta, datetime from distutils.version import LooseVersion from numpy import nan @@ -26,8 +27,7 @@ class TestRank(TestData): } def test_rank(self): - tm._skip_if_no_scipy() - from scipy.stats import rankdata + rankdata = pytest.importorskip('scipy.stats.rankdata') self.frame['A'][::2] = np.nan self.frame['B'][::3] = np.nan @@ -120,8 +120,7 @@ def test_rank2(self): tm.assert_frame_equal(df.rank(), exp) def test_rank_na_option(self): - tm._skip_if_no_scipy() - from scipy.stats import rankdata + rankdata = pytest.importorskip('scipy.stats.rankdata') self.frame['A'][::2] = np.nan self.frame['B'][::3] = np.nan @@ -193,10 +192,9 @@ def test_rank_axis(self): tm.assert_frame_equal(df.rank(axis=1), df.rank(axis='columns')) def test_rank_methods_frame(self): - tm.skip_if_no_package('scipy', min_version='0.13', - app='scipy.stats.rankdata') + pytest.importorskip('scipy.stats.special') + rankdata = pytest.importorskip('scipy.stats.rankdata') import scipy - from scipy.stats import rankdata xs = np.random.randint(0, 21, (100, 26)) xs = (xs - 10.0) / 10.0 diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index f99dcee9e5c8a..47f53f53cfd02 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -9,7 +9,7 @@ from pandas.compat import lrange from pandas.compat.numpy import np_datetime64_compat from pandas import (DatetimeIndex, Index, date_range, Series, DataFrame, - Timestamp, datetime, offsets, _np_version_under1p8) + Timestamp, datetime, offsets) from pandas.util.testing import assert_series_equal, assert_almost_equal @@ -276,11 +276,7 @@ def test_comparisons_nat(self): np_datetime64_compat('2014-06-01 00:00Z'), np_datetime64_compat('2014-07-01 00:00Z')]) - if _np_version_under1p8: - # cannot test array because np.datetime('nat') returns today's date - cases = [(fidx1, fidx2), (didx1, didx2)] - else: - cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] + cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] # Check pd.NaT is handles as the same as np.nan with tm.assert_produces_warning(None): diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index d4dac1cf88fff..efc13a56cd77e 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -8,7 +8,7 @@ from pandas.compat import lrange from pandas._libs import tslib from pandas import (PeriodIndex, Series, DatetimeIndex, - period_range, Period, _np_version_under1p9) + period_range, Period) class TestGetItem(object): @@ -149,16 +149,12 @@ def test_getitem_seconds(self): values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', '2013/02/01 09:00'] for v in values: - if _np_version_under1p9: - with pytest.raises(ValueError): - idx[v] - else: - # GH7116 - # these show deprecations as we are trying - # to slice with non-integer indexers - # with pytest.raises(IndexError): - # idx[v] - continue + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + # with pytest.raises(IndexError): + # idx[v] + continue s = Series(np.random.rand(len(idx)), index=idx) tm.assert_series_equal(s['2013/01/01 10:00'], s[3600:3660]) @@ -178,16 +174,12 @@ def test_getitem_day(self): '2013/02/01 09:00'] for v in values: - if _np_version_under1p9: - with pytest.raises(ValueError): - idx[v] - else: - # GH7116 - # these show deprecations as we are trying - # to slice with non-integer indexers - # with pytest.raises(IndexError): - # idx[v] - continue + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + # with pytest.raises(IndexError): + # idx[v] + continue s = Series(np.random.rand(len(idx)), index=idx) tm.assert_series_equal(s['2013/01'], s[0:31]) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 59e4b1432b8bc..0b3bd0b03bccf 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -7,7 +7,7 @@ import pandas.util.testing as tm from pandas import (timedelta_range, date_range, Series, Timedelta, DatetimeIndex, TimedeltaIndex, Index, DataFrame, - Int64Index, _np_version_under1p8) + Int64Index) from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_index_equal) @@ -379,11 +379,7 @@ def test_comparisons_nat(self): np.timedelta64(1, 'D') + np.timedelta64(2, 's'), np.timedelta64(5, 'D') + np.timedelta64(3, 's')]) - if _np_version_under1p8: - # cannot test array because np.datetime('nat') returns today's date - cases = [(tdidx1, tdidx2)] - else: - cases = [(tdidx1, tdidx2), (tdidx1, tdarr)] + cases = [(tdidx1, tdidx2), (tdidx1, tdarr)] # Check pd.NaT is handles as the same as np.nan for idx1, idx2 in cases: diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 3ab443b223f20..dfab539e9474c 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -39,7 +39,8 @@ def _ok_for_gaussian_kde(kind): from scipy.stats import gaussian_kde # noqa except ImportError: return False - return True + + return plotting._compat._mpl_ge_1_5_0() class TestPlotBase(object): diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index e9c7d806fd65d..cff0c1c0b424e 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -610,6 +610,8 @@ def test_secondary_y_ts(self): @pytest.mark.slow def test_secondary_kde(self): + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 6d813ac76cc4e..67098529a0111 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -473,7 +473,6 @@ def test_subplots_multiple_axes(self): # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes fig, axes = self.plt.subplots(2, 2) with warnings.catch_warnings(): - warnings.simplefilter('ignore') df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) @@ -1290,6 +1289,9 @@ def test_boxplot_subplots_return_type(self): def test_kde_df(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + df = DataFrame(randn(100, 4)) ax = _check_plot_works(df.plot, kind='kde') expected = [pprint_thing(c) for c in df.columns] @@ -1311,6 +1313,9 @@ def test_kde_df(self): def test_kde_missing_vals(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + df = DataFrame(np.random.uniform(size=(100, 4))) df.loc[0, 0] = np.nan _check_plot_works(df.plot, kind='kde') @@ -1835,6 +1840,8 @@ def test_hist_colors(self): def test_kde_colors(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") from matplotlib import cm @@ -1858,6 +1865,8 @@ def test_kde_colors(self): def test_kde_colors_and_styles_subplots(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") from matplotlib import cm default_colors = self._maybe_unpack_cycler(self.plt.rcParams) @@ -2160,71 +2169,74 @@ def test_pie_df_nan(self): @pytest.mark.slow def test_errorbar_plot(self): - d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} - df = DataFrame(d) - d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4} - df_err = DataFrame(d_err) - - # check line plots - ax = _check_plot_works(df.plot, yerr=df_err, logy=True) - self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True) - self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(df.plot, yerr=df_err, loglog=True) - self._check_has_errorbars(ax, xerr=0, yerr=2) + with warnings.catch_warnings(): + d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} + df = DataFrame(d) + d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4} + df_err = DataFrame(d_err) - kinds = ['line', 'bar', 'barh'] - for kind in kinds: - ax = _check_plot_works(df.plot, yerr=df_err['x'], kind=kind) + # check line plots + ax = _check_plot_works(df.plot, yerr=df_err, logy=True) self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(df.plot, yerr=d_err, kind=kind) + ax = _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True) self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, - kind=kind) - self._check_has_errorbars(ax, xerr=2, yerr=2) - ax = _check_plot_works(df.plot, yerr=df_err['x'], xerr=df_err['x'], - kind=kind) - self._check_has_errorbars(ax, xerr=2, yerr=2) - ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) - self._check_has_errorbars(ax, xerr=2, yerr=2) - # _check_plot_works adds an ax so catch warning. see GH #13188 - with tm.assert_produces_warning(UserWarning): + ax = _check_plot_works(df.plot, yerr=df_err, loglog=True) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + kinds = ['line', 'bar', 'barh'] + for kind in kinds: + ax = _check_plot_works(df.plot, yerr=df_err['x'], kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(df.plot, yerr=d_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, + kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + ax = _check_plot_works(df.plot, yerr=df_err['x'], + xerr=df_err['x'], + kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + + # _check_plot_works adds an ax so catch warning. see GH #13188 axes = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind) - self._check_has_errorbars(axes, xerr=1, yerr=1) - - ax = _check_plot_works((df + 1).plot, yerr=df_err, - xerr=df_err, kind='bar', log=True) - self._check_has_errorbars(ax, xerr=2, yerr=2) + self._check_has_errorbars(axes, xerr=1, yerr=1) - # yerr is raw error values - ax = _check_plot_works(df['y'].plot, yerr=np.ones(12) * 0.4) - self._check_has_errorbars(ax, xerr=0, yerr=1) - ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4) - self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works((df + 1).plot, yerr=df_err, + xerr=df_err, kind='bar', log=True) + self._check_has_errorbars(ax, xerr=2, yerr=2) - # yerr is iterator - import itertools - ax = _check_plot_works(df.plot, yerr=itertools.repeat(0.1, len(df))) - self._check_has_errorbars(ax, xerr=0, yerr=2) + # yerr is raw error values + ax = _check_plot_works(df['y'].plot, yerr=np.ones(12) * 0.4) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4) + self._check_has_errorbars(ax, xerr=0, yerr=2) - # yerr is column name - for yerr in ['yerr', u('誤差')]: - s_df = df.copy() - s_df[yerr] = np.ones(12) * 0.2 - ax = _check_plot_works(s_df.plot, yerr=yerr) + # yerr is iterator + import itertools + ax = _check_plot_works(df.plot, + yerr=itertools.repeat(0.1, len(df))) self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(s_df.plot, y='y', x='x', yerr=yerr) - self._check_has_errorbars(ax, xerr=0, yerr=1) - with pytest.raises(ValueError): - df.plot(yerr=np.random.randn(11)) + # yerr is column name + for yerr in ['yerr', u('誤差')]: + s_df = df.copy() + s_df[yerr] = np.ones(12) * 0.2 + ax = _check_plot_works(s_df.plot, yerr=yerr) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(s_df.plot, y='y', x='x', yerr=yerr) + self._check_has_errorbars(ax, xerr=0, yerr=1) - df_err = DataFrame({'x': ['zzz'] * 12, 'y': ['zzz'] * 12}) - with pytest.raises((ValueError, TypeError)): - df.plot(yerr=df_err) + with pytest.raises(ValueError): + df.plot(yerr=np.random.randn(11)) + + df_err = DataFrame({'x': ['zzz'] * 12, 'y': ['zzz'] * 12}) + with pytest.raises((ValueError, TypeError)): + df.plot(yerr=df_err) @pytest.mark.slow def test_errorbar_with_integer_column_names(self): @@ -2262,33 +2274,34 @@ def test_errorbar_with_partial_columns(self): @pytest.mark.slow def test_errorbar_timeseries(self): - d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} - d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4} + with warnings.catch_warnings(): + d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} + d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4} - # check time-series plots - ix = date_range('1/1/2000', '1/1/2001', freq='M') - tdf = DataFrame(d, index=ix) - tdf_err = DataFrame(d_err, index=ix) + # check time-series plots + ix = date_range('1/1/2000', '1/1/2001', freq='M') + tdf = DataFrame(d, index=ix) + tdf_err = DataFrame(d_err, index=ix) - kinds = ['line', 'bar', 'barh'] - for kind in kinds: - ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(tdf.plot, y='y', yerr=tdf_err['x'], - kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=1) - ax = _check_plot_works(tdf.plot, y='y', yerr='x', kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=1) - ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=2) - # _check_plot_works adds an ax so catch warning. see GH #13188 - with tm.assert_produces_warning(UserWarning): + kinds = ['line', 'bar', 'barh'] + for kind in kinds: + ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(tdf.plot, y='y', yerr=tdf_err['x'], + kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(tdf.plot, y='y', yerr='x', kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + # _check_plot_works adds an ax so catch warning. see GH #13188 axes = _check_plot_works(tdf.plot, kind=kind, yerr=tdf_err, subplots=True) - self._check_has_errorbars(axes, xerr=0, yerr=1) + self._check_has_errorbars(axes, xerr=0, yerr=1) def test_errorbar_asymmetrical(self): diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 684a943fb5a69..c4795ea1e1eca 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -4,7 +4,7 @@ import pytest -from pandas import Series, DataFrame +from pandas import DataFrame from pandas.compat import lmap import pandas.util.testing as tm @@ -13,8 +13,7 @@ from numpy.random import randn import pandas.plotting as plotting -from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, - _ok_for_gaussian_kde) +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works tm._skip_if_no_mpl() @@ -52,46 +51,6 @@ def test_bootstrap_plot(self): class TestDataFramePlots(TestPlotBase): - @pytest.mark.slow - def test_scatter_plot_legacy(self): - tm._skip_if_no_scipy() - - df = DataFrame(randn(100, 2)) - - def scat(**kwds): - return plotting.scatter_matrix(df, **kwds) - - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat) - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, marker='+') - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, vmin=0) - if _ok_for_gaussian_kde('kde'): - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, diagonal='kde') - if _ok_for_gaussian_kde('density'): - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, diagonal='density') - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, diagonal='hist') - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, range_padding=.1) - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, color='rgb') - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, c='rgb') - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat, facecolor='rgb') - - def scat2(x, y, by=None, ax=None, figsize=None): - return plotting._core.scatter_plot(df, x, y, by, ax, figsize=None) - - _check_plot_works(scat2, x=0, y=1) - grouper = Series(np.repeat([1, 2, 3, 4, 5], 20), df.index) - with tm.assert_produces_warning(UserWarning): - _check_plot_works(scat2, x=0, y=1, by=grouper) - def test_scatter_matrix_axis(self): tm._skip_if_no_scipy() scatter_matrix = plotting.scatter_matrix diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 9c9011ba1ca7b..8164ad74a190a 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -571,6 +571,9 @@ def test_plot_fails_with_dupe_color_and_style(self): @pytest.mark.slow def test_hist_kde(self): + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + _, ax = self.plt.subplots() ax = self.ts.plot.hist(logy=True, ax=ax) self._check_ax_scales(ax, yaxis='log') @@ -596,6 +599,9 @@ def test_hist_kde(self): def test_kde_kwargs(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + from numpy import linspace _check_plot_works(self.ts.plot.kde, bw_method=.5, ind=linspace(-100, 100, 20)) @@ -611,6 +617,9 @@ def test_kde_kwargs(self): def test_kde_missing_vals(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + s = Series(np.random.uniform(size=50)) s[0] = np.nan axes = _check_plot_works(s.plot.kde) @@ -638,6 +647,9 @@ def test_hist_kwargs(self): @pytest.mark.slow def test_hist_kde_color(self): + if not self.mpl_ge_1_5_0: + pytest.skip("mpl is not supported") + _, ax = self.plt.subplots() ax = self.ts.plot.hist(logy=True, bins=10, color='b', ax=ax) self._check_ax_scales(ax, yaxis='log') diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 4888f8fe996b6..114a055de8195 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -14,8 +14,7 @@ import pandas as pd from pandas import (Index, Series, DataFrame, isna, bdate_range, - NaT, date_range, timedelta_range, - _np_version_under1p8) + NaT, date_range, timedelta_range) from pandas.core.indexes.datetimes import Timestamp from pandas.core.indexes.timedeltas import Timedelta import pandas.core.nanops as nanops @@ -687,14 +686,13 @@ def run_ops(ops, get_ser, test_ser): assert_series_equal(result, exp) # odd numpy behavior with scalar timedeltas - if not _np_version_under1p8: - result = td1[0] + dt1 - exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) - assert_series_equal(result, exp) + result = td1[0] + dt1 + exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) - result = td2[0] + dt2 - exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) - assert_series_equal(result, exp) + result = td2[0] + dt2 + exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) result = dt1 - td1[0] exp = (dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize(tz) diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index 21379641a78d8..cf5e3fe4f29b0 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -1,11 +1,10 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 -import pytest import numpy as np import pandas as pd -from pandas import (Index, Series, _np_version_under1p9) +from pandas import Index, Series from pandas.core.indexes.datetimes import Timestamp from pandas.core.dtypes.common import is_integer import pandas.util.testing as tm @@ -68,8 +67,6 @@ def test_quantile_multi(self): [], dtype=float)) tm.assert_series_equal(result, expected) - @pytest.mark.skipif(_np_version_under1p9, - reason="Numpy version is under 1.9") def test_quantile_interpolation(self): # see gh-10174 @@ -82,8 +79,6 @@ def test_quantile_interpolation(self): # test with and without interpolation keyword assert q == q1 - @pytest.mark.skipif(_np_version_under1p9, - reason="Numpy version is under 1.9") def test_quantile_interpolation_dtype(self): # GH #10174 @@ -96,26 +91,6 @@ def test_quantile_interpolation_dtype(self): assert q == np.percentile(np.array([1, 3, 4]), 50) assert is_integer(q) - @pytest.mark.skipif(not _np_version_under1p9, - reason="Numpy version is greater 1.9") - def test_quantile_interpolation_np_lt_1p9(self): - # GH #10174 - - # interpolation = linear (default case) - q = self.ts.quantile(0.1, interpolation='linear') - assert q == np.percentile(self.ts.valid(), 10) - q1 = self.ts.quantile(0.1) - assert q1 == np.percentile(self.ts.valid(), 10) - - # interpolation other than linear - msg = "Interpolation methods other than " - with tm.assert_raises_regex(ValueError, msg): - self.ts.quantile(0.9, interpolation='nearest') - - # object dtype - with tm.assert_raises_regex(ValueError, msg): - Series(self.ts, dtype=object).quantile(0.7, interpolation='higher') - def test_quantile_nan(self): # GH 13098 diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py index ff489eb7f15b1..128a4cdd845e6 100644 --- a/pandas/tests/series/test_rank.py +++ b/pandas/tests/series/test_rank.py @@ -28,8 +28,8 @@ class TestSeriesRank(TestData): } def test_rank(self): - tm._skip_if_no_scipy() - from scipy.stats import rankdata + pytest.importorskip('scipy.stats.special') + rankdata = pytest.importorskip('scipy.stats.rankdata') self.ts[::2] = np.nan self.ts[:10][::3] = 4. @@ -246,10 +246,9 @@ def _check(s, expected, method='average'): _check(series, results[method], method=method) def test_rank_methods_series(self): - tm.skip_if_no_package('scipy', min_version='0.13', - app='scipy.stats.rankdata') + pytest.importorskip('scipy.stats.special') + rankdata = pytest.importorskip('scipy.stats.rankdata') import scipy - from scipy.stats import rankdata xs = np.random.randn(9) xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py index 4ce03f72dbba6..b0a9182a265fe 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/sparse/test_array.py @@ -8,7 +8,6 @@ from numpy import nan import numpy as np -from pandas import _np_version_under1p8 from pandas.core.sparse.api import SparseArray, SparseSeries from pandas._libs.sparse import IntIndex from pandas.util.testing import assert_almost_equal @@ -150,10 +149,8 @@ def test_take(self): assert np.isnan(self.arr.take(0)) assert np.isscalar(self.arr.take(2)) - # np.take in < 1.8 doesn't support scalar indexing - if not _np_version_under1p8: - assert self.arr.take(2) == np.take(self.arr_data, 2) - assert self.arr.take(6) == np.take(self.arr_data, 6) + assert self.arr.take(2) == np.take(self.arr_data, 2) + assert self.arr.take(6) == np.take(self.arr_data, 6) exp = SparseArray(np.take(self.arr_data, [2, 3])) tm.assert_sp_array_equal(self.arr.take([2, 3]), exp) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 2a22fc9d32919..9305504f8d5e3 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from pandas import Series, isna, _np_version_under1p9 +from pandas import Series, isna from pandas.core.dtypes.common import is_integer_dtype import pandas.core.nanops as nanops import pandas.util.testing as tm @@ -340,15 +340,13 @@ def test_nanmean_overflow(self): # In the previous implementation mean can overflow for int dtypes, it # is now consistent with numpy - # numpy < 1.9.0 is not computing this correctly - if not _np_version_under1p9: - for a in [2 ** 55, -2 ** 55, 20150515061816532]: - s = Series(a, index=range(500), dtype=np.int64) - result = s.mean() - np_result = s.values.mean() - assert result == a - assert result == np_result - assert result.dtype == np.float64 + for a in [2 ** 55, -2 ** 55, 20150515061816532]: + s = Series(a, index=range(500), dtype=np.int64) + result = s.mean() + np_result = s.values.mean() + assert result == a + assert result == np_result + assert result.dtype == np.float64 def test_returned_dtype(self): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index d938d5bf9f3ab..d42e37048d87f 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -1688,7 +1688,7 @@ def test_resample_dtype_preservation(self): def test_resample_dtype_coerceion(self): - pytest.importorskip('scipy') + pytest.importorskip('scipy.interpolate') # GH 16361 df = {"a": [1, 3, 1, 4]} diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py index 664a97640387e..1d13ba93ba759 100644 --- a/pandas/tests/tools/test_numeric.py +++ b/pandas/tests/tools/test_numeric.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd -from pandas import to_numeric, _np_version_under1p9 +from pandas import to_numeric from pandas.util import testing as tm from numpy import iinfo @@ -355,9 +355,6 @@ def test_downcast(self): def test_downcast_limits(self): # Test the limits of each downcast. Bug: #14401. - # Check to make sure numpy is new enough to run this test. - if _np_version_under1p9: - pytest.skip("Numpy version is under 1.9") i = 'integer' u = 'unsigned' diff --git a/setup.py b/setup.py index a912b25328954..04a5684c20fcd 100755 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ def is_platform_mac(): _have_setuptools = False setuptools_kwargs = {} -min_numpy_ver = '1.7.0' +min_numpy_ver = '1.9.0' if sys.version_info[0] >= 3: setuptools_kwargs = { From 8fbd8f8806fbf7bc019a47eaa8c4da3f12f1237c Mon Sep 17 00:00:00 2001 From: jschendel Date: Tue, 22 Aug 2017 07:57:53 -0600 Subject: [PATCH 099/141] CLN: replace %s syntax with .format in core.tools, algorithms.py, base.py (#17305) --- pandas/core/algorithms.py | 10 +++++----- pandas/core/base.py | 19 ++++++++++--------- pandas/core/tools/datetimes.py | 32 ++++++++++++++++++-------------- pandas/core/tools/timedeltas.py | 7 ++++--- 4 files changed, 37 insertions(+), 31 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index ffd03096e2a27..cccb094eaae7b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -393,12 +393,12 @@ def isin(comps, values): if not is_list_like(comps): raise TypeError("only list-like objects are allowed to be passed" - " to isin(), you passed a " - "[{0}]".format(type(comps).__name__)) + " to isin(), you passed a [{comps_type}]" + .format(comps_type=type(comps).__name__)) if not is_list_like(values): raise TypeError("only list-like objects are allowed to be passed" - " to isin(), you passed a " - "[{0}]".format(type(values).__name__)) + " to isin(), you passed a [{values_type}]" + .format(values_type=type(values).__name__)) if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): values = lib.list_to_object_array(list(values)) @@ -671,7 +671,7 @@ def mode(values): try: result = np.sort(result) except TypeError as e: - warn("Unable to sort modes: %s" % e) + warn("Unable to sort modes: {error}".format(error=e)) result = _reconstruct_data(result, original.dtype, original) return Series(result) diff --git a/pandas/core/base.py b/pandas/core/base.py index 4ae4736035793..a7c991dc8d257 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -342,24 +342,25 @@ def _obj_with_exclusions(self): def __getitem__(self, key): if self._selection is not None: - raise Exception('Column(s) %s already selected' % self._selection) + raise Exception('Column(s) {selection} already selected' + .format(selection=self._selection)) if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass, np.ndarray)): if len(self.obj.columns.intersection(key)) != len(key): bad_keys = list(set(key).difference(self.obj.columns)) - raise KeyError("Columns not found: %s" - % str(bad_keys)[1:-1]) + raise KeyError("Columns not found: {missing}" + .format(missing=str(bad_keys)[1:-1])) return self._gotitem(list(key), ndim=2) elif not getattr(self, 'as_index', False): if key not in self.obj.columns: - raise KeyError("Column not found: %s" % key) + raise KeyError("Column not found: {key}".format(key=key)) return self._gotitem(key, ndim=2) else: if key not in self.obj: - raise KeyError("Column not found: %s" % key) + raise KeyError("Column not found: {key}".format(key=key)) return self._gotitem(key, ndim=1) def _gotitem(self, key, ndim, subset=None): @@ -409,7 +410,7 @@ def _try_aggregate_string_function(self, arg, *args, **kwargs): if f is not None: return f(self, *args, **kwargs) - raise ValueError("{} is an unknown string function".format(arg)) + raise ValueError("{arg} is an unknown string function".format(arg=arg)) def _aggregate(self, arg, *args, **kwargs): """ @@ -484,9 +485,9 @@ def nested_renaming_depr(level=4): is_nested_renamer = True if k not in obj.columns: - raise SpecificationError('cannot perform renaming ' - 'for {0} with a nested ' - 'dictionary'.format(k)) + msg = ('cannot perform renaming for {key} with a ' + 'nested dictionary').format(key=k) + raise SpecificationError(msg) nested_renaming_depr(4 + (_level or 0)) elif isinstance(obj, ABCSeries): diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 6ff4302937d07..53f58660cabdb 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -46,7 +46,8 @@ def _infer(a, b): if b and b.tzinfo: if not (tslib.get_timezone(tz) == tslib.get_timezone(b.tzinfo)): raise AssertionError('Inputs must both have the same timezone,' - ' {0} != {1}'.format(tz, b.tzinfo)) + ' {timezone1} != {timezone2}' + .format(timezone1=tz, timezone2=b.tzinfo)) return tz tz = None @@ -491,10 +492,10 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): offset = tslib.Timestamp(origin) - tslib.Timestamp(0) except tslib.OutOfBoundsDatetime: raise tslib.OutOfBoundsDatetime( - "origin {} is Out of Bounds".format(origin)) + "origin {origin} is Out of Bounds".format(origin=origin)) except ValueError: - raise ValueError("origin {} cannot be converted " - "to a Timestamp".format(origin)) + raise ValueError("origin {origin} cannot be converted " + "to a Timestamp".format(origin=origin)) # convert the offset to the unit of the arg # this should be lossless in terms of precision @@ -590,16 +591,16 @@ def f(value): required = ['year', 'month', 'day'] req = sorted(list(set(required) - set(unit_rev.keys()))) if len(req): - raise ValueError("to assemble mappings requires at " - "least that [year, month, day] be specified: " - "[{0}] is missing".format(','.join(req))) + raise ValueError("to assemble mappings requires at least that " + "[year, month, day] be specified: [{required}] " + "is missing".format(required=','.join(req))) # keys we don't recognize excess = sorted(list(set(unit_rev.keys()) - set(_unit_map.values()))) if len(excess): raise ValueError("extra keys have been passed " "to the datetime assemblage: " - "[{0}]".format(','.join(excess))) + "[{excess}]".format(','.join(excess=excess))) def coerce(values): # we allow coercion to if errors allows @@ -617,7 +618,7 @@ def coerce(values): values = to_datetime(values, format='%Y%m%d', errors=errors) except (TypeError, ValueError) as e: raise ValueError("cannot assemble the " - "datetimes: {0}".format(e)) + "datetimes: {error}".format(error=e)) for u in ['h', 'm', 's', 'ms', 'us', 'ns']: value = unit_rev.get(u) @@ -627,8 +628,8 @@ def coerce(values): unit=u, errors=errors) except (TypeError, ValueError) as e: - raise ValueError("cannot assemble the datetimes " - "[{0}]: {1}".format(value, e)) + raise ValueError("cannot assemble the datetimes [{value}]: " + "{error}".format(value=value, error=e)) return values @@ -810,8 +811,10 @@ def _convert_listlike(arg, format): times.append(datetime.strptime(element, format).time()) except (ValueError, TypeError): if errors == 'raise': - raise ValueError("Cannot convert %s to a time with " - "given format %s" % (element, format)) + msg = ("Cannot convert {element} to a time with given " + "format {format}").format(element=element, + format=format) + raise ValueError(msg) elif errors == 'ignore': return arg else: @@ -876,6 +879,7 @@ def ole2datetime(oledt): # Excel has a bug where it thinks the date 2/29/1900 exists # we just reject any date before 3/1/1900. if val < 61: - raise ValueError("Value is outside of acceptable range: %s " % val) + msg = "Value is outside of acceptable range: {value}".format(value=val) + raise ValueError(msg) return OLE_TIME_ZERO + timedelta(days=val) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index f2d99d26a87b8..d5132826bb93f 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -129,7 +129,8 @@ def _validate_timedelta_unit(arg): except: if arg is None: return 'ns' - raise ValueError("invalid timedelta unit {0} provided".format(arg)) + raise ValueError("invalid timedelta unit {arg} provided" + .format(arg=arg)) def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'): @@ -161,8 +162,8 @@ def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None): if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): - value = arg.astype('timedelta64[{0}]'.format( - unit)).astype('timedelta64[ns]', copy=False) + value = arg.astype('timedelta64[{unit}]'.format(unit=unit)).astype( + 'timedelta64[ns]', copy=False) else: try: value = tslib.array_to_timedelta64(_ensure_object(arg), From 362519092d2b137e2ed7630ad477a448bb1c9e31 Mon Sep 17 00:00:00 2001 From: ante328 Date: Tue, 22 Aug 2017 16:31:14 +0200 Subject: [PATCH 100/141] BUG: Fix strange behaviour of Series.iloc on MultiIndex Series (#17148) (#17291) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexing.py | 3 ++- pandas/tests/indexing/test_iloc.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 7532730f6be27..496285ddfdee9 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -354,6 +354,7 @@ Indexing - Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`) - Avoids ``IndexError`` when passing an Index or Series to ``.iloc`` with older numpy (:issue:`17193`) - Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`) +- Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`) I/O ^^^ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 929c2346ba5b0..6b9ad5cd2d93b 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -146,7 +146,8 @@ def _get_setitem_indexer(self, key): return self._convert_tuple(key, is_setter=True) axis = self.obj._get_axis(0) - if isinstance(axis, MultiIndex): + + if isinstance(axis, MultiIndex) and self.name != 'iloc': try: return axis.get_loc(key) except Exception: diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 31fee303a41e2..39569f0b0cb38 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -269,6 +269,35 @@ def test_iloc_setitem(self): expected = Series([0, 1, 0], index=[4, 5, 6]) tm.assert_series_equal(s, expected) + @pytest.mark.parametrize( + 'data, indexes, values, expected_k', [ + # test without indexer value in first level of MultiIndex + ([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]), + # test like code sample 1 in the issue + ([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], + [755, 1066]), + # test like code sample 2 in the issue + ([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]), + # test like code sample 3 in the issue + ([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], + [8, 15, 13]) + ]) + def test_iloc_setitem_int_multiindex_series( + self, data, indexes, values, expected_k): + # GH17148 + df = pd.DataFrame( + data=data, + columns=['i', 'j', 'k']) + df = df.set_index(['i', 'j']) + + series = df.k.copy() + for i, v in zip(indexes, values): + series.iloc[i] += v + + df['k'] = expected_k + expected = df.k + tm.assert_series_equal(series, expected) + def test_iloc_setitem_list(self): # setitem with an iloc list From 736471161384108eda8448130a263e784ea5bbf1 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 23 Aug 2017 13:30:34 -0700 Subject: [PATCH 101/141] DOC: Add module doc-string to tseries/api.py --- pandas/tseries/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/api.py b/pandas/tseries/api.py index 71386c02547ba..2094791ecdc60 100644 --- a/pandas/tseries/api.py +++ b/pandas/tseries/api.py @@ -1,5 +1,5 @@ """ - +Timeseries API """ # flake8: noqa From e5797fad48b806492c1dee9cabd374a364df6b8c Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 23 Aug 2017 14:35:49 -0700 Subject: [PATCH 102/141] MAINT: Clean up docs in pandas/errors/__init__.py --- pandas/errors/__init__.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 6304f3a527f2c..42b3bdd4991a9 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -1,25 +1,28 @@ # flake8: noqa -""" expose public exceptions & warnings """ +""" +Expose public exceptions & warnings +""" from pandas._libs.tslib import OutOfBoundsDatetime class PerformanceWarning(Warning): """ - Warnings shown when there is a possible performance - impact. + Warning raised when there is a possible + performance impact. """ class UnsupportedFunctionCall(ValueError): """ - If attempting to call a numpy function on a pandas - object. For example using ``np.cumsum(groupby_object)``. + Exception raised when attempting to call a numpy function + on a pandas object, but that function is not supported by + the object e.g. ``np.cumsum(groupby_object)``. """ class UnsortedIndexError(KeyError): """ - Error raised when attempting to get a slice of a MultiIndex + Error raised when attempting to get a slice of a MultiIndex, and the index has not been lexsorted. Subclass of `KeyError`. .. versionadded:: 0.20.0 @@ -29,22 +32,22 @@ class UnsortedIndexError(KeyError): class ParserError(ValueError): """ - Exception that is thrown by an error is encountered in `pd.read_csv` + Exception that is raised by an error encountered in `pd.read_csv`. """ class DtypeWarning(Warning): """ - Warning that is raised for a dtype incompatiblity. This is + Warning that is raised for a dtype incompatiblity. This can happen whenever `pd.read_csv` encounters non- - uniform dtypes in a column(s) of a given CSV file + uniform dtypes in a column(s) of a given CSV file. """ class EmptyDataError(ValueError): """ Exception that is thrown in `pd.read_csv` (by both the C and - Python engines) when empty data or header is encountered + Python engines) when empty data or header is encountered. """ @@ -53,7 +56,7 @@ class ParserWarning(Warning): Warning that is raised in `pd.read_csv` whenever it is necessary to change parsers (generally from 'c' to 'python') contrary to the one specified by the user due to lack of support or functionality for - parsing particular attributes of a CSV file with the requsted engine + parsing particular attributes of a CSV file with the requsted engine. """ @@ -61,5 +64,4 @@ class MergeError(ValueError): """ Error raised when problems arise during merging due to problems with input data. Subclass of `ValueError`. - """ From 9be531a770b87d2cef11107e928b07a0de424871 Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 24 Aug 2017 03:50:18 -0600 Subject: [PATCH 103/141] CLN: replace %s syntax with .format in missing.py, nanops.py, ops.py (#17322) Replaced %s syntax with .format in missing.py, nanops.py, ops.py. Additionally, made some of the existing positional .format code more explicit. --- pandas/core/missing.py | 41 +++++++++++++-------- pandas/core/nanops.py | 8 ++-- pandas/core/ops.py | 84 +++++++++++++++++++++++------------------- 3 files changed, 76 insertions(+), 57 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 93281e20a2a96..8a6a870834c83 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -88,8 +88,8 @@ def clean_fill_method(method, allow_nearest=False): valid_methods.append('nearest') expecting = 'pad (ffill), backfill (bfill) or nearest' if method not in valid_methods: - msg = ('Invalid fill method. Expecting %s. Got %s' % - (expecting, method)) + msg = ('Invalid fill method. Expecting {expecting}. Got {method}' + .format(expecting=expecting, method=method)) raise ValueError(msg) return method @@ -104,8 +104,8 @@ def clean_interp_method(method, **kwargs): raise ValueError("You must specify the order of the spline or " "polynomial.") if method not in valid: - raise ValueError("method must be one of {0}." - "Got '{1}' instead.".format(valid, method)) + raise ValueError("method must be one of {valid}. Got '{method}' " + "instead.".format(valid=valid, method=method)) return method @@ -146,8 +146,10 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None, valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: - raise ValueError('Invalid limit_direction: expecting one of %r, got ' - '%r.' % (valid_limit_directions, limit_direction)) + msg = ('Invalid limit_direction: expecting one of {valid!r}, ' + 'got {invalid!r}.') + raise ValueError(msg.format(valid=valid_limit_directions, + invalid=limit_direction)) from pandas import Series ys = Series(yvalues) @@ -248,7 +250,8 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, # TODO: Why is DatetimeIndex being imported here? from pandas import DatetimeIndex # noqa except ImportError: - raise ImportError('{0} interpolation requires Scipy'.format(method)) + raise ImportError('{method} interpolation requires SciPy' + .format(method=method)) new_x = np.asarray(new_x) @@ -466,7 +469,8 @@ def pad_1d(values, limit=None, mask=None, dtype=None): dtype = values.dtype _method = None if is_float_dtype(values): - _method = getattr(algos, 'pad_inplace_%s' % dtype.name, None) + name = 'pad_inplace_{name}'.format(name=dtype.name) + _method = getattr(algos, name, None) elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): _method = _pad_1d_datetime elif is_integer_dtype(values): @@ -476,7 +480,8 @@ def pad_1d(values, limit=None, mask=None, dtype=None): _method = algos.pad_inplace_object if _method is None: - raise ValueError('Invalid dtype for pad_1d [%s]' % dtype.name) + raise ValueError('Invalid dtype for pad_1d [{name}]' + .format(name=dtype.name)) if mask is None: mask = isna(values) @@ -490,7 +495,8 @@ def backfill_1d(values, limit=None, mask=None, dtype=None): dtype = values.dtype _method = None if is_float_dtype(values): - _method = getattr(algos, 'backfill_inplace_%s' % dtype.name, None) + name = 'backfill_inplace_{name}'.format(name=dtype.name) + _method = getattr(algos, name, None) elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): _method = _backfill_1d_datetime elif is_integer_dtype(values): @@ -500,7 +506,8 @@ def backfill_1d(values, limit=None, mask=None, dtype=None): _method = algos.backfill_inplace_object if _method is None: - raise ValueError('Invalid dtype for backfill_1d [%s]' % dtype.name) + raise ValueError('Invalid dtype for backfill_1d [{name}]' + .format(name=dtype.name)) if mask is None: mask = isna(values) @@ -515,7 +522,8 @@ def pad_2d(values, limit=None, mask=None, dtype=None): dtype = values.dtype _method = None if is_float_dtype(values): - _method = getattr(algos, 'pad_2d_inplace_%s' % dtype.name, None) + name = 'pad_2d_inplace_{name}'.format(name=dtype.name) + _method = getattr(algos, name, None) elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): _method = _pad_2d_datetime elif is_integer_dtype(values): @@ -525,7 +533,8 @@ def pad_2d(values, limit=None, mask=None, dtype=None): _method = algos.pad_2d_inplace_object if _method is None: - raise ValueError('Invalid dtype for pad_2d [%s]' % dtype.name) + raise ValueError('Invalid dtype for pad_2d [{name}]' + .format(name=dtype.name)) if mask is None: mask = isna(values) @@ -544,7 +553,8 @@ def backfill_2d(values, limit=None, mask=None, dtype=None): dtype = values.dtype _method = None if is_float_dtype(values): - _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None) + name = 'backfill_2d_inplace_{name}'.format(name=dtype.name) + _method = getattr(algos, name, None) elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): _method = _backfill_2d_datetime elif is_integer_dtype(values): @@ -554,7 +564,8 @@ def backfill_2d(values, limit=None, mask=None, dtype=None): _method = algos.backfill_2d_inplace_object if _method is None: - raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name) + raise ValueError('Invalid dtype for backfill_2d [{name}]' + .format(name=dtype.name)) if mask is None: mask = isna(values) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index b2bbf1c75b7ea..858aed7fd3e23 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -70,9 +70,8 @@ def __call__(self, f): def _f(*args, **kwargs): obj_iter = itertools.chain(args, compat.itervalues(kwargs)) if any(self.check(obj) for obj in obj_iter): - raise TypeError('reduction operation {0!r} not allowed for ' - 'this dtype'.format( - f.__name__.replace('nan', ''))) + msg = 'reduction operation {name!r} not allowed for this dtype' + raise TypeError(msg.format(name=f.__name__.replace('nan', ''))) try: with np.errstate(invalid='ignore'): return f(*args, **kwargs) @@ -786,7 +785,8 @@ def _ensure_numeric(x): try: x = complex(x) except Exception: - raise TypeError('Could not convert %s to numeric' % str(x)) + raise TypeError('Could not convert {value!s} to numeric' + .format(value=x)) return x # NA-friendly array comparisons diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 82101414e4aa6..221f6ff8b92c6 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -63,9 +63,9 @@ def _create_methods(arith_method, comp_method, bool_method, def names(x): if x[-1] == "_": - return "__%s_" % x + return "__{name}_".format(name=x) else: - return "__%s__" % x + return "__{name}__".format(name=x) else: names = lambda x: x @@ -388,8 +388,8 @@ def _validate(self, lvalues, rvalues, name): if name not in ('__div__', '__truediv__', '__mul__', '__rmul__'): raise TypeError("can only operate on a timedelta and an " "integer or a float for division and " - "multiplication, but the operator [%s] was" - "passed" % name) + "multiplication, but the operator [{name}] " + "was passed".format(name=name)) # 2 timedeltas elif ((self.is_timedelta_lhs and @@ -400,9 +400,9 @@ def _validate(self, lvalues, rvalues, name): if name not in ('__div__', '__rdiv__', '__truediv__', '__rtruediv__', '__add__', '__radd__', '__sub__', '__rsub__'): - raise TypeError("can only operate on a timedeltas for " - "addition, subtraction, and division, but the" - " operator [%s] was passed" % name) + raise TypeError("can only operate on a timedeltas for addition" + ", subtraction, and division, but the operator" + " [{name}] was passed".format(name=name)) # datetime and timedelta/DateOffset elif (self.is_datetime_lhs and @@ -411,23 +411,24 @@ def _validate(self, lvalues, rvalues, name): if name not in ('__add__', '__radd__', '__sub__'): raise TypeError("can only operate on a datetime with a rhs of " "a timedelta/DateOffset for addition and " - "subtraction, but the operator [%s] was " - "passed" % name) + "subtraction, but the operator [{name}] was " + "passed".format(name=name)) elif (self.is_datetime_rhs and (self.is_timedelta_lhs or self.is_offset_lhs)): if name not in ('__add__', '__radd__', '__rsub__'): raise TypeError("can only operate on a timedelta/DateOffset " "with a rhs of a datetime for addition, " - "but the operator [%s] was passed" % name) + "but the operator [{name}] was passed" + .format(name=name)) # 2 datetimes elif self.is_datetime_lhs and self.is_datetime_rhs: if name not in ('__sub__', '__rsub__'): raise TypeError("can only operate on a datetimes for" - " subtraction, but the operator [%s] was" - " passed" % name) + " subtraction, but the operator [{name}] was" + " passed".format(name=name)) # if tz's must be equal (same or None) if getattr(lvalues, 'tz', None) != getattr(rvalues, 'tz', None): @@ -439,8 +440,8 @@ def _validate(self, lvalues, rvalues, name): if name not in ('__add__', '__radd__'): raise TypeError("can only operate on a timedelta/DateOffset " - "and a datetime for addition, but the " - "operator [%s] was passed" % name) + "and a datetime for addition, but the operator" + " [{name}] was passed".format(name=name)) else: raise TypeError('cannot operate on a series without a rhs ' 'of a series/ndarray of type datetime64[ns] ' @@ -498,7 +499,7 @@ def _convert_to_array(self, values, name=None, other=None): values = values.to_timestamp().to_series() elif name not in ('__truediv__', '__div__', '__mul__', '__rmul__'): raise TypeError("incompatible type for a datetime/timedelta " - "operation [{0}]".format(name)) + "operation [{name}]".format(name=name)) elif inferred_type == 'floating': if (isna(values).all() and name in ('__add__', '__radd__', '__sub__', '__rsub__')): @@ -508,8 +509,9 @@ def _convert_to_array(self, values, name=None, other=None): elif self._is_offset(values): return values else: - raise TypeError("incompatible type [{0}] for a datetime/timedelta" - " operation".format(np.array(values).dtype)) + raise TypeError("incompatible type [{dtype}] for a " + "datetime/timedelta operation" + .format(dtype=np.array(values).dtype)) return values @@ -866,8 +868,8 @@ def wrapper(self, other, axis=None): with np.errstate(all='ignore'): res = na_op(values, other) if is_scalar(res): - raise TypeError('Could not compare %s type with Series' % - type(other)) + raise TypeError('Could not compare {typ} type with Series' + .format(typ=type(other))) # always return a full value series here res = _values_from_object(res) @@ -906,9 +908,10 @@ def na_op(x, y): y = bool(y) result = lib.scalar_binop(x, y, op) except: - raise TypeError("cannot compare a dtyped [{0}] array with " - "a scalar of type [{1}]".format( - x.dtype, type(y).__name__)) + msg = ("cannot compare a dtyped [{dtype}] array " + "with a scalar of type [{type}]" + ).format(dtype=x.dtype, type=type(y).__name__) + raise TypeError(msg) return result @@ -1140,14 +1143,17 @@ def _align_method_FRAME(left, right, axis): """ convert rhs to meet lhs dims if input is list, tuple or np.ndarray """ def to_series(right): - msg = 'Unable to coerce to Series, length must be {0}: given {1}' + msg = ('Unable to coerce to Series, length must be {req_len}: ' + 'given {given_len}') if axis is not None and left._get_axis_name(axis) == 'index': if len(left.index) != len(right): - raise ValueError(msg.format(len(left.index), len(right))) + raise ValueError(msg.format(req_len=len(left.index), + given_len=len(right))) right = left._constructor_sliced(right, index=left.index) else: if len(left.columns) != len(right): - raise ValueError(msg.format(len(left.columns), len(right))) + raise ValueError(msg.format(req_len=len(left.columns), + given_len=len(right))) right = left._constructor_sliced(right, index=left.columns) return right @@ -1161,15 +1167,16 @@ def to_series(right): elif right.ndim == 2: if left.shape != right.shape: - msg = ("Unable to coerce to DataFrame, " - "shape must be {0}: given {1}") - raise ValueError(msg.format(left.shape, right.shape)) + msg = ("Unable to coerce to DataFrame, shape " + "must be {req_shape}: given {given_shape}" + ).format(req_shape=left.shape, given_shape=right.shape) + raise ValueError(msg) right = left._constructor(right, index=left.index, columns=left.columns) else: - msg = 'Unable to coerce to Series/DataFrame, dim must be <= 2: {0}' - raise ValueError(msg.format(right.shape, )) + raise ValueError('Unable to coerce to Series/DataFrame, dim ' + 'must be <= 2: {dim}'.format(dim=right.shape)) return right @@ -1278,7 +1285,8 @@ def na_op(x, y): return result - @Appender('Wrapper for flexible comparison methods %s' % name) + @Appender('Wrapper for flexible comparison methods {name}' + .format(name=name)) def f(self, other, axis=default_axis, level=None): other = _align_method_FRAME(self, other, axis) @@ -1299,7 +1307,7 @@ def f(self, other, axis=default_axis, level=None): def _comp_method_FRAME(func, name, str_rep, masker=False): - @Appender('Wrapper for comparison method %s' % name) + @Appender('Wrapper for comparison method {name}'.format(name=name)) def f(self, other): if isinstance(other, pd.DataFrame): # Another DataFrame return self._compare_frame(other, func, str_rep) @@ -1349,9 +1357,9 @@ def na_op(x, y): # work only for scalars def f(self, other): if not is_scalar(other): - raise ValueError('Simple arithmetic with %s can only be ' - 'done with scalar values' % - self._constructor.__name__) + raise ValueError('Simple arithmetic with {name} can only be ' + 'done with scalar values' + .format(name=self._constructor.__name__)) return self._combine(other, op) @@ -1384,7 +1392,7 @@ def na_op(x, y): return result - @Appender('Wrapper for comparison method %s' % name) + @Appender('Wrapper for comparison method {name}'.format(name=name)) def f(self, other, axis=None): # Validate the axis parameter if axis is not None: @@ -1394,8 +1402,8 @@ def f(self, other, axis=None): return self._compare_constructor(other, na_op, try_cast=False) elif isinstance(other, (self._constructor_sliced, pd.DataFrame, ABCSeries)): - raise Exception("input needs alignment for this object [%s]" % - self._constructor) + raise Exception("input needs alignment for this object [{object}]" + .format(object=self._constructor)) else: return self._combine_const(other, na_op, try_cast=False) From a9574b077696c3419b88cf355423583e2ed602df Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 24 Aug 2017 03:03:18 -0700 Subject: [PATCH 104/141] Make pd.Period immutable (#17239) --- doc/source/whatsnew/v0.21.0.txt | 2 ++ pandas/_libs/period.pyx | 17 +++++++++++------ pandas/tests/scalar/test_period.py | 11 +++++++++++ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 496285ddfdee9..202fd2c13718f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -292,6 +292,8 @@ Other API Changes - Moved definition of ``MergeError`` to the ``pandas.errors`` module. - The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is deprecated and will show a ``FutureWarning`` (:issue:`14636`) - :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`) +- :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`). + .. _whatsnew_0210.deprecations: diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 6ba7ec0270f30..a1d04fea89151 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -29,7 +29,9 @@ from datetime cimport ( PANDAS_FR_ns, INT32_MIN) + cimport util, lib + from lib cimport is_null_datetimelike, is_period from pandas._libs import tslib, lib from pandas._libs.tslib import (Timedelta, Timestamp, iNaT, @@ -668,13 +670,17 @@ class IncompatibleFrequency(ValueError): cdef class _Period(object): - cdef public: + cdef readonly: int64_t ordinal object freq _comparables = ['name', 'freqstr'] _typ = 'period' + def __cinit__(self, ordinal, freq): + self.ordinal = ordinal + self.freq = freq + @classmethod def _maybe_convert_freq(cls, object freq): @@ -698,9 +704,8 @@ cdef class _Period(object): if ordinal == iNaT: return NaT else: - self = _Period.__new__(cls) - self.ordinal = ordinal - self.freq = cls._maybe_convert_freq(freq) + freq = cls._maybe_convert_freq(freq) + self = _Period.__new__(cls, ordinal, freq) return self def __richcmp__(self, other, op): @@ -752,7 +757,7 @@ cdef class _Period(object): def __add__(self, other): if isinstance(self, Period): if isinstance(other, (timedelta, np.timedelta64, - offsets.Tick, offsets.DateOffset, + offsets.DateOffset, Timedelta)): return self._add_delta(other) elif other is NaT: @@ -770,7 +775,7 @@ cdef class _Period(object): def __sub__(self, other): if isinstance(self, Period): if isinstance(other, (timedelta, np.timedelta64, - offsets.Tick, offsets.DateOffset, + offsets.DateOffset, Timedelta)): neg_other = -other return self + neg_other diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 931d6b2b8f1f0..a167c9c738b0b 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -1406,3 +1406,14 @@ def test_period_ops_offset(self): with tm.assert_raises_regex(period.IncompatibleFrequency, msg): p - offsets.Hour(2) + + +def test_period_immutable(): + # see gh-17116 + per = pd.Period('2014Q1') + with pytest.raises(AttributeError): + per.ordinal = 14 + + freq = per.freq + with pytest.raises(AttributeError): + per.freq = 2 * freq From 3e3138341bf7bb011c2484f4d2246f88bac3bfb3 Mon Sep 17 00:00:00 2001 From: P-Tillmann Date: Thu, 24 Aug 2017 12:38:27 +0200 Subject: [PATCH 105/141] Bug: groupby multiindex levels equals rows (#16859) closes #16843 --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/groupby.py | 9 +++++---- pandas/tests/groupby/test_groupby.py | 13 +++++++++++++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 202fd2c13718f..2850c98c64d78 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -384,7 +384,7 @@ Groupby/Resample/Rolling - Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) - Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`) - Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1` (:issue:`15305`) - +- Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`) Sparse ^^^^^^ diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index aa7c4517c0a01..c23b00dc740a4 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2629,13 +2629,14 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, try: if isinstance(obj, DataFrame): - all_in_columns = all(g in obj.columns for g in keys) + all_in_columns_index = all(g in obj.columns or g in obj.index.names + for g in keys) else: - all_in_columns = False + all_in_columns_index = False except Exception: - all_in_columns = False + all_in_columns_index = False - if not any_callable and not all_in_columns and \ + if not any_callable and not all_in_columns_index and \ not any_arraylike and not any_groupers and \ match_axis_length and level is None: keys = [com._asarray_tuplesafe(keys)] diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index f9e1a0d2e744a..8957beacab376 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3891,6 +3891,19 @@ def predictions(tool): result = df2.groupby('Key').apply(predictions).p1 tm.assert_series_equal(expected, result) + def test_gb_key_len_equal_axis_len(self): + # GH16843 + # test ensures that index and column keys are recognized correctly + # when number of keys equals axis length of groupby + df = pd.DataFrame([['foo', 'bar', 'B', 1], + ['foo', 'bar', 'B', 2], + ['foo', 'baz', 'C', 3]], + columns=['first', 'second', 'third', 'one']) + df = df.set_index(['first', 'second']) + df = df.groupby(['first', 'second', 'third']).size() + assert df.loc[('foo', 'bar', 'B')] == 2 + assert df.loc[('foo', 'baz', 'C')] == 1 + def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): tups = lmap(tuple, df[keys].values) From e5030b39bf06e7bca543fc16dc83256593e6d198 Mon Sep 17 00:00:00 2001 From: step4me Date: Thu, 24 Aug 2017 08:53:50 -0400 Subject: [PATCH 106/141] BUG: Cannot use tz-aware origin in to_datetime (#16842) closes #16842 Author: step4me Closes #17244 from step4me/step4me-feature and squashes the following commits: 09d051d48 [step4me] BUG: Cannot use tz-aware origin in to_datetime (#16842) --- doc/source/whatsnew/v0.21.0.txt | 5 +++-- pandas/core/tools/datetimes.py | 7 ++++++- pandas/tests/indexes/datetimes/test_tools.py | 6 ++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2850c98c64d78..7748f437347a6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -293,6 +293,7 @@ Other API Changes - The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is deprecated and will show a ``FutureWarning`` (:issue:`14636`) - :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`) - :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`). +- :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`) .. _whatsnew_0210.deprecations: @@ -357,6 +358,7 @@ Indexing - Avoids ``IndexError`` when passing an Index or Series to ``.iloc`` with older numpy (:issue:`17193`) - Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`) - Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`) +- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) I/O ^^^ @@ -403,6 +405,7 @@ Reshaping - Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`) - Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`) - :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`). +- Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`) Numeric ^^^^^^^ @@ -421,5 +424,3 @@ Categorical Other ^^^^^ - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) -- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) -- Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 53f58660cabdb..c0f234a36803d 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -489,7 +489,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): # we are going to offset back to unix / epoch time try: - offset = tslib.Timestamp(origin) - tslib.Timestamp(0) + offset = tslib.Timestamp(origin) except tslib.OutOfBoundsDatetime: raise tslib.OutOfBoundsDatetime( "origin {origin} is Out of Bounds".format(origin=origin)) @@ -497,6 +497,11 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): raise ValueError("origin {origin} cannot be converted " "to a Timestamp".format(origin=origin)) + if offset.tz is not None: + raise ValueError( + "origin offset {} must be tz-naive".format(offset)) + offset -= tslib.Timestamp(0) + # convert the offset to the unit of the arg # this should be lossless in terms of precision offset = offset // tslib.Timedelta(1, unit=unit) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 9764b65d330af..50669ee357bbd 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1589,6 +1589,12 @@ def test_invalid_origins(self, origin, exc, units, units_from_epochs): pd.to_datetime(units_from_epochs, unit=units, origin=origin) + def test_invalid_origins_tzinfo(self): + # GH16842 + with pytest.raises(ValueError): + pd.to_datetime(1, unit='D', + origin=datetime(2000, 1, 1, tzinfo=pytz.utc)) + def test_processing_order(self): # make sure we handle out-of-bounds *before* # constructing the dates From 7be53ed4c21b5d85dc055833c2e27e3a13a73e4e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 25 Aug 2017 13:29:57 -0700 Subject: [PATCH 107/141] Replace usage of total_seconds compat func with timedelta method (#17289) --- pandas/_libs/period.pyx | 7 ++--- pandas/_libs/src/datetime_helper.h | 36 ----------------------- pandas/_libs/src/ujson/python/objToJSON.c | 22 +++++++++++++- pandas/_libs/tslib.pyx | 28 +++++++----------- pandas/io/pytables.py | 2 +- pandas/tseries/offsets.py | 6 ++-- setup.py | 2 -- 7 files changed, 38 insertions(+), 65 deletions(-) delete mode 100644 pandas/_libs/src/datetime_helper.h diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index a1d04fea89151..816b7ebfff86d 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -10,9 +10,6 @@ from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray, NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA) import numpy as np -cdef extern from "datetime_helper.h": - double total_seconds(object) - from libc.stdlib cimport free from pandas import compat @@ -552,7 +549,7 @@ cdef _reso_local(ndarray[int64_t] stamps, object tz): &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) - delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 + delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000 pandas_datetime_to_datetimestruct(stamps[i] + delta, PANDAS_FR_ns, &dts) curr_reso = _reso_stamp(&dts) @@ -619,7 +616,7 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) - delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 + delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000 pandas_datetime_to_datetimestruct(stamps[i] + delta, PANDAS_FR_ns, &dts) result[i] = get_period_ordinal(dts.year, dts.month, dts.day, diff --git a/pandas/_libs/src/datetime_helper.h b/pandas/_libs/src/datetime_helper.h deleted file mode 100644 index 8023285f85b9b..0000000000000 --- a/pandas/_libs/src/datetime_helper.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. -*/ - -#ifndef PANDAS__LIBS_SRC_DATETIME_HELPER_H_ -#define PANDAS__LIBS_SRC_DATETIME_HELPER_H_ - -#include -#include "datetime.h" -#include "numpy/arrayobject.h" -#include "numpy/arrayscalars.h" - -npy_int64 get_long_attr(PyObject *o, const char *attr) { - npy_int64 long_val; - PyObject *value = PyObject_GetAttrString(o, attr); - long_val = (PyLong_Check(value) ? - PyLong_AsLongLong(value) : PyInt_AS_LONG(value)); - Py_DECREF(value); - return long_val; -} - -npy_float64 total_seconds(PyObject *td) { - // Python 2.6 compat - npy_int64 microseconds = get_long_attr(td, "microseconds"); - npy_int64 seconds = get_long_attr(td, "seconds"); - npy_int64 days = get_long_attr(td, "days"); - npy_int64 days_in_seconds = days * 24LL * 3600LL; - return (microseconds + (seconds + days_in_seconds) * 1000000.0) / 1000000.0; -} - -#endif // PANDAS__LIBS_SRC_DATETIME_HELPER_H_ diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index f2c0b18d35131..4beaa3fd449df 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -47,9 +47,9 @@ Numeric decoder derived from from TCL library #include // NOLINT(build/include_order) #include // NOLINT(build/include_order) #include // NOLINT(build/include_order) -#include // NOLINT(build/include_order) #include // NOLINT(build/include_order) #include // NOLINT(build/include_order) +#include "datetime.h" static PyObject *type_decimal; @@ -329,6 +329,26 @@ static Py_ssize_t get_attr_length(PyObject *obj, char *attr) { return ret; } +npy_int64 get_long_attr(PyObject *o, const char *attr) { + npy_int64 long_val; + PyObject *value = PyObject_GetAttrString(o, attr); + long_val = (PyLong_Check(value) ? + PyLong_AsLongLong(value) : PyInt_AS_LONG(value)); + Py_DECREF(value); + return long_val; +} + +npy_float64 total_seconds(PyObject *td) { + // Python 2.6 compat + // TODO(anyone): remove this legacy workaround with a more + // direct td.total_seconds() + npy_int64 microseconds = get_long_attr(td, "microseconds"); + npy_int64 seconds = get_long_attr(td, "seconds"); + npy_int64 days = get_long_attr(td, "days"); + npy_int64 days_in_seconds = days * 24LL * 3600LL; + return (microseconds + (seconds + days_in_seconds) * 1000000.0) / 1000000.0; +} + static PyObject *get_item(PyObject *obj, Py_ssize_t i) { PyObject *tmp = PyInt_FromSsize_t(i); PyObject *ret; diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c4a38ec660a4c..b5aca2e3ec309 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -26,9 +26,6 @@ from cpython cimport ( cdef extern from "Python.h": cdef PyTypeObject *Py_TYPE(object) -cdef extern from "datetime_helper.h": - double total_seconds(object) - # this is our datetime.pxd from libc.stdlib cimport free @@ -1639,7 +1636,7 @@ cdef inline void _localize_tso(_TSObject obj, object tz): pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns, &obj.dts) dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, obj.dts.min, obj.dts.sec, obj.dts.us, tz) - delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 + delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000 if obj.value != NPY_NAT: pandas_datetime_to_datetimestruct(obj.value + delta, PANDAS_FR_ns, &obj.dts) @@ -4136,7 +4133,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz1) - delta = (int(total_seconds(_get_utcoffset(tz1, dt))) + delta = (int(_get_utcoffset(tz1, dt).total_seconds()) * 1000000000) utc_dates[i] = v - delta else: @@ -4176,8 +4173,8 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz2) - delta = int(total_seconds( - _get_utcoffset(tz2, dt))) * 1000000000 + delta = (int(_get_utcoffset(tz2, dt).total_seconds()) + * 1000000000) result[i] = v + delta return result @@ -4243,7 +4240,7 @@ def tz_convert_single(int64_t val, object tz1, object tz2): pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz1) - delta = int(total_seconds(_get_utcoffset(tz1, dt))) * 1000000000 + delta = int(_get_utcoffset(tz1, dt).total_seconds()) * 1000000000 utc_date = val - delta elif _get_zone(tz1) != 'UTC': trans, deltas, typ = _get_dst_info(tz1) @@ -4261,7 +4258,7 @@ def tz_convert_single(int64_t val, object tz1, object tz2): pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz2) - delta = int(total_seconds(_get_utcoffset(tz2, dt))) * 1000000000 + delta = int(_get_utcoffset(tz2, dt).total_seconds()) * 1000000000 return utc_date + delta # Convert UTC to other timezone @@ -4333,7 +4330,7 @@ cdef object _get_dst_info(object tz): """ cache_key = _tz_cache_key(tz) if cache_key is None: - num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000 + num = int(_get_utcoffset(tz, None).total_seconds()) * 1000000000 return (np.array([NPY_NAT + 1], dtype=np.int64), np.array([num], dtype=np.int64), None) @@ -4380,7 +4377,7 @@ cdef object _get_dst_info(object tz): else: # static tzinfo trans = np.array([NPY_NAT + 1], dtype=np.int64) - num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000 + num = int(_get_utcoffset(tz, None).total_seconds()) * 1000000000 deltas = np.array([num], dtype=np.int64) typ = 'static' @@ -4403,9 +4400,6 @@ cdef object _get_utc_trans_times_from_dateutil_tz(object tz): return new_trans -def tot_seconds(td): - return total_seconds(td) - cpdef ndarray _unbox_utcoffsets(object transinfo): cdef: Py_ssize_t i, sz @@ -4415,7 +4409,7 @@ cpdef ndarray _unbox_utcoffsets(object transinfo): arr = np.empty(sz, dtype='i8') for i in range(sz): - arr[i] = int(total_seconds(transinfo[i][0])) * 1000000000 + arr[i] = int(transinfo[i][0].total_seconds()) * 1000000000 return arr @@ -4458,7 +4452,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) - delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 + delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000 result[i] = v - delta return result @@ -5181,7 +5175,7 @@ cdef _normalize_local(ndarray[int64_t] stamps, object tz): pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) - delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 + delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000 pandas_datetime_to_datetimestruct(stamps[i] + delta, PANDAS_FR_ns, &dts) result[i] = _normalized_stamp(&dts) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 82c80a13372d7..712e9e9903f0a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4381,7 +4381,7 @@ def _get_tz(tz): """ for a tz-aware type, return an encoded zone """ zone = tslib.get_timezone(tz) if zone is None: - zone = tslib.tot_seconds(tz.utcoffset()) + zone = tz.utcoffset().total_seconds() return zone diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 29cdda5548896..7ccecaa84e6d6 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -778,12 +778,12 @@ def _get_business_hours_by_sec(self): # create dummy datetime to calcurate businesshours in a day dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute) until = datetime(2014, 4, 1, self.end.hour, self.end.minute) - return tslib.tot_seconds(until - dtstart) + return (until - dtstart).total_seconds() else: self.daytime = False dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute) until = datetime(2014, 4, 2, self.end.hour, self.end.minute) - return tslib.tot_seconds(until - dtstart) + return (until - dtstart).total_seconds() @apply_wraps def rollback(self, dt): @@ -907,7 +907,7 @@ def _onOffset(self, dt, businesshours): op = self._prev_opening_time(dt) else: op = self._next_opening_time(dt) - span = tslib.tot_seconds(dt - op) + span = (dt - op).total_seconds() if span <= businesshours: return True else: diff --git a/setup.py b/setup.py index 04a5684c20fcd..444db5bc4d275 100755 --- a/setup.py +++ b/setup.py @@ -467,7 +467,6 @@ def pxd(name): tseries_depends = ['pandas/_libs/src/datetime/np_datetime.h', 'pandas/_libs/src/datetime/np_datetime_strings.h', - 'pandas/_libs/src/datetime_helper.h', 'pandas/_libs/src/period_helper.h', 'pandas/_libs/src/datetime.pxd'] @@ -597,7 +596,6 @@ def pxd(name): ujson_ext = Extension('pandas._libs.json', depends=['pandas/_libs/src/ujson/lib/ultrajson.h', - 'pandas/_libs/src/datetime_helper.h', 'pandas/_libs/src/numpy_helper.h'], sources=['pandas/_libs/src/ujson/python/ujson.c', 'pandas/_libs/src/ujson/python/objToJSON.c', From f4adbb9c53c7917c34529e0bbeea307923d12f2b Mon Sep 17 00:00:00 2001 From: cbertinato Date: Mon, 28 Aug 2017 09:58:05 -0400 Subject: [PATCH 108/141] CLN: replace %s syntax with .format in core/indexing.py (#17357) Progress toward issue #16130. Converted old string formatting to new string formatting in core/indexing.py. --- pandas/core/indexing.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6b9ad5cd2d93b..b7a51afcedabf 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -188,8 +188,9 @@ def _has_valid_tuple(self, key): if i >= self.obj.ndim: raise IndexingError('Too many indexers') if not self._has_valid_type(k, i): - raise ValueError("Location based indexing can only have [%s] " - "types" % self._valid_types) + raise ValueError("Location based indexing can only have " + "[{types}] types" + .format(types=self._valid_types)) def _should_validate_iterable(self, axis=0): """ return a boolean whether this axes needs validation for a passed @@ -263,11 +264,11 @@ def _has_valid_positional_setitem_indexer(self, indexer): pass elif is_integer(i): if i >= len(ax): - raise IndexError("{0} cannot enlarge its target object" - .format(self.name)) + raise IndexError("{name} cannot enlarge its target " + "object".format(name=self.name)) elif isinstance(i, dict): - raise IndexError("{0} cannot enlarge its target object" - .format(self.name)) + raise IndexError("{name} cannot enlarge its target object" + .format(name=self.name)) return True @@ -1235,7 +1236,8 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): mask = check == -1 if mask.any(): - raise KeyError('%s not in index' % objarr[mask]) + raise KeyError('{mask} not in index' + .format(mask=objarr[mask])) return _values_from_object(indexer) @@ -1421,8 +1423,9 @@ def _has_valid_type(self, key, axis): if (not is_iterator(key) and len(key) and np.all(ax.get_indexer_for(key) < 0)): - raise KeyError("None of [%s] are in the [%s]" % - (key, self.obj._get_axis_name(axis))) + raise KeyError(u"None of [{key}] are in the [{axis}]" + .format(key=key, + axis=self.obj._get_axis_name(axis))) return True @@ -1432,8 +1435,9 @@ def error(): if isna(key): raise TypeError("cannot use label indexing with a null " "key") - raise KeyError("the label [%s] is not in the [%s]" % - (key, self.obj._get_axis_name(axis))) + raise KeyError(u"the label [{key}] is not in the [{axis}]" + .format(key=key, + axis=self.obj._get_axis_name(axis))) try: key = self._convert_scalar_indexer(key, axis) From b1b33254361965b60222589f4b801a885ea6762e Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 28 Aug 2017 07:05:29 -0700 Subject: [PATCH 109/141] DOC: Point to dev-docs in issue template (#17353) [ci skip] --- .github/ISSUE_TEMPLATE.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 237e61487d13a..e33835c462511 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -12,6 +12,12 @@ **Note**: Many problems can be resolved by simply upgrading `pandas` to the latest version. Before submitting, please check if that solution works for you. If possible, you may want to check if `master` addresses this issue, but that is not necessary. +For documentation-related issues, you can check the latest versions of the docs on `master` here: + +https://pandas-docs.github.io/pandas-docs-travis/ + +If the issue has not been resolved there, go ahead and file it in the issue tracker. + #### Expected Output #### Output of ``pd.show_versions()`` From 76cc924813c0cf27ce591f27cf30ba2cac6f0d16 Mon Sep 17 00:00:00 2001 From: chris-b1 Date: Tue, 29 Aug 2017 05:06:29 -0500 Subject: [PATCH 110/141] CLN: remove total_seconds compat from json (#17341) --- pandas/_libs/src/ujson/python/objToJSON.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 4beaa3fd449df..1ee862b54cf0b 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -329,7 +329,7 @@ static Py_ssize_t get_attr_length(PyObject *obj, char *attr) { return ret; } -npy_int64 get_long_attr(PyObject *o, const char *attr) { +static npy_int64 get_long_attr(PyObject *o, const char *attr) { npy_int64 long_val; PyObject *value = PyObject_GetAttrString(o, attr); long_val = (PyLong_Check(value) ? @@ -338,15 +338,12 @@ npy_int64 get_long_attr(PyObject *o, const char *attr) { return long_val; } -npy_float64 total_seconds(PyObject *td) { - // Python 2.6 compat - // TODO(anyone): remove this legacy workaround with a more - // direct td.total_seconds() - npy_int64 microseconds = get_long_attr(td, "microseconds"); - npy_int64 seconds = get_long_attr(td, "seconds"); - npy_int64 days = get_long_attr(td, "days"); - npy_int64 days_in_seconds = days * 24LL * 3600LL; - return (microseconds + (seconds + days_in_seconds) * 1000000.0) / 1000000.0; +static npy_float64 total_seconds(PyObject *td) { + npy_float64 double_val; + PyObject *value = PyObject_CallMethod(td, "total_seconds", NULL); + double_val = PyFloat_AS_DOUBLE(value); + Py_DECREF(value); + return double_val; } static PyObject *get_item(PyObject *obj, Py_ssize_t i) { From 0309daeb778c0704bda7fa597135d33b3e1914ed Mon Sep 17 00:00:00 2001 From: jschendel Date: Tue, 29 Aug 2017 04:10:15 -0600 Subject: [PATCH 111/141] CLN: Move test_intersect_str_dates (#17366) Moves test_intersect_str_dates from tests/indexes/test_range.py to tests/indexes/test_base.py. --- pandas/tests/indexes/test_base.py | 9 +++++++++ pandas/tests/indexes/test_range.py | 9 --------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index ef36e4a91aa1c..07e98c326bcaa 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -663,6 +663,15 @@ def test_intersection(self): intersect = first.intersection(second) assert intersect.name is None + def test_intersect_str_dates(self): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + i1 = Index(dt_dates, dtype=object) + i2 = Index(['aa'], dtype=object) + res = i2.intersection(i1) + + assert len(res) == 0 + def test_union(self): first = self.strIndex[5:20] second = self.strIndex[:10] diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 566354da4870d..5ecf467b57fc5 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -639,15 +639,6 @@ def test_intersection(self): expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) - def test_intersect_str_dates(self): - dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] - - i1 = Index(dt_dates, dtype=object) - i2 = Index(['aa'], dtype=object) - res = i2.intersection(i1) - - assert len(res) == 0 - def test_union_noncomparable(self): from datetime import datetime, timedelta # corner case, non-Int64Index From c523bfcfb91f6c9775af33c0a809dadd12ab80c1 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 29 Aug 2017 05:52:51 -0700 Subject: [PATCH 112/141] BUG: Respect dups in reindexing CategoricalIndex (#17355) When the indexer is identical to the elements. We should still return duplicates when the indexer contains duplicates. Closes gh-17323. --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/category.py | 2 +- pandas/tests/indexes/test_category.py | 22 +++++++++++++++++----- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 7748f437347a6..f67afbe5882e0 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -359,6 +359,7 @@ Indexing - Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`) - Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`) - Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) +- Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`) I/O ^^^ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index f22407308e094..0681202289311 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -487,7 +487,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) target = ibase._ensure_index(target) - if self.equals(target): + if self.is_unique and self.equals(target): return np.arange(len(self), dtype='intp') if method == 'pad' or method == 'backfill': diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 64bd6df361aeb..05d31af57b36c 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -365,18 +365,18 @@ def test_astype(self): tm.assert_index_equal(result, expected) def test_reindex_base(self): - - # determined by cat ordering - idx = self.create_index() + # Determined by cat ordering. + idx = CategoricalIndex(list("cab"), categories=list("cab")) expected = np.arange(len(idx), dtype=np.intp) actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) - with tm.assert_raises_regex(ValueError, 'Invalid fill method'): - idx.get_indexer(idx, method='invalid') + with tm.assert_raises_regex(ValueError, "Invalid fill method"): + idx.get_indexer(idx, method="invalid") def test_reindexing(self): + np.random.seed(123456789) ci = self.create_index() oidx = Index(np.array(ci)) @@ -388,6 +388,18 @@ def test_reindexing(self): actual = ci.get_indexer(finder) tm.assert_numpy_array_equal(expected, actual) + # see gh-17323 + # + # Even when indexer is equal to the + # members in the index, we should + # respect duplicates instead of taking + # the fast-track path. + for finder in [list("aabbca"), list("aababca")]: + expected = oidx.get_indexer_non_unique(finder)[0] + + actual = ci.get_indexer(finder) + tm.assert_numpy_array_equal(expected, actual) + def test_reindex_dtype(self): c = CategoricalIndex(['a', 'b', 'c', 'a']) res, indexer = c.reindex(['a', 'c']) From 5a6f2ac325544587ca360e0277befce37137d225 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 29 Aug 2017 06:23:38 -0700 Subject: [PATCH 113/141] Unify Index._dir_* with Series implementation (#17117) --- pandas/core/accessor.py | 35 +++++++++++++++++++++++++++++++++++ pandas/core/base.py | 22 +++------------------- pandas/core/generic.py | 5 +++-- pandas/core/indexes/base.py | 9 +++++++-- pandas/core/series.py | 17 +++-------------- pandas/core/strings.py | 20 ++------------------ 6 files changed, 53 insertions(+), 55 deletions(-) create mode 100644 pandas/core/accessor.py diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py new file mode 100644 index 0000000000000..9f8556d1e6961 --- /dev/null +++ b/pandas/core/accessor.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +""" + +accessor.py contains base classes for implementing accessor properties +that can be mixed into or pinned onto other pandas classes. + +""" + + +class DirNamesMixin(object): + _accessors = frozenset([]) + + def _dir_deletions(self): + """ delete unwanted __dir__ for this object """ + return self._accessors + + def _dir_additions(self): + """ add addtional __dir__ for this object """ + rv = set() + for accessor in self._accessors: + try: + getattr(self, accessor) + rv.add(accessor) + except AttributeError: + pass + return rv + + def __dir__(self): + """ + Provide method name lookup and completion + Only provide 'public' methods + """ + rv = set(dir(type(self))) + rv = (rv - self._dir_deletions()) | self._dir_additions() + return sorted(rv) diff --git a/pandas/core/base.py b/pandas/core/base.py index a7c991dc8d257..d60a8515dc920 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -19,6 +19,7 @@ from pandas.util._decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError +from pandas.core.accessor import DirNamesMixin _shared_docs = dict() _indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='', @@ -73,7 +74,7 @@ def __repr__(self): return str(self) -class PandasObject(StringMixin): +class PandasObject(StringMixin, DirNamesMixin): """baseclass for various pandas objects""" @@ -92,23 +93,6 @@ def __unicode__(self): # Should be overwritten by base classes return object.__repr__(self) - def _dir_additions(self): - """ add addtional __dir__ for this object """ - return set() - - def _dir_deletions(self): - """ delete unwanted __dir__ for this object """ - return set() - - def __dir__(self): - """ - Provide method name lookup and completion - Only provide 'public' methods - """ - rv = set(dir(type(self))) - rv = (rv - self._dir_deletions()) | self._dir_additions() - return sorted(rv) - def _reset_cache(self, key=None): """ Reset cached properties. If ``key`` is passed, only clears that key. @@ -141,7 +125,7 @@ class NoNewAttributesMixin(object): Prevents additional attributes via xxx.attribute = "something" after a call to `self.__freeze()`. Mainly used to prevent the user from using - wrong attrirbutes on a accessor (`Series.cat/.str/.dt`). + wrong attributes on a accessor (`Series.cat/.str/.dt`). If you really want to add a new attribute at a later time, you need to use `object.__setattr__(self, key, value)`. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f8366c804e3e7..cdb08d8887e05 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -192,8 +192,9 @@ def __unicode__(self): def _dir_additions(self): """ add the string-like attributes from the info_axis """ - return set([c for c in self._info_axis - if isinstance(c, string_types) and isidentifier(c)]) + additions = set([c for c in self._info_axis + if isinstance(c, string_types) and isidentifier(c)]) + return super(NDFrame, self)._dir_additions().union(additions) @property def _constructor_sliced(self): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a21e6df3ffc93..31cf1e48b8529 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -56,7 +56,7 @@ import pandas.core.sorting as sorting from pandas.io.formats.printing import pprint_thing from pandas.core.ops import _comp_method_OBJECT_ARRAY -from pandas.core.strings import StringAccessorMixin +from pandas.core import strings from pandas.core.config import get_option @@ -102,7 +102,7 @@ def _new_Index(cls, d): return cls.__new__(cls, **d) -class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): +class Index(IndexOpsMixin, PandasObject): """ Immutable ndarray implementing an ordered, sliceable set. The basic object storing axis labels for all pandas objects @@ -155,6 +155,11 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): _engine_type = libindex.ObjectEngine + _accessors = frozenset(['str']) + + # String Methods + str = base.AccessorProperty(strings.StringMethods) + def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): diff --git a/pandas/core/series.py b/pandas/core/series.py index 75dc3d6403650..6905fc1aced74 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -114,8 +114,7 @@ def wrapper(self): # Series class -class Series(base.IndexOpsMixin, strings.StringAccessorMixin, - generic.NDFrame,): +class Series(base.IndexOpsMixin, generic.NDFrame): """ One-dimensional ndarray with axis labels (including time series). @@ -2923,18 +2922,8 @@ def to_period(self, freq=None, copy=True): # Categorical methods cat = base.AccessorProperty(CategoricalAccessor) - def _dir_deletions(self): - return self._accessors - - def _dir_additions(self): - rv = set() - for accessor in self._accessors: - try: - getattr(self, accessor) - rv.add(accessor) - except AttributeError: - pass - return rv + # String Methods + str = base.AccessorProperty(strings.StringMethods) # ---------------------------------------------------------------------- # Add plotting methods to Series diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 0b1db0277eee3..2f95e510bba5e 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -16,7 +16,7 @@ from pandas.core.algorithms import take_1d import pandas.compat as compat -from pandas.core.base import AccessorProperty, NoNewAttributesMixin +from pandas.core.base import NoNewAttributesMixin from pandas.util._decorators import Appender import re import pandas._libs.lib as lib @@ -1920,20 +1920,4 @@ def _make_accessor(cls, data): message = ("Can only use .str accessor with Index, not " "MultiIndex") raise AttributeError(message) - return StringMethods(data) - - -class StringAccessorMixin(object): - """ Mixin to add a `.str` acessor to the class.""" - - str = AccessorProperty(StringMethods) - - def _dir_additions(self): - return set() - - def _dir_deletions(self): - try: - getattr(self, 'str') - except AttributeError: - return set(['str']) - return set() + return cls(data) From ce8ccba4a4393cde5822b0de0face430fdd8fd1f Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Tue, 29 Aug 2017 19:03:17 +0200 Subject: [PATCH 114/141] BUG: make order of index from pd.concat deterministic (#17364) closes #17344 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/common.py | 14 ++++++++++++++ pandas/core/indexes/api.py | 9 ++------- pandas/tests/reshape/test_concat.py | 13 ++++++++++++- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f67afbe5882e0..226dd99101a06 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -407,6 +407,7 @@ Reshaping - Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`) - :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`). - Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`) +- Bug in :func:`concat` where order of result index was unpredictable if it contained non-comparable elements (:issue:`17344`) Numeric ^^^^^^^ diff --git a/pandas/core/common.py b/pandas/core/common.py index 44cb36b8a3207..515a401096120 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -629,3 +629,17 @@ def _random_state(state=None): else: raise ValueError("random_state must be an integer, a numpy " "RandomState, or None") + + +def _get_distinct_objs(objs): + """ + Return a list with distinct elements of "objs" (different ids). + Preserves order. + """ + ids = set() + res = [] + for obj in objs: + if not id(obj) in ids: + ids.add(id(obj)) + res.append(obj) + return res diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index db73a6878258a..323d50166e7b6 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -23,8 +23,7 @@ 'PeriodIndex', 'DatetimeIndex', '_new_Index', 'NaT', '_ensure_index', '_get_na_value', '_get_combined_index', - '_get_objs_combined_axis', - '_get_distinct_indexes', '_union_indexes', + '_get_objs_combined_axis', '_union_indexes', '_get_consensus_names', '_all_indexes_same'] @@ -41,7 +40,7 @@ def _get_objs_combined_axis(objs, intersect=False, axis=0): def _get_combined_index(indexes, intersect=False): # TODO: handle index names! - indexes = _get_distinct_indexes(indexes) + indexes = com._get_distinct_objs(indexes) if len(indexes) == 0: return Index([]) if len(indexes) == 1: @@ -55,10 +54,6 @@ def _get_combined_index(indexes, intersect=False): return _ensure_index(union) -def _get_distinct_indexes(indexes): - return list(dict((id(x), x) for x in indexes).values()) - - def _union_indexes(indexes): if len(indexes) == 0: raise AssertionError('Must have at least 1 Index to union') diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 52cd18126859a..6e646f9b29442 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -5,7 +5,7 @@ from numpy.random import randn from datetime import datetime -from pandas.compat import StringIO, iteritems +from pandas.compat import StringIO, iteritems, PY2 import pandas as pd from pandas import (DataFrame, concat, read_csv, isna, Series, date_range, @@ -1944,6 +1944,17 @@ def test_concat_categoricalindex(self): index=exp_idx) tm.assert_frame_equal(result, exp) + def test_concat_order(self): + # GH 17344 + dfs = [pd.DataFrame(index=range(3), columns=['a', 1, None])] + dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a']) + for i in range(100)] + result = pd.concat(dfs).columns + expected = dfs[0].columns + if PY2: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) @pytest.mark.parametrize('dt', np.sctypes['float']) From a585e0994aaf727d30663623e4d9c347d82ead6b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 29 Aug 2017 10:04:07 -0700 Subject: [PATCH 115/141] Fix typo that causes several NaT methods to have incorrect docstrings (#17327) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/_libs/tslib.pyx | 7 ++++--- pandas/tests/scalar/test_nat.py | 5 +++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 226dd99101a06..60090ab5b82ef 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -426,3 +426,4 @@ Categorical Other ^^^^^ - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) +- Several ``NaT`` method docstrings (e.g. :func:`NaT.ctime`) were incorrect (:issue:`17327`) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b5aca2e3ec309..5dd30072fb7aa 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # cython: profile=False import warnings @@ -3922,7 +3923,7 @@ for _method_name in _nat_methods: def f(*args, **kwargs): return NaT f.__name__ = func_name - f.__doc__ = _get_docstring(_method_name) + f.__doc__ = _get_docstring(func_name) return f setattr(NaTType, _method_name, _make_nat_func(_method_name)) @@ -3934,7 +3935,7 @@ for _method_name in _nan_methods: def f(*args, **kwargs): return np.nan f.__name__ = func_name - f.__doc__ = _get_docstring(_method_name) + f.__doc__ = _get_docstring(func_name) return f setattr(NaTType, _method_name, _make_nan_func(_method_name)) @@ -3952,7 +3953,7 @@ for _maybe_method_name in dir(NaTType): def f(*args, **kwargs): raise ValueError("NaTType does not support " + func_name) f.__name__ = func_name - f.__doc__ = _get_docstring(_method_name) + f.__doc__ = _get_docstring(func_name) return f setattr(NaTType, _maybe_method_name, diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 5f247cae1099b..6f852f2b394e1 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -247,3 +247,8 @@ def test_nat_arithmetic_index(): tm.assert_index_equal(right + left, exp) tm.assert_index_equal(left - right, exp) tm.assert_index_equal(right - left, exp) + + +def test_nat_pinned_docstrings(): + # GH17327 + assert NaT.ctime.__doc__ == datetime.ctime.__doc__ From 8199559d374a987506aaf7c1bba35f772da6d30a Mon Sep 17 00:00:00 2001 From: cbertinato Date: Wed, 30 Aug 2017 06:19:44 -0400 Subject: [PATCH 116/141] CLN: replace %s syntax with .format in io/formats/format.py (#17358) Progress toward issue #16130. Converted old string formatting to new string formatting in io/formats/format.py. --- pandas/io/formats/format.py | 165 ++++++++++++++++++++---------------- 1 file changed, 93 insertions(+), 72 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 733fd3bd39b52..6a98497aa1bfe 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -47,6 +47,7 @@ import itertools import csv +from functools import partial common_docstring = """ Parameters @@ -109,7 +110,7 @@ def _get_footer(self): if self.length: if footer: footer += ', ' - footer += "Length: %d" % len(self.categorical) + footer += "Length: {length}".format(length=len(self.categorical)) level_info = self.categorical._repr_categories_info() @@ -135,7 +136,7 @@ def to_string(self): fmt_values = self._get_formatted_values() - result = ['%s' % i for i in fmt_values] + result = [u('{i}').format(i=i) for i in fmt_values] result = [i.strip() for i in result] result = u(', ').join(result) result = [u('[') + result + u(']')] @@ -191,7 +192,7 @@ def _get_footer(self): footer = u('') if getattr(self.series.index, 'freq', None) is not None: - footer += 'Freq: %s' % self.series.index.freqstr + footer += 'Freq: {freq}'.format(freq=self.series.index.freqstr) if self.name is not False and name is not None: if footer: @@ -199,20 +200,21 @@ def _get_footer(self): series_name = pprint_thing(name, escape_chars=('\t', '\r', '\n')) - footer += ("Name: %s" % series_name) if name is not None else "" + footer += ((u"Name: {sname}".format(sname=series_name)) + if name is not None else "") if (self.length is True or (self.length == 'truncate' and self.truncate_v)): if footer: footer += ', ' - footer += 'Length: %d' % len(self.series) + footer += 'Length: {length}'.format(length=len(self.series)) if self.dtype is not False and self.dtype is not None: name = getattr(self.tr_series.dtype, 'name', None) if name: if footer: footer += ', ' - footer += 'dtype: %s' % pprint_thing(name) + footer += u'dtype: {typ}'.format(typ=pprint_thing(name)) # level infos are added to the end and in a new line, like it is done # for Categoricals @@ -509,8 +511,10 @@ def _to_str_columns(self): else: if is_list_like(self.header): if len(self.header) != len(self.columns): - raise ValueError(('Writing %d cols but got %d aliases' - % (len(self.columns), len(self.header)))) + raise ValueError(('Writing {ncols} cols but got {nalias} ' + 'aliases' + .format(ncols=len(self.columns), + nalias=len(self.header)))) str_columns = [[label] for label in self.header] else: str_columns = self._get_formatted_column_labels(frame) @@ -578,10 +582,10 @@ def to_string(self): frame = self.frame if len(frame.columns) == 0 or len(frame.index) == 0: - info_line = (u('Empty %s\nColumns: %s\nIndex: %s') % - (type(self.frame).__name__, - pprint_thing(frame.columns), - pprint_thing(frame.index))) + info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}') + .format(name=type(self.frame).__name__, + col=pprint_thing(frame.columns), + idx=pprint_thing(frame.index))) text = info_line else: @@ -630,8 +634,8 @@ def to_string(self): self.buf.writelines(text) if self.should_show_dimensions: - self.buf.write("\n\n[%d rows x %d columns]" % - (len(frame), len(frame.columns))) + self.buf.write("\n\n[{nrows} rows x {ncols} columns]" + .format(nrows=len(frame), ncols=len(frame.columns))) def _join_multiline(self, *strcols): lwidth = self.line_width @@ -805,7 +809,8 @@ def _get_formatted_index(self, frame): # empty space for columns if show_col_names: - col_header = ['%s' % x for x in self._get_column_name_list()] + col_header = ['{x}'.format(x=x) + for x in self._get_column_name_list()] else: col_header = [''] * columns.nlevels @@ -861,9 +866,10 @@ def write_result(self, buf): # string representation of the columns if len(self.frame.columns) == 0 or len(self.frame.index) == 0: - info_line = (u('Empty %s\nColumns: %s\nIndex: %s') % - (type(self.frame).__name__, self.frame.columns, - self.frame.index)) + info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}') + .format(name=type(self.frame).__name__, + col=self.frame.columns, + idx=self.frame.index)) strcols = [[info_line]] else: strcols = self.fmt._to_str_columns() @@ -906,14 +912,16 @@ def get_col_type(dtype): column_format = index_format + column_format elif not isinstance(column_format, compat.string_types): # pragma: no cover - raise AssertionError('column_format must be str or unicode, not %s' - % type(column_format)) + raise AssertionError('column_format must be str or unicode, ' + 'not {typ}'.format(typ=type(column_format))) if not self.longtable: - buf.write('\\begin{tabular}{%s}\n' % column_format) + buf.write('\\begin{{tabular}}{{{fmt}}}\n' + .format(fmt=column_format)) buf.write('\\toprule\n') else: - buf.write('\\begin{longtable}{%s}\n' % column_format) + buf.write('\\begin{{longtable}}{{{fmt}}}\n' + .format(fmt=column_format)) buf.write('\\toprule\n') ilevels = self.frame.index.nlevels @@ -948,7 +956,7 @@ def get_col_type(dtype): crow = [x if x else '{}' for x in row] if self.bold_rows and self.fmt.index: # bold row labels - crow = ['\\textbf{%s}' % x + crow = ['\\textbf{{{x}}}'.format(x=x) if j < ilevels and x.strip() not in ['', '{}'] else x for j, x in enumerate(crow)] if i < clevels and self.fmt.header and self.multicolumn: @@ -986,9 +994,9 @@ def _format_multicolumn(self, row, ilevels): def append_col(): # write multicolumn if needed if ncol > 1: - row2.append('\\multicolumn{{{0:d}}}{{{1:s}}}{{{2:s}}}' - .format(ncol, self.multicolumn_format, - coltext.strip())) + row2.append('\\multicolumn{{{ncol:d}}}{{{fmt:s}}}{{{txt:s}}}' + .format(ncol=ncol, fmt=self.multicolumn_format, + txt=coltext.strip())) # don't modify where not needed else: row2.append(coltext) @@ -1027,8 +1035,8 @@ def _format_multirow(self, row, ilevels, i, rows): break if nrow > 1: # overwrite non-multirow entry - row[j] = '\\multirow{{{0:d}}}{{*}}{{{1:s}}}'.format( - nrow, row[j].strip()) + row[j] = '\\multirow{{{nrow:d}}}{{*}}{{{row:s}}}'.format( + nrow=nrow, row=row[j].strip()) # save when to end the current block with \cline self.clinebuf.append([i + nrow - 1, j + 1]) return row @@ -1039,7 +1047,8 @@ def _print_cline(self, buf, i, icol): """ for cl in self.clinebuf: if cl[0] == i: - buf.write('\cline{{{0:d}-{1:d}}}\n'.format(cl[1], icol)) + buf.write('\cline{{{cl:d}-{icol:d}}}\n' + .format(cl=cl[1], icol=icol)) # remove entries that have been written to buffer self.clinebuf = [x for x in self.clinebuf if x[0] != i] @@ -1076,7 +1085,8 @@ def write(self, s, indent=0): def write_th(self, s, indent=0, tags=None): if self.fmt.col_space is not None and self.fmt.col_space > 0: tags = (tags or "") - tags += 'style="min-width: %s;"' % self.fmt.col_space + tags += ('style="min-width: {colspace};"' + .format(colspace=self.fmt.col_space)) return self._write_cell(s, kind='th', indent=indent, tags=tags) @@ -1085,9 +1095,9 @@ def write_td(self, s, indent=0, tags=None): def _write_cell(self, s, kind='td', indent=0, tags=None): if tags is not None: - start_tag = '<%s %s>' % (kind, tags) + start_tag = '<{kind} {tags}>'.format(kind=kind, tags=tags) else: - start_tag = '<%s>' % kind + start_tag = '<{kind}>'.format(kind=kind) if self.escape: # escape & first to prevent double escaping of & @@ -1096,7 +1106,8 @@ def _write_cell(self, s, kind='td', indent=0, tags=None): else: esc = {} rs = pprint_thing(s, escape_chars=esc).strip() - self.write('%s%s' % (start_tag, rs, kind), indent) + self.write(u'{start}{rs}' + .format(start=start_tag, rs=rs, kind=kind), indent) def write_tr(self, line, indent=0, indent_delta=4, header=False, align=None, tags=None, nindex_levels=0): @@ -1106,7 +1117,8 @@ def write_tr(self, line, indent=0, indent_delta=4, header=False, if align is None: self.write('', indent) else: - self.write('' % align, indent) + self.write('' + .format(align=align), indent) indent += indent_delta for i, s in enumerate(line): @@ -1146,8 +1158,8 @@ def write_result(self, buf): if isinstance(self.classes, str): self.classes = self.classes.split() if not isinstance(self.classes, (list, tuple)): - raise AssertionError('classes must be list or tuple, ' - 'not %s' % type(self.classes)) + raise AssertionError('classes must be list or tuple, not {typ}' + .format(typ=type(self.classes))) _classes.extend(self.classes) if self.notebook: @@ -1159,12 +1171,11 @@ def write_result(self, buf): except (ImportError, AttributeError): pass - self.write(''.format(div_style)) + self.write(''.format(style=div_style)) self.write_style() - self.write('
`` rows and ```` elements within each ``
`` - element in the table. ```` stands for "table data". + element in the table. ```` stands for "table data". This function + attempts to properly handle ``colspan`` and ``rowspan`` attributes. + If the function has a ``
`` elements into the header). + + .. versionadded:: 0.21.0 Similar to :func:`~pandas.read_csv` the `header` argument is applied **after** `skiprows` is applied. diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 6fc080c8d9090..c016106dfcc80 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -385,7 +385,7 @@ def test_thousands_macau_stats(self): attrs={'class': 'style1'}) df = dfs[all_non_nan_table_index] - assert not any(s.isna().any() for _, s in df.iteritems()) + assert not any(s.isnull().any() for _, s in df.iteritems()) @pytest.mark.slow def test_thousands_macau_index_col(self): @@ -394,7 +394,7 @@ def test_thousands_macau_index_col(self): dfs = self.read_html(macau_data, index_col=0, header=0) df = dfs[all_non_nan_table_index] - assert not any(s.isna().any() for _, s in df.iteritems()) + assert not any(s.isnull().any() for _, s in df.iteritems()) def test_empty_tables(self): """ @@ -640,6 +640,118 @@ def test_different_number_of_rows(self): res = self.read_html(out, index_col=0)[0] tm.assert_frame_equal(expected, res) + def test_colspan_rowspan_are_1(self): + expected = """ + + + + + + + + + + +
XYZW
""" + out = """ + + + + + + + + + + +
XYZW
""" + expected = self.read_html(expected)[0] + res = self.read_html(out)[0] + tm.assert_frame_equal(expected, res) + + def test_colspan_rowspan_are_more_than_1(self): + expected = """ + + + + + + + + + + + + + + + + + + +
XXYZW
1223
""" + out = """ + + + + + + + + + + + + + + + +
XYZW
123
""" + expected = self.read_html(expected)[0] + res = self.read_html(out)[0] + tm.assert_frame_equal(expected, res) + + def test_header_should_be_inferred_from_th_elements(self): + expected = """ + + + + + + + + + + + + + + + + + +
XXYZW
12345
""" + out = """ + + + + + + + + + + + + + +
XXYZW
12345
""" + expected = self.read_html(expected)[0] # header is explicit + res = self.read_html(out)[0] # infer header + tm.assert_frame_equal(expected, res) + res2 = self.read_html(out, header=0)[0] # manually set header + tm.assert_frame_equal(expected, res2) + def test_parse_dates_list(self): df = DataFrame({'date': date_range('1/1/2001', periods=10)}) expected = df.to_html() @@ -657,14 +769,6 @@ def test_parse_dates_combine(self): newdf = DataFrame({'datetime': raw_dates}) tm.assert_frame_equal(newdf, res[0]) - def test_computer_sales_page(self): - data = os.path.join(DATA_PATH, 'computer_sales_page.html') - with tm.assert_raises_regex(ParserError, - r"Passed header=\[0,1\] are " - r"too many rows for this " - r"multi_index of columns"): - self.read_html(data, header=[0, 1]) - def test_wikipedia_states_table(self): data = os.path.join(DATA_PATH, 'wikipedia_states.html') assert os.path.isfile(data), '%r is not a file' % data @@ -891,7 +995,7 @@ def test_computer_sales_page(self): def test_invalid_flavor(): url = 'google.com' with pytest.raises(ValueError): - read_html(url, 'google', flavor='not a* valid**++ flaver') + read_html(url, 'google', flavor='not a* valid**++ flavor') def get_elements_from_file(url, element='table'): @@ -939,6 +1043,7 @@ def test_same_ordering(): class ErrorThread(threading.Thread): + def run(self): try: super(ErrorThread, self).run() From 1aacf1736bc71b67c9e64d84bc40a7982b1b0f34 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 26 Jul 2017 16:43:49 -0700 Subject: [PATCH 016/141] TST: Check more error messages in tests (#17075) --- pandas/tests/frame/test_validate.py | 51 +++++---- pandas/tests/indexing/test_interval.py | 4 +- pandas/tests/io/msgpack/test_except.py | 21 ++-- pandas/tests/io/msgpack/test_limits.py | 29 +++-- pandas/tests/io/msgpack/test_sequnpack.py | 28 +++-- pandas/tests/io/sas/test_sas.py | 12 ++- pandas/tests/scalar/test_interval.py | 122 ++++++++++++---------- pandas/tests/series/test_validate.py | 39 ++++--- 8 files changed, 167 insertions(+), 139 deletions(-) diff --git a/pandas/tests/frame/test_validate.py b/pandas/tests/frame/test_validate.py index d6065e6042908..2de0e866f6e70 100644 --- a/pandas/tests/frame/test_validate.py +++ b/pandas/tests/frame/test_validate.py @@ -1,34 +1,33 @@ from pandas.core.frame import DataFrame import pytest +import pandas.util.testing as tm -class TestDataFrameValidate(object): - """Tests for error handling related to data types of method arguments.""" - df = DataFrame({'a': [1, 2], 'b': [3, 4]}) - - def test_validate_bool_args(self): - # Tests for error handling related to boolean arguments. - invalid_values = [1, "True", [1, 2, 3], 5.0] - - for value in invalid_values: - with pytest.raises(ValueError): - self.df.query('a > b', inplace=value) - - with pytest.raises(ValueError): - self.df.eval('a + b', inplace=value) +@pytest.fixture +def dataframe(): + return DataFrame({'a': [1, 2], 'b': [3, 4]}) - with pytest.raises(ValueError): - self.df.set_index(keys=['a'], inplace=value) - with pytest.raises(ValueError): - self.df.reset_index(inplace=value) - - with pytest.raises(ValueError): - self.df.dropna(inplace=value) - - with pytest.raises(ValueError): - self.df.drop_duplicates(inplace=value) +class TestDataFrameValidate(object): + """Tests for error handling related to data types of method arguments.""" - with pytest.raises(ValueError): - self.df.sort_values(by=['a'], inplace=value) + @pytest.mark.parametrize("func", ["query", "eval", "set_index", + "reset_index", "dropna", + "drop_duplicates", "sort_values"]) + @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, dataframe, func, inplace): + msg = "For argument \"inplace\" expected type bool" + kwargs = dict(inplace=inplace) + + if func == "query": + kwargs["expr"] = "a > b" + elif func == "eval": + kwargs["expr"] = "a + b" + elif func == "set_index": + kwargs["keys"] = ["a"] + elif func == "sort_values": + kwargs["by"] = ["a"] + + with tm.assert_raises_regex(ValueError, msg): + getattr(dataframe, func)(**kwargs) diff --git a/pandas/tests/indexing/test_interval.py b/pandas/tests/indexing/test_interval.py index 2552fc066cc87..be6e5e1cffb2e 100644 --- a/pandas/tests/indexing/test_interval.py +++ b/pandas/tests/indexing/test_interval.py @@ -109,10 +109,10 @@ def test_with_slices(self): # slice of interval with pytest.raises(NotImplementedError): - result = s.loc[Interval(3, 6):] + s.loc[Interval(3, 6):] with pytest.raises(NotImplementedError): - result = s[Interval(3, 6):] + s[Interval(3, 6):] expected = s.iloc[3:5] result = s[[Interval(3, 6)]] diff --git a/pandas/tests/io/msgpack/test_except.py b/pandas/tests/io/msgpack/test_except.py index 6246e0777daee..5a803c5eba34b 100644 --- a/pandas/tests/io/msgpack/test_except.py +++ b/pandas/tests/io/msgpack/test_except.py @@ -1,9 +1,11 @@ # coding: utf-8 -import pytest - +from datetime import datetime from pandas.io.msgpack import packb, unpackb +import pytest +import pandas.util.testing as tm + class DummyException(Exception): pass @@ -12,12 +14,13 @@ class DummyException(Exception): class TestExceptions(object): def test_raise_on_find_unsupported_value(self): - import datetime - pytest.raises(TypeError, packb, datetime.datetime.now()) + msg = "can\'t serialize datetime" + with tm.assert_raises_regex(TypeError, msg): + packb(datetime.now()) def test_raise_from_object_hook(self): - def hook(obj): - raise DummyException + def hook(_): + raise DummyException() pytest.raises(DummyException, unpackb, packb({}), object_hook=hook) pytest.raises(DummyException, unpackb, packb({'fizz': 'buzz'}), @@ -30,5 +33,7 @@ def hook(obj): packb({'fizz': {'buzz': 'spam'}}), object_pairs_hook=hook) - def test_invalidvalue(self): - pytest.raises(ValueError, unpackb, b'\xd9\x97#DL_') + def test_invalid_value(self): + msg = "Unpack failed: error" + with tm.assert_raises_regex(ValueError, msg): + unpackb(b"\xd9\x97#DL_") diff --git a/pandas/tests/io/msgpack/test_limits.py b/pandas/tests/io/msgpack/test_limits.py index 07044dbb7e5de..e4abd4ddb8d13 100644 --- a/pandas/tests/io/msgpack/test_limits.py +++ b/pandas/tests/io/msgpack/test_limits.py @@ -1,10 +1,10 @@ # coding: utf-8 from __future__ import (absolute_import, division, print_function, unicode_literals) +from pandas.io.msgpack import packb, unpackb, Packer, Unpacker, ExtType import pytest - -from pandas.io.msgpack import packb, unpackb, Packer, Unpacker, ExtType +import pandas.util.testing as tm class TestLimits(object): @@ -39,7 +39,10 @@ def test_max_str_len(self): unpacker = Unpacker(max_str_len=2, encoding='utf-8') unpacker.feed(packed) - pytest.raises(ValueError, unpacker.unpack) + + msg = "3 exceeds max_str_len" + with tm.assert_raises_regex(ValueError, msg): + unpacker.unpack() def test_max_bin_len(self): d = b'x' * 3 @@ -51,7 +54,10 @@ def test_max_bin_len(self): unpacker = Unpacker(max_bin_len=2) unpacker.feed(packed) - pytest.raises(ValueError, unpacker.unpack) + + msg = "3 exceeds max_bin_len" + with tm.assert_raises_regex(ValueError, msg): + unpacker.unpack() def test_max_array_len(self): d = [1, 2, 3] @@ -63,7 +69,10 @@ def test_max_array_len(self): unpacker = Unpacker(max_array_len=2) unpacker.feed(packed) - pytest.raises(ValueError, unpacker.unpack) + + msg = "3 exceeds max_array_len" + with tm.assert_raises_regex(ValueError, msg): + unpacker.unpack() def test_max_map_len(self): d = {1: 2, 3: 4, 5: 6} @@ -75,7 +84,10 @@ def test_max_map_len(self): unpacker = Unpacker(max_map_len=2) unpacker.feed(packed) - pytest.raises(ValueError, unpacker.unpack) + + msg = "3 exceeds max_map_len" + with tm.assert_raises_regex(ValueError, msg): + unpacker.unpack() def test_max_ext_len(self): d = ExtType(42, b"abc") @@ -87,4 +99,7 @@ def test_max_ext_len(self): unpacker = Unpacker(max_ext_len=2) unpacker.feed(packed) - pytest.raises(ValueError, unpacker.unpack) + + msg = "4 exceeds max_ext_len" + with tm.assert_raises_regex(ValueError, msg): + unpacker.unpack() diff --git a/pandas/tests/io/msgpack/test_sequnpack.py b/pandas/tests/io/msgpack/test_sequnpack.py index 1178176c2c557..dc6fc5ef916b4 100644 --- a/pandas/tests/io/msgpack/test_sequnpack.py +++ b/pandas/tests/io/msgpack/test_sequnpack.py @@ -1,28 +1,26 @@ # coding: utf-8 -import pytest - from pandas import compat from pandas.io.msgpack import Unpacker, BufferFull from pandas.io.msgpack import OutOfData +import pytest +import pandas.util.testing as tm + class TestPack(object): - def test_partialdata(self): + def test_partial_data(self): unpacker = Unpacker() - unpacker.feed(b'\xa5') - pytest.raises(StopIteration, next, iter(unpacker)) - unpacker.feed(b'h') - pytest.raises(StopIteration, next, iter(unpacker)) - unpacker.feed(b'a') - pytest.raises(StopIteration, next, iter(unpacker)) - unpacker.feed(b'l') - pytest.raises(StopIteration, next, iter(unpacker)) - unpacker.feed(b'l') - pytest.raises(StopIteration, next, iter(unpacker)) - unpacker.feed(b'o') - assert next(iter(unpacker)) == b'hallo' + msg = "No more data to unpack" + + for data in [b"\xa5", b"h", b"a", b"l", b"l"]: + unpacker.feed(data) + with tm.assert_raises_regex(StopIteration, msg): + next(iter(unpacker)) + + unpacker.feed(b"o") + assert next(iter(unpacker)) == b"hallo" def test_foobar(self): unpacker = Unpacker(read_size=3, use_list=1) diff --git a/pandas/tests/io/sas/test_sas.py b/pandas/tests/io/sas/test_sas.py index 617df99b99f0b..b85f6b6bbd5ce 100644 --- a/pandas/tests/io/sas/test_sas.py +++ b/pandas/tests/io/sas/test_sas.py @@ -1,14 +1,16 @@ -import pytest - from pandas.compat import StringIO from pandas import read_sas +import pandas.util.testing as tm + class TestSas(object): def test_sas_buffer_format(self): - - # GH14947 + # see gh-14947 b = StringIO("") - with pytest.raises(ValueError): + + msg = ("If this is a buffer object rather than a string " + "name, you must specify a format string") + with tm.assert_raises_regex(ValueError, msg): read_sas(b) diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/test_interval.py index e06f7cb34eb52..d431db0b4ca4f 100644 --- a/pandas/tests/scalar/test_interval.py +++ b/pandas/tests/scalar/test_interval.py @@ -1,42 +1,49 @@ from __future__ import division -import pytest from pandas import Interval + +import pytest import pandas.util.testing as tm +@pytest.fixture +def interval(): + return Interval(0, 1) + + class TestInterval(object): - def setup_method(self, method): - self.interval = Interval(0, 1) - def test_properties(self): - assert self.interval.closed == 'right' - assert self.interval.left == 0 - assert self.interval.right == 1 - assert self.interval.mid == 0.5 + def test_properties(self, interval): + assert interval.closed == 'right' + assert interval.left == 0 + assert interval.right == 1 + assert interval.mid == 0.5 - def test_repr(self): - assert repr(self.interval) == "Interval(0, 1, closed='right')" - assert str(self.interval) == "(0, 1]" + def test_repr(self, interval): + assert repr(interval) == "Interval(0, 1, closed='right')" + assert str(interval) == "(0, 1]" interval_left = Interval(0, 1, closed='left') assert repr(interval_left) == "Interval(0, 1, closed='left')" assert str(interval_left) == "[0, 1)" - def test_contains(self): - assert 0.5 in self.interval - assert 1 in self.interval - assert 0 not in self.interval - pytest.raises(TypeError, lambda: self.interval in self.interval) - - interval = Interval(0, 1, closed='both') - assert 0 in interval + def test_contains(self, interval): + assert 0.5 in interval assert 1 in interval - - interval = Interval(0, 1, closed='neither') assert 0 not in interval - assert 0.5 in interval - assert 1 not in interval + + msg = "__contains__ not defined for two intervals" + with tm.assert_raises_regex(TypeError, msg): + interval in interval + + interval_both = Interval(0, 1, closed='both') + assert 0 in interval_both + assert 1 in interval_both + + interval_neither = Interval(0, 1, closed='neither') + assert 0 not in interval_neither + assert 0.5 in interval_neither + assert 1 not in interval_neither def test_equal(self): assert Interval(0, 1) == Interval(0, 1, closed='right') @@ -54,74 +61,79 @@ def test_comparison(self): assert Interval(0, 1) > Interval(-1, 2) assert Interval(0, 1) >= Interval(0, 1) - def test_hash(self): + def test_hash(self, interval): # should not raise - hash(self.interval) + hash(interval) - def test_math_add(self): + def test_math_add(self, interval): expected = Interval(1, 2) - actual = self.interval + 1 + actual = interval + 1 assert expected == actual expected = Interval(1, 2) - actual = 1 + self.interval + actual = 1 + interval assert expected == actual - actual = self.interval + actual = interval actual += 1 assert expected == actual - with pytest.raises(TypeError): - self.interval + Interval(1, 2) + msg = "unsupported operand type\(s\) for \+" + with tm.assert_raises_regex(TypeError, msg): + interval + Interval(1, 2) - with pytest.raises(TypeError): - self.interval + 'foo' + with tm.assert_raises_regex(TypeError, msg): + interval + 'foo' - def test_math_sub(self): + def test_math_sub(self, interval): expected = Interval(-1, 0) - actual = self.interval - 1 + actual = interval - 1 assert expected == actual - actual = self.interval + actual = interval actual -= 1 assert expected == actual - with pytest.raises(TypeError): - self.interval - Interval(1, 2) + msg = "unsupported operand type\(s\) for -" + with tm.assert_raises_regex(TypeError, msg): + interval - Interval(1, 2) - with pytest.raises(TypeError): - self.interval - 'foo' + with tm.assert_raises_regex(TypeError, msg): + interval - 'foo' - def test_math_mult(self): + def test_math_mult(self, interval): expected = Interval(0, 2) - actual = self.interval * 2 + actual = interval * 2 assert expected == actual expected = Interval(0, 2) - actual = 2 * self.interval + actual = 2 * interval assert expected == actual - actual = self.interval + actual = interval actual *= 2 assert expected == actual - with pytest.raises(TypeError): - self.interval * Interval(1, 2) + msg = "unsupported operand type\(s\) for \*" + with tm.assert_raises_regex(TypeError, msg): + interval * Interval(1, 2) - with pytest.raises(TypeError): - self.interval * 'foo' + msg = "can\'t multiply sequence by non-int" + with tm.assert_raises_regex(TypeError, msg): + interval * 'foo' - def test_math_div(self): + def test_math_div(self, interval): expected = Interval(0, 0.5) - actual = self.interval / 2.0 + actual = interval / 2.0 assert expected == actual - actual = self.interval + actual = interval actual /= 2.0 assert expected == actual - with pytest.raises(TypeError): - self.interval / Interval(1, 2) + msg = "unsupported operand type\(s\) for /" + with tm.assert_raises_regex(TypeError, msg): + interval / Interval(1, 2) - with pytest.raises(TypeError): - self.interval / 'foo' + with tm.assert_raises_regex(TypeError, msg): + interval / 'foo' diff --git a/pandas/tests/series/test_validate.py b/pandas/tests/series/test_validate.py index 134fa0a38f618..a0cde5f81d021 100644 --- a/pandas/tests/series/test_validate.py +++ b/pandas/tests/series/test_validate.py @@ -1,30 +1,27 @@ -import pytest from pandas.core.series import Series +import pytest +import pandas.util.testing as tm -class TestSeriesValidate(object): - """Tests for error handling related to data types of method arguments.""" - s = Series([1, 2, 3, 4, 5]) - - def test_validate_bool_args(self): - # Tests for error handling related to boolean arguments. - invalid_values = [1, "True", [1, 2, 3], 5.0] - for value in invalid_values: - with pytest.raises(ValueError): - self.s.reset_index(inplace=value) +@pytest.fixture +def series(): + return Series([1, 2, 3, 4, 5]) - with pytest.raises(ValueError): - self.s._set_name(name='hello', inplace=value) - with pytest.raises(ValueError): - self.s.sort_values(inplace=value) +class TestSeriesValidate(object): + """Tests for error handling related to data types of method arguments.""" - with pytest.raises(ValueError): - self.s.sort_index(inplace=value) + @pytest.mark.parametrize("func", ["reset_index", "_set_name", + "sort_values", "sort_index", + "rename", "dropna"]) + @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, series, func, inplace): + msg = "For argument \"inplace\" expected type bool" + kwargs = dict(inplace=inplace) - with pytest.raises(ValueError): - self.s.rename(inplace=value) + if func == "_set_name": + kwargs["name"] = "hello" - with pytest.raises(ValueError): - self.s.dropna(inplace=value) + with tm.assert_raises_regex(ValueError, msg): + getattr(series, func)(**kwargs) From 359890fe7b3995f6054330af4e26103ada8ec3d3 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Wed, 26 Jul 2017 19:49:19 -0400 Subject: [PATCH 017/141] BUG: Respect dtype when calling pivot_table with margins=True closes #17013 This fix actually exposed an occurrence of #17035 in an existing test (as well as in one I added). Author: Pietro Battiston Closes #17062 from toobaz/pivot_margin_int and squashes the following commits: 2737600 [Pietro Battiston] Removed now obsolete workaround 956c4f9 [Pietro Battiston] BUG: respect dtype when calling pivot_table with margins=True --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/reshape/pivot.py | 14 ++++---- pandas/tests/reshape/test_pivot.py | 58 ++++++++++++++++++++++++++---- 3 files changed, 59 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4b2dad30bdd05..64873efc0ec19 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -312,6 +312,7 @@ Reshaping - Bug in merging with categorical dtypes with datetimelikes incorrectly raised a ``TypeError`` (:issue:`16900`) - Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`) - Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) +- Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`) Numeric ^^^^^^^ diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index c2fb81178433e..fe525eb0a3c87 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -178,7 +178,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', data = data[data.notna().all(axis=1)] table = _add_margins(table, data, values, rows=index, cols=columns, aggfunc=aggfunc, - margins_name=margins_name) + margins_name=margins_name, fill_value=fill_value) # discard the top level if values_passed and not values_multi and not table.empty and \ @@ -199,7 +199,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', def _add_margins(table, data, values, rows, cols, aggfunc, - margins_name='All'): + margins_name='All', fill_value=None): if not isinstance(margins_name, compat.string_types): raise ValueError('margins_name argument must be a string') @@ -240,8 +240,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, if not isinstance(marginal_result_set, tuple): return marginal_result_set result, margin_keys, row_margin = marginal_result_set - - row_margin = row_margin.reindex(result.columns) + row_margin = row_margin.reindex(result.columns, fill_value=fill_value) # populate grand margin for k in margin_keys: if isinstance(k, compat.string_types): @@ -253,6 +252,9 @@ def _add_margins(table, data, values, rows, cols, aggfunc, row_names = result.index.names try: + for dtype in set(result.dtypes): + cols = result.select_dtypes([dtype]).columns + margin_dummy[cols] = margin_dummy[cols].astype(dtype) result = result.append(margin_dummy) except TypeError: @@ -524,10 +526,6 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, margins=margins, margins_name=margins_name, dropna=dropna, **kwargs) - # GH 17013: - if values is None and margins: - table = table.fillna(0).astype(np.int64) - # Post-process if normalize is not False: table = _normalize(table, normalize=normalize, margins=margins, diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 5e5852ac5381d..ee6c32cd0a208 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -459,6 +459,41 @@ def _check_output(result, values_col, index=['A', 'B'], tm.assert_frame_equal(result['SALARY'], expected['SALARY']) + def test_margins_dtype(self): + # GH 17013 + + df = self.data.copy() + df[['D', 'E', 'F']] = np.arange(len(df) * 3).reshape(len(df), 3) + + mi_val = list(product(['bar', 'foo'], ['one', 'two'])) + [('All', '')] + mi = MultiIndex.from_tuples(mi_val, names=('A', 'B')) + expected = DataFrame({'dull': [12, 21, 3, 9, 45], + 'shiny': [33, 0, 36, 51, 120]}, + index=mi).rename_axis('C', axis=1) + expected['All'] = expected['dull'] + expected['shiny'] + + result = df.pivot_table(values='D', index=['A', 'B'], + columns='C', margins=True, + aggfunc=np.sum, fill_value=0) + + tm.assert_frame_equal(expected, result) + + @pytest.mark.xfail(reason='GH 17035 (len of floats is casted back to ' + 'floats)') + def test_margins_dtype_len(self): + mi_val = list(product(['bar', 'foo'], ['one', 'two'])) + [('All', '')] + mi = MultiIndex.from_tuples(mi_val, names=('A', 'B')) + expected = DataFrame({'dull': [1, 1, 2, 1, 5], + 'shiny': [2, 0, 2, 2, 6]}, + index=mi).rename_axis('C', axis=1) + expected['All'] = expected['dull'] + expected['shiny'] + + result = self.data.pivot_table(values='D', index=['A', 'B'], + columns='C', margins=True, + aggfunc=len, fill_value=0) + + tm.assert_frame_equal(expected, result) + def test_pivot_integer_columns(self): # caused by upstream bug in unstack @@ -894,6 +929,8 @@ def test_pivot_table_margins_name_with_aggfunc_list(self): expected = pd.DataFrame(table.values, index=ix, columns=cols) tm.assert_frame_equal(table, expected) + @pytest.mark.xfail(reason='GH 17035 (np.mean of ints is casted back to ' + 'ints)') def test_categorical_margins(self): # GH 10989 df = pd.DataFrame({'x': np.arange(8), @@ -904,14 +941,23 @@ def test_categorical_margins(self): expected.index = Index([0, 1, 'All'], name='y') expected.columns = Index([0, 1, 'All'], name='z') - data = df.copy() - table = data.pivot_table('x', 'y', 'z', margins=True) + table = df.pivot_table('x', 'y', 'z', margins=True) tm.assert_frame_equal(table, expected) - data = df.copy() - data.y = data.y.astype('category') - data.z = data.z.astype('category') - table = data.pivot_table('x', 'y', 'z', margins=True) + @pytest.mark.xfail(reason='GH 17035 (np.mean of ints is casted back to ' + 'ints)') + def test_categorical_margins_category(self): + df = pd.DataFrame({'x': np.arange(8), + 'y': np.arange(8) // 4, + 'z': np.arange(8) % 2}) + + expected = pd.DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) + expected.index = Index([0, 1, 'All'], name='y') + expected.columns = Index([0, 1, 'All'], name='z') + + df.y = df.y.astype('category') + df.z = df.z.astype('category') + table = df.pivot_table('x', 'y', 'z', margins=True) tm.assert_frame_equal(table, expected) def test_categorical_aggfunc(self): From 3fd26121c7e8cd9532471e395c6fa13f217f311a Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 26 Jul 2017 22:30:56 -0700 Subject: [PATCH 018/141] MAINT: Add missing space in parsers.pyx "2< heuristic" --> "2 < heuristic" --- pandas/_libs/parsers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 543a943aea311..29ff9c401df48 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -563,7 +563,7 @@ cdef class TextReader: # compute buffer_lines as function of table width heuristic = 2**20 // self.table_width self.buffer_lines = 1 - while self.buffer_lines * 2< heuristic: + while self.buffer_lines * 2 < heuristic: self.buffer_lines *= 2 def __init__(self, *args, **kwargs): From 76249bf96df738543f30d244495972fbdf484e6a Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 26 Jul 2017 22:48:56 -0700 Subject: [PATCH 019/141] MAINT: Add missing paren around print statement Stray verbose print statement in parsers.pyx was bare without any parentheses. --- pandas/_libs/parsers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 29ff9c401df48..077c355e785a3 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1013,7 +1013,7 @@ cdef class TextReader: cdef _end_clock(self, what): if self.verbose: elapsed = time.time() - self.clocks.pop(-1) - print '%s took: %.2f ms' % (what, elapsed * 1000) + print('%s took: %.2f ms' % (what, elapsed * 1000)) def set_noconvert(self, i): self.noconvert.add(i) From 77d16d4f2716dfe97e13ce70b938d7a50d1eee5d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 27 Jul 2017 05:57:20 -0400 Subject: [PATCH 020/141] DOC: fix typos in missing.rst xref #16972 --- doc/source/missing_data.rst | 12 ++++++------ pandas/core/frame.py | 2 +- pandas/core/generic.py | 4 ++-- pandas/core/indexes/base.py | 4 ++-- pandas/core/series.py | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index e40b7d460fef8..5c10df25051a2 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -36,7 +36,7 @@ When / why does data become missing? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Some might quibble over our usage of *missing*. By "missing" we simply mean -**na** or "not present for whatever reason". Many data sets simply arrive with +**NA** or "not present for whatever reason". Many data sets simply arrive with missing data, either because it exists and was not collected or it never existed. For example, in a collection of financial time series, some of the time series might start on different dates. Thus, values prior to the start date @@ -63,12 +63,12 @@ to handling missing data. While ``NaN`` is the default missing value marker for reasons of computational speed and convenience, we need to be able to easily detect this value with data of different types: floating point, integer, boolean, and general object. In many cases, however, the Python ``None`` will -arise and we wish to also consider that "missing" or "na". +arise and we wish to also consider that "missing" or "not available" or "NA". .. note:: Prior to version v0.10.0 ``inf`` and ``-inf`` were also - considered to be "na" in computations. This is no longer the case by + considered to be "NA" in computations. This is no longer the case by default; use the ``mode.use_inf_as_na`` option to recover it. .. _missing.isna: @@ -206,7 +206,7 @@ with missing data. Filling missing values: fillna ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The **fillna** function can "fill in" NA values with non-na data in a couple +The **fillna** function can "fill in" NA values with non-NA data in a couple of ways, which we illustrate: **Replace NA with a scalar value** @@ -220,7 +220,7 @@ of ways, which we illustrate: **Fill gaps forward or backward** Using the same filling arguments as :ref:`reindexing `, we -can propagate non-na values forward or backward: +can propagate non-NA values forward or backward: .. ipython:: python @@ -540,7 +540,7 @@ String/Regular Expression Replacement `__ if this is unclear. -Replace the '.' with ``nan`` (str -> str) +Replace the '.' with ``NaN`` (str -> str) .. ipython:: python diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6c72fa648559a..e546e96f253c7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3213,7 +3213,7 @@ def isna(self): def isnull(self): return super(DataFrame, self).isnull() - @Appender(_shared_docs['isna'] % _shared_doc_kwargs) + @Appender(_shared_docs['notna'] % _shared_doc_kwargs) def notna(self): return super(DataFrame, self).notna() diff --git a/pandas/core/generic.py b/pandas/core/generic.py index abccd76b2fbcb..fbd26655798bd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4534,7 +4534,7 @@ def asof(self, where, subset=None): # Action Methods _shared_docs['isna'] = """ - Return a boolean same-sized object indicating if the values are na. + Return a boolean same-sized object indicating if the values are NA. See Also -------- @@ -4553,7 +4553,7 @@ def isnull(self): _shared_docs['notna'] = """ Return a boolean same-sized object indicating if the values are - not na. + not NA. See Also -------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index fd9abcfb726bf..411428e001c81 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1852,7 +1852,7 @@ def isna(self): Returns ------- - a boolean array of whether my values are na + a boolean array of whether my values are NA See also -------- @@ -1870,7 +1870,7 @@ def notna(self): Returns ------- - a boolean array of whether my values are not na + a boolean array of whether my values are not NA See also -------- diff --git a/pandas/core/series.py b/pandas/core/series.py index fb5819b2748a0..60d268c89a9d7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2792,7 +2792,7 @@ def isna(self): def isnull(self): return super(Series, self).isnull() - @Appender(generic._shared_docs['isna'] % _shared_doc_kwargs) + @Appender(generic._shared_docs['notna'] % _shared_doc_kwargs) def notna(self): return super(Series, self).notna() From bd50a4fdc0bd731a78c6a424b502fc04f6a15a99 Mon Sep 17 00:00:00 2001 From: John Owens Date: Thu, 27 Jul 2017 08:29:43 -0700 Subject: [PATCH 021/141] in python 3, lambdas no longer take tuples as args. thanks pep 3113. --- pandas/io/html.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index b2561c519f71f..38da7bb7ef4ce 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -468,10 +468,10 @@ def _expand_colspan_rowspan(self, rows, fill_rowspan=True): # maybe this can be done with a list comprehension, dunno cols = zip( list(flatten( - lmap(lambda (text, nc): [text] * nc, + lmap(lambda text_nc: [text_nc[0]] * text_nc[1], zip(cols_text, col_colspans)))), list(flatten( - lmap(lambda (nc, nr): [nr] * nc, + lmap(lambda nc_nr: [nc_nr[1]] * nc_nr[0], zip(col_colspans, col_rowspans)))) ) # cols is now a list of (text, number of rows) From 452e08d72a26368e62d8ff2c6ca16eca6fb7184c Mon Sep 17 00:00:00 2001 From: John Owens Date: Thu, 27 Jul 2017 09:52:41 -0700 Subject: [PATCH 022/141] fixing lint error --- pandas/tests/io/test_html.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index c016106dfcc80..66dc353ce7065 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -28,7 +28,6 @@ from pandas.io.common import URLError, urlopen, file_path_to_url import pandas.io.html from pandas.io.html import read_html -from pandas._libs.parsers import ParserError import pandas.util.testing as tm from pandas.util.testing import makeCustomDataframe as mkdf, network From ecfaa4cefe6e1bbd451a5eaeebf00b81faa49b73 Mon Sep 17 00:00:00 2001 From: John Owens Date: Thu, 27 Jul 2017 10:08:43 -0700 Subject: [PATCH 023/141] in python3, zip does not return a list, so list(zip(...)) --- pandas/io/html.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 38da7bb7ef4ce..9467a32fe670e 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -466,13 +466,13 @@ def _expand_colspan_rowspan(self, rows, fill_rowspan=True): for col in extracted_row] # expand cols using col_colspans # maybe this can be done with a list comprehension, dunno - cols = zip( + cols = list(zip( list(flatten( lmap(lambda text_nc: [text_nc[0]] * text_nc[1], - zip(cols_text, col_colspans)))), + list(zip(cols_text, col_colspans))))), list(flatten( lmap(lambda nc_nr: [nc_nr[1]] * nc_nr[0], - zip(col_colspans, col_rowspans)))) + list(zip(col_colspans, col_rowspans)))))) ) # cols is now a list of (text, number of rows) # now insert any previous rowspans From 69cd83cec11093c3553abf279bebf8ad2b33fc0a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 29 Jul 2017 23:58:03 +0200 Subject: [PATCH 024/141] DOC: further clean-up null/na changes (#17113) --- doc/source/basics.rst | 4 +-- doc/source/io.rst | 5 ---- doc/source/missing_data.rst | 2 +- doc/source/whatsnew/v0.10.0.txt | 46 +++++++++++++++++++++++++++------ doc/source/whatsnew/v0.4.x.txt | 2 +- pandas/core/config_init.py | 4 +-- 6 files changed, 44 insertions(+), 19 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index c8138d795b836..fe20a7eb2b786 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -511,7 +511,7 @@ optional ``level`` parameter which applies only if the object has a :header: "Function", "Description" :widths: 20, 80 - ``count``, Number of non-na observations + ``count``, Number of non-NA observations ``sum``, Sum of values ``mean``, Mean of values ``mad``, Mean absolute deviation @@ -541,7 +541,7 @@ will exclude NAs on Series input by default: np.mean(df['one'].values) ``Series`` also has a method :meth:`~Series.nunique` which will return the -number of unique non-na values: +number of unique non-NA values: .. ipython:: python diff --git a/doc/source/io.rst b/doc/source/io.rst index 149c86aead135..bf68a0cae1d27 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -137,7 +137,6 @@ usecols : array-like or callable, default ``None`` Using this parameter results in much faster parsing time and lower memory usage. as_recarray : boolean, default ``False`` - .. deprecated:: 0.18.2 Please call ``pd.read_csv(...).to_records()`` instead. @@ -193,7 +192,6 @@ skiprows : list-like or integer, default ``None`` skipfooter : int, default ``0`` Number of lines at bottom of file to skip (unsupported with engine='c'). skip_footer : int, default ``0`` - .. deprecated:: 0.19.0 Use the ``skipfooter`` parameter instead, as they are identical @@ -208,13 +206,11 @@ low_memory : boolean, default ``True`` use the ``chunksize`` or ``iterator`` parameter to return the data in chunks. (Only valid with C parser) buffer_lines : int, default None - .. deprecated:: 0.19.0 Argument removed because its value is not respected by the parser compact_ints : boolean, default False - .. deprecated:: 0.19.0 Argument moved to ``pd.to_numeric`` @@ -223,7 +219,6 @@ compact_ints : boolean, default False parser will attempt to cast it as the smallest integer ``dtype`` possible, either signed or unsigned depending on the specification from the ``use_unsigned`` parameter. use_unsigned : boolean, default False - .. deprecated:: 0.18.2 Argument moved to ``pd.to_numeric`` diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 5c10df25051a2..d54288baa389b 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -36,7 +36,7 @@ When / why does data become missing? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Some might quibble over our usage of *missing*. By "missing" we simply mean -**NA** or "not present for whatever reason". Many data sets simply arrive with +**NA** ("not available") or "not present for whatever reason". Many data sets simply arrive with missing data, either because it exists and was not collected or it never existed. For example, in a collection of financial time series, some of the time series might start on different dates. Thus, values prior to the start date diff --git a/doc/source/whatsnew/v0.10.0.txt b/doc/source/whatsnew/v0.10.0.txt index cf5369466308c..f0db1d82252c1 100644 --- a/doc/source/whatsnew/v0.10.0.txt +++ b/doc/source/whatsnew/v0.10.0.txt @@ -128,15 +128,45 @@ labeled the aggregated group with the end of the interval: the next day). ``notnull``. That they ever were was a relic of early pandas. This behavior can be re-enabled globally by the ``mode.use_inf_as_null`` option: -.. ipython:: python +.. code-block:: ipython - s = pd.Series([1.5, np.inf, 3.4, -np.inf]) - pd.isnull(s) - s.fillna(0) - pd.set_option('use_inf_as_null', True) - pd.isnull(s) - s.fillna(0) - pd.reset_option('use_inf_as_null') + In [6]: s = pd.Series([1.5, np.inf, 3.4, -np.inf]) + + In [7]: pd.isnull(s) + Out[7]: + 0 False + 1 False + 2 False + 3 False + Length: 4, dtype: bool + + In [8]: s.fillna(0) + Out[8]: + 0 1.500000 + 1 inf + 2 3.400000 + 3 -inf + Length: 4, dtype: float64 + + In [9]: pd.set_option('use_inf_as_null', True) + + In [10]: pd.isnull(s) + Out[10]: + 0 False + 1 True + 2 False + 3 True + Length: 4, dtype: bool + + In [11]: s.fillna(0) + Out[11]: + 0 1.5 + 1 0.0 + 2 3.4 + 3 0.0 + Length: 4, dtype: float64 + + In [12]: pd.reset_option('use_inf_as_null') - Methods with the ``inplace`` option now all return ``None`` instead of the calling object. E.g. code written like ``df = df.fillna(0, inplace=True)`` diff --git a/doc/source/whatsnew/v0.4.x.txt b/doc/source/whatsnew/v0.4.x.txt index 237ea84425051..ed9352059a6dc 100644 --- a/doc/source/whatsnew/v0.4.x.txt +++ b/doc/source/whatsnew/v0.4.x.txt @@ -9,7 +9,7 @@ New Features - Added Python 3 support using 2to3 (:issue:`200`) - :ref:`Added ` ``name`` attribute to ``Series``, now prints as part of ``Series.__repr__`` -- :ref:`Added ` instance methods ``isnull`` and ``notnull`` to +- :ref:`Added ` instance methods ``isnull`` and ``notnull`` to Series (:issue:`209`, :issue:`203`) - :ref:`Added ` ``Series.align`` method for aligning two series with choice of join method (ENH56_) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 76e30a6fb9d52..3e753aacf7c71 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -398,8 +398,8 @@ def table_schema_cb(key): use_inf_as_na_doc = """ : boolean - True means treat None, NaN, INF, -INF as na (old way), - False means None and NaN are null, but INF, -INF are not na + True means treat None, NaN, INF, -INF as NA (old way), + False means None and NaN are null, but INF, -INF are not NA (new way). """ From 1e5cfa11fc72a785166640061fbdd337f12a98b3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 30 Jul 2017 02:43:25 -0700 Subject: [PATCH 025/141] BUG: Allow pd.unique to accept tuple of strings (#17108) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/algorithms.py | 2 ++ pandas/tests/test_algos.py | 9 +++++++++ 3 files changed, 12 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 64873efc0ec19..ea3167738d1a3 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -328,3 +328,4 @@ Other ^^^^^ - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) - Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) +- Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4ca658b35a276..f2359f3ff1a9d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -170,6 +170,8 @@ def _ensure_arraylike(values): ABCIndexClass, ABCSeries)): inferred = lib.infer_dtype(values) if inferred in ['mixed', 'string', 'unicode']: + if isinstance(values, tuple): + values = list(values) values = lib.list_to_object_array(values) else: values = np.asarray(values) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 0e86ec123efea..b26089ea7a822 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -415,6 +415,15 @@ def test_order_of_appearance(self): expected = pd.Categorical(list('abc')) tm.assert_categorical_equal(result, expected) + @pytest.mark.parametrize("arg ,expected", [ + (('1', '1', '2'), np.array(['1', '2'], dtype=object)), + (('foo',), np.array(['foo'], dtype=object)) + ]) + def test_tuple_with_strings(self, arg, expected): + # see GH 17108 + result = pd.unique(arg) + tm.assert_numpy_array_equal(result, expected) + class TestIsin(object): From c502dba2b3b6c93841115319cf4bda619683505e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 30 Jul 2017 15:54:02 -0700 Subject: [PATCH 026/141] BUG: Allow Series with same name with crosstab (#16028) Closes gh-13279 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/reshape/pivot.py | 13 +++++++++++-- pandas/tests/reshape/test_pivot.py | 10 ++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index ea3167738d1a3..13d953a2cabdb 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -313,6 +313,7 @@ Reshaping - Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`) - Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) - Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`) +- Bug in ``pd.crosstab()`` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`) Numeric ^^^^^^^ diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index fe525eb0a3c87..d4ea49c130add 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -148,8 +148,17 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', table = agged if table.index.nlevels > 1: - to_unstack = [agged.index.names[i] or i - for i in range(len(index), len(keys))] + # Related GH #17123 + # If index_names are integers, determine whether the integers refer + # to the level position or name. + index_names = agged.index.names[:len(index)] + to_unstack = [] + for i in range(len(index), len(keys)): + name = agged.index.names[i] + if name is None or name in index_names: + to_unstack.append(i) + else: + to_unstack.append(name) table = agged.unstack(to_unstack) if not dropna: diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index ee6c32cd0a208..879ac96680fbb 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1513,6 +1513,16 @@ def test_crosstab_with_numpy_size(self): columns=expected_column) tm.assert_frame_equal(result, expected) + def test_crosstab_dup_index_names(self): + # GH 13279 + s = pd.Series(range(3), name='foo') + result = pd.crosstab(s, s) + expected_index = pd.Index(range(3), name='foo') + expected = pd.DataFrame(np.eye(3, dtype=np.int64), + index=expected_index, + columns=expected_index) + tm.assert_frame_equal(result, expected) + class TestPivotAnnual(object): """ From 2155c3eec5b17c080b3ec38c0ed532213a4d2a34 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 1 Aug 2017 14:19:16 -0400 Subject: [PATCH 027/141] COMPAT: make sure use_inf_as_null is deprecated (#17126) closes #17115 --- pandas/core/config_init.py | 7 +++++-- pandas/tests/series/test_missing.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 3e753aacf7c71..875ab8249f953 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -412,8 +412,11 @@ def use_inf_as_na_cb(key): _use_inf_as_na(key) -cf.register_option('mode.use_inf_as_na', False, use_inf_as_na_doc, - cb=use_inf_as_na_cb) +with cf.config_prefix('mode'): + cf.register_option('use_inf_as_na', False, use_inf_as_na_doc, + cb=use_inf_as_na_cb) + cf.register_option('use_inf_as_null', False, use_inf_as_null_doc, + cb=use_inf_as_na_cb) cf.deprecate_option('mode.use_inf_as_null', msg=use_inf_as_null_doc, rkey='mode.use_inf_as_na') diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 2d20ac9685914..01bf7274fd384 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -410,6 +410,22 @@ def test_isna_for_inf(self): tm.assert_series_equal(r, e) tm.assert_series_equal(dr, de) + @tm.capture_stdout + def test_isnull_for_inf_deprecated(self): + # gh-17115 + s = Series(['a', np.inf, np.nan, 1.0]) + with tm.assert_produces_warning(DeprecationWarning, + check_stacklevel=False): + pd.set_option('mode.use_inf_as_null', True) + r = s.isna() + dr = s.dropna() + pd.reset_option('mode.use_inf_as_null') + + e = Series([False, True, True, False]) + de = Series(['a', 1.0], index=[0, 3]) + tm.assert_series_equal(r, e) + tm.assert_series_equal(dr, de) + def test_fillna(self): ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5)) From 3ed9f5398c973b5778f12031ce9377ef6a27aff4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 1 Aug 2017 16:09:22 -0400 Subject: [PATCH 028/141] CI: bump version of xlsxwriter to 0.5.2 (#17142) --- ci/install_travis.sh | 3 --- ci/requirements-2.7.pip | 2 ++ ci/requirements-2.7.run | 6 ++---- ci/requirements-2.7_COMPAT.pip | 2 ++ ci/requirements-2.7_COMPAT.run | 7 ++----- ci/requirements-2.7_LOCALE.pip | 2 ++ ci/requirements-2.7_LOCALE.run | 5 +---- ci/requirements-2.7_SLOW.run | 1 - ci/requirements-3.6.run | 2 +- ci/requirements-3.6_LOCALE.run | 2 +- ci/requirements-3.6_LOCALE_SLOW.run | 2 +- doc/source/install.rst | 3 ++- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/nanops.py | 19 +++++++++++++++++-- 14 files changed, 34 insertions(+), 23 deletions(-) diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 8cf6f2ce636da..dcc1656ce3dd7 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -52,9 +52,6 @@ conda update -q conda echo echo "[add channels]" -# add the pandas channel to take priority -# to add extra packages -conda config --add channels pandas || exit 1 conda config --remove channels defaults || exit 1 conda config --add channels defaults || exit 1 diff --git a/ci/requirements-2.7.pip b/ci/requirements-2.7.pip index eb796368e7820..876d9e978fa84 100644 --- a/ci/requirements-2.7.pip +++ b/ci/requirements-2.7.pip @@ -1,5 +1,7 @@ blosc pandas-gbq +html5lib +beautifulsoup4 pathlib backports.lzma py diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run index 62e31e4ae24e3..7152cb2c8b605 100644 --- a/ci/requirements-2.7.run +++ b/ci/requirements-2.7.run @@ -10,13 +10,11 @@ xlrd=0.9.2 sqlalchemy=0.9.6 lxml=3.2.1 scipy -xlsxwriter=0.4.6 +xlsxwriter=0.5.2 s3fs bottleneck -psycopg2=2.5.2 +psycopg2 patsy pymysql=0.6.3 -html5lib=1.0b2 -beautiful-soup=4.2.1 jinja2=2.8 xarray=0.8.0 diff --git a/ci/requirements-2.7_COMPAT.pip b/ci/requirements-2.7_COMPAT.pip index 9533a630d06a4..13cd35a923124 100644 --- a/ci/requirements-2.7_COMPAT.pip +++ b/ci/requirements-2.7_COMPAT.pip @@ -1,2 +1,4 @@ +html5lib==1.0b2 +beautifulsoup4==4.2.0 openpyxl argparse diff --git a/ci/requirements-2.7_COMPAT.run b/ci/requirements-2.7_COMPAT.run index d27b6a72c2d15..b94f4ab7b27d1 100644 --- a/ci/requirements-2.7_COMPAT.run +++ b/ci/requirements-2.7_COMPAT.run @@ -4,13 +4,10 @@ pytz=2013b scipy=0.11.0 xlwt=0.7.5 xlrd=0.9.2 -bottleneck=0.8.0 numexpr=2.2.2 pytables=3.0.0 -html5lib=1.0b2 -beautiful-soup=4.2.0 -psycopg2=2.5.1 +psycopg2 pymysql=0.6.0 sqlalchemy=0.7.8 -xlsxwriter=0.4.6 +xlsxwriter=0.5.2 jinja2=2.8 diff --git a/ci/requirements-2.7_LOCALE.pip b/ci/requirements-2.7_LOCALE.pip index cf8e6b8b3d3a6..1b825bbf492ca 100644 --- a/ci/requirements-2.7_LOCALE.pip +++ b/ci/requirements-2.7_LOCALE.pip @@ -1 +1,3 @@ +html5lib==1.0b2 +beautifulsoup4==4.2.1 blosc diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run index 5d7cc31b7d55e..8e360cf74b081 100644 --- a/ci/requirements-2.7_LOCALE.run +++ b/ci/requirements-2.7_LOCALE.run @@ -3,12 +3,9 @@ pytz=2013b numpy=1.8.2 xlwt=0.7.5 openpyxl=1.6.2 -xlsxwriter=0.4.6 +xlsxwriter=0.5.2 xlrd=0.9.2 -bottleneck=0.8.0 matplotlib=1.3.1 sqlalchemy=0.8.1 -html5lib=1.0b2 lxml=3.2.1 scipy -beautiful-soup=4.2.1 diff --git a/ci/requirements-2.7_SLOW.run b/ci/requirements-2.7_SLOW.run index c2d2a14285ad6..0a549554f5219 100644 --- a/ci/requirements-2.7_SLOW.run +++ b/ci/requirements-2.7_SLOW.run @@ -13,7 +13,6 @@ pytables sqlalchemy lxml s3fs -bottleneck psycopg2 pymysql html5lib diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index ef66ebeb336f3..00db27d3f2704 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -16,7 +16,7 @@ sqlalchemy pymysql feather-format pyarrow -# psycopg2 (not avail on defaults ATM) +psycopg2 beautifulsoup4 s3fs xarray diff --git a/ci/requirements-3.6_LOCALE.run b/ci/requirements-3.6_LOCALE.run index ae456f4f9f38a..ad54284c6f7e3 100644 --- a/ci/requirements-3.6_LOCALE.run +++ b/ci/requirements-3.6_LOCALE.run @@ -15,7 +15,7 @@ jinja2 sqlalchemy pymysql # feather-format (not available on defaults ATM) -# psycopg2 (not avail on defaults ATM) +psycopg2 beautifulsoup4 s3fs xarray diff --git a/ci/requirements-3.6_LOCALE_SLOW.run b/ci/requirements-3.6_LOCALE_SLOW.run index 28131031f0bbd..ad54284c6f7e3 100644 --- a/ci/requirements-3.6_LOCALE_SLOW.run +++ b/ci/requirements-3.6_LOCALE_SLOW.run @@ -15,7 +15,7 @@ jinja2 sqlalchemy pymysql # feather-format (not available on defaults ATM) -# psycopg2 (not available on defaults ATM) +psycopg2 beautifulsoup4 s3fs xarray diff --git a/doc/source/install.rst b/doc/source/install.rst index 48d51e1200447..c185a7cf4b875 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -217,7 +217,8 @@ Recommended Dependencies If installed, must be Version 2.4.6 or higher. * `bottleneck `__: for accelerating certain types of ``nan`` - evaluations. ``bottleneck`` uses specialized cython routines to achieve large speedups. + evaluations. ``bottleneck`` uses specialized cython routines to achieve large speedups. If installed, + must be Version 1.0.0 or higher. .. note:: diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 13d953a2cabdb..9af36e632f03d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -206,6 +206,7 @@ Other API Changes ^^^^^^^^^^^^^^^^^ - Support has been dropped for Python 3.4 (:issue:`15251`) +- Support has been dropped for bottleneck < 1.0.0 (:issue:`15214`) - The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`) - Accessing a non-existent attribute on a closed :class:`~pandas.HDFStore` will now raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 5bebb8eb65b23..e2777cb56374e 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,6 +1,8 @@ import itertools import functools import operator +import warnings +from distutils.version import LooseVersion import numpy as np from pandas import compat @@ -20,11 +22,24 @@ from pandas.core.config import get_option from pandas.core.common import _values_from_object +_BOTTLENECK_INSTALLED = False +_MIN_BOTTLENECK_VERSION = '1.0.0' + try: import bottleneck as bn - _BOTTLENECK_INSTALLED = True + ver = bn.__version__ + _BOTTLENCK_INSTALLED = ver >= LooseVersion(_MIN_BOTTLENECK_VERSION) + + if not _BOTTLENECK_INSTALLED: + warnings.warn( + "The installed version of bottleneck {ver} is not supported " + "in pandas and will be not be used\nThe minimum supported " + "version is {min_ver}\n".format( + ver=ver, min_ver=_MIN_BOTTLENECK_VERSION), UserWarning) + except ImportError: # pragma: no cover - _BOTTLENECK_INSTALLED = False + pass + _USE_BOTTLENECK = False From 9a50c218126ac34a9a99a015a03d4c784ede9793 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 1 Aug 2017 15:33:00 -0700 Subject: [PATCH 029/141] DOC: Clean up instructions in ISSUE_TEMPLATE (#17146) --- .github/ISSUE_TEMPLATE.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1f614b54b1f71..6ab03c9907475 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -8,11 +8,18 @@ [this should explain **why** the current behaviour is a problem and why the expected output is a better solution.] +**Note**: We receive a lot of issues on our GitHub tracker, so it is very possible that your issue has been posted before. +Please check first before submitting so that we do not have to handle and close duplicates! + +**Note**: Many problems can be resolved by simply upgrading `pandas` to the latest version. Before submitting, please check +if that solution works for you. If possible, you may want to check if `master` addresses this issue, but that is not necessary. + #### Expected Output #### Output of ``pd.show_versions()``
-# Paste the output here pd.show_versions() here + +[paste the output of ``pd.show_versions()`` here below this line]
From 5759effd43546df1fe3b4d3a3a2532d3f45aa059 Mon Sep 17 00:00:00 2001 From: Floris Kint Date: Tue, 1 Aug 2017 15:36:40 -0700 Subject: [PATCH 030/141] Add missing space to the NotImplementedError's message for compound dtypes (#17140) --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fbd26655798bd..ec44dce0da9bc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -152,7 +152,7 @@ def _validate_dtype(self, dtype): # a compound dtype if dtype.kind == 'V': raise NotImplementedError("compound dtypes are not implemented" - "in the {0} constructor" + " in the {0} constructor" .format(self.__class__.__name__)) return dtype From 3855039c69a0648d4e17886ba889eeec12907353 Mon Sep 17 00:00:00 2001 From: jebob Date: Tue, 1 Aug 2017 23:38:18 +0100 Subject: [PATCH 031/141] DOC: (de)type the return value of concat (#17079) (#17119) --- pandas/core/reshape/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 96603b6adc3b0..e199ec2710367 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -65,7 +65,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, Returns ------- - concatenated : type of objects + concatenated : object, type of objs Notes ----- From d7cb6279a0b48af43da849c65fe55c989ed19c0e Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 1 Aug 2017 15:44:09 -0700 Subject: [PATCH 032/141] BUG: Thoroughly dedup column names in read_csv (#17095) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/parsers.py | 18 +++++++++++------- pandas/tests/io/parser/mangle_dupes.py | 24 +++++++++++++++++++++++- 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 9af36e632f03d..aadb82b8a4973 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -280,6 +280,7 @@ I/O ^^^ - Bug in :func:`read_csv` in which columns were not being thoroughly de-duplicated (:issue:`17060`) +- Bug in :func:`read_csv` in which specified column names were not being thoroughly de-duplicated (:issue:`17095`) - Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) - Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696`, :issue:`16798`). - Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`). diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index ea0bb104338b6..41b0cdd6dd250 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1318,14 +1318,18 @@ def _maybe_dedup_names(self, names): # would be nice! if self.mangle_dupe_cols: names = list(names) # so we can index - counts = {} + counts = defaultdict(int) for i, col in enumerate(names): - cur_count = counts.get(col, 0) + cur_count = counts[col] - if cur_count > 0: - names[i] = '%s.%d' % (col, cur_count) + while cur_count > 0: + counts[col] = cur_count + 1 + col = '%s.%d' % (col, cur_count) + cur_count = counts[col] + + names[i] = col counts[col] = cur_count + 1 return names @@ -2330,15 +2334,15 @@ def _infer_columns(self): this_columns.append(c) if not have_mi_columns and self.mangle_dupe_cols: - counts = {} + counts = defaultdict(int) for i, col in enumerate(this_columns): - cur_count = counts.get(col, 0) + cur_count = counts[col] while cur_count > 0: counts[col] = cur_count + 1 col = "%s.%d" % (col, cur_count) - cur_count = counts.get(col, 0) + cur_count = counts[col] this_columns[i] = col counts[col] = cur_count + 1 diff --git a/pandas/tests/io/parser/mangle_dupes.py b/pandas/tests/io/parser/mangle_dupes.py index 70ecfe51c0f09..e2efb1377f8b0 100644 --- a/pandas/tests/io/parser/mangle_dupes.py +++ b/pandas/tests/io/parser/mangle_dupes.py @@ -25,7 +25,7 @@ def test_basic(self): mangle_dupe_cols=True) assert list(df.columns) == expected - def test_thorough_mangle(self): + def test_thorough_mangle_columns(self): # see gh-17060 data = "a,a,a.1\n1,2,3" df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True) @@ -40,3 +40,25 @@ def test_thorough_mangle(self): df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True) assert list(df.columns) == ["a", "a.1", "a.3", "a.1.1", "a.2", "a.2.1", "a.3.1"] + + def test_thorough_mangle_names(self): + # see gh-17095 + data = "a,b,b\n1,2,3" + names = ["a.1", "a.1", "a.1.1"] + df = self.read_csv(StringIO(data), sep=",", names=names, + mangle_dupe_cols=True) + assert list(df.columns) == ["a.1", "a.1.1", "a.1.1.1"] + + data = "a,b,c,d,e,f\n1,2,3,4,5,6" + names = ["a", "a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1"] + df = self.read_csv(StringIO(data), sep=",", names=names, + mangle_dupe_cols=True) + assert list(df.columns) == ["a", "a.1", "a.1.1", "a.1.1.1", + "a.1.1.1.1", "a.1.1.1.1.1"] + + data = "a,b,c,d,e,f,g\n1,2,3,4,5,6,7" + names = ["a", "a", "a.3", "a.1", "a.2", "a", "a"] + df = self.read_csv(StringIO(data), sep=",", names=names, + mangle_dupe_cols=True) + assert list(df.columns) == ["a", "a.1", "a.3", "a.1.1", + "a.2", "a.2.1", "a.3.1"] From 9d32df6134454e212a0c62427f98d313ec1fd29e Mon Sep 17 00:00:00 2001 From: Alan Yee Date: Wed, 2 Aug 2017 02:46:59 -0700 Subject: [PATCH 033/141] DOC: Additions/updates to documentation (#17150) --- README.md | 58 +++++++++++++++++++++--------------------- doc/source/gotchas.rst | 16 ++++++------ 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index dc74828ba9863..ac043f5586498 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@
Conda - + conda default downloads
Conda-forge - + conda-forge downloads
' % (self.border, - ' '.join(_classes)), - indent) + self.write('
' + .format(border=self.border, cls=' '.join(_classes)), indent) indent += self.indent_delta indent = self._write_header(indent) @@ -1173,8 +1184,10 @@ def write_result(self, buf): self.write('
', indent) if self.should_show_dimensions: by = chr(215) if compat.PY3 else unichr(215) # × - self.write(u('

%d rows %s %d columns

') % - (len(frame), by, len(frame.columns))) + self.write(u('

{rows} rows {by} {cols} columns

') + .format(rows=len(frame), + by=by, + cols=len(frame.columns))) if self.notebook: self.write('') @@ -1199,7 +1212,7 @@ def _column_header(): row.append(single_column_table(self.columns.names)) else: row.append('') - style = "text-align: %s;" % self.fmt.justify + style = "text-align: {just};".format(just=self.fmt.justify) row.extend([single_column_table(c, self.fmt.justify, style) for c in self.columns]) else: @@ -1214,7 +1227,7 @@ def _column_header(): indent += self.indent_delta if isinstance(self.columns, MultiIndex): - template = 'colspan="%d" halign="left"' + template = 'colspan="{span:d}" halign="left"' if self.fmt.sparsify: # GH3547 @@ -1282,7 +1295,7 @@ def _column_header(): for i, v in enumerate(values): if i in records: if records[i] > 1: - tags[j] = template % records[i] + tags[j] = template.format(span=records[i]) else: continue j += 1 @@ -1372,7 +1385,7 @@ def _write_regular_rows(self, fmt_values, indent): nindex_levels=1) def _write_hierarchical_rows(self, fmt_values, indent): - template = 'rowspan="%d" valign="top"' + template = 'rowspan="{span}" valign="top"' truncate_h = self.fmt.truncate_h truncate_v = self.fmt.truncate_v @@ -1447,7 +1460,7 @@ def _write_hierarchical_rows(self, fmt_values, indent): for records, v in zip(level_lengths, idx_values[i]): if i in records: if records[i] > 1: - tags[j] = template % records[i] + tags[j] = template.format(span=records[i]) else: sparse_offset += 1 continue @@ -1615,8 +1628,9 @@ def _save_header(self): return if has_aliases: if len(header) != len(cols): - raise ValueError(('Writing %d cols but got %d aliases' - % (len(cols), len(header)))) + raise ValueError(('Writing {ncols} cols but got {nalias} ' + 'aliases'.format(ncols=len(cols), + nalias=len(header)))) else: write_cols = header else: @@ -1790,8 +1804,9 @@ def _format_strings(self): if self.float_format is None: float_format = get_option("display.float_format") if float_format is None: - fmt_str = '%% .%dg' % get_option("display.precision") - float_format = lambda x: fmt_str % x + fmt_str = ('{{x: .{prec:d}g}}' + .format(prec=get_option("display.precision"))) + float_format = lambda x: fmt_str.format(x=x) else: float_format = self.float_format @@ -1807,10 +1822,10 @@ def _format(x): return 'NaT' return self.na_rep elif isinstance(x, PandasObject): - return '%s' % x + return u'{x}'.format(x=x) else: # object dtype - return '%s' % formatter(x) + return u'{x}'.format(x=formatter(x)) vals = self.values if isinstance(vals, Index): @@ -1824,11 +1839,11 @@ def _format(x): fmt_values = [] for i, v in enumerate(vals): if not is_float_type[i] and leading_space: - fmt_values.append(' %s' % _format(v)) + fmt_values.append(u' {v}'.format(v=_format(v))) elif is_float_type[i]: fmt_values.append(float_format(v)) else: - fmt_values.append(' %s' % _format(v)) + fmt_values.append(u' {v}'.format(v=_format(v))) return fmt_values @@ -1864,7 +1879,7 @@ def _value_formatter(self, float_format=None, threshold=None): # because str(0.0) = '0.0' while '%g' % 0.0 = '0' if float_format: def base_formatter(v): - return (float_format % v) if notna(v) else self.na_rep + return float_format(value=v) if notna(v) else self.na_rep else: def base_formatter(v): return str(v) if notna(v) else self.na_rep @@ -1925,10 +1940,14 @@ def format_values_with(float_format): # There is a special default string when we are fixed-width # The default is otherwise to use str instead of a formatting string - if self.float_format is None and self.fixed_width: - float_format = '%% .%df' % self.digits + if self.float_format is None: + if self.fixed_width: + float_format = partial('{value: .{digits:d}f}'.format, + digits=self.digits) + else: + float_format = self.float_format else: - float_format = self.float_format + float_format = lambda value: self.float_format % value formatted_values = format_values_with(float_format) @@ -1955,7 +1974,8 @@ def format_values_with(float_format): (abs_vals > 0)).any() if has_small_values or (too_long and has_large_values): - float_format = '%% .%de' % self.digits + float_format = partial('{value: .{digits:d}e}'.format, + digits=self.digits) formatted_values = format_values_with(float_format) return formatted_values @@ -1971,7 +1991,7 @@ def _format_strings(self): class IntArrayFormatter(GenericArrayFormatter): def _format_strings(self): - formatter = self.formatter or (lambda x: '% d' % x) + formatter = self.formatter or (lambda x: '{x: d}'.format(x=x)) fmt_values = [formatter(x) for x in self.values] return fmt_values @@ -2023,7 +2043,7 @@ def _format_strings(self): # periods may contains different freq values = Index(self.values, dtype='object').to_native_types() - formatter = self.formatter or (lambda x: '%s' % x) + formatter = self.formatter or (lambda x: '{x}'.format(x=x)) fmt_values = [formatter(x) for x in values] return fmt_values @@ -2223,7 +2243,7 @@ def _formatter(x): x = Timedelta(x) result = x._repr_base(format=format) if box: - result = "'{0}'".format(result) + result = "'{res}'".format(res=result) return result return _formatter @@ -2278,12 +2298,12 @@ def _cond(values): def single_column_table(column, align=None, style=None): table = '
%s
{i!s}
' return table @@ -2291,7 +2311,7 @@ def single_column_table(column, align=None, style=None): def single_row_table(row): # pragma: no cover table = '' for i in row: - table += ('' % str(i)) + table += (''.format(i=i)) table += '
%s{i!s}
' return table @@ -2385,18 +2405,19 @@ def __call__(self, num): prefix = self.ENG_PREFIXES[int_pow10] else: if int_pow10 < 0: - prefix = 'E-%02d' % (-int_pow10) + prefix = 'E-{pow10:02d}'.format(pow10=-int_pow10) else: - prefix = 'E+%02d' % int_pow10 + prefix = 'E+{pow10:02d}'.format(pow10=int_pow10) mant = sign * dnum / (10**pow10) if self.accuracy is None: # pragma: no cover - format_str = u("% g%s") + format_str = u("{mant: g}{prefix}") else: - format_str = (u("%% .%if%%s") % self.accuracy) + format_str = (u("{{mant: .{acc:d}f}}{{prefix}}") + .format(acc=self.accuracy)) - formatted = format_str % (mant, prefix) + formatted = format_str.format(mant=mant, prefix=prefix) return formatted # .strip() From 6ec10441974a5825727363bd2546c9778b6da91a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 30 Aug 2017 05:50:04 -0500 Subject: [PATCH 117/141] PKG: Added pyproject.toml for PEP 518 (#16745) Declaring build-time requirements: https://www.python.org/dev/peps/pep-0518/ --- MANIFEST.in | 1 + doc/source/whatsnew/v0.21.0.txt | 1 + pyproject.toml | 9 +++++++++ 3 files changed, 11 insertions(+) create mode 100644 pyproject.toml diff --git a/MANIFEST.in b/MANIFEST.in index 8bd83a7d56948..1a6b831c1b975 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,6 +3,7 @@ include LICENSE include RELEASE.md include README.rst include setup.py +include pyproject.toml graft doc prune doc/build diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 60090ab5b82ef..f3a726b67fee7 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -112,6 +112,7 @@ Other Enhancements ^^^^^^^^^^^^^^^^^^ - The ``validate`` argument for :func:`merge` function now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception of type ``MergeError`` will be raised. For more, see :ref:`here ` (:issue:`16270`) +- Added support for `PEP 518 `_ to the build system (:issue:`16745`) - :func:`Series.to_dict` and :func:`DataFrame.to_dict` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) - :func:`RangeIndex.append` now returns a ``RangeIndex`` object when possible (:issue:`16212`) - :func:`Series.rename_axis` and :func:`DataFrame.rename_axis` with ``inplace=True`` now return ``None`` while renaming the axis inplace. (:issue:`15704`) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000..f0d57d1d808a2 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,9 @@ +[build-system] +requires = [ + "wheel", + "setuptools", + "Cython", # required for VCS build, optional for released source + "numpy==1.9.3; python_version=='3.5'", + "numpy==1.12.1; python_version=='3.6'", + "numpy==1.13.1; python_version>='3.7'", +] From c33af561d95a0b3a18c6ff55ccbe004341c6e367 Mon Sep 17 00:00:00 2001 From: iulia Date: Wed, 30 Aug 2017 19:39:45 +0300 Subject: [PATCH 118/141] DOC: Update Overview page in documentation (#17368) * Update Overview page in documentation * DOC Revise Overview page * DOC Make further revisions in Overview webpage * Update overview.rst Remove references to Panel --- doc/source/overview.rst | 71 ++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/doc/source/overview.rst b/doc/source/overview.rst index 92caeec319169..00a71603e1261 100644 --- a/doc/source/overview.rst +++ b/doc/source/overview.rst @@ -6,7 +6,11 @@ Package overview **************** -:mod:`pandas` consists of the following things +:mod:`pandas` is an open source, BSD-licensed library providing high-performance, +easy-to-use data structures and data analysis tools for the `Python `__ +programming language. + +:mod:`pandas` consists of the following elements * A set of labeled array data structures, the primary of which are Series and DataFrame @@ -21,27 +25,23 @@ Package overview * Memory-efficient "sparse" versions of the standard data structures for storing data that is mostly missing or mostly constant (some fixed value) * Moving window statistics (rolling mean, rolling standard deviation, etc.) - * Static and moving window linear and `panel regression - `__ -Data structures at a glance ---------------------------- +Data Structures +--------------- .. csv-table:: :header: "Dimensions", "Name", "Description" :widths: 15, 20, 50 - 1, Series, "1D labeled homogeneously-typed array" - 2, DataFrame, "General 2D labeled, size-mutable tabular structure with - potentially heterogeneously-typed columns" - 3, Panel, "General 3D labeled, also size-mutable array" + 1, "Series", "1D labeled homogeneously-typed array" + 2, "DataFrame", "General 2D labeled, size-mutable tabular structure with potentially heterogeneously-typed column" -Why more than 1 data structure? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Why more than one data structure? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The best way to think about the pandas data structures is as flexible containers for lower dimensional data. For example, DataFrame is a container -for Series, and Panel is a container for DataFrame objects. We would like to be +for Series, and Series is a container for scalars. We would like to be able to insert and remove objects from these containers in a dictionary-like fashion. @@ -85,36 +85,41 @@ The first stop for pandas issues and ideas is the `Github Issue Tracker pandas community experts can answer through `Stack Overflow `__. -Longer discussions occur on the `developer mailing list -`__, and commercial support -inquiries for Lambda Foundry should be sent to: support@lambdafoundry.com +Community +--------- -Credits -------- +pandas is actively supported today by a community of like-minded individuals around +the world who contribute their valuable time and energy to help make open source +pandas possible. Thanks to `all of our contributors `__. + +If you're interested in contributing, please +visit `Contributing to pandas webpage `__. -pandas development began at `AQR Capital Management `__ in -April 2008. It was open-sourced at the end of 2009. AQR continued to provide -resources for development through the end of 2011, and continues to contribute -bug reports today. +pandas is a `NUMFocus `__ sponsored project. +This will help ensure the success of development of pandas as a world-class open-source +project, and makes it possible to `donate `__ to the project. -Since January 2012, `Lambda Foundry `__, has -been providing development resources, as well as commercial support, -training, and consulting for pandas. +Project Governance +------------------ -pandas is only made possible by a group of people around the world like you -who have contributed new code, bug reports, fixes, comments and ideas. A -complete list can be found `on Github `__. +The governance process that pandas project has used informally since its inception in 2008 is formalized in `Project Governance documents `__ . +The documents clarify how decisions are made and how the various elements of our community interact, including the relationship between open source collaborative development and work that may be funded by for-profit or non-profit entities. + +Wes McKinney is the Benevolent Dictator for Life (BDFL). Development Team ----------------- +----------------- + +The list of the Core Team members and more detailed information can be found on the `people’s page `__ of the governance repo. + -pandas is a part of the PyData project. The PyData Development Team is a -collection of developers focused on the improvement of Python's data -libraries. The core team that coordinates development can be found on `Github -`__. If you're interested in contributing, please -visit the `project website `__. +Institutional Partners +---------------------- + +The information about current institutional partners can be found on `pandas website page `__ License ------- .. literalinclude:: ../../LICENSE + From 0f8205cc501af248f98a6b430bc61d760c4cf678 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 30 Aug 2017 15:30:53 -0500 Subject: [PATCH 119/141] API: Have MultiIndex consturctors always return a MI (#17236) * API: Have MultiIndex constructors return MI This removes the special case for MultiIndex constructors returning an Index if all the levels are length-1. Now this will return a MultiIndex with a single level. This is a backwards incompatabile change, with no clear method for deprecation, so we're making a clean break. Closes #17178 * fixup! API: Have MultiIndex constructors return MI * Update for comments --- doc/source/whatsnew/v0.21.0.txt | 24 +++++++++++ pandas/core/frame.py | 11 ++--- pandas/core/indexes/api.py | 12 ++++-- pandas/core/indexes/base.py | 69 ++++++++++++++++++++++++++++++ pandas/core/indexes/multi.py | 10 ----- pandas/core/reshape/reshape.py | 21 ++++++--- pandas/core/sparse/scipy_sparse.py | 6 ++- pandas/core/strings.py | 7 ++- pandas/io/parsers.py | 13 +++--- pandas/tests/indexes/test_base.py | 18 +++++++- pandas/tests/indexes/test_multi.py | 20 ++++----- pandas/util/testing.py | 4 ++ 12 files changed, 170 insertions(+), 45 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f3a726b67fee7..29d57df43e941 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -275,6 +275,30 @@ named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement. +.. _whatsnew_210.api.multiindex_single: + +MultiIndex Constructor with a Single Level +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``MultiIndex`` constructors no longer squeeze a MultiIndex with all +length-one levels down to a regular ``Index``. This affects all the +``MultiIndex`` constructors. (:issue:`17178`) + +Previous behavior: + +.. code-block:: ipython + + In [2]: pd.MultiIndex.from_tuples([('a',), ('b',)]) + Out[2]: Index(['a', 'b'], dtype='object') + +Length 1 levels are no longer special-cased. They behave exactly as if you had +length 2+ levels, so a :class:`MultiIndex` is always returned from all of the +``MultiIndex`` constructors: + +.. ipython:: python + + pd.MultiIndex.from_tuples([('a',), ('b',)]) + .. _whatsnew_0210.api: Other API Changes diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b5b3df64d24c0..5991ec825c841 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -67,7 +67,8 @@ _dict_compat, standardize_mapping) from pandas.core.generic import NDFrame, _shared_docs -from pandas.core.index import Index, MultiIndex, _ensure_index +from pandas.core.index import (Index, MultiIndex, _ensure_index, + _ensure_index_from_sequences) from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable, check_bool_indexer) from pandas.core.internals import (BlockManager, @@ -1155,9 +1156,9 @@ def from_records(cls, data, index=None, exclude=None, columns=None, else: try: to_remove = [arr_columns.get_loc(field) for field in index] - - result_index = MultiIndex.from_arrays( - [arrays[i] for i in to_remove], names=index) + index_data = [arrays[i] for i in to_remove] + result_index = _ensure_index_from_sequences(index_data, + names=index) exclude.update(index) except Exception: @@ -3000,7 +3001,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False, to_remove.append(col) arrays.append(level) - index = MultiIndex.from_arrays(arrays, names=names) + index = _ensure_index_from_sequences(arrays, names) if verify_integrity and not index.is_unique: duplicates = index.get_duplicates() diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 323d50166e7b6..d20a0b0a2c73d 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -1,6 +1,9 @@ -from pandas.core.indexes.base import (Index, _new_Index, # noqa - _ensure_index, _get_na_value, - InvalidIndexError) +from pandas.core.indexes.base import (Index, + _new_Index, + _ensure_index, + _ensure_index_from_sequences, + _get_na_value, + InvalidIndexError) # noqa from pandas.core.indexes.category import CategoricalIndex # noqa from pandas.core.indexes.multi import MultiIndex # noqa from pandas.core.indexes.interval import IntervalIndex # noqa @@ -22,7 +25,8 @@ 'InvalidIndexError', 'TimedeltaIndex', 'PeriodIndex', 'DatetimeIndex', '_new_Index', 'NaT', - '_ensure_index', '_get_na_value', '_get_combined_index', + '_ensure_index', '_ensure_index_from_sequences', '_get_na_value', + '_get_combined_index', '_get_objs_combined_axis', '_union_indexes', '_get_consensus_names', '_all_indexes_same'] diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 31cf1e48b8529..6a30eaefaaae7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4012,7 +4012,76 @@ def invalid_op(self, other=None): Index._add_comparison_methods() +def _ensure_index_from_sequences(sequences, names=None): + """Construct an index from sequences of data. + + A single sequence returns an Index. Many sequences returns a + MultiIndex. + + Parameters + ---------- + sequences : sequence of sequences + names : sequence of str + + Returns + ------- + index : Index or MultiIndex + + Examples + -------- + >>> _ensure_index_from_sequences([[1, 2, 3]], names=['name']) + Int64Index([1, 2, 3], dtype='int64', name='name') + + >>> _ensure_index_from_sequences([['a', 'a'], ['a', 'b']], + names=['L1', 'L2']) + MultiIndex(levels=[['a'], ['a', 'b']], + labels=[[0, 0], [0, 1]], + names=['L1', 'L2']) + + See Also + -------- + _ensure_index + """ + from .multi import MultiIndex + + if len(sequences) == 1: + if names is not None: + names = names[0] + return Index(sequences[0], name=names) + else: + return MultiIndex.from_arrays(sequences, names=names) + + def _ensure_index(index_like, copy=False): + """ + Ensure that we have an index from some index-like object + + Parameters + ---------- + index : sequence + An Index or other sequence + copy : bool + + Returns + ------- + index : Index or MultiIndex + + Examples + -------- + >>> _ensure_index(['a', 'b']) + Index(['a', 'b'], dtype='object') + + >>> _ensure_index([('a', 'a'), ('b', 'c')]) + Index([('a', 'a'), ('b', 'c')], dtype='object') + + >>> _ensure_index([['a', 'a'], ['b', 'c']]) + MultiIndex(levels=[['a'], ['b', 'c']], + labels=[[0, 0], [0, 1]]) + + See Also + -------- + _ensure_index_from_sequences + """ if isinstance(index_like, Index): if copy: index_like = index_like.copy() diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ea45b4700172f..d7d5b6d128a2c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -91,12 +91,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, raise ValueError('Length of levels and labels must be the same.') if len(levels) == 0: raise ValueError('Must pass non-zero number of levels/labels') - if len(levels) == 1: - if names: - name = names[0] - else: - name = None - return Index(levels[0], name=name, copy=True).take(labels[0]) result = object.__new__(MultiIndex) @@ -1084,10 +1078,6 @@ def from_arrays(cls, arrays, sortorder=None, names=None): MultiIndex.from_product : Make a MultiIndex from cartesian product of iterables """ - if len(arrays) == 1: - name = None if names is None else names[0] - return Index(arrays[0], name=name) - # Check if lengths of all arrays are equal or not, # raise ValueError, if not for i in range(1, len(arrays)): diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 455da9246783c..b4abba8026b35 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -31,7 +31,7 @@ from pandas.core.frame import _shared_docs from pandas.util._decorators import Appender -from pandas.core.index import MultiIndex, _get_na_value +from pandas.core.index import Index, MultiIndex, _get_na_value class _Unstacker(object): @@ -311,10 +311,14 @@ def _unstack_multiple(data, clocs): recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels, xnull=False) - dummy_index = MultiIndex(levels=rlevels + [obs_ids], - labels=rlabels + [comp_ids], - names=rnames + ['__placeholder__'], - verify_integrity=False) + if rlocs == []: + # Everything is in clocs, so the dummy df has a regular index + dummy_index = Index(obs_ids, name='__placeholder__') + else: + dummy_index = MultiIndex(levels=rlevels + [obs_ids], + labels=rlabels + [comp_ids], + names=rnames + ['__placeholder__'], + verify_integrity=False) if isinstance(data, Series): dummy = data.copy() @@ -446,7 +450,12 @@ def _slow_pivot(index, columns, values): def unstack(obj, level, fill_value=None): if isinstance(level, (tuple, list)): - return _unstack_multiple(obj, level) + if len(level) != 1: + # _unstack_multiple only handles MultiIndexes, + # and isn't needed for a single level + return _unstack_multiple(obj, level) + else: + level = level[0] if isinstance(obj, DataFrame): if isinstance(obj.index, MultiIndex): diff --git a/pandas/core/sparse/scipy_sparse.py b/pandas/core/sparse/scipy_sparse.py index ea108e3e89935..d2b9583d8efe5 100644 --- a/pandas/core/sparse/scipy_sparse.py +++ b/pandas/core/sparse/scipy_sparse.py @@ -71,7 +71,11 @@ def robust_get_level_values(i): labels_to_i = Series(labels_to_i) if len(subset) > 1: labels_to_i.index = MultiIndex.from_tuples(labels_to_i.index) - labels_to_i.index.names = [index.names[i] for i in subset] + labels_to_i.index.names = [index.names[i] for i in subset] + else: + labels_to_i.index = Index(x[0] for x in labels_to_i.index) + labels_to_i.index.name = index.names[subset[0]] + labels_to_i.name = 'value' return (labels_to_i) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 2f95e510bba5e..48bc2ee05dd68 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1452,7 +1452,12 @@ def cons_row(x): if expand: result = list(result) - return MultiIndex.from_tuples(result, names=name) + out = MultiIndex.from_tuples(result, names=name) + if out.nlevels == 1: + # We had all tuples of length-one, which are + # better represented as a regular Index. + out = out.get_level_values(0) + return out else: return Index(result, name=name) else: diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a9821be3fa5e2..8b1a921536a1d 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -23,7 +23,8 @@ is_scalar, is_categorical_dtype) from pandas.core.dtypes.missing import isna from pandas.core.dtypes.cast import astype_nansafe -from pandas.core.index import Index, MultiIndex, RangeIndex +from pandas.core.index import (Index, MultiIndex, RangeIndex, + _ensure_index_from_sequences) from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.categorical import Categorical @@ -1444,7 +1445,8 @@ def _agg_index(self, index, try_parse_dates=True): arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues) arrays.append(arr) - index = MultiIndex.from_arrays(arrays, names=self.index_names) + names = self.index_names + index = _ensure_index_from_sequences(arrays, names) return index @@ -1808,7 +1810,7 @@ def read(self, nrows=None): try_parse_dates=True) arrays.append(values) - index = MultiIndex.from_arrays(arrays) + index = _ensure_index_from_sequences(arrays) if self.usecols is not None: names = self._filter_usecols(names) @@ -3138,9 +3140,8 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None): if index_col is None or index_col is False: index = Index([]) else: - index = [Series([], dtype=dtype[index_name]) - for index_name in index_names] - index = MultiIndex.from_arrays(index, names=index_names) + data = [Series([], dtype=dtype[name]) for name in index_names] + index = _ensure_index_from_sequences(data, names=index_names) index_col.sort() for i, n in enumerate(index_col): columns.pop(n - i) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 07e98c326bcaa..aa32e75ba0d58 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -17,7 +17,7 @@ DataFrame, Float64Index, Int64Index, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, isna) -from pandas.core.index import _get_combined_index +from pandas.core.index import _get_combined_index, _ensure_index_from_sequences from pandas.util.testing import assert_almost_equal from pandas.compat.numpy import np_datetime64_compat @@ -2112,3 +2112,19 @@ def test_intersect_str_dates(self): res = i2.intersection(i1) assert len(res) == 0 + + +class TestIndexUtils(object): + + @pytest.mark.parametrize('data, names, expected', [ + ([[1, 2, 3]], None, Index([1, 2, 3])), + ([[1, 2, 3]], ['name'], Index([1, 2, 3], name='name')), + ([['a', 'a'], ['c', 'd']], None, + MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]])), + ([['a', 'a'], ['c', 'd']], ['L1', 'L2'], + MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]], + names=['L1', 'L2'])), + ]) + def test_ensure_index_from_sequences(self, data, names, expected): + result = _ensure_index_from_sequences(data, names) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index c66775f4690cc..798d244468961 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -537,15 +537,12 @@ def test_astype(self): self.index.astype(np.dtype(int)) def test_constructor_single_level(self): - single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]], names=['first']) - assert isinstance(single_level, Index) - assert not isinstance(single_level, MultiIndex) - assert single_level.name == 'first' - - single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]]) - assert single_level.name is None + result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], + labels=[[0, 1, 2, 3]], names=['first']) + assert isinstance(result, MultiIndex) + expected = Index(['foo', 'bar', 'baz', 'qux'], name='first') + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ['first'] def test_constructor_no_levels(self): tm.assert_raises_regex(ValueError, "non-zero number " @@ -768,8 +765,9 @@ def test_from_arrays_empty(self): # 1 level result = MultiIndex.from_arrays(arrays=[[]], names=['A']) + assert isinstance(result, MultiIndex) expected = Index([], name='A') - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result.levels[0], expected) # N levels for N in [2, 3]: @@ -830,7 +828,7 @@ def test_from_product_empty(self): # 1 level result = MultiIndex.from_product([[]], names=['A']) expected = pd.Index([], name='A') - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result.levels[0], expected) # 2 levels l1 = [[], ['foo', 'bar', 'baz'], []] diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 5a17cb6d7dc47..7dac83953ad8f 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1909,7 +1909,11 @@ def keyfunc(x): # convert tuples to index if nentries == 1: + # we have a single level of tuples, i.e. a regular Index index = Index(tuples[0], name=names[0]) + elif nlevels == 1: + name = None if names is None else names[0] + index = Index((x[0] for x in tuples), name=name) else: index = MultiIndex.from_tuples(tuples, names=names) return index From 54f68b425454671ae8fdffd81b82f58ef5b5d9e8 Mon Sep 17 00:00:00 2001 From: cbertinato Date: Thu, 31 Aug 2017 06:24:23 -0400 Subject: [PATCH 120/141] CLN: replace %s syntax with .format in io/formats/css.py, excel.py, printing.py, style.py, and terminal.py (#17387) Progress toward issue #16130. Converted old string formatting to new string formatting in io/formats/css.py, excel.py, printing.py, style.py, and terminal.py --- pandas/io/formats/css.py | 32 +++++++++++++------------ pandas/io/formats/excel.py | 18 ++++++++------ pandas/io/formats/printing.py | 31 ++++++++++++------------ pandas/io/formats/style.py | 45 ++++++++++++++++++++--------------- pandas/io/formats/terminal.py | 2 +- 5 files changed, 71 insertions(+), 57 deletions(-) diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py index d12d2373e1190..429c98b579ca0 100644 --- a/pandas/io/formats/css.py +++ b/pandas/io/formats/css.py @@ -94,12 +94,13 @@ def __call__(self, declarations_str, inherited=None): # 3. TODO: resolve other font-relative units for side in self.SIDES: - prop = 'border-%s-width' % side + prop = 'border-{side}-width'.format(side=side) if prop in props: props[prop] = self.size_to_pt( props[prop], em_pt=font_size, conversions=self.BORDER_WIDTH_RATIOS) - for prop in ['margin-%s' % side, 'padding-%s' % side]: + for prop in ['margin-{side}'.format(side=side), + 'padding-{side}'.format(side=side)]: if prop in props: # TODO: support % props[prop] = self.size_to_pt( @@ -152,7 +153,8 @@ def __call__(self, declarations_str, inherited=None): def size_to_pt(self, in_val, em_pt=None, conversions=UNIT_RATIOS): def _error(): - warnings.warn('Unhandled size: %r' % in_val, CSSWarning) + warnings.warn('Unhandled size: {val!r}'.format(val=in_val), + CSSWarning) return self.size_to_pt('1!!default', conversions=conversions) try: @@ -185,10 +187,10 @@ def _error(): val = round(val, 5) if int(val) == val: - size_fmt = '%d' + size_fmt = '{fmt:d}pt'.format(fmt=int(val)) else: - size_fmt = '%f' - return (size_fmt + 'pt') % val + size_fmt = '{fmt:f}pt'.format(fmt=val) + return size_fmt def atomize(self, declarations): for prop, value in declarations: @@ -215,19 +217,19 @@ def expand(self, prop, value): try: mapping = self.SIDE_SHORTHANDS[len(tokens)] except KeyError: - warnings.warn('Could not expand "%s: %s"' % (prop, value), - CSSWarning) + warnings.warn('Could not expand "{prop}: {val}"' + .format(prop=prop, val=value), CSSWarning) return for key, idx in zip(self.SIDES, mapping): - yield prop_fmt % key, tokens[idx] + yield prop_fmt.format(key), tokens[idx] return expand - expand_border_color = _side_expander('border-%s-color') - expand_border_style = _side_expander('border-%s-style') - expand_border_width = _side_expander('border-%s-width') - expand_margin = _side_expander('margin-%s') - expand_padding = _side_expander('padding-%s') + expand_border_color = _side_expander('border-{:s}-color') + expand_border_style = _side_expander('border-{:s}-style') + expand_border_width = _side_expander('border-{:s}-width') + expand_margin = _side_expander('margin-{:s}') + expand_padding = _side_expander('padding-{:s}') def parse(self, declarations_str): """Generates (prop, value) pairs from declarations @@ -245,4 +247,4 @@ def parse(self, declarations_str): yield prop, val else: warnings.warn('Ill-formatted attribute: expected a colon ' - 'in %r' % decl, CSSWarning) + 'in {decl!r}'.format(decl=decl), CSSWarning) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 80c3880d39dfd..ab689d196f4b6 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -132,10 +132,12 @@ def build_alignment(self, props): def build_border(self, props): return {side: { - 'style': self._border_style(props.get('border-%s-style' % side), - props.get('border-%s-width' % side)), + 'style': self._border_style(props.get('border-{side}-style' + .format(side=side)), + props.get('border-{side}-width' + .format(side=side))), 'color': self.color_to_excel( - props.get('border-%s-color' % side)), + props.get('border-{side}-color'.format(side=side))), } for side in ['top', 'right', 'bottom', 'left']} def _border_style(self, style, width): @@ -302,7 +304,8 @@ def color_to_excel(self, val): try: return self.NAMED_COLORS[val] except KeyError: - warnings.warn('Unhandled colour format: %r' % val, CSSWarning) + warnings.warn('Unhandled colour format: {val!r}'.format(val=val), + CSSWarning) class ExcelFormatter(object): @@ -369,7 +372,7 @@ def _format_value(self, val): if lib.isposinf_scalar(val): val = self.inf_rep elif lib.isneginf_scalar(val): - val = '-%s' % self.inf_rep + val = '-{inf}'.format(inf=self.inf_rep) elif self.float_format is not None: val = float(self.float_format % val) return val @@ -434,8 +437,9 @@ def _format_header_regular(self): colnames = self.columns if has_aliases: if len(self.header) != len(self.columns): - raise ValueError('Writing %d cols but got %d aliases' % - (len(self.columns), len(self.header))) + raise ValueError('Writing {cols} cols but got {alias} ' + 'aliases'.format(cols=len(self.columns), + alias=len(self.header))) else: colnames = self.header diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index cbad603630bd3..e0f53f671017a 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -102,9 +102,9 @@ def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): bounds length of printed sequence, depending on options """ if isinstance(seq, set): - fmt = u("{%s}") + fmt = u("{{{body}}}") else: - fmt = u("[%s]") if hasattr(seq, '__setitem__') else u("(%s)") + fmt = u("[{body}]") if hasattr(seq, '__setitem__') else u("({body})") if max_seq_items is False: nitems = len(seq) @@ -123,7 +123,7 @@ def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): elif isinstance(seq, tuple) and len(seq) == 1: body += ',' - return fmt % body + return fmt.format(body=body) def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): @@ -131,10 +131,10 @@ def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): internal. pprinter for iterables. you should probably use pprint_thing() rather then calling this directly. """ - fmt = u("{%s}") + fmt = u("{{{things}}}") pairs = [] - pfmt = u("%s: %s") + pfmt = u("{key}: {val}") if max_seq_items is False: nitems = len(seq) @@ -142,16 +142,17 @@ def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): nitems = max_seq_items or get_option("max_seq_items") or len(seq) for k, v in list(seq.items())[:nitems]: - pairs.append(pfmt % - (pprint_thing(k, _nest_lvl + 1, - max_seq_items=max_seq_items, **kwds), - pprint_thing(v, _nest_lvl + 1, - max_seq_items=max_seq_items, **kwds))) + pairs.append( + pfmt.format( + key=pprint_thing(k, _nest_lvl + 1, + max_seq_items=max_seq_items, **kwds), + val=pprint_thing(v, _nest_lvl + 1, + max_seq_items=max_seq_items, **kwds))) if nitems < len(seq): - return fmt % (", ".join(pairs) + ", ...") + return fmt.format(things=", ".join(pairs) + ", ...") else: - return fmt % ", ".join(pairs) + return fmt.format(things=", ".join(pairs)) def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, @@ -221,10 +222,10 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): max_seq_items=max_seq_items) elif isinstance(thing, compat.string_types) and quote_strings: if compat.PY3: - fmt = "'%s'" + fmt = u("'{thing}'") else: - fmt = "u'%s'" - result = fmt % as_escaped_unicode(thing) + fmt = u("u'{thing}'") + result = fmt.format(thing=as_escaped_unicode(thing)) else: result = as_escaped_unicode(thing) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 445fceb4b8146..87d672197be30 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -230,7 +230,7 @@ def format_attr(pair): # ... except maybe the last for columns.names name = self.data.columns.names[r] cs = [BLANK_CLASS if name is None else INDEX_NAME_CLASS, - "level%s" % r] + "level{lvl}".format(lvl=r)] name = BLANK_VALUE if name is None else name row_es.append({"type": "th", "value": name, @@ -240,7 +240,8 @@ def format_attr(pair): if clabels: for c, value in enumerate(clabels[r]): - cs = [COL_HEADING_CLASS, "level%s" % r, "col%s" % c] + cs = [COL_HEADING_CLASS, "level{lvl}".format(lvl=r), + "col{col}".format(col=c)] cs.extend(cell_context.get( "col_headings", {}).get(r, {}).get(c, [])) es = { @@ -264,7 +265,7 @@ def format_attr(pair): for c, name in enumerate(self.data.index.names): cs = [INDEX_NAME_CLASS, - "level%s" % c] + "level{lvl}".format(lvl=c)] name = '' if name is None else name index_header_row.append({"type": "th", "value": name, "class": " ".join(cs)}) @@ -281,7 +282,8 @@ def format_attr(pair): for r, idx in enumerate(self.data.index): row_es = [] for c, value in enumerate(rlabels[r]): - rid = [ROW_HEADING_CLASS, "level%s" % c, "row%s" % r] + rid = [ROW_HEADING_CLASS, "level{lvl}".format(lvl=c), + "row{row}".format(row=r)] es = { "type": "th", "is_visible": _is_visible(r, c, idx_lengths), @@ -298,7 +300,8 @@ def format_attr(pair): row_es.append(es) for c, col in enumerate(self.data.columns): - cs = [DATA_CLASS, "row%s" % r, "col%s" % c] + cs = [DATA_CLASS, "row{row}".format(row=r), + "col{col}".format(col=c)] cs.extend(cell_context.get("data", {}).get(r, {}).get(c, [])) formatter = self._display_funcs[(r, c)] value = self.data.iloc[r, c] @@ -317,7 +320,8 @@ def format_attr(pair): else: props.append(['', '']) cellstyle.append({'props': props, - 'selector': "row%s_col%s" % (r, c)}) + 'selector': "row{row}_col{col}" + .format(row=r, col=c)}) body.append(row_es) return dict(head=head, cellstyle=cellstyle, body=body, uuid=uuid, @@ -512,22 +516,23 @@ def _apply(self, func, axis=0, subset=None, **kwargs): result = func(data, **kwargs) if not isinstance(result, pd.DataFrame): raise TypeError( - "Function {!r} must return a DataFrame when " - "passed to `Styler.apply` with axis=None".format(func)) + "Function {func!r} must return a DataFrame when " + "passed to `Styler.apply` with axis=None" + .format(func=func)) if not (result.index.equals(data.index) and result.columns.equals(data.columns)): - msg = ('Result of {!r} must have identical index and columns ' - 'as the input'.format(func)) + msg = ('Result of {func!r} must have identical index and ' + 'columns as the input'.format(func=func)) raise ValueError(msg) result_shape = result.shape expected_shape = self.data.loc[subset].shape if result_shape != expected_shape: - msg = ("Function {!r} returned the wrong shape.\n" - "Result has shape: {}\n" - "Expected shape: {}".format(func, - result.shape, - expected_shape)) + msg = ("Function {func!r} returned the wrong shape.\n" + "Result has shape: {res}\n" + "Expected shape: {expect}".format(func=func, + res=result.shape, + expect=expected_shape)) raise ValueError(msg) self._update_ctx(result) return self @@ -771,7 +776,8 @@ def set_table_styles(self, table_styles): @staticmethod def _highlight_null(v, null_color): - return 'background-color: %s' % null_color if pd.isna(v) else '' + return ('background-color: {color}'.format(color=null_color) + if pd.isna(v) else '') def highlight_null(self, null_color='red'): """ @@ -839,7 +845,8 @@ def _background_gradient(s, cmap='PuBu', low=0, high=0): # https://github.com/matplotlib/matplotlib/issues/5427 normed = norm(s.values) c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)] - return ['background-color: %s' % color for color in c] + return ['background-color: {color}'.format(color=color) + for color in c] def set_properties(self, subset=None, **kwargs): """ @@ -1182,6 +1189,6 @@ def _maybe_wrap_formatter(formatter): elif callable(formatter): return formatter else: - msg = "Expected a template string or callable, got {} instead".format( - formatter) + msg = ("Expected a template string or callable, got {formatter} " + "instead".format(formatter=formatter)) raise TypeError(msg) diff --git a/pandas/io/formats/terminal.py b/pandas/io/formats/terminal.py index 30bd1d16b538a..4bcb28fa59b86 100644 --- a/pandas/io/formats/terminal.py +++ b/pandas/io/formats/terminal.py @@ -124,4 +124,4 @@ def ioctl_GWINSZ(fd): if __name__ == "__main__": sizex, sizey = get_terminal_size() - print('width = %s height = %s' % (sizex, sizey)) + print('width = {w} height = {h}'.format(w=sizex, h=sizey)) From b717ebc52c442ee66a1baa2199e1290be152b18e Mon Sep 17 00:00:00 2001 From: Sylvia Date: Thu, 31 Aug 2017 06:35:51 -0400 Subject: [PATCH 121/141] BUG: not correctly using OrderedDict in test_series_apply (#17384) in Python versions <3.6 this syntax will result in an unordered dic --- pandas/tests/series/test_apply.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index e3be5427588b3..d0693984689a6 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -317,9 +317,9 @@ def test_non_callable_aggregates(self): # test when mixed w/ callable reducers result = s.agg(['size', 'count', 'mean']) - expected = Series(OrderedDict({'size': 3.0, - 'count': 2.0, - 'mean': 1.5})) + expected = Series(OrderedDict([('size', 3.0), + ('count', 2.0), + ('mean', 1.5)])) assert_series_equal(result[expected.index], expected) From b61af0edbabb26973228f1441f745420be088d95 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 31 Aug 2017 03:37:58 -0700 Subject: [PATCH 122/141] Remove boxplot from _dataframe_apply_whitelist (#17381) --- pandas/core/groupby.py | 11 ++++++----- pandas/tests/groupby/test_whitelist.py | 1 - 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index c23b00dc740a4..248f3b2095a78 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -63,6 +63,8 @@ import pandas.core.common as com from pandas.core.config import option_context +from pandas.plotting._core import boxplot_frame_groupby + from pandas._libs import lib, groupby as libgroupby, Timestamp, NaT, iNaT from pandas._libs.lib import count_level_2d @@ -168,8 +170,9 @@ {'nlargest', 'nsmallest'}) - {'boxplot'}) | frozenset(['dtype', 'unique']) -_dataframe_apply_whitelist = (_common_apply_whitelist | - frozenset(['dtypes', 'corrwith'])) +_dataframe_apply_whitelist = ((_common_apply_whitelist | + frozenset(['dtypes', 'corrwith'])) - + {'boxplot'}) _cython_transforms = frozenset(['cumprod', 'cumsum', 'shift', 'cummin', 'cummax']) @@ -4280,9 +4283,7 @@ def groupby_series(obj, col=None): results.index = _default_index(len(results)) return results - -from pandas.plotting._core import boxplot_frame_groupby # noqa -DataFrameGroupBy.boxplot = boxplot_frame_groupby + boxplot = boxplot_frame_groupby class PanelGroupBy(NDFrameGroupBy): diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index 2c8bf57f20fae..1c5161d2ffb43 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -42,7 +42,6 @@ 'pct_change', 'skew', 'plot', - 'boxplot', 'hist', 'median', 'dtypes', From c80e8d0f47ffc8774d9ead7a06a927766dd55169 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 1 Sep 2017 05:19:30 -0700 Subject: [PATCH 123/141] API: Localize Series when calling to_datetime with utc=True (#6415) (#17109) --- doc/source/whatsnew/v0.21.0.txt | 30 +++++++++++ pandas/core/tools/datetimes.py | 2 +- pandas/io/sql.py | 13 ++--- pandas/tests/indexes/datetimes/test_tools.py | 56 ++++++++++++++++---- pandas/tests/io/test_sql.py | 23 +++++--- pandas/tests/test_multilevel.py | 2 +- 6 files changed, 101 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 29d57df43e941..9fca1041bc4b6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -299,6 +299,36 @@ length 2+ levels, so a :class:`MultiIndex` is always returned from all of the pd.MultiIndex.from_tuples([('a',), ('b',)]) +.. _whatsnew_0210.api.utc_localization_with_series: + +UTC Localization with Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ``utc=True`` was passed. Now, :func:`to_datetime` will correctly localize ``Series`` with a ``datetime64[ns, UTC]`` dtype to be consistent with how list-like and ``Index`` data are handled. (:issue:`6415`). + + Previous Behavior + + .. ipython:: python + + s = Series(['20130101 00:00:00'] * 3) + + .. code-block:: ipython + + In [12]: pd.to_datetime(s, utc=True) + Out[12]: + 0 2013-01-01 + 1 2013-01-01 + 2 2013-01-01 + dtype: datetime64[ns] + + New Behavior + + .. ipython:: python + + pd.to_datetime(s, utc=True) + +Additionally, DataFrames with datetime columns that were parsed by :func:`read_sql_table` and :func:`read_sql_query` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns. + .. _whatsnew_0210.api: Other API Changes diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index c0f234a36803d..9ff0275a7c370 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -516,7 +516,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): result = arg elif isinstance(arg, ABCSeries): from pandas import Series - values = _convert_listlike(arg._values, False, format) + values = _convert_listlike(arg._values, True, format) result = Series(values, index=arg.index, name=arg.name) elif isinstance(arg, (ABCDataFrame, MutableMapping)): result = _assemble_from_unit_mappings(arg, errors=errors) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 9aa47e5c69850..9c6d01d236c57 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -99,24 +99,24 @@ def _convert_params(sql, params): return args -def _handle_date_column(col, format=None): +def _handle_date_column(col, utc=None, format=None): if isinstance(format, dict): return to_datetime(col, errors='ignore', **format) else: if format in ['D', 's', 'ms', 'us', 'ns']: - return to_datetime(col, errors='coerce', unit=format, utc=True) + return to_datetime(col, errors='coerce', unit=format, utc=utc) elif (issubclass(col.dtype.type, np.floating) or issubclass(col.dtype.type, np.integer)): # parse dates as timestamp format = 's' if format is None else format - return to_datetime(col, errors='coerce', unit=format, utc=True) + return to_datetime(col, errors='coerce', unit=format, utc=utc) elif is_datetime64tz_dtype(col): # coerce to UTC timezone # GH11216 return (to_datetime(col, errors='coerce') .astype('datetime64[ns, UTC]')) else: - return to_datetime(col, errors='coerce', format=format, utc=True) + return to_datetime(col, errors='coerce', format=format, utc=utc) def _parse_date_columns(data_frame, parse_dates): @@ -821,8 +821,9 @@ def _harmonize_columns(self, parse_dates=None): if (col_type is datetime or col_type is date or col_type is DatetimeTZDtype): - self.frame[col_name] = _handle_date_column(df_col) - + # Convert tz-aware Datetime SQL columns to UTC + utc = col_type is DatetimeTZDtype + self.frame[col_name] = _handle_date_column(df_col, utc=utc) elif col_type is float: # floats support NA, can always convert! self.frame[col_name] = df_col.astype(col_type, copy=False) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 50669ee357bbd..089d74a1d69b8 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -260,15 +260,53 @@ def test_to_datetime_tz_pytz(self): dtype='datetime64[ns, UTC]', freq=None) tm.assert_index_equal(result, expected) - def test_to_datetime_utc_is_true(self): - # See gh-11934 - start = pd.Timestamp('2014-01-01', tz='utc') - end = pd.Timestamp('2014-01-03', tz='utc') - date_range = pd.bdate_range(start, end) - - result = pd.to_datetime(date_range, utc=True) - expected = pd.DatetimeIndex(data=date_range) - tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("init_constructor, end_constructor, test_method", + [(Index, DatetimeIndex, tm.assert_index_equal), + (list, DatetimeIndex, tm.assert_index_equal), + (np.array, DatetimeIndex, tm.assert_index_equal), + (Series, Series, tm.assert_series_equal)]) + def test_to_datetime_utc_true(self, + init_constructor, + end_constructor, + test_method): + # See gh-11934 & gh-6415 + data = ['20100102 121314', '20100102 121315'] + expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'), + pd.Timestamp('2010-01-02 12:13:15', tz='utc')] + + result = pd.to_datetime(init_constructor(data), + format='%Y%m%d %H%M%S', + utc=True) + expected = end_constructor(expected_data) + test_method(result, expected) + + # Test scalar case as well + for scalar, expected in zip(data, expected_data): + result = pd.to_datetime(scalar, format='%Y%m%d %H%M%S', utc=True) + assert result == expected + + def test_to_datetime_utc_true_with_series_single_value(self): + # GH 15760 UTC=True with Series + ts = 1.5e18 + result = pd.to_datetime(pd.Series([ts]), utc=True) + expected = pd.Series([pd.Timestamp(ts, tz='utc')]) + tm.assert_series_equal(result, expected) + + def test_to_datetime_utc_true_with_series_tzaware_string(self): + ts = '2013-01-01 00:00:00-01:00' + expected_ts = '2013-01-01 01:00:00' + data = pd.Series([ts] * 3) + result = pd.to_datetime(data, utc=True) + expected = pd.Series([pd.Timestamp(expected_ts, tz='utc')] * 3) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('date, dtype', + [('2013-01-01 01:00:00', 'datetime64[ns]'), + ('2013-01-01 01:00:00', 'datetime64[ns, UTC]')]) + def test_to_datetime_utc_true_with_series_datetime_ns(self, date, dtype): + expected = pd.Series([pd.Timestamp('2013-01-01 01:00:00', tz='UTC')]) + result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True) + tm.assert_series_equal(result, expected) def test_to_datetime_tz_psycopg2(self): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index a7c42391effe6..93eb0ff0ac1f2 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -602,7 +602,7 @@ def test_execute_sql(self): tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa']) def test_date_parsing(self): - # Test date parsing in read_sq + # Test date parsing in read_sql # No Parsing df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn) assert not issubclass(df.DateCol.dtype.type, np.datetime64) @@ -1271,11 +1271,13 @@ def check(col): # "2000-01-01 00:00:00-08:00" should convert to # "2000-01-01 08:00:00" - assert col[0] == Timestamp('2000-01-01 08:00:00', tz='UTC') - # "2000-06-01 00:00:00-07:00" should convert to # "2000-06-01 07:00:00" - assert col[1] == Timestamp('2000-06-01 07:00:00', tz='UTC') + # GH 6415 + expected_data = [Timestamp('2000-01-01 08:00:00', tz='UTC'), + Timestamp('2000-06-01 07:00:00', tz='UTC')] + expected = Series(expected_data, name=col.name) + tm.assert_series_equal(col, expected) else: raise AssertionError("DateCol loaded with incorrect type " @@ -1298,6 +1300,9 @@ def check(col): self.conn, parse_dates=['DateColWithTz']) if not hasattr(df, 'DateColWithTz'): pytest.skip("no column with datetime with time zone") + col = df.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) + assert str(col.dt.tz) == 'UTC' check(df.DateColWithTz) df = pd.concat(list(pd.read_sql_query("select * from types_test_data", @@ -1307,9 +1312,9 @@ def check(col): assert is_datetime64tz_dtype(col.dtype) assert str(col.dt.tz) == 'UTC' expected = sql.read_sql_table("types_test_data", self.conn) - tm.assert_series_equal(df.DateColWithTz, - expected.DateColWithTz - .astype('datetime64[ns, UTC]')) + col = expected.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) + tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz) # xref #7139 # this might or might not be converted depending on the postgres driver @@ -1388,8 +1393,10 @@ def test_datetime_date(self): df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) df.to_sql('test_date', self.conn, index=False) res = read_sql_table('test_date', self.conn) + result = res['a'] + expected = to_datetime(df['a']) # comes back as datetime64 - tm.assert_series_equal(res['a'], to_datetime(df['a'])) + tm.assert_series_equal(result, expected) def test_datetime_time(self): # test support for datetime.time diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index a765e2c4ca1bf..6976fe162c5d5 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2137,7 +2137,7 @@ def test_set_index_datetime(self): '2011-07-19 08:00:00', '2011-07-19 09:00:00'], 'value': range(6)}) df.index = pd.to_datetime(df.pop('datetime'), utc=True) - df.index = df.index.tz_localize('UTC').tz_convert('US/Pacific') + df.index = df.index.tz_convert('US/Pacific') expected = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00', From 3a0dc929441dc91c0d656c6249ae5610d4e36a22 Mon Sep 17 00:00:00 2001 From: jschendel Date: Fri, 1 Sep 2017 08:52:44 -0600 Subject: [PATCH 124/141] TST: Enable tests in test_tools.py (#17405) Enabled tests that currently aren't running. Small fix to make sure all tests pass. Verified that the raised messages match expectations for TestToDatetimeUnit::test_frame. --- pandas/core/tools/datetimes.py | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 24 +++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 9ff0275a7c370..9dde26f43ad33 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -605,7 +605,7 @@ def f(value): if len(excess): raise ValueError("extra keys have been passed " "to the datetime assemblage: " - "[{excess}]".format(','.join(excess=excess))) + "[{excess}]".format(excess=','.join(excess))) def coerce(values): # we allow coercion to if errors allows diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 089d74a1d69b8..5152c1019d8de 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -25,7 +25,7 @@ compat) -class TimeConversionFormats(object): +class TestTimeConversionFormats(object): def test_to_datetime_format(self): values = ['1/1/2000', '1/2/2000', '1/3/2000'] @@ -372,7 +372,7 @@ def test_datetime_invalid_datatype(self): pd.to_datetime(pd.to_datetime) -class ToDatetimeUnit(object): +class TestToDatetimeUnit(object): def test_unit(self): # GH 11758 @@ -566,7 +566,10 @@ def test_dataframe(self): df2 = DataFrame({'year': [2015, 2016], 'month': [2, 20], 'day': [4, 5]}) - with pytest.raises(ValueError): + + msg = ("cannot assemble the datetimes: time data .+ does not " + "match format '%Y%m%d' \(match\)") + with tm.assert_raises_regex(ValueError, msg): to_datetime(df2) result = to_datetime(df2, errors='coerce') expected = Series([Timestamp('20150204 00:00:00'), @@ -574,26 +577,31 @@ def test_dataframe(self): assert_series_equal(result, expected) # extra columns - with pytest.raises(ValueError): + msg = ("extra keys have been passed to the datetime assemblage: " + "\[foo\]") + with tm.assert_raises_regex(ValueError, msg): df2 = df.copy() df2['foo'] = 1 to_datetime(df2) # not enough + msg = ('to assemble mappings requires at least that \[year, month, ' + 'day\] be specified: \[.+\] is missing') for c in [['year'], ['year', 'month'], ['year', 'month', 'second'], ['month', 'day'], ['year', 'day', 'second']]: - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): to_datetime(df[c]) # duplicates + msg = 'cannot assemble with duplicate keys' df2 = DataFrame({'year': [2015, 2016], 'month': [2, 20], 'day': [4, 5]}) df2.columns = ['year', 'year', 'day'] - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): to_datetime(df2) df2 = DataFrame({'year': [2015, 2016], @@ -601,7 +609,7 @@ def test_dataframe(self): 'day': [4, 5], 'hour': [4, 5]}) df2.columns = ['year', 'month', 'day', 'day'] - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): to_datetime(df2) def test_dataframe_dtypes(self): @@ -632,7 +640,7 @@ def test_dataframe_dtypes(self): to_datetime(df) -class ToDatetimeMisc(object): +class TestToDatetimeMisc(object): def test_index_to_datetime(self): idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) From 365f2fe95dd781b760b77738c4055cfb531ee9a5 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Fri, 1 Sep 2017 17:36:00 +0100 Subject: [PATCH 125/141] TST: remove tests and docs for legacy (pre 0.12) hdf5 support (#17404) --- doc/source/io.rst | 38 ------------------ doc/source/whatsnew/v0.21.0.txt | 2 + .../tests/io/data/legacy_hdf/legacy_0.10.h5 | Bin 238321 -> 0 bytes .../io/data/legacy_hdf/legacy_table_0.11.h5 | Bin 293877 -> 0 bytes pandas/tests/io/test_pytables.py | 34 +--------------- 5 files changed, 3 insertions(+), 71 deletions(-) delete mode 100644 pandas/tests/io/data/legacy_hdf/legacy_0.10.h5 delete mode 100644 pandas/tests/io/data/legacy_hdf/legacy_table_0.11.h5 diff --git a/doc/source/io.rst b/doc/source/io.rst index e338407361705..f55c72bae5a20 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4419,44 +4419,6 @@ Now you can import the ``DataFrame`` into R: starting point if you have stored multiple ``DataFrame`` objects to a single HDF5 file. -Backwards Compatibility -''''''''''''''''''''''' - -0.10.1 of ``HDFStore`` can read tables created in a prior version of pandas, -however query terms using the -prior (undocumented) methodology are unsupported. ``HDFStore`` will -issue a warning if you try to use a legacy-format file. You must -read in the entire file and write it out using the new format, using the -method ``copy`` to take advantage of the updates. The group attribute -``pandas_version`` contains the version information. ``copy`` takes a -number of options, please see the docstring. - - -.. ipython:: python - :suppress: - - import os - legacy_file_path = os.path.abspath('source/_static/legacy_0.10.h5') - -.. ipython:: python - :okwarning: - - # a legacy store - legacy_store = pd.HDFStore(legacy_file_path,'r') - legacy_store - - # copy (and return the new handle) - new_store = legacy_store.copy('store_new.h5') - new_store - new_store.close() - -.. ipython:: python - :suppress: - - legacy_store.close() - import os - os.remove('store_new.h5') - Performance ''''''''''' diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 9fca1041bc4b6..f632f64a2841e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -372,6 +372,8 @@ Removal of prior version deprecations/changes - ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`) - :func:`eval` and :func:`DataFrame.eval` have changed the default of ``inplace`` from ``None`` to ``False`` (:issue:`11149`) - The function ``get_offset_name`` has been dropped in favor of the ``.freqstr`` attribute for an offset (:issue:`11834`) +- pandas no longer tests for compatibility with hdf5-files created with pandas < 0.11 (:issue:`17404`). + .. _whatsnew_0210.performance: diff --git a/pandas/tests/io/data/legacy_hdf/legacy_0.10.h5 b/pandas/tests/io/data/legacy_hdf/legacy_0.10.h5 deleted file mode 100644 index b1439ef16361abbc0756fbf7d344fd65d8a1a473..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 238321 zcmeEP1zZ(P*WW9Mh@u#X2_kkN4d&3&T_)1fDF_yJcOWK;ofw$lV1pvkCDIMjDWKo% z-Q9bENA}+5jqmsV-q%&`Y|MXW&Y3f3&YUy5yGKi1Q&3>60ETb;{1`7L#N4HRevw=K zB_8G}>X^E|n%rTK>25N;-jCQ}V0;*UZ8Ujah`j$q;&xa$dEU%KLjyxOsryqYc^Ql4 zBlf8KQ0M=o{~H|8(oiwNDUBe>WgejY%+16JO0ISX6UXa_1RIP^XYS#0$dCS+X`AUG zLBmF%KU^*;xn*WTKiYl{?kf0?D*mH}4I|~Xw_N|-`}WSSrl(?Rips<5(I5R$^;lwJ zXpYmy&X3ocAG6RfG1WFSVCKiW4wYY8QVJd6`IVR{1NAgKztK`N6;(YAQ)^8_6MYpk z>%QZevD$y#LUMe1*g z6V4v?4(rf?t(&KvtCY3BjoVrWU-q#x>U{ViX0y)OR~n^M2K*S8cuM|-@JwhkH84~s z`Z#gS&r76z{ZhEq6Z^_RoTudYup@mwiq6p+>GMq|FXxP;&o`R9ypTga`n&x7tCYTz zBKc=Twi{a?ai>QIV}xG9^$Miidf2!-;9SS^ch}PflK5F%PgC^q zqkWX0Sy5jM*R$w9+LW2U6kn*6jq6X~?48~5jD6uqb=|Ezyfl8y%f`dr#@E^}(2H43 zW8Ej2_yqj zd!qjYnK_%1w6nEp5oPbWOrnLGuh0v`NAQ&GC~*V$Egdi}8U9XxFnf8T+pnh%%h zOhc)x(`be1DBaq2dpn*c?NLd=(~0BK#b=<@^R|l@o~~FueZ@?a-l~tfiKjod8n@%A z_Q=vDvru~KO3+C>-LX_@>ui*!R>mjdsl)~wu{kL95o~eC(^u#BM&apQ6Rqo#DD5iB zX~9#ywcZ+YQF?}t?>L@ThMX>(D|qOb%@(b_89*zvzoFW&c(_Fto=%wF2vY}K)qPd> z5{x#MD2lgjfb+g@D_$i&L^%unH}$4 z0n(&$ecJCA0zRXbyOM1U@bpu^x8+zp*r&>$n}7NPc%{!X(rRfbs2On6r8A%w)@c+L z#qX{M(=Ke-F>hfB=!>sw^t|SoSgHmo>KLr?@X=J)mDMuRRB~6a@K$kJE90VPr6Rk` z&);&bgR8cgbCBjb8!aCeoHRnm`+GsZJPf21jSPIAz?(=zW?L^9+{byx@;89<8 zn2L#s%2HHkHa*K9>5+>@#w(Vlh8law9r=;PpxpTOHQ0sT>Rva|E_3baPE6#?{P=jq|= z=d;$%&(lZ1OIl#M9bsKd`Vm&Nq^F~!uY;ejfa+RbXAcKoUv-;60WTQ=J8Ns(wa#vS z&K}m*0-m-m4t9P5Ua|rPekNXW0@LliOnxC{G+YR3c zIVJWnthdf&_hW8#~{Ef>1$=8fynWd>eT=0{`{w({XQ6(>p% zEy8VFF>xx9pBT-k*8G$HuW+FE98!xUJa$q~4*aX*ic~yo_e7$c>tuQC@s?XVTZ(yu zKk+bvkN?T>uGq1^ThYCgl={E@wqjF6+=zrEvOyXAZ5=y^=^N(`GW)AM}q zqlx5+$-{eJ>3K{t4;wwB=lR}8{N#z4DZQ`sJZ6n|r2nMn)qmd+TfhBvUfO;Bf&5?p zmHti2kI;9&;xs=X`~;L=hw2}c51ZZJrq3i=F@|zEFb{k8H+Fk)=g+13`SxMN-NT6F zZ}(3UhVhpPwtbpW16C*bCo7nB!4+01#Y;EmgUaof`JZh02(?#4YDJhQLiahvS`UW( zgsE>#RD-}>IKEz1cF3nw9lX_WEj<>Auxq zTe-LGpn3JcYLb}zFR^qecxim|5Saofqat!6{Co^Js4Zb!|Dh5VI%^I2wxbRj?(@x^ z99IHd7mK-l-CPbV*6@{wDZK)h>;~4`&CCJ3?-Fm^^vMA>Z?8V?e4Gzb4xL(Gd9@9` zo*2SAcuE7<`ouduKDHj-UdhODvB`mLzB4|2tFMQ`mpiAO`_u|o6c@iR=P3fpm(o<` zWmiK5$*;CI9ExDfhr@e=Q{thT$R`(%qg^m|#t6-E8J(bB>K%AKq7EK7I`u`r+-6{M zE?Ib3OdO~iza%d%D-|5Oco(W{`3enkopgrRCBy3ZEoq6r8la`u#jYD6)sR2j#?rYW z3JU(}7r1j$88Ck59QAZS3fTTZ-)>*wFKF6mX*FOU7V}8B|F4-zMR42d#wU?&Mev3C z!hvqiHDJ)*(vF0PG@vud@_FT@LU7UWQSgbRF4$PR+(EP+$;Fw_l2q3itfQ; zVo%)^(d*0wr$2n%niW|Jz0V}i{;bso-_4!lxBpc%thng8eD%F1aO(bY5Ao-v@WL{Xz;}(QhNe%TlZma1fsVPeo2#XX)EXlPcU>PJ zO+OjOKqYe@KWR%D4HZ3U&v|}Uy6U!m8gAakn%?g6{*G45*1G%I2dXXechPrK^j7o? zG}6&m(DN`^=dGmVp9Oi$iz`)nY*o!jj$r8=I{ zOSISMYI&&WnYh}j>#ft+Jz`GniTzhVC*RKrYgTzdal0-VfOJSr*3z=|$%I?-KtQB}&ZFRnJqzX(8J>ER2umQL}*6UE?&jIQK z(?yea*8*J&y<&%N%^+CvRX=rrV(1(9Khh0qhazL&ZTD8G1IqmK8=XsIz}kjziPO2w zaOHacy1ApSfJfKml_eh)fUAyr23-eU!v{&0Pp1#gfWBKgVr`>4pxhi8&xp^7;N!wK zUq%clfp}*Z=e{ zZq`n5dR|6(G0`rRKcGh^`E&4J&jHH6yjqbU)t>CNz4bn4edxu!0lwx5mrHUTkEa{4 zjEA|7X$i_fIrXNy;R}|~G9c4w%x5H~3HYIvNjB59WzZRS8y8O>Yy6Q7{Io27S{6U; zj8-MFPRj|{pan#OvFl{HgkRLd)5Dr5+h!g9909(5D)_JEqSZ)ls65kYZqjIR5-I`h z$)G)SQ?w_G_T=$Bv^WaCu0ZbNITZ1ITTf3WP>DLgugt>_&^iemftEWF2lxfF>dD5> zOB#_UiRw#@8+|_yf%{1*K6#+$EUc-8Y3Tev?gysICEAC1OZHP7NQSWd+B^h{crYnu=zS_$+<>c4)&pZ` zALj+`!)J>zIzStKuKT(lwmdU0WB39q9E%s(gKXp^#uxwKjyimq7JeO1@eS{;L>2ja zV$eftxGxyT;GRG{59Q$d-I{_PsGec|*eA&LG#ep{ErV64(Fy{yh$` z)&Y_WO4blU#d;q8pSRnP5TlKOq@1}1T3bU;-CD}pKt*2zKaa}@B{%pn)t}hvkT;Ec9vwQinJ@DQ2W{*CBNd7%ujypW+!@tMNbB9O$ z`1g1P?(nEjv&Wn6^h;T5YU?q3BHnH|?DiTCbPYzS-9?c1ke9h{O{yN zhK2uo<;t>f-Sy?~%T>ltqxuLU-AdJ4j)ni7?j-UvVXU9u(=X4u&p$@L0t??a{S@E$ zP#f@L1}27Q2!8>KHMP3=uk!=;x*y6u7h=vU z*mG_IjN3`w{VCM*W9;X_D0_*-v+RF1zWcm{T`q+q{7$6ZFJ)~{yxIaU7dK%jyB7Bt z;mtJ|Gr|*eo_H?_3rCWQ%`VY;6AZsE>5_`gDH| z!_m#t#t$)}kNQYctjFtvFA<~YApJT_?-#Q&R82^?8R3=dK5ytzADQm;!TllBeUaZx z7GDB4n0)H87k^~B*GH@yj;s&qc4O8E?X$Vl2VZ2n*Mq;?{fPFJk?W%#vK;C`It-ch zK>OY6LHVmVJ>RN;YJ*{%{PtEB%xp;J|7`z@9O%6sU5I3Ym2ppQP_ARDpK0~H{I`#& zc)@5x0;)uo*L(iRIbN`uc>}t50lsvWsyDNr6Y&DJitiaTU}65AL1L-)r~Lfg>))e}vgmC@B!;r3C=5;$?#id)0%4Zu0B!lCf4^H? zAb2@(X(O_$$TvK&aS?XJ#eDL{z2gG0X2kVBzkiPdz2gGwNI2!s`{&~Vl>Oe*zuY}8 z@b~@uH1Pq}C_xa?-A~JoOZ1En{C)pEm+^r<{rg!IBwtxR0^V75a{QsPvp6&1d$LMGK`TM4y z;v08`wzK|Se)_aN{Pa`9d$uI5jPZ@7kcq=TzkfLJhXa2&@P`9`IPix9|3w^N z+lznv?Rc{?&}*CUc>_7$<-*@?#gv9#{_P(AcHD7ASy`lin{q=j9m|v(in(l+azl}h zceXv=qh5df?#x!Ad)Fx8ckk<+jYiJQX2aS@{{kofe1toZ|Brv3;KTpgZ(siTwlzID z@$d4_KXoL^$t26`J#XafpVwmEfX+WZ2Ic6Xr|`Fb;!EJsr!`2|e7E>y1bM%{{qQ^f zdO!4x0I^PAWg5P4968_7FY>yhUv=g$8E|8&LNegS^v+WOQ>J&G3Yaq2@#6>hI&$Vh zell*zT*ptw4bd`wv{)al-zVb-l%Jw+|9f};6!U#8-5l_}^)dPV11`Q}K$Y9~dVR|O ziZiyG|D9Qd|2_Y^7*iIlNP+yT|GkcReJTn%t08_-?`gt$hx=&oH3B_+*2q1BpTm9R zxGNgJE$Nb`U2~7G;m0u((I1W>-6PoV{82s1L%uxxfm9qzwJYT#@9u-@fkFPeMI?rk zU%kx(Wkj15Nklilx{(iYDH&Z-BX-$-_23}l%&9*7>RQZI4HW86$}I+Q+!A+hcI?*90m{`9}^59iXK-lspDOMiNw{%|h+>8L-k^>ugp+5Ygp z{kfFB7DV@;){V3M;QyFEo9z$(o&F%&m)Xp$pYH9)t_RyM-d&gc9{)SJVf(}Trk}!b z=g+3(y|-QR@A7B2^x@C$xI}4Lf`zye#5a~ABo6=l{^7tM4*cQ39}fKCz#k6$7jb}X z?{$A~0XOT;DSM}fpOuUC-@o<$N_AgX{2Tu-<<4OGe<^nc)9FFEGwA)lT==Cae@~Bf z={@|?zgc(vd$$ZCfX&w){Q2XD=GqVa`|p;=lIAMWTAbI3$7~}yEB}8{pZ!A$_Z;ZU z4;{R@=RIuEw{P){bf5jx-@c;mZbR=LttoPbQM8 z*J|=U|LTNV*RD){KY2CDA8zNT|M+`G1erV^O%g<{gXPDa;b`+B`#Yz2)9&3we(FBN z(VyS{6%O?7=gK7EYe@Z1@uB;7squ5J!h=RNV~aBwIaN7%vK$N#32{P}mi--`S1HlUCh>-mgC{9!44BL`$w9Em}_kqM6u zu(#gN2lRe#B%8lI>E$fvRDSN>W6Irk8~T%9x5KcQ5H=0J+ashNy>BY?cURfx2q{P3 z9Y4!H_q#oQ^5nW*zq`slN9o_hz3g-UNqcO4^`HKJPWpcJkA9byT0cbf4^|wyUq5D# z;ru<$iOl9?kR5}HA5-xXs=u)NUH(3z{gv@$2&}Dlzhk!xcXU&B=tQy~gpqgdeIHK! z)Y|Q$;k>sEplwgUT~*tB*w#<>{cF!QP_#Yc+UG+h@WqOxVaFZXK#a_aZSEt!!ew0x zEVk)IgAv7BR_CWxLGv7K-ltg|@Y2logZ^K(!e_%}3*-~qp-|V{;M}5Iz_X=3TIX#F z3~!QE7%FrZ%yZuDBxh0yZ+mQVu2(65ftU7)SA}K(&FSe616)(UM|shqk&?0Ct55aZeCv_H>|W#_UYH37$WI&g)YNyAx92lg$oUk4g&rl1loL*P4Bd8FRmll9^=h|UrmgJY&mWkl5VtcVg zNg{mR3WDBSSAc@>#i3>!ia>5hyhOOjdlFn(Hg11F3QM)sr{3z+W5MZ=N0f9oQF%BxpaW1Dit^>5Ax-z_oSh`}}iqf#J?3 z**91#d^jX%hv|b_IH=^!7v-PLz%j_!@1SWr$mUJDZB+9WR0kfNqw*^SI7qlC-`W!k zIs(^rm941)l2^(_^e&e}=@q92j#^#|w)~37s+R8p*K1}4je1cHhu$rWD!W++vu(F| zoa6Zh&sDt1A26^2P99&n?|VxwoG3W;OYNg7$Y_+`W%aV3^14g&g&nW;Q@$Vbi&2o* z0bkYLif}>iy7zB8`2OZ4!I&Y@%P;1iONXBVTrARdJc8vrhhC5?ZinJImxn0$R6()H zyyA6!*AE8>H^EQ3YM=Qg zRKpjGjY89&G{CzBvx{aby@rjm%6-Ehr9jc(56Y#VKZ7fiv#Z+D%m>=OHC?D$soac^O21ci@~Eo&6n)06JTglm1xM_HaMvM=*@%X2Z7{rbzSL# zuV8_@UeWz|EpSK8Q-{2?0vJ5pW^=)g0?4=i#X(2(oe;s(sUf@Nf51b7Ewl8p>)`t6PK;pKJ-~DfLroDIhYl}BXgA(oGxhtuiMWCobV`vhQ8kt6#Tw}mr{yneqJpF zDvZhMqw|X4p^d@gwbiO&amCIZ!@noM8Ot*zN)zirqiWqo-w6dEqHgEpct#z3>0MMS zZS(^nA>e#Rs`+8c^R+1oduRB4cwnC(*h+2mwgpo zej8*rMBaVia2cA{kJZ_u*9_)1deztEmBOLIN%hj(KEdR*iWh4oGof~>FW>ob(ZHbQ z(F60J4CTUdL!Hb1d9YRHd<=g;G8|A*ENVWW98TyD1%}9d2HTH`WB1=Tz|;w)7o_4! z;Y2Id^ViS42MJG1<)sZO!F}sX;=GZ$(C?ex26f>&C_76^xW1+Wu8#g5^*X8!td1J~ zJosG$7zxvWzg{66rlMG$|EwH@9Ltd#i~3pO(X&QJPGtiRhec|$+c0GV>DAg_f(oGA z%YrXE=3+7ZX6br~eD8$ok9DaBoT~$E=B-PIZ%u`Z&2y^#HF%X@-Fx>^leZqagdQ?` zG5aSR^JVaciGERV;+L;RAUhd+E$@G5#@jOJyLqh0I-L%%>D#3mO|fRsAO{mcT@>1zh^K}?Pnw0`!uX@zI+o{p~!nleM=}fbvb>-8SQo;|3T=yh*bkz zUvbj)MO_n|{oM7sz|>M0`XKtQ)SE)k*mm#C*}Ph4Fis$O?lbyhThVG1EIQo{bg+y2=w@Z_T%on{?S@RVMQ!-GRGWRq@F=h-3v_pR<|5Uqz- zqZ&L{94my)Q7R=Tw&uX7ev?&iNR^=Waa!xwm$Il0T;4nV#-oHX9hXN*1^jcU%gb@kO8uw z7EeG>4a|1j;UksZ3U4{>u>aDY4~Ov?Shp@H0@GAXSCnka25Ze9S@C`-1YV&rzYZ_^ z3?A&8TrNB&8uE_1qBcvv5!CHoKUpfS5a#;l2Grlcl#6YghE}@N0o55>7XR8(4SjvQ zOz-Wf2M1-R$`|LQ!`y*qW>37B2*S(;Xv{j&0{m+}jE*h44Q7?YeU-kYVE%wB7YxNP z<%-3#%QwbWfJq_+b*(q+Kw++GwM%Rf6xhE|TwCTje5#L?#cof6y!DCujh7Tc>9r># zcOR*M^FJ;6;lsERFFtg=n;O3DjaP!J*7GeTraL6j&*(VM# zV)jZm2#@ot0FgSfu_G*!fV%f~Q!x2EXi#`^I{91;G?MRk`l0rBSiLw-Ml7xg?mYbI zV%zaHC^L88vu`3{;FS0IEC?1jqq;u@o=npa)_GhH!P3TxORx<-x-k0OkS&Gd2@ zFl?_?-jo8E_&hgUG&c`OcdU@|h)9AH-uIu^bw3Ua96E2PeNi)1w~ARNR@??3Ze3lQ ztH4mMdA&!dwK5%E^_8`an|l#Hm6jU+zOxSOS=#S`fNwp_%u(p#o81P)M6HAm&if2j zm%m&1aprgEnfSn|(5n-e`-*Jy2j78tko>I1&@MRA^SLHw+6Xfr34HjK)&y}xrss;&|^6a@XeDP0EMz1;X|8~Xm!_^wm7 z0&*>&LEZGi54A>Exj6pznANRt&4^0rA1}*5lF-zV^Q|FRZ=-B5aAy{X9xf|>PwqZg zudG`+Y%8XG;oJt%_hALdesztnF8cx2pE%>%b-D%)IrQtvORzaQij5I?l=W9Mb2bb|@_!ccKLx zsgUAdaJmJu*K-YGt}FBNvGMTr^7N$~=zXm#Bd6yG^Y`Y-?EIYGQ-l`Fq3!Q{mj(AD z^fmuy&u4hasWrVHfn85xKEr7q!p{GnJ}>3YpTVYwyZIbDKez2l_=ot(3FlwuvD|&< zo|?C^+jj)nzI9Cbp?)~cS2?#YaX$un%Q{YB_wU>%Fy)RY$jBRSM42qB|RM-eI5LK1ytAiI(s+I&|>|t#!fF3$vwp} z<1EkN_78v>f2pS$(E1s)w5JDJmBEPC(VQTh-vBB!-Y;Ds)&_Qp-<>u0ODp68t%hM$ z9y2zbF|3DD>4x{JPB*}~!9F6dpVf1NR%b9KMDF~x<7zsvXlm1U5h(yO3nf;~Zu~%x zmf}00G+9uE!H9d(Ja5mFHn9K7?A)p@OgY&h{O#Bqzv$^E(5e_FJ-ooV>6PuDO(+5jQKSAWu0^( zoOSfY++f=b;AK(8SEwFMU-sq@@m);1E93SB%pQ{iZjNr*K6XI`P&YUby=hAcec1$B zm4QuOtMkKg;yZBZqo8)PR3)@i7v4~Xy!^E7O`s9o@6EEV6qja!_LY0x;tw;F6PBF3 zy)C$ao0=oK7sL;p`T0!-eE2wM$7<1EzHQz1w>B~--{x}WMjbT5IIm1@uA1jXc z0TtIXp?t7G;;K`H^koxhM7KY`#n4Wt5)kEa7pqyB4gJHjOJv6X;s%ZAHu1Xqd0Rw2 z7_a~=YkZA-S@xD=bw}rMgGO?0^y$>dZ>I{tg5*hW9o!SZ<6{fnFY5Y9kDl;cXYv%B z?)#l_k2Vkd3FXbdxgR;70Ie@9kKQ{hh8r}Z`$drRTnV#xVAMQu?Q>TXVa3wHms2H5 zxk0O8jNBya)Lfl`?Y5g8S--y?3~txmoNdwu z72eq0($s_WXaNs?p{a;&2LGP`DJeC1Qi#3=X zdb$ZT(rY7pOMhK9S`U9Nyd;p|(gt<%*H1p4z*X654D2~?{57#tO+fnWiRfn{17bSb zjh38H=fHEKY(%$E!v52`=Uc$Fn-Ao4UlziJiD`L1ORDMVzJ6Hx4zs<@Grko)I@$RSPS8~nx)xwvmp z0bCPX9|<&D=+T-Fm+4HyI`_uv- z*p$mki+#Rv>eQj<%eEp!`jxXkUAToPm{JwQRxG5XSxq7qI=69=V!9~f{ zNr}6k!{p)mZ@q02>FFlQM(sT&$~;UgAsYr+`dmEowidd9a|S=AbFjF!w%y*&q-zM^pY zniFUw=Zi$%WQ4jj!0JOuy2{6|z%WMA=Z!l#aE(ACx|2lS+(AKcXgO-R`&IFL=xgE- z8Sy-Yp65w>R8nxd8Sy0}ALndr0M&jUzs+nc02zyRJ&3Wn!VMbHEwg}cmrPX(7_78e z=I#3e_&6&0w$JinZqSHs-5-I%(u1ntE?0h;nxXkHCTCzlJBo$T_7bAz$ZmMA?XL->R_H;hRdp z;MphI!`D>8QMU&D7;>M3J}1yfuMNHE+4#Vy76zK#+8J=E4qTsS_dWi3EPacuSUr8k zOkA(Y_?7gZGmlR>`P;ITrM_9PD@bZ)o7F>Z(1`BTj!m*28{R|z5cAi2-133Y^^Z|& zPV>ajN-9w{ve!;5nYWfztRUdT|(T!pA8zRSBf2@U~v8PtvBajuMud} z-cfrJY+qEjz?Gv8Td#Rv2@C>5wFppjlH5PI=etECXQeu}*6fo9T8!o`opW+&03wMUjNnT6V$@lDTt z#r&ULaQ((Y`3YT(;LxXjrH1=BXhi~z=)V1O9egCrP;NauK*u({9{hT!oV_=WD>Rbx zyqXz1%-f5=bHn|i%U{)lRR>R9HnVS}=NeHqYHuY*^frEx6sRc#kI6-rfSjr->8_(3 zG}EaoK_?O246HV5iXddvgX@Ct48?;!!Q{2sJu07@veCRmd;S1;*rEc6 zt*{FXSyKoPjnA4I+t@~5_Ku}WTW2%rK496XwDlu6D&q-+HYdZCd(9_Go{gX{n?R%S zdwJ@^>m>$C5M(Q{bVDhZYRW zE|~FIGQ>S5lN&VB=LP9kO|)M%z{B+`NLyd6jysW21k@>*MkfHcFpF?{~Y_JiVgWGEn z*S?JXvZx7MSUBc}ooOMME-hUx#&|_fH-ScUUv_z=Zd{1Pq<*|!si|56vM;_!Y^=M( z4I1_DueBGVO0zzI#)w=KUbI%PTsz_9^!O9ppiz5&7c~*jSIULL4KbHC-f4vA((2Z{ zY2?sDeFR(FnRM%{db>k;X9?Ic>{$Mc_&1<*QTjg1$N}^tFo8yLUd2;%dWCfo7|b}F zV>jjpEGT%gGD@x$(2m~(8nyS!FCR4w?xw;`SyMJ$(5r%jPQ6|lsD^P9uW7^^f1@Ed)GaPqDb>*Ko>^u*i@R9IX4hCR*1eaXD9$vyzR9_zn(q zJeo5`stTl4jW6TT7|80qxQ~EpSJJIGvp}=-m+!U)k9$akR|okAAF%l zcNOKdFzKGIcC96HOfksnT)y$_h;-03h+oOPlR?V$u3_3xjJ z^FHmd?f{bDqP>gP2k6{ayO+MLhvGqg6IsF;*o`W-de8&;p4D6}e0dhCTji@xzxTeR2gL}o?ppjk^U9fQeNT(Q3zI=>y4ORyY9$AYBw_l?{ zqY4-JwN*&q_PKw)c%tj(dbs$G#K(6(zJlrR`vpz7-^>jf-NyFF5aEv&`S5fEcGz}P zCTyKM;nCD{bef4(Hlq9P`Q~fU!#{wahW4(Ln$5tNarn4^$3AY#M(rIq*RB>DQVcGX ztY?IKyaHP#Mfq**=y;CGX2nB?he*^_l*WO<1zo}8_tk^*5l>3}vl%pFC<~4Bn!XVb zk2{tEJBosz-P+O$EW&gKKW(7%fV0p@&JT*2<~w$lfHL*m(lfH<@MFOTk*lV3^D!10 z(QOzOaJIoR6O8-VdDWsd4`f?Uo@7{0HxeN<84tx6VvWYxP4nyFtwp@q(hZeBRN0_Y zK!^j~tazyXy;*bb`!<0OD}?lvPS(McV(*4lQ@W@Ft8CQXDj`p!r@2*uoR7_ei{l!> zz8TZK=Ga8hw>MEXlJgS*5h^JU-hqB_^ZdattD(w}lF{;RbUsZ~HWd$LoOiU$+L&Aj z!^8(*>GHwN5*dG=L`W{PIXw=?Ljj}>ZAJqXp@qS(lN8bZ7I&;em zLpkuAjE6E9eE!0G>p#_jyKOfD8bSn=g-S(!Ovs_r$f#@rjr6&l#<6`$-Zh~AbNL{( zxE82AVgBC3Tj}gI78;FX=Z3dVG%#YsJOlAQ{^3`khgJ3=Uq?Ef%0i>|j(TOeDKzsn zsIOhf(>S0E1|*z0o@d9QY$_g#mF?LrqIx6>I%imJ5kAlfM~LR`b`7VSL8G!+@lcm= ztF2#PE0}-VM1s$_9*&i7&A+{kF51FEqxMeSZ;|AxTLm3Pj1tSamj_}igRB|f=zLr( zG@@H(#^KbcsuD0sP+`^$DptOO){0HL3re4;O#6%XCGDC}nbm`JE}BPGwtt_&VueJ*3k5;{K_zhWhwy{^wb3c5cVLFg$`=V~bfZ5i zn-veO$=@y))%5~Of8SP!4Q>WQkK37+2ei<)7=cE9rn8|V-+Zqx0hU9c-Kp6Pp#93? zs}oF{=xa{KLm7-u2U0fa&TIg81I$+@C>4T+*>!h;c@>R>vfx;bpLNVrzz2`Z$Vrm(9C(s%1LwT zn&HDXnVMs3D#7gS)1^hj=w>x6G#bC9_iAm4(D(t&G?&1W-*TYD&tFF>rqZp3VxbY; zHK+T%@PnhESeJx#d`unW4SMYSup@(>ZZaN# zbAv{e*gI_F(Nz#Vo#(93G+d|=o)NgPZ0sUBPZ0tUWg|HcJKR5A$Ug^`y-nC+7r+}c z@YJ`eexh-ZwttiHPzL52{~^7#G!@)dFj}-|QZbxvl^t_L{4;&c$#^KnC_QQS#dAeI ztO=R1;E`@7Fqpsl++7zAy@VAH9pg87-JD2-KI_efXHjP1#7V1*=}T zy1uvwgzO3*`n0PCxJQgjv>hMM4I0fu!$;Jfce+~xC)^!=#ARdy7%um|3PVyu%QY$< ziVZP+$*3Ch5KK*ceS^oh4u(6Vy{J0Op_dS7G=7I%%{scxuNpWWiX6LO@qm~ydLIwX z*iEg_V#A!_!8h#?s@Ivi+E7KM?Pt^ULif*R6G=GtM_?0sHy=>I5}_4 zI=v3KJTE7B$P*`eG%FtZ*dv2)Wa(SzH;n&s(7|_bg}XxBTFo!?TqEP57{h33ou9qJ zYj{vsaJ(xYrd-$fC5lIv!`y%s53P7KP-t{{0jQbscxK4>N~mevIBJp$hgl7QM&q~j zo%p@Gz88ZlMpNaMq9=d!$dlJQUmV?<(1+S!M-KqqX$fP<6DL1g|tUtJ>( zGYbNZ^x9r)HDAW4Pw=bQ=e2>uGC||hhcxRbn!7HEo3~Df!&f9|ICQA1&@cF`JRlPBp3NGd%dR&o%bD~$#^KnI8$8HIjK4w zrk(NAJ-@60@E^RlZ`wsVp9c$#B&9*LgD(uOXy{diicwU zYo6~^{8l{`=xQP z*Qj_XgO@S!s8?YVYy*pL?XBqqhjoi9Z>Foz%Q=BYbf?K*cd(t<4!jF)Khv961%3Co zb!?31FfU=nLnoiuexoAiD;&Ii_w`$^OJLQq&Tu)uTyDxndTrCyM)^ms-{3;GS<_Va zdY>B4)dlXt&CvI|LB{tW4z>eo73e2?+SKrlgN04T zLoo(#G{5nu-K8)#N#l-E zCvW61hLZ763=^9$&dTpg32Znf2WlJ9`w9{w9p<}nu&~K^D8?9ct?b4Q$4dCr?aiqV z7m`3%x#^UETsr?b;u?WQ4FVt*PhG^#m) zMms}iYJHfKnE>y{hOC&KR|mHl4E7qYNcXG*3ytJlpLg<{XXBp0*Xx$=-*WaUFs%I9 zx$6{%`8O*b`uOw>rP{Md8$NKqsdzIJtSDZg7-3sRFXs|OJQRKFNYd+Ja_t*9C&#<^ zd~poi>}Xusu!w_|OQ6ww%=X+(N0G@bz-ejc_LA>tL`uJX=<#w6BQSwR^O8lGX}=a( z6oTWsr}C+fX@cRW^44?~a*!4Rjr6%$xc$M!C>9%L$?tBL*$fo>PaW}a4~O|T6%WPC zqHc-HPG|&5l}ATd6ug3?bj{{We1_4tHyIDbFbH%c6Zu==`O%dlijW^U_xZV$mvtPp z0~rrxU@3yG>lxQ8(VTzs8OfQI@Zf=%pk!gXr!-v>2(Vk4`pEMmu`JdnI{PFGj0e8+QjN&ZJwXiCT<|hoUtHHyVv5JV^yz*PKqfwif`C zfW|ZaOE}C0S@FRyBcTI>#0qm#qhqWvA_BTR3<`S@BS*^n?X7zLtP>!_M!O z-B|?`CrN_lDb@5`Bhbh`zrJNx{7K0hP-l}wpvU-1AZePnS9s2M`t~N!sJ-iE4%wP$ zk_alFCnxLADuI*Eg<0rZaD_&4zM>?|PItj)Fud^m9p%;suxS5~XwOxV^fhP2LqE3i zR}Bv>gwLj|21Vne;O6BCbyh+t^ff2rp%@mv=hbe{rL8a_Z1?1@v<|51BN{r{or6v# z(1`B6yiVJWjcfwNiv$9&qu*ip5_75CIJ&qXk`@Au=xz^Hd|n+_4jlG4ZP`1l1X``w zS5{`iA;Lw)LmB6C7G2=oSP3lVD>CM*wSaA>;m%E*v?41WD)spDvSlxGz~pgyYfUBy z!~{kh`xr0rou21pJQTw^RXZd#zrKgtee?zo8}ec4nz6pM^W z?-zPK9wY|cSDWEp3gh-TYJU!+vs+NvgS2m`8*Js6%QTJ5+i)4?lHX4b^Q5OvvkneKF#XQn4k1qqvD~AFw9d#RWw=Us?5fiG3zy1#FUgWCpbdt{VkI<}m=(q>D9`WLbX z%&me=Efdui%4NYP`hm@j5h?UMC(wv)vw^Qp9n>uYS=U_-Wh$kDNX^+(-X+mJU5bjN z;-QSiI&v?6J~|7H9L5GJrtWpw^C#^u$mePDTNpUVa zH{^qvN&0J6Jg2jI5SomKqGwx_B5Gx{vq3}jx={4;OE_oP)JaDVa*$L4jpk!l8%vM8 z&QAid4+o`&<`saS7w24;4|z||H3E(Lcg&+lsmEO(0{pmq2y#oHt4MnP81 z(R?gLp!Hng=u+4)|4R9qhDH#wRW!_?Kiz6agr?%5C`Ogwx${;g4D%JcI%0qyG(LAI zMEL{NKyI zT=;ynv||+Q>KInd(fB=Wn7c^ysdjKA$zJF7t@DgB<&OA8ebWyt79XRdQ?g1aTQtyJ#J16PF%mlog6 zhO}cS84qP(Q*t^UTGU(0ux^Wx#dxq-F|Rp?@|sKp30ve&kH z6#kIOsK2JhYhqhe_P87I<^U{4VdbR%o~=>GWP34w6d7 zLovp+8NBkPt7G6(TVun=Vuj$=%ZHbeuG9GyQO(JCC!E`e!R?Sg+YYl7}8}Oh5-d!@`plEOk9CbFSAtRYXi?QON7xFKZ9QyDE z4B0A^aCGN87`OSwCW(*N=p~gE4=p#Db}>n=9qjKao+`I05B6_tcV96jlN&UW^E#>5 zvUA)Uq0K10HD*&1fqua1O}<8SaUw)F6%S<$2@y0%I`IJrS!)hPb~+d{-~^A$nJ4r- zXT?K5?X}y!KCu|SXc^ii)E{|>ekCb+iMDWqMs|zDzF6%8AF7~n=!wT&1ubBXRkYv1 z5p*+2RC885bZ?+u>CAKK0Dd>xo357uO^2@vI`ZQtH)W&ods9MQf?r}IxT^WWTWnJy zcrP_idEKHmdNeB@nlrLC=ego95G~oif#S@F(&8Cc7y8RfPkBfyya<0*( zzPRH{BaA#3kgM7l3kOW9nVYuo5Ixlf*U;>TgT=1axw!=%CfcKq_;mLtNz;+RnCjSK-xJJf9 zF|6P4F|$0zHp3|s`R!s9^I^i%{kh|oR?*j-KqI;X)>NL*UiA(>6K=eERQVztZ7^)@ zg&QyEYtD*?3Tf{@ZolU}m@P9fd*VSAo#zPrc;pe#GBb8;qy7Inw z=dd5}dqH}*RSXB)fsBWuH}1%XA5QRm38rcvD+yc17h_h=`)cBLy7dmIY$_hgfblCI z#ecsBi{1oZVECqhSL*WemUHUqxkkl9u@UVpdFf*5Fy`3!le09k;F+V2mN5(He44DX zk$t{skn5V#V-@h0N0|(ErxNgeTX(JXDTnc!iia}ZHJvUiUt0{#@~anb_?!%mCSb2a ze{mSUS@F=H!@o4kpf_qAF+!1tT^~WD;kom=j z;42N0JdITa^wL7cLoqDYT(M9ssR6bPGyb@-b{AZ5obRBm);D^dlkrdncH~p$Sd$S= zV9X3b$N44Ya9e43)~yK~xJJc8vD_hrJpL*HT?6T?UX2nBaE8kdG5tRf5ON%xo{w#n;=SS~+rCLId zrsAR4z4=mFk2kf!AcMU7x`)2Q83Jd23StlFxkkl98AJI3olY)BtJhP#c=Ri>LE-gf z{Y~c4tuaCEO`wt8GV48m?w-N*&|fWk)25jZfX3~sLj)&quzJXND1+fKbj%YJyjT8a zZuErzav@NC7Fkh%z6e086^W>#3Wh1%oe=kYcQ%Z+&kR%lUJJ+QB?eh}aOj}~8rdxe z#BYWcZE1m{wKVLW%zq0E=WoCFf9>6AT#Z}UKk&UvnUk5!awtPIDZ{avG^bHWNku9t zB~6ksLuMlLkSVi_8M4Z(NOMZ_sChRh(zBfHoLA?z&#UM2|NNidE3d9~t?PTOweI`2 zlEJ~<`x^FF2jj-5TL*Jj??0Pus=U4nW*=}$CQ#5;yg=(8Gex4acm)8a-9hbO8 z_h%*ezV{LQzTf}chnCb%EkE8rhe-8yjFG)`pMJlp@Uc_-cXyJkWB=TTesKRD6}|O4 zjotdHM#Vpy8mZi@67v!9=JD%3l*hX~v{7a%d-{Lc?z}!@Z5ovu+G~U9```VH$v?T- z_xFQ_j!&CU@HH+gq;0(3+(1{<)J&Vv6!-gn|8pOzxYH%5uLDnG%Pc=>@xu)?cTPYA zpZy&AZ&%B&`%oV5aAJU&Oa3XkPBv`+*;D~Ft-Q5&iQa$8&HA0C7k|;esgABU?o#cp z*hGSQw%ZyV7xC8f>pqmnugg4jCLy|zhDLl;Jj;InT<_6Syxv|f{Qb=Tz7OR;?mvHV z?Y<)Fn#z6@J0XLV-H!5}RQ|i4Ut{LK?n8Mz_wdX$mX{mpyjdx0?h^rBkrY=hyY>2i z$<69ss36b#F}{{gmv2>>EF&N`#og|HnBGEuYyRs#l*c>MwR*$Dr~)b(t2B7v#dsQ@ z6zYD=Uc@^8`#zLs6{4r!*zFUk)HZ8+eDo6aTdkq5Rd(a|GynTOl>N(f@8#4}Kvi!_ z9+ypMqN_)*i;E2v@m2HdK9u!)NQTmCN%ns#`E)t=Mo>!{txw*3(YyM;ENOB}3e^#8SlR@asO5&u_bzZJTc$PizOKO0K zhxSxZ_tB5fq}^U7>h+sgOm~=F36tt8Vu${^4`shetSC<<{7xm+PMi99mu?f?v!J(( zjMDG^T@71nf8U4lznzU48K>Dq%8LJp?ntO1ZQEbgE$S`e&Ewa7D37OLcafiUF z{F3_oJ8cg?&`Rlo+jDyq5w$%xCau_&^k1UcxfZ=n&f)2>5)xY;U@$xV4Rt1OpU5>Q z{(iOmb06wHL~3rWTnzcVLp-J7csZ4hR+$&0nDYDFzwSeMyd6C}GsCh>S8nZF}U;fL?*>}koC&?oQZY?BG zL4VhM_LB;N#@8#xKIc;VfA2T=Vnz^v00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHaf zKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZaf&Y5~g+6T+vb{C= z{G%hC@`q(5k$rY2Hg6mwAm;5UL4LW7g#XJ#?AM>po}P{l-aMW#P>au3+i_&<6aRCh z-eZr1*`|7uGeXL_>3$jgeV{gSWsp3lPz z6{k0+&~tNoJX~oVOXrVmmDKDS$eD4P;$9l}6X;z&g)3Jh(Ikq?~e_iqrzY0k_+bc zC9}Tg(8w+iY-~=R=TsGDWPOy*?lmiOODk>D{V>FRS~VH{aJr^T-$D_AI()vivC+`$ zE|p~Jpoxc;ITsS=>;qk#@(MVoI^wb6xIccb)wX%5Wa{&T78qdwM;iGE4lfY8Q6WZ$?dq5r(Cl8GCqilOE63D!)+7aL|+W z{MgosVa5+{@HAFvY)G4JRU;yh_0h3&Xjn?#dve25>B{O;Su|^TcmjQ$AR>^h)kdEu z8#8)+p&|1&d>pm)8*To)DJ$BwNJJppk2~Ax(^#=)`t0naRCA3S!V7C}nxs-nxVF`z z&XY@i*2tUc)24WQ)GK;-{>Otim0hWmQsLmL&!PfZAI-I1N;fTQqz&iBsg86lChoK6 ze*JN&oO7zejI72~KPs6Lxtl7u%rX|F=hDl|+k$%*)rpuYTdRDlhr3?8wv!7wc9&nv z7ZKN-_ZDBaa&N21{famKsxd)5`h$MbO4?t4*L6&!&zIk(~D{1zP%0ZHWHkNl`xR4Rpl`?XLL`ekOlLeDleMzm1qsQ;uc%{ofAhG z$ojY^V)O%3-F&)yzFhyD@2TX-;A9!?@Jvpe)-kWYu^RdONTT~YhSYX#IB9Qg5*EtQ7^sIV1U+>^ixu(y=D{RSujpw;fIblZD$12|RciEbiw6E>V z@11>8sAkAKsiGBWoHYsqb@;p~CdcI5FNtZio4twN7#>ZDgG5p{d+v7V-nB8Dz-r`G zYHf2moR>&uK5XzfdZCsI?DG92!;&~N3IkamJqPz18(WZ1YGWkl41bhO9j`a<^h)K% z5eBkzHRPa}MdpZVI=|jnb>`&;^6*7VrPHN+PE}zbJ9FAQ4;o(F-9(4GuX7shdx9>w zcQW_3lYn!o$}5IUm-~6<@Q29v{d09KH9bDUGvjS3k<5HlyWu?dDJKkM&qu$N^)p7_ zDXc99qk**7QmJg3n|W2{W@Z^@#?5Wlx3U`fd^zodh z`SYIJ?mgQ(iQT$|Hhcbv;>;)vWczW>*YADyc5kMA{}@#0E6dXutb9mYDy3dTAbVfk z=O(Ec$a_U>#&2s(l=whr1xf5!Ggu&Es%))3daat^;vY_z551EvA*iOa#!Swcs#DAv zIC@yd82O)T)lBw@l=Z$6dSsqnl!j9&>Hnx;lX3rY5rJAfo(j**LDj#LZho^j=~zky z705~_ZVcvrA`3IJK8m{xtm*vrhI%bKljpoLg(fTV!uGAo5|NSBxFMaixtHhBOPd<^ z>)K_};6A>e-Oi?Q26|q1Tg__Z^Th(UNp5-EM#!HkP{m*AgH9Y#W`zAEY6co)380Dk39$c35s$BVj97M?y=SY~v235p{`25gD7rG`Jq+ z!i?%C?pKc#}!h zG$Fpv9OeG?oKqDBvi%rztXRqXRxGg#zol!KUP5~qZd%#>k~rt{Q5eW-EOS07e&I?z zIe4vIEnKyPtV$5Cdv~2LB9OfUoZ6^9_4tBjk`SNLxWvwv6u8FFZpY&|r+Tga=~Y%E zkGG=v?xTWY@z^1AR@^_eyn&v#iGM8dxQ=qE3Io}GOxNuBZsN>B8g7=?_m=aAs`ZZ2tAH8U=@TTfUBZOFtJ6a@v!eOcy$jP_KW*{k|#;WNWo=U$w{2 z`_+?8)^!VlFK3YPp~;iZUak_6k)1ho8t-%L$ZF!f@K&8pk2-pxRqczTv8buC{ivE{ z)_dLgR60OB?9iV5C3LsgT>I~rpK)e17*M7+>1Q9~x1Q7QWmG{=Y+JN8Ix>N#O!5i# zmN`kd`X~%!XO8jQh`FC+TWPpnp0e!RFc;Z^e z??l7n@Hwpm+%*a_vS&x11rlwB&l^e7YV{jpFG}c;e$wJvK~0=fJ$f9aw^~FXtI;T4 z(O$4TkB$p0eNvxONxltf^&~&ui3ntUe5V|oY~j~L>rX}R`<_%qH@)U5hsdW=u6Beh zQr|rJ=d{5iiN3F8uqS%(%aFgn?|Wda2%;tbeGHJTn`;MyVr* z$cI^X>vy1vbE?8XwjX20yna`=w4AQ#J$do6DXG+T)~)v;L*hjQvi&GHXk%l4tb~l{ zR`G4{cL9;PpxAfpe2xMz@dv1r;-GdDzf2q{-i-kMrg0Z_KE=fP;43rIW7&YbR^HEA8QOZWShP>52 zpO&naNj#I<^Ur&8XA}mq8pmr7%(-HCk?iyQR2OIXie7neCT>z4_h+CmkoEDSUyRhY zfL+vV;K0#Q$Fk^y<#LPa+PN2iFp#ZP@2fE<7az^18)n9w*r=63-CF&(_bSSuTx-?4 zcgrePBaiPL_hs^kh#I=n&Chsw=tp9j!Jm`q%KeQj3}nv^DXV7nS0?2&;a#@b^RNce zf98Ztm6nSlGP36I;G)T)iEraX1hN_xw&?cRKDC-I?|1Hw z)$=cOT-nvRIUBjZJK|1UIm;r3Io}hqj&U6ukFE^MD;9{`!h3{q*S3xUM363=EmB?gER|C+u(}z0k^piD`6n(W1dxB>afLCbgW_bci&`h(4kYB z&$=IY$%!KjWNX#+m~7?BUEip*_Hh}L2m#p^CoMLkphQF<>tofBC)4}SswH_f-#6}U z%%j6gpFHR@G?z2bc&*pmsXzDQXW1vd?zSEDnR9brmFj%5-${D(9)5Suw?|!bd* zhSq`K+llo+_w&6huLELZ1 z!a&x?%eAp)#+TQUH68vu$q2(l(mv z@AbIn_%Bo{!_r`vCil@H%*f81zRT^EV}jx+Q9M>=vG)$;C6A20cRiCcV`cd9qMv=_ zS?#cYGe<9j5y)z+$nV#1Fu#f%?RBH9&$D`BoO0{W=Ml-2 z>m(Ou{Q2w%ReMqKDV@#;te9yYf0ZWJNM@%!$`g^1eOqa(6lXYZ7SP1uW+UzQRg$h< ziSmJH?oZwSdpC8c7y=N000bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb z2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$## zAOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;| zfB*y_009U<00RFj0xoKcmRV^n_gU@F&M)@_|P=sW_zt( zqxLTKe;_lx|9%0-5Nn*wHI*C`6<4{kSrjS6Oi5YYO#809`E8_i2lch7H0b&`!7^qb12MPDoA4nfnf|+ljf#%3 zm!*mMGDkLxsB6`_i{I?06S{96#vhYI7Jk2EqT6zVMp~!;IQ*c6V-}7x=KGqs`7JQ9 zVneK#eD}U$S|dGJXI2q$r-q!|E#Y^4{};-CkR@&HU(7Ltg;9X+>KQIdrdq5c51Oat ztqgia)^?k;Yn(w5@$t2CIBF42SKbg$aeLmtQOLAqPMXV1&AjziGmB+qo(HZktfKRe z$Y0rYE|vPlcr8{Nl0q9gqF#R6|B7Q4CSLBA>bh#~?iOqoS7skt7k9da>{;dFqBb#_ zm`~W=`><>Q4VbgC>U4WK#}H-#TGLktdIg#ovmti6kNK!oQBFq9TIq4XtCmcDZnU!f zP!m0$D!=J;)mM%}bOZh8t1A1f)b(UTbc=bS|17PO*qcVlPCd|7!$M$E^Y(55k=i!O zX^lb?#}Lz|&+ze3@iAYc#)b$882Of5Y#@geW;-^iHIf2_`) zGY{}GF`KTbqsN*$|9(G{%Q6kLVCCbldKqP8LC}u`gTZf!gM3+zO=bbdEF4xlu5wvu zK37|r&EnMC$h1!nUlNt0_j7ma6p{`_Tm4(J1@w6POQl(6r5r>mWM;1~K{+7%^*#eFs9JHKuSL+z5Xgl!P7`7Abr0!G@ zfyKMw6SsEI){v`9o}8?s{sp5`jz8ta{`(#=)iXCSSmvN)Y0gIY8d4wB&##JZu`|on z+F3(-j~%dA-zJm}IN`P6?CwJH??VKv)^bvwZth~O$Y$}et$6b3UF{@z!2o}Y_&lmL zB6COD>q7ccO@B9o^hT_Ui50Lc04})6J!&%^Vhv zy59Qc&RWXGR&0p2IeT?|A{*$6TaV7PH>6RO^=`H~ON+_lOKx`sAFDWq(4Xd}s%+%% z@8Q7;8RiijX<%PQO+9+gEE(2G0vEL0nL0L=o~-N8&K#1@F@&D`0v9cp!0FxwY>1aF zHgmU5`a(m7gie@Z`i^vVkN)wtwL*T+=xh!PH7y4(O+DXLW;$$vf40u(Rg!&= z$W$4ag^VvHzN;GEdW}e=A0m!gdWJM}3}Lj;#AemBWe$$ZnMK%Bx%DrGrP27sy+3;k zKGC}hi}Eab))OPgfZn>T>E&xef@1tqWL1Y^h)s0n1aaY_ULW<`RVc$9nW*Me{~ggmE3^YA^bijk~RF-KSr`Zl!JY#yi?roepGnG7L)47p+LkJ`q zr$Oz%2}9W1;P>%^`j&dym6yn_dpk}&Rn?_7Zlq+t`5P>bfhyK1*{4I@)+|?zL%QFT zms87(YyEML{9V50Qls@|_@l3zzsr>VF6a$3dE4v5i-N+clwTdfQ_@;<$lK~OnOY`O ze$&Q=ne#93h1y%%0xG}gI_1yLE8zdles+k=Cym#rR{CdQGnCI84ElJ>xUtvECS`&bf1KvP= z$QO*cAM*1&w~#0O4&wU`=Jze|3C7}nrpCTo%8{OobcE8KJ~4nxi(Pw~I-SP(h~pEv zdpVh>l8r1$u@MvL?r<3s=Y*`2|A@Tga)vuy(Y6!~VMTlRJ(lLjbF;W0-ld{T2*cJ3=gkAkzXS_6Z3W( zr$hEe>9#$xn=v0bG?CI*gtHM_)ZwT@e1ZT7fB*=900{IW0(G6GfsP3^^TsZ`*HiKI zdn@eje=Q1+>Ad(O*S2Gx3*C3&eIZ4-{7h>8(dSbzxKI9TmR+yQ{}Bk zj4Iw0`Pui+sXA=ov#vQ!XV)~$Zw!oIbowE`{MQZhUvh2ufBAo1F!{5{s0Z)u8oTGI z$ocbzKC<|`yCU=3&YWLRd-kNNf|WyVx+Z)1mdM4MR{W#r$<3~t9%(%5{b7>C355y&)q$G`v%v5CCj%iKkrl5{Icc}Z&UtQ-=z6n&%X1kXa3{* z`DyKImU~}tP5Z~?K3{2wY~Z-92yO)Uq?1L(^w2?#L>*BmMDrUE@cr`@H;; zk6ds5A^nNZ@?MEFJhd`??c*Cdgi(-$6UP2wY6o_Nn7u}&t?1W`R|u)W?g@OZOXdi zs<*l7YR@StYVjhFVIF|3TKKPhGY@O@H9MMgNIB?6~^yoty58JQK|Pz`1Kvq-tz&<-CfL z(7drVlm0mGqUG1#!olA5-GUt;f;r%^|GsEj{n@b^QRWg?|lEl z&`)m~yC?F4A59tC^oxgF6S5z<(*3~)5%0Q*S30&0?3&d!XUe?NV()^65zm})>B%oY z6S?i_KVN(4celIVeEknsz54t+k-uI(ebTzKw?%?;&pmTdVe5=eU;Pz3cQ}9XTh|3w zef;Iz4cj7VPi%MW9QKmy?kDD#t-17VS73a@-02Gn&#hVX`^#^+z3ZBPyWDsA{4d?K z!*$dN8?PUG&4-btd28E#6517MuN^HLdB?~dD>Ei`ZjYQ^+`g#em!Cy$`dv=NgKaNH zp5A@M^&egJNu;xCMs>bt!mQ$slOEZ%>W*b^xn4Z%#~W{rL|muca@EU|8@5NL-+W?q zY1$UoxlmLFJ9ak8MeOgvn}7+?V5brYv14Y_G6Kw17lu2>5wlX zp0g(u&L7(_rMTdWOGp0Y`PIz*RNGNG?*DFd{paSNZobj6HS+kTJ5PFdM_SkX&gyB! zK2KS5@vS$Uxpl?2wz$q(^Tp3M4f)j7^vh>{SI=&fx>{;ct#NZ_n-RV8PZH&2{3 zXU^j9|L)a2uH!ov3_Nnn3z6rJ$h~0qm=7Y?|MtnV&;Q{^u72dU(G?H?0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!XNErbe z8^CP5v>aVxHp`ZKYmX3T8Cxqq8LO);D`Q-8k7d)BS=6EQ zBLAWKD@1#I?as!wK-k+7@P(YsZ6Rma+u-*($22t;jFG#HWwKn^LMm@^WwFPj%NuAa zudbr1w2VVArXSu7v$yZa)-tphbxa!Ee+bGm>!4rgyS&hI3@2^ z;%rHQI-aPHk@Rl@G!9qmZqC(dO_8z#^6zY%I;QcO%wE%Z8A_TlvR5lbPgQlPjZ2aL zUw$Ozuu9@M83EEyaEjt5ohr|e$e$C&}8Zi4KU^5dh-0LP~K+J>tNabwqV%rTj2A{%={w}KXK0yn~`6&KjdrbXbiWsah1z0 zQGV)&jh7(z^@#zTme{qYsncotSll-=_>Yb5gLW!GRwHuA9hx4}@etlWA!*b1FS{(y z#@gq0G&lQwvP_Xtcn;w|ntaWEZ`ddEi1RcaW*@KW>k=pVHiFBGO`N^q+RM%{>@U{7 zhiEw=FK^GqwY9~w#F5xkIclFV<;MnSYi6wL6l*_8Ql+{2Fz4jta>5S9<~$*}9QkAS z*yM6WH&*?c{ohcN zyr(S`_BA=Dd;J~0c3BVkr9pChmCdXxt(dCDRs3?8vd>KdL!Pkddo8D zt=1cu<1>a?pY^n)$9l?#WCtZ}9f2lqXb}ya zzWO(e8+HFBzCD>QnEjY1>M?=J|P9pvj~S=!51@`%@or z?kmv&vhV&XOZmVa-skDLZ>C4xM zauc;&&3BT^b!+T?Jm~#=$}L+*Eu_amzCc3u(P&pp|f2*P%e-Ul+IJgIoX(6 zucr*w=R?x}&ZAX{#M`Eyi;ej!JyEE1Z}#1j=Rr{;w{Q0ux-&cvW$QM7?TdX|*G(i; z{-JO&##bN3LK>q9X{+8LN8;5xi*?$nQos={~IE0^iW|3qHjc2527o7_%je=}p%F(3c<(g%w^`1p=ruUUBB%)dYTz|H^M z?0fv^w;w$EvVv^3wBm^));yMV_(kizPyMRl!w+38Gw<7R?Tb%M^Vo(vmMmFgFTefD zamPJ*O4i9YZq9jg+Uj+8?%sa%qYIbcwsq2tOPOQYyc6y{b@+qd{&4HLukHGx^qCWH zxO(m5o?{&jer@w9PS2=sf6?{vGmqXf?K$71d$Jave%Q3b#-4cHnh%2m?ary$ z@ARqdhy9}9k?_--HdZZs{&yqlw%IRnv-Hmn&3$9{;{z9NSv=>SKTIA|)iUajCq6Xc z*~-qxx1WC5SiU?o{#O$s?`}Bl+ix7T?$;ed55H^7Mfd-9MehH)9qDd&{nF3xT0iQs zUo}q{a>lNPaX)_M_GO=3amkpiZ$EgGs(Ra?vOTSZ0IP&|q+e%!L-Y^AZ8BG{R4d;9%?V6~FxYRC2sBtUV?Gj-pcI$eEM z_mQu|@mYOKJ?%N6oS{OmAu{sq2J zy9^c;`OUw_ke^q;|BH6I`4!RMAu^wYZd5DlJyc$oV$2&1`gqIuuTRvscKE|BvK{=A z@%tF692uQl&!dy|!ndgZC{sDP{$N2yZbg2EXS5U@oe?a|2nWYzc=KdMnQXR9Mf|GZ zq2A9J1&Z><@*mQZEYCAqi^(YA`g~ca*E71o8{)gXY|rTCwl=<7MAh)p=jwVCw?JE< zUbWrJe{Do@KiyhSZUt|nul-14x-aviG7mWEm6n`~(OkO{c-{f<7y^^VHLHBnc-4&+ZI-Shra`=xZTHjvSaHos6S8Q%p zG=vrH;TL#|V2=|7yI=k09$DY7(fcNP zUuD>PY&@RXztt6&$lnJum}|7fhl&o?$N|Ah<1qHM(wI-bbFNZ=l20>h@pp!XgSf!bveSv zQ9145P)lG=QNCdZs(x-?J*hu>A4*ieC};2W7Zw@qR?nkVJgCRR+nLCI$^Rr;58t+j zXO6QiIgfg-E{dCq*rGQUb%;+8009sH0T2Lzen{ZTVdF9aXKamJedxeZcWwT+Ye(n* z4SN5%ovyY3U)SN27Mdj-dCiEQ{dnQ)k=AAPPoH(lGm+Ih{&CTgs^3S#3;FXbm!qf2 z6{mlBL*193MNXOOob%+CXI*ng9R2qJ?w4HQxgnpA%hB`h?`>!2JTY*CYx}(O|9NZC zhmotF`u)$ZF5jl=<+C~c_{%~o-dQ(f)aFR-_Q_eB7w?F?_Rss)Z1}+%m#mk|iDzoD zy&B%QF6Z)hB2V49xU21hZIPxsKYrYH&kmAXtfx2Z%oz<8-~ZG4$c1;LUvlW|HzObY zW<&mW#=RAh?Q8HZ7IRXu-8cD@dz*jziRHRgWixI;cITmPYI-~5hFdOr5v zyYV;dxEEbl&wBUnm%2W5jk@#Fzpb;srt0N#gH>_tq9ZpvebTPouJhgJ{demHpGGFU z^4k9mulZcn%jL8f5)c3Z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5ctXo*w_GOSBm+4x4zOJ^c zjB&|5mQ7z~4>u}#2c1CrjUrH9R$L=GJX81Dsna*=m0jpV))Hv)b@GD-Z%CeMZfldN zutBj+R7sCNhiiLXd7i50VO?J)E0<;1*g)ps|LQ90D!Bo(L-@}L{D(_rDYs~ zAunoou<2#Bo{H)!DWAsWsQ!Yye3{S2hRSm}@`&+Xsd31Bs$W5kQH?`;!g07{t`c-p zISbWsZ@KF1NDeXrG!9qmcF)!6&PB=&$gb{Ktd6;<$}O3_rgI6az3Oby#8XvWYGY|+ zwQTkks{E*cWrys%{GsG>bU)dt??;wm`rRSvCtLHAPL=mEQ+r18KW2Y+7{5;Xk^W5j zFzIQzU-Ngg2F(5_)_)B!=U48l$NsA@uR!V`laU{D@VpK-rJ}NqyF8Z76{vLRmU7fi zn@#N*{Te3owRZ%4p$30j0`x z)zSQ_CZ{~6_A?egf8*SafNpy7{HjpiX62b}Y_M#9TQKbRE%5neX8w_gpSb6U&B(9X zAM!PIG=^K+xXR_0C_nYX#)c@{(I*CQZHZlbnmSGPxVA<0_hDVGjqZnbDoJ0^Txgm| zoQCjz3Q3zD?si#^jkVA1Xm0lVw2cTm7WLa$ldswD4f|vsQL;nNP}A&bRJ5#^W0dAK zLYE*;{^i=s&N1vU*4~GRc8EgxCU4ipwY9~w)Uono1GF{s`N(|TV0Ag>GgqHk)|OmO z*rC{#{4lv3`Qy!<$>rR#G{)wJlgnwb{ho+BfnaR4zuw+wta>#2y`j8((xay>6!tYa zr+fV!zIJ}z$TvuitFoDOr4>_Ed*Y90g~-M-WSOgV!PfB*?H3pCUXHQ^TZ+bmb`QKQ zy&exTYpTXW@Ac)DAmiPqM)518ud0sL20qztYj#SH>1+w8Nw>_*Kl-Y_ReRGP_Zrf1 zFt@BSkw2r+MNWr~zXE?I?scMg*{prl-{j)Sd=|p@gL{;a1fU#Zls%3>c#8_r}~;& zya8uOs3#SEFjeE|Le4;J1e1Ac70#W?eb{vVh0NF!HTSuVD8(Ks|sjq$yZrSvB&o@qfh3+?%-uAGw zwr!!aT|JmCkPoKLQ^do|L7)y`>sh0?v*xqLX3?M(b!sqK8j#afj- z4`utx>|8?S9|{)}?}@}h9HWVBtKK08u6q2X{oN)nymlS;vckSslHmJaSQ?=00JNY0wB;I2qc;> zoRpw*BR`8jUx-df6Xe5b5C8!X009sHfrFO-#rHbBY98R$>tV+F@<18iQCd-3Cqj6{ zI*56FgVsTa0Ij>xIw`H=IDf9n?`fRvs$b~#=z<-$sAF0OrTUQePXdYJY~Q#|*#X(r zvfI`1zOFZ0=~1~mA5`VmXaQp#FZR9WNG?O;phCpE%lJ?pfgg+glqt4Ead0#qHvdwMhb}4IyZ3SC z_siCJP+Eo8#YW%9skeKKmEWN1rR&yI(O_)Vb;;$jb;dhZB$p#Uvi8>Ga&BGT++Qb` zv$l7C9VdzBl`1#&q=0NcS7FzV>IF=*m9& zH5y&?-iwa+_8xX`el3pPuH#%p@iOba>g`~S4_(q@J*k;J#et|%V8k7!PA}28(6M8+ zPDEX?dPzL5IsoxG^g6xI@oaq^Hx&O#sZZ+uweKC@TS%+ioC@2Oc2|9!w~ zlgl}DrsY>Bmn+wDJ8w=dH&M&Iabt2hb6fZ4ekz|!oV?+5ABo2A^)bG4?Ny2!@!pT| zoonyaD&zL8mdttoV&|{>X5ZFz6Vmy?7T@XCPGF|)K(ck#p2T-r^-J;Tlp5u$^~SGz zoM#Ow){>r{AZ{b-NG1OX5L0T2KI5IFb=^fu1($^`R;=*0BkM}i)J00@8p2!H?x zSO`#@=Q91hhj-|8u~f%-I{r^Bdi6BUbJ?Rx-mTla?OAn9>z{50&m`X?s`wgQ5$7pvFrKJ@`IQ;w+a? zdh=c{**NLFzLbR?3~`>x{2GleidUoKy~P{s&9B87A9tOs;yh*6ebrlkj`LJ{%8?#y zU*kM`u9s+>=d@|sXNihq^+NN#uf_MU?bkR@rH}aY0gCISaYM&n!MG9rP89#O9XI0M zPw^uMBF+=zru*N4F~?`6`uje65?2U)bk~P@-f7bVXUSrxtv4i>3k%)T)CFp@R#Ir6SZ8|)5+ybP2I1%seCSR@|M$mI#qF=s~=X} zh_^KkkiNf@(!JR?y%$fkULu|=w0*CBU8{`ep=>F$@BR|!nX8?^Ol`HsdSL82>o+mZ zb8>>w{7{0p4dJ`7fdB}A00@8p2=q?^y^ZtSm0-RQotXAdOz0H|fB*=900?}e2~eD8 zQ2*WU<$7H#)p4HN-qVX-dLcs0Q^cwsy@#Zfl;QYL$Ta(j zJJQEtZv+2p)r{4*)YGoKLhHq}POioGn$UQdxk1@qr>@Uf&yHPx6m?O3RDo6A{C-0G z&Na!OSzA4=h94X9vGJ*5$dJ78eL1RUHu-J44i@zCKU>*eA6^s`G}QM$n&00whrF#m z6Cz`L598k$8O^`I7iyP(tXzD5r1+cWRBwJ>0sk-BN%anq`6P6sTKT<`=J)E}V9>`~ z#+6hrn~=%Wio5tb=b~Qz=azh(^^I+Q{fCo8y+@hq&GiQhGIA^OGd!cE=;(}KVMaJO zHp838r9^w&G9?b`?|K;pit@w{^+T5D8C^0u!|NGc%s*8zyd0qgA*Xl*+5+{eZQf4t zhg(G%-f6l1zwylsBoAXStq*3>vIXS6R%_P5C!_KMbZv<7&os@$HnFIR~3_9^=a zcmwqzUr@HoY#;o*lr!|J{@$PbBlf(;M1Plx_}JKCl9fZBh>bg4C9faGD}9;QBpJyX zlYf}&EzzRn+Y{v!TtwfBwSchaWSfv*s|DyiWzA+aj%!3gd^5d=S@kb*5%>JmZxTKO# zAo%#|J}#!0#s7P=g?Zy8b3?wTew^MjtMNOfqO7vCKEJ-Ic&eygjz4p|XgnC>&z!HI zTRveKH10}d>qLLz)mNA>AB_#eFK6*G9E{pIrJ}M@6m1@V;+X0c`(lenRwYuhebom-_zvD*i`E zJzPGK-e}w;TJO7P56>KDJ9c|_eKxe$Dy^&e-(khtp5a$)DY|#FjQNGvQAC(C@wTCYvD2u!+gmpI7auZXGsg zc&;VO(j7}}*?g|U1z&k_Fw1wOy_k2(PM4KV$aM?VI*Q z9{IyYH(Pq1eOURQhW$^E>mnoJf4_zQYA+of)G19;`%#x@*4vM|L<Rkpz(TYXaqE#WkXXGj%VWI?dE} zqz_q3pvl+C4@&fjVx0;b6kBiYWw^H2=RZ^Sx8-w{X0mcwo{bG;4*svMqOOt~Fgt|* zoWOsmz6#MEU%RuhEfDs$1biW9b6dz6_BQx^&M{5RW8@x}&t$p$Q{+?SO|C5Vc=)j> zZ=gKSKloNxQB_*TAsF(ab_bhYR_m##u9EVig;ak*UcSs{V?*V+9C^feuhcj!+O7H% zRKtm>acECC4jTJ=(Q&nARV`VL@B)X^AjY2zxFTcZ5b4;vezY)79Mz_lfI?P=;X+2d}_`(a(K zjqZnbDoJ0^Txgm|oQ81w6_Pf6AG6DPY^;55M{~2^r)@;ov8dn1ntaWEZ`ddEh>{)J zv!vP6sAyR+$0*Hfgf2mx{L8hMonzQzti2Br?GT0XP2R4HYio;Vsbl5G254($%sbM=`=2PT&jb||(7Y{})wA0L&ST+S^^W9-Hu$>p@zeoxZ)$Q6vO_Sf6{j8%_j zzc-YZPkQvUg~Glj=X9^X!`IHw8~FywaaA_6uC!vRYL6o3}jQgUp($@z8sHxh2YY_o-2mue{8vI$9g}WV@}|DLtmMC7>qV z>T2Khw`y?gGjXpI#mi>xtNtbzPv%oV!=LqDfBBk6 z>~m8Vze3}!)*G1PGlpBA$J?@A>nlgKQ_myh0so+zL+bQnry`k?OorDJs~+T9rN*otxYqeMJ&#^7OwEv&i^0h^n|?l|b%mw+Ih4{= z+-F3S((Ly4N9f5pPt+8WpO2jnbe~6=*Ne^bD87tbS6e;9G-HbvDZ0hzdDJP77(0G| zu3R5m6E!PhES#2Hj%vC6%jD~7<+_5RtmNfQ)N<(=$>pr=-CxIKH$_@`^qg&spV)PJ z(UaKwhg&u|-t&#qBwy)m4?Any7CPJ2gXseKVCp=DoENS4Qw53ae1(3mMtZQ?Ijyfy zx;Hx~?Ie4y)OJ4dNG;40%J!AnxrE9;6fP#-6N!a5Mibdqy+aPht9KUbv{k1ne`PI| zr^|_+Abk=~lIv|s_H;#zJ*klp&pw9pN~r#*eNabr1ET8-e1P)*d~?fB>pEY+=l8Yx z0%18h;S$z$7OE#h=H`Bj)?@gwQl?X0>1(;-eUw@KZ?P~U0Yjn+wN9jER@m8++5wy&I|t@NneQ+0f8jTSK0@nYX=j^Hvh4k|>vyNnMl ziN=PCB8+wL*!M_?4;B1WeC0PUJ~UI=jy53cdx|u^N-ZuO!hgh@q(pI4^nQZYhv|54 z?&Z33kp@eblsZf>JzWz zCzs3Ca&PH=Sj(Yf@*}$^B`?pdGu${Xxg3?dztbe~yi(q*V* zDGr4E8tEr>dWpt`PMfA~eX^=py{J0?*||9m4lC{r=OU3 z(68;d5%+#=-mE0W@t_ztYCVAX9_Tnw=%e@gFwf6Z9sdb^^j;rve&0YxU-cy(H+kHr z^*NKC`$m3GJ>SgMH~LiEKZLK~ah}iXqQ!b5J%;w`xiVRtC$;|){T$b++fRu#?z41E z^5-};>*2o-_?_f(6le8hZgRPD-H=l!B$u10HT1f?+j}vFjIFR**a@a;ybPS zrTBD8jq=rcQVA;t)}A})G@7l z>eER2DS**dP_j7BwfuUj<2*%~V%uAsXZkcX&bp-Z{)_W$`hlufR~wV4 z(azYT=O&jEb||(dbwAWm>?iq=_ZyR!=hj&-sZTB!Tf_c26DN*M4n5vlsq)^xpHpuf zD4q&&o=T5NI7`Fm&3ip_yoEI$y7&5W%Lc~N*Vir1GnrqbxK4^!qvO5B8|=-m#nIbs z9GECxX5Cl4_2)QGrKcQaSNr(=pFP)0G|ux(?zkvhoH&r&Zgy^dPu}0-Je59l|1IBt zOmUqwZs_a=DdTz&zprGDUrF`%eJ;_8-upZn z`XGIn=N$(k&J*LK``<}A&~ctZAG8j#Td%Lo=I6wgiu;EcqCC#?foA>aM9(Cm{NCI@ z%zniBeV){QzkZJE6os2MYn3ikMHyBpN4>5fdB}A00@9U5&{(GdAt7p!YB2**uKVjcG~{EzQD%v9Z$ONx#*HF zjydh3%OVrkzTiK>o_=fk7bC|7u4PLXk3K5h(KPsiTQ?2;k&w5E1=~Nb+ELv)Y|!xB z%$)q$T}y0f1J%yb9ZPNeMV*ZczFfdpdK_s5uiXFMg^lT1%NMSHYg~KAaObwOR!#ri zPmVl$iTj@a9zN8b?#On!m+-%Aj2%FLC4A+hVCnhxtZxq)R?wTFynb*Nb7rQm{>Pjz mvu?Zi_G{KZaK@%LKb(GY_pu*!0;Zk%q2GJwO*Wy&mHmG_5(N4H diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index f33ba7627101e..b5ecc4d34cd08 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4599,41 +4599,13 @@ def test_legacy_table_read(self): expected = df2[df2.index > df2.index[2]] assert_frame_equal(expected, result) - def test_legacy_0_10_read(self): - # legacy from 0.10 - with catch_warnings(record=True): - path = tm.get_data_path('legacy_hdf/legacy_0.10.h5') - with ensure_clean_store(path, mode='r') as store: - str(store) - for k in store.keys(): - store.select(k) - - def test_legacy_0_11_read(self): - # legacy from 0.11 - path = os.path.join('legacy_hdf', 'legacy_table_0.11.h5') - with ensure_clean_store(tm.get_data_path(path), mode='r') as store: - str(store) - assert 'df' in store - assert 'df1' in store - assert 'mi' in store - df = store.select('df') - df1 = store.select('df1') - mi = store.select('mi') - assert isinstance(df, DataFrame) - assert isinstance(df1, DataFrame) - assert isinstance(mi, DataFrame) - def test_copy(self): with catch_warnings(record=True): - def do_copy(f=None, new_f=None, keys=None, + def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): try: - if f is None: - f = tm.get_data_path(os.path.join('legacy_hdf', - 'legacy_0.10.h5')) - store = HDFStore(f, 'r') if new_f is None: @@ -4671,10 +4643,6 @@ def do_copy(f=None, new_f=None, keys=None, pass safe_remove(new_f) - do_copy() - do_copy(keys=['/a', '/b', '/df1_mixed']) - do_copy(propindexes=False) - # new table df = tm.makeDataFrame() From d99432366c429026f0c6c748ded5d9b553960021 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 1 Sep 2017 10:11:40 -0700 Subject: [PATCH 126/141] Tslib unused (#17402) --- pandas/_libs/tslib.pyx | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 5dd30072fb7aa..50e0b77c6d3a0 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -2622,8 +2622,6 @@ cdef class _Timedelta(timedelta): int ndim if isinstance(other, _Timedelta): - if isinstance(other, _NaT): - return _cmp_nat_dt(other, self, _reverse_ops[op]) ots = other elif isinstance(other, timedelta): ots = Timedelta(other) @@ -3882,7 +3880,7 @@ fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond', 'nanosecond', 'week', 'dayofyear', 'weekofyear', 'days_in_month', 'daysinmonth', 'dayofweek', 'weekday_name', 'days', 'seconds', 'microseconds', - 'nanoseconds', 'qyear', 'quarter'] + 'nanoseconds', 'qyear'] for field in fields: prop = property(fget=lambda self: np.nan) setattr(NaTType, field, prop) @@ -4620,7 +4618,6 @@ def build_field_sarray(ndarray[int64_t] dtindex): """ cdef: Py_ssize_t i, count = 0 - int isleap pandas_datetimestruct dts ndarray[int32_t] years, months, days, hours, minutes, seconds, mus @@ -5270,7 +5267,6 @@ cpdef _isleapyear_arr(ndarray years): def monthrange(int64_t year, int64_t month): cdef: int64_t days - int64_t day_of_week if month < 1 or month > 12: raise ValueError("bad month number 0; must be 1-12") From e94e572455fb4fa817f8f6326a173a08786ef1b7 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Sat, 2 Sep 2017 12:50:55 +0100 Subject: [PATCH 127/141] DOC: Cleaned references to pandas ` .. ipython:: python diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 3c6572229802d..4652ccbf0ad34 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -73,7 +73,7 @@ index is passed, one will be created having values ``[0, ..., len(data) - 1]``. .. note:: - Starting in v0.8.0, pandas supports non-unique index values. If an operation + pandas supports non-unique index values. If an operation that does not support duplicate index values is attempted, an exception will be raised at that time. The reason for being lazy is nearly all performance-based (there are many instances in computations, like parts of GroupBy, where the index @@ -698,7 +698,7 @@ DataFrame in tabular form, though it won't always fit the console width: print(baseball.iloc[-20:, :12].to_string()) -New since 0.10.0, wide DataFrames will now be printed across multiple rows by +Wide DataFrames will be printed across multiple rows by default: .. ipython:: python @@ -845,19 +845,16 @@ DataFrame objects with mixed-type columns, all of the data will get upcasted to .. note:: - Unfortunately Panel, being less commonly used than Series and DataFrame, + Panel, being less commonly used than Series and DataFrame, has been slightly neglected feature-wise. A number of methods and options - available in DataFrame are not available in Panel. This will get worked - on, of course, in future releases. And faster if you join me in working on - the codebase. + available in DataFrame are not available in Panel. .. _dsintro.to_panel: From DataFrame using ``to_panel`` method ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This method was introduced in v0.7 to replace ``LongPanel.to_long``, and converts -a DataFrame with a two-level index to a Panel. +``to_panel`` converts a DataFrame with a two-level index to a Panel. .. ipython:: python :okwarning: diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 937d682d238b3..53c0b771555f8 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -140,7 +140,7 @@ columns: In [5]: grouped = df.groupby(get_letter_type, axis=1) -Starting with 0.8, pandas Index objects now support duplicate values. If a +pandas Index objects support duplicate values. If a non-unique index is used as the group key in a groupby operation, all values for the same index value will be considered to be in one group and thus the output of aggregation functions will only contain unique index values: @@ -288,8 +288,6 @@ chosen level: s.sum(level='second') -.. versionadded:: 0.6 - Grouping with multiple levels is supported. .. ipython:: python diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 53a259ad6eb15..4687e46490562 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -66,8 +66,6 @@ See the :ref:`cookbook` for some advanced strategies Different Choices for Indexing ------------------------------ -.. versionadded:: 0.11.0 - Object selection has had a number of user-requested additions in order to support more explicit location based indexing. Pandas now supports three types of multi-axis indexing. diff --git a/doc/source/io.rst b/doc/source/io.rst index f55c72bae5a20..f68358764a40e 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -364,7 +364,7 @@ warn_bad_lines : boolean, default ``True`` Specifying column data types '''''''''''''''''''''''''''' -Starting with v0.10, you can indicate the data type for the whole DataFrame or +You can indicate the data type for the whole DataFrame or individual columns: .. ipython:: python @@ -3346,7 +3346,7 @@ Read/Write API '''''''''''''' ``HDFStore`` supports an top-level API using ``read_hdf`` for reading and ``to_hdf`` for writing, -similar to how ``read_csv`` and ``to_csv`` work. (new in 0.11.0) +similar to how ``read_csv`` and ``to_csv`` work. .. ipython:: python @@ -3791,7 +3791,7 @@ indexed dimension as the ``where``. .. note:: - Indexes are automagically created (starting ``0.10.1``) on the indexables + Indexes are automagically created on the indexables and any data columns you specify. This behavior can be turned off by passing ``index=False`` to ``append``. @@ -3878,7 +3878,7 @@ create a new table!) Iterator ++++++++ -Starting in ``0.11.0``, you can pass, ``iterator=True`` or ``chunksize=number_in_a_chunk`` +You can pass ``iterator=True`` or ``chunksize=number_in_a_chunk`` to ``select`` and ``select_as_multiple`` to return an iterator on the results. The default is 50,000 rows returned in a chunk. @@ -3986,8 +3986,8 @@ of rows in an object. Multiple Table Queries ++++++++++++++++++++++ -New in 0.10.1 are the methods ``append_to_multiple`` and -``select_as_multiple``, that can perform appending/selecting from +The methods ``append_to_multiple`` and +``select_as_multiple`` can perform appending/selecting from multiple tables at once. The idea is to have one table (call it the selector table) that you index most/all of the columns, and perform your queries. The other table(s) are data tables with an index matching the @@ -4291,7 +4291,7 @@ Pass ``min_itemsize`` on the first table creation to a-priori specify the minimu ``min_itemsize`` can be an integer, or a dict mapping a column name to an integer. You can pass ``values`` as a key to allow all *indexables* or *data_columns* to have this min_itemsize. -Starting in 0.11.0, passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically. +Passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically. .. note:: diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index d54288baa389b..64a321d67a825 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -67,9 +67,8 @@ arise and we wish to also consider that "missing" or "not available" or "NA". .. note:: - Prior to version v0.10.0 ``inf`` and ``-inf`` were also - considered to be "NA" in computations. This is no longer the case by - default; use the ``mode.use_inf_as_na`` option to recover it. + If you want to consider ``inf`` and ``-inf`` to be "NA" in computations, + you can set ``pandas.options.mode.use_inf_as_na = True``. .. _missing.isna: @@ -485,8 +484,8 @@ respectively: Replacing Generic Values ~~~~~~~~~~~~~~~~~~~~~~~~ -Often times we want to replace arbitrary values with other values. New in v0.8 -is the ``replace`` method in Series/DataFrame that provides an efficient yet +Often times we want to replace arbitrary values with other values. The +``replace`` method in Series/DataFrame provides an efficient yet flexible way to perform such replacements. For a Series, you can replace a single value or a list of values by another diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index ce4a920ad77b5..aded5e4402df2 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1069,8 +1069,7 @@ Offset Aliases ~~~~~~~~~~~~~~ A number of string aliases are given to useful common time series -frequencies. We will refer to these aliases as *offset aliases* -(referred to as *time rules* prior to v0.8.0). +frequencies. We will refer to these aliases as *offset aliases*. .. csv-table:: :header: "Alias", "Description" diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index fb799c642131d..c637246537ca1 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -306,8 +306,6 @@ subplots: df.diff().hist(color='k', alpha=0.5, bins=50) -.. versionadded:: 0.10.0 - The ``by`` keyword can be specified to plot grouped histograms: .. ipython:: python @@ -831,8 +829,6 @@ and take a :class:`Series` or :class:`DataFrame` as an argument. Scatter Matrix Plot ~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.7.3 - You can create a scatter plot matrix using the ``scatter_matrix`` method in ``pandas.plotting``: @@ -859,8 +855,6 @@ You can create a scatter plot matrix using the Density Plot ~~~~~~~~~~~~ -.. versionadded:: 0.8.0 - You can create density plots using the :meth:`Series.plot.kde` and :meth:`DataFrame.plot.kde` methods. .. ipython:: python From 6a02ffa51c4361a26e1bc6cbe3f06cb05118419f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 4 Sep 2017 16:32:34 -0700 Subject: [PATCH 128/141] Remove unused _day and _month attrs (#17431) closes #17429 --- pandas/_libs/tslib.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 50e0b77c6d3a0..8fbc606ccdfe2 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -829,8 +829,6 @@ class NaTType(_NaT): cdef _NaT base base = _NaT.__new__(cls, 1, 1, 1) - base._day = -1 - base._month = -1 base.value = NPY_NAT return base From 519c57f6a7baeaad14235a02f195c7d95ed42f5e Mon Sep 17 00:00:00 2001 From: topper-123 Date: Tue, 5 Sep 2017 11:30:31 +0100 Subject: [PATCH 129/141] DOC: Clean-up references to v12 to v14 (both included) (#17420) --- doc/source/advanced.rst | 21 ++--------- doc/source/basics.rst | 10 +----- doc/source/comparison_with_r.rst | 4 --- doc/source/cookbook.rst | 2 +- doc/source/enhancingperf.rst | 36 ++++++------------- doc/source/groupby.rst | 19 ---------- doc/source/indexing.rst | 23 ++---------- doc/source/install.rst | 2 +- doc/source/io.rst | 61 +++++++++----------------------- doc/source/merging.rst | 2 -- doc/source/missing_data.rst | 9 ----- doc/source/options.rst | 2 +- doc/source/text.rst | 2 -- doc/source/timedeltas.rst | 2 -- doc/source/timeseries.rst | 10 +++--- doc/source/visualization.rst | 16 --------- 16 files changed, 43 insertions(+), 178 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 711c3e9a95d05..4af476cd5a7e1 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -270,9 +270,6 @@ Passing a list of labels or tuples works similar to reindexing: Using slicers ~~~~~~~~~~~~~ -.. versionadded:: 0.14.0 - -In 0.14.0 we added a new way to slice multi-indexed objects. You can slice a multi-index by providing multiple indexers. You can provide any of the selectors as if you are indexing by label, see :ref:`Selection by Label `, @@ -384,7 +381,7 @@ selecting data at a particular level of a MultiIndex easier. .. ipython:: python - # using the slicers (new in 0.14.0) + # using the slicers df.loc[(slice(None),'one'),:] You can also select on the columns with :meth:`~pandas.MultiIndex.xs`, by @@ -397,7 +394,7 @@ providing the axis argument .. ipython:: python - # using the slicers (new in 0.14.0) + # using the slicers df.loc[:,(slice(None),'one')] :meth:`~pandas.MultiIndex.xs` also allows selection with multiple keys @@ -408,11 +405,9 @@ providing the axis argument .. ipython:: python - # using the slicers (new in 0.14.0) + # using the slicers df.loc[:,('bar','one')] -.. versionadded:: 0.13.0 - You can pass ``drop_level=False`` to :meth:`~pandas.MultiIndex.xs` to retain the level that was selected @@ -743,16 +738,6 @@ Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``ND Float64Index ~~~~~~~~~~~~ -.. note:: - - As of 0.14.0, ``Float64Index`` is backed by a native ``float64`` dtype - array. Prior to 0.14.0, ``Float64Index`` was backed by an ``object`` dtype - array. Using a ``float64`` dtype in the backend speeds up arithmetic - operations by about 30x and boolean indexing operations on the - ``Float64Index`` itself are about 2x as fast. - -.. versionadded:: 0.13.0 - By default a ``Float64Index`` will be automatically created when passing floating, or mixed-integer-floating values in index creation. This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the same. diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 35eb14eda238f..5880703b1d271 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -347,7 +347,7 @@ That is because NaNs do not compare as equals: np.nan == np.nan -So, as of v0.13.1, NDFrames (such as Series, DataFrames, and Panels) +So, NDFrames (such as Series, DataFrames, and Panels) have an :meth:`~DataFrame.equals` method for testing equality, with NaNs in corresponding locations treated as equal. @@ -1104,10 +1104,6 @@ Applying with a ``Panel`` will pass a ``Series`` to the applied function. If the function returns a ``Series``, the result of the application will be a ``Panel``. If the applied function reduces to a scalar, the result of the application will be a ``DataFrame``. -.. note:: - - Prior to 0.13.1 ``apply`` on a ``Panel`` would only work on ``ufuncs`` (e.g. ``np.sum/np.max``). - .. ipython:: python import pandas.util.testing as tm @@ -1800,8 +1796,6 @@ Series has the :meth:`~Series.searchsorted` method, which works similar to smallest / largest values ~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14.0 - ``Series`` has the :meth:`~Series.nsmallest` and :meth:`~Series.nlargest` methods which return the smallest or largest :math:`n` values. For a large ``Series`` this can be much faster than sorting the entire Series and calling ``head(n)`` on the result. @@ -2168,8 +2162,6 @@ Selecting columns based on ``dtype`` .. _basics.selectdtypes: -.. versionadded:: 0.14.1 - The :meth:`~DataFrame.select_dtypes` method implements subsetting of columns based on their ``dtype``. diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst index 194e022e34c7c..f895cdc25e620 100644 --- a/doc/source/comparison_with_r.rst +++ b/doc/source/comparison_with_r.rst @@ -247,8 +247,6 @@ For more details and examples see :ref:`the reshaping documentation |subset|_ ~~~~~~~~~~ -.. versionadded:: 0.13 - The :meth:`~pandas.DataFrame.query` method is similar to the base R ``subset`` function. In R you might want to get the rows of a ``data.frame`` where one column's values are less than another column's values: @@ -277,8 +275,6 @@ For more details and examples see :ref:`the query documentation |with|_ ~~~~~~~~ -.. versionadded:: 0.13 - An expression using a data.frame called ``df`` in R with the columns ``a`` and ``b`` would be evaluated using ``with`` like so: diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 32e7a616fe856..f51c3e679b36f 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -818,7 +818,7 @@ The :ref:`Concat ` docs. The :ref:`Join ` d df1 = pd.DataFrame(np.random.randn(6, 3), index=rng, columns=['A', 'B', 'C']) df2 = df1.copy() -ignore_index is needed in pandas < v0.13, and depending on df construction +Depending on df construction, ``ignore_index`` may be needed .. ipython:: python diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst index 685a8690a53d5..264bd1de1fc77 100644 --- a/doc/source/enhancingperf.rst +++ b/doc/source/enhancingperf.rst @@ -213,17 +213,18 @@ the rows, applying our ``integrate_f_typed``, and putting this in the zeros arra .. warning:: - In 0.13.0 since ``Series`` has internaly been refactored to no longer sub-class ``ndarray`` - but instead subclass ``NDFrame``, you can **not pass** a ``Series`` directly as a ``ndarray`` typed parameter - to a cython function. Instead pass the actual ``ndarray`` using the ``.values`` attribute of the Series. + You can **not pass** a ``Series`` directly as a ``ndarray`` typed parameter + to a cython function. Instead pass the actual ``ndarray`` using the + ``.values`` attribute of the Series. The reason is that the cython + definition is specific to an ndarray and not the passed Series. - Prior to 0.13.0 + So, do not do this: .. code-block:: python apply_integrate_f(df['a'], df['b'], df['N']) - Use ``.values`` to get the underlying ``ndarray`` + But rather, use ``.values`` to get the underlying ``ndarray`` .. code-block:: python @@ -399,10 +400,8 @@ Read more in the `numba docs `__. .. _enhancingperf.eval: -Expression Evaluation via :func:`~pandas.eval` (Experimental) -------------------------------------------------------------- - -.. versionadded:: 0.13 +Expression Evaluation via :func:`~pandas.eval` +----------------------------------------------- The top-level function :func:`pandas.eval` implements expression evaluation of :class:`~pandas.Series` and :class:`~pandas.DataFrame` objects. @@ -539,10 +538,8 @@ Now let's do the same thing but with comparisons: of type ``bool`` or ``np.bool_``. Again, you should perform these kinds of operations in plain Python. -The ``DataFrame.eval`` method (Experimental) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. versionadded:: 0.13 +The ``DataFrame.eval`` method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In addition to the top level :func:`pandas.eval` function you can also evaluate an expression in the "context" of a :class:`~pandas.DataFrame`. @@ -646,19 +643,6 @@ whether the query modifies the original frame. Local Variables ~~~~~~~~~~~~~~~ -In pandas version 0.14 the local variable API has changed. In pandas 0.13.x, -you could refer to local variables the same way you would in standard Python. -For example, - -.. code-block:: python - - df = pd.DataFrame(np.random.randn(5, 2), columns=['a', 'b']) - newcol = np.random.randn(len(df)) - df.eval('b + newcol') - - UndefinedVariableError: name 'newcol' is not defined - -As you can see from the exception generated, this syntax is no longer allowed. You must *explicitly reference* any local variable that you want to use in an expression by placing the ``@`` character in front of the name. For example, diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 53c0b771555f8..e1231b9a4a200 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -766,8 +766,6 @@ missing values with the ``ffill()`` method. Filtration ---------- -.. versionadded:: 0.12 - The ``filter`` method returns a subset of the original object. Suppose we want to take only elements that belong to groups with a group sum greater than 2. @@ -858,8 +856,6 @@ In this example, we chopped the collection of time series into yearly chunks then independently called :ref:`fillna ` on the groups. -.. versionadded:: 0.14.1 - The ``nlargest`` and ``nsmallest`` methods work on ``Series`` style groupbys: .. ipython:: python @@ -1048,19 +1044,6 @@ Just like for a DataFrame or Series you can call head and tail on a groupby: This shows the first or last n rows from each group. -.. warning:: - - Before 0.14.0 this was implemented with a fall-through apply, - so the result would incorrectly respect the as_index flag: - - .. code-block:: python - - >>> g.head(1): # was equivalent to g.apply(lambda x: x.head(1)) - A B - A - 1 0 1 2 - 5 2 5 6 - .. _groupby.nth: Taking the nth row of each group @@ -1113,8 +1096,6 @@ You can also select multiple rows from each group by specifying multiple nth val Enumerate group items ~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.13.0 - To see the order in which each row appears within its group, use the ``cumcount`` method: diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 4687e46490562..a6e7df57be4e5 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -248,8 +248,6 @@ as an attribute: - In any of these cases, standard indexing will still work, e.g. ``s['1']``, ``s['min']``, and ``s['index']`` will access the corresponding element or column. - - The ``Series/Panel`` accesses are available starting in 0.13.0. - If you are using the IPython environment, you may also use tab-completion to see these accessible attributes. @@ -529,7 +527,6 @@ Out of range slice indexes are handled gracefully just as in Python/Numpy. .. ipython:: python # these are allowed in python/numpy. - # Only works in Pandas starting from v0.14.0. x = list('abcdef') x x[4:10] @@ -539,14 +536,8 @@ Out of range slice indexes are handled gracefully just as in Python/Numpy. s.iloc[4:10] s.iloc[8:10] -.. note:: - - Prior to v0.14.0, ``iloc`` would not accept out of bounds indexers for - slices, e.g. a value that exceeds the length of the object being indexed. - - -Note that this could result in an empty axis (e.g. an empty DataFrame being -returned) +Note that using slices that go out of bounds can result in +an empty axis (e.g. an empty DataFrame being returned) .. ipython:: python @@ -745,8 +736,6 @@ Finally, one can also set a seed for ``sample``'s random number generator using Setting With Enlargement ------------------------ -.. versionadded:: 0.13 - The ``.loc/[]`` operations can perform enlargement when setting a non-existant key for that axis. In the ``Series`` case this is effectively an appending operation @@ -1020,8 +1009,6 @@ partial setting via ``.loc`` (but on the contents rather than the axis labels) df2[ df2[1:4] > 0 ] = 3 df2 -.. versionadded:: 0.13 - Where can also accept ``axis`` and ``level`` parameters to align the input when performing the ``where``. @@ -1064,8 +1051,6 @@ as condition and ``other`` argument. The :meth:`~pandas.DataFrame.query` Method (Experimental) --------------------------------------------------------- -.. versionadded:: 0.13 - :class:`~pandas.DataFrame` objects have a :meth:`~pandas.DataFrame.query` method that allows selection using an expression. @@ -1506,8 +1491,6 @@ The name, if set, will be shown in the console display: Setting metadata ~~~~~~~~~~~~~~~~ -.. versionadded:: 0.13.0 - Indexes are "mostly immutable", but it is possible to set and change their metadata, like the index ``name`` (or, for ``MultiIndex``, ``levels`` and ``labels``). @@ -1790,7 +1773,7 @@ Evaluation order matters Furthermore, in chained expressions, the order may determine whether a copy is returned or not. If an expression will set values on a copy of a slice, then a ``SettingWithCopy`` -exception will be raised (this raise/warn behavior is new starting in 0.13.0) +warning will be issued. You can control the action of a chained assignment via the option ``mode.chained_assignment``, which can take the values ``['raise','warn',None]``, where showing a warning is the default. diff --git a/doc/source/install.rst b/doc/source/install.rst index f92c43839ee31..8dc8224ea6cb2 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -107,7 +107,7 @@ following command:: To install a specific pandas version:: - conda install pandas=0.13.1 + conda install pandas=0.20.3 To install other packages, IPython for example:: diff --git a/doc/source/io.rst b/doc/source/io.rst index f68358764a40e..33523ea171f3a 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1310,8 +1310,6 @@ column widths for contiguous columns: The parser will take care of extra white spaces around the columns so it's ok to have extra separation between the columns in the file. -.. versionadded:: 0.13.0 - By default, ``read_fwf`` will try to infer the file's ``colspecs`` by using the first 100 rows of the file. It can do it only in cases when the columns are aligned and correctly separated by the provided ``delimiter`` (default delimiter @@ -1407,8 +1405,7 @@ Reading columns with a ``MultiIndex`` By specifying list of row locations for the ``header`` argument, you can read in a ``MultiIndex`` for the columns. Specifying non-consecutive -rows will skip the intervening rows. In order to have the pre-0.13 behavior -of tupleizing columns, specify ``tupleize_cols=True``. +rows will skip the intervening rows. .. ipython:: python @@ -1418,7 +1415,7 @@ of tupleizing columns, specify ``tupleize_cols=True``. print(open('mi.csv').read()) pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1]) -Starting in 0.13.0, ``read_csv`` will be able to interpret a more common format +``read_csv`` is also able to interpret a more common format of multi-columns indices. .. ipython:: python @@ -2012,8 +2009,6 @@ The speedup is less noticeable for smaller datasets: Normalization ''''''''''''' -.. versionadded:: 0.13.0 - pandas provides a utility function to take a dict or list of dicts and *normalize* this semi-structured data into a flat table. @@ -2198,8 +2193,6 @@ Reading HTML Content We **highly encourage** you to read the :ref:`HTML Table Parsing gotchas ` below regarding the issues surrounding the BeautifulSoup4/html5lib/lxml parsers. -.. versionadded:: 0.12.0 - The top-level :func:`~pandas.io.html.read_html` function can accept an HTML string/file/URL and will parse HTML tables into list of pandas DataFrames. Let's look at a few examples. @@ -2653,10 +2646,6 @@ of sheet names can simply be passed to ``read_excel`` with no loss in performanc # equivalent using the read_excel function data = read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'], index_col=None, na_values=['NA']) -.. versionadded:: 0.12 - -``ExcelFile`` has been moved to the top level namespace. - .. versionadded:: 0.17 ``read_excel`` can take an ``ExcelFile`` object as input @@ -2716,9 +2705,6 @@ Using a list to get multiple sheets: ``read_excel`` can read more than one sheet, by setting ``sheet_name`` to either a list of sheet names, a list of sheet positions, or ``None`` to read all sheets. - -.. versionadded:: 0.13 - Sheets can be specified by sheet index or sheet name, using an integer or string, respectively. @@ -2866,9 +2852,9 @@ Files with a ``.xls`` extension will be written using ``xlwt`` and those with a ``.xlsx`` extension will be written using ``xlsxwriter`` (if available) or ``openpyxl``. -The DataFrame will be written in a way that tries to mimic the REPL output. One -difference from 0.12.0 is that the ``index_label`` will be placed in the second -row instead of the first. You can get the previous behaviour by setting the +The DataFrame will be written in a way that tries to mimic the REPL output. +The ``index_label`` will be placed in the second +row instead of the first. You can place it in the first row by setting the ``merge_cells`` option in ``to_excel()`` to ``False``: .. code-block:: python @@ -2945,8 +2931,6 @@ Added support for Openpyxl >= 2.2 Excel writer engines '''''''''''''''''''' -.. versionadded:: 0.13 - ``pandas`` chooses an Excel writer via two methods: 1. the ``engine`` keyword argument @@ -3074,14 +3058,19 @@ any pickled pandas object (or any other pickled object) from file: Loading pickled data received from untrusted sources can be unsafe. - See: http://docs.python.org/2.7/library/pickle.html + See: https://docs.python.org/3.6/library/pickle.html .. warning:: - Several internal refactorings, 0.13 (:ref:`Series Refactoring `), and 0.15 (:ref:`Index Refactoring `), - preserve compatibility with pickles created prior to these versions. However, these must - be read with ``pd.read_pickle``, rather than the default python ``pickle.load``. - See `this question `__ + Several internal refactorings have been done while still preserving + compatibility with pickles created with older versions of pandas. However, + for such cases, pickled dataframes, series etc, must be read with + ``pd.read_pickle``, rather than ``pickle.load``. + + See `here `__ + and `here `__ + for some examples of compatibility-breaking changes. See + `this question `__ for a detailed explanation. .. _io.pickle.compression: @@ -3150,9 +3139,7 @@ The default is to 'infer msgpack ------- -.. versionadded:: 0.13.0 - -Starting in 0.13.0, pandas is supporting the ``msgpack`` format for +pandas supports the ``msgpack`` format for object serialization. This is a lightweight portable binary format, similar to binary JSON, that is highly space efficient, and provides good performance both on the writing (serialization), and reading (deserialization). @@ -3424,10 +3411,6 @@ This is also true for the major axis of a ``Panel``: Fixed Format '''''''''''' -.. note:: - - This was prior to 0.13.0 the ``Storer`` format. - The examples above show storing using ``put``, which write the HDF5 to ``PyTables`` in a fixed array format, called the ``fixed`` format. These types of stores are **not** appendable once written (though you can simply remove them and rewrite). Nor are they **queryable**; they must be @@ -3460,8 +3443,6 @@ other sessions. In addition, delete & query type operations are supported. This format is specified by ``format='table'`` or ``format='t'`` to ``append`` or ``put`` or ``to_hdf`` -.. versionadded:: 0.13 - This format can be set as an option as well ``pd.set_option('io.hdf.default_format','table')`` to enable ``put/append/to_hdf`` to by default store in the ``table`` format. @@ -3765,9 +3746,7 @@ space. These are in terms of the total number of rows in a table. Using timedelta64[ns] +++++++++++++++++++++ -.. versionadded:: 0.13 - -Beginning in 0.13.0, you can store and query using the ``timedelta64[ns]`` type. Terms can be +You can store and query using the ``timedelta64[ns]`` type. Terms can be specified in the format: ``()``, where float may be signed (and fractional), and unit can be ``D,s,ms,us,ns`` for the timedelta. Here's an example: @@ -3889,8 +3868,6 @@ The default is 50,000 rows returned in a chunk. .. note:: - .. versionadded:: 0.12.0 - You can also use the iterator with ``read_hdf`` which will open, then automatically close the store when finished iterating. @@ -4603,8 +4580,6 @@ included in Python's standard library by default. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs `__. -.. versionadded:: 0.14.0 - If SQLAlchemy is not installed, a fallback is only provided for sqlite (and for mysql for backwards compatibility, but this is deprecated and will be removed in a future version). @@ -4937,8 +4912,6 @@ Full documentation can be found `here `__ Stata Format ------------ -.. versionadded:: 0.12.0 - .. _io.stata_writer: Writing to Stata format diff --git a/doc/source/merging.rst b/doc/source/merging.rst index d956f1ca54e6b..a5ee1b1a9384c 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -1053,8 +1053,6 @@ As you can see, this drops any rows where there was no match. Joining a single Index to a Multi-index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14.0 - You can join a singly-indexed ``DataFrame`` with a level of a multi-indexed ``DataFrame``. The level will match on the name of the index of the singly-indexed frame against a level name of the multi-indexed frame. diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 64a321d67a825..65b411ccd4af2 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -263,8 +263,6 @@ and ``bfill()`` is equivalent to ``fillna(method='bfill')`` Filling with a PandasObject ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.12 - You can also fillna using a dict or Series that is alignable. The labels of the dict or index of the Series must match the columns of the frame you wish to fill. The use case of this is to fill a DataFrame with the mean of that column. @@ -280,8 +278,6 @@ use case of this is to fill a DataFrame with the mean of that column. dff.fillna(dff.mean()) dff.fillna(dff.mean()['B':'C']) -.. versionadded:: 0.13 - Same result as above, but is aligning the 'fill' value which is a Series in this case. @@ -320,11 +316,6 @@ examined :ref:`in the API `. Interpolation ~~~~~~~~~~~~~ -.. versionadded:: 0.13.0 - - :meth:`~pandas.DataFrame.interpolate`, and :meth:`~pandas.Series.interpolate` have - revamped interpolation methods and functionality. - .. versionadded:: 0.17.0 The ``limit_direction`` keyword argument was added. diff --git a/doc/source/options.rst b/doc/source/options.rst index 51d02bc89692a..1592caf90546c 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -306,7 +306,7 @@ display.float_format None The callable should accept a fl See core.format.EngFormatter for an example. display.large_repr truncate For DataFrames exceeding max_rows/max_cols, the repr (and HTML repr) can show - a truncated table (the default from 0.13), + a truncated table (the default), or switch to the view from df.info() (the behaviour in earlier versions of pandas). allowable settings, ['truncate', 'info'] diff --git a/doc/source/text.rst b/doc/source/text.rst index e3e4b24d17f44..85b8aa6aa1857 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -211,8 +211,6 @@ Extracting Substrings Extract first match in each subject (extract) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. versionadded:: 0.13.0 - .. warning:: In version 0.18.0, ``extract`` gained the ``expand`` argument. When diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst index 07effcfdff33b..daa2c262c8c86 100644 --- a/doc/source/timedeltas.rst +++ b/doc/source/timedeltas.rst @@ -242,8 +242,6 @@ Numeric reduction operation for ``timedelta64[ns]`` will return ``Timedelta`` ob Frequency Conversion -------------------- -.. versionadded:: 0.13 - Timedelta Series, ``TimedeltaIndex``, and ``Timedelta`` scalars can be converted to other 'frequencies' by dividing by another timedelta, or by astyping to a specific timedelta type. These operations yield Series and propagate ``NaT`` -> ``nan``. Note that division by the numpy scalar is true division, while astyping is equivalent of floor division. diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index aded5e4402df2..c86c58c3183f6 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -177,7 +177,7 @@ you can pass the ``dayfirst`` flag: .. note:: Specifying a ``format`` argument will potentially speed up the conversion - considerably and on versions later then 0.13.0 explicitly specifying + considerably and explicitly specifying a format string of '%Y%m%d' takes a faster path still. If you pass a single string to ``to_datetime``, it returns single ``Timestamp``. @@ -1946,9 +1946,11 @@ These can easily be converted to a ``PeriodIndex`` Time Zone Handling ------------------ -Pandas provides rich support for working with timestamps in different time zones using ``pytz`` and ``dateutil`` libraries. -``dateutil`` support is new in 0.14.1 and currently only supported for fixed offset and tzfile zones. The default library is ``pytz``. -Support for ``dateutil`` is provided for compatibility with other applications e.g. if you use ``dateutil`` in other python packages. +Pandas provides rich support for working with timestamps in different time +zones using ``pytz`` and ``dateutil`` libraries. ``dateutil`` currently is only +supported for fixed offset and tzfile zones. The default library is ``pytz``. +Support for ``dateutil`` is provided for compatibility with other +applications e.g. if you use ``dateutil`` in other python packages. Working with Time Zones ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index c637246537ca1..839390c8778aa 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -512,8 +512,6 @@ Compare to: Area Plot ~~~~~~~~~ -.. versionadded:: 0.14 - You can create area plots with :meth:`Series.plot.area` and :meth:`DataFrame.plot.area`. Area plots are stacked by default. To produce stacked area plot, each column must be either all positive or all negative values. @@ -550,8 +548,6 @@ To produce an unstacked plot, pass ``stacked=False``. Alpha value is set to 0.5 Scatter Plot ~~~~~~~~~~~~ -.. versionadded:: 0.13 - Scatter plot can be drawn by using the :meth:`DataFrame.plot.scatter` method. Scatter plot requires numeric columns for x and y axis. These can be specified by ``x`` and ``y`` keywords each. @@ -619,8 +615,6 @@ See the :meth:`scatter ` method and the Hexagonal Bin Plot ~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14 - You can create hexagonal bin plots with :meth:`DataFrame.plot.hexbin`. Hexbin plots can be a useful alternative to scatter plots if your data are too dense to plot each point individually. @@ -682,8 +676,6 @@ See the :meth:`hexbin ` method and the Pie plot ~~~~~~~~ -.. versionadded:: 0.14 - You can create a pie plot with :meth:`DataFrame.plot.pie` or :meth:`Series.plot.pie`. If your data includes any ``NaN``, they will be automatically filled with 0. A ``ValueError`` will be raised if there are any negative values in your data. @@ -1365,8 +1357,6 @@ Another option is passing an ``ax`` argument to :meth:`Series.plot` to plot on a Plotting With Error Bars ~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14 - Plotting with error bars is now supported in the :meth:`DataFrame.plot` and :meth:`Series.plot` Horizontal and vertical errorbars can be supplied to the ``xerr`` and ``yerr`` keyword arguments to :meth:`~DataFrame.plot()`. The error values can be specified using a variety of formats. @@ -1407,8 +1397,6 @@ Here is an example of one way to easily plot group means with standard deviation Plotting Tables ~~~~~~~~~~~~~~~ -.. versionadded:: 0.14 - Plotting with matplotlib table is now supported in :meth:`DataFrame.plot` and :meth:`Series.plot` with a ``table`` keyword. The ``table`` keyword can accept ``bool``, :class:`DataFrame` or :class:`Series`. The simple way to draw a table is to specify ``table=True``. Data will be transposed to meet matplotlib's default layout. .. ipython:: python @@ -1585,10 +1573,6 @@ available in matplotlib. Although this formatting does not provide the same level of refinement you would get when plotting via pandas, it can be faster when plotting a large number of points. -.. note:: - - The speed up for large data sets only applies to pandas 0.14.0 and later. - .. ipython:: python :suppress: From f22b8954d0bd8777cc15f5e49e77bc821b0bd0b4 Mon Sep 17 00:00:00 2001 From: s-weigand Date: Wed, 6 Sep 2017 14:03:39 +0200 Subject: [PATCH 130/141] BUG: Plotting Timedelta on y-axis #16953 (#17430) * implemented fix for GH issue #16953 * added tests for fix of issue #16953 * changed comments for git issue to pandas style GH# * changed linelength in tests, so all lines are less than 80 characters * added whatsnew entry * swaped conversion and filtering of values, for plot to also work with object dtypes * refomated code, so len(line) < 80 * changed whatsnew with timedelta and datetime dtypes * added support for datetimetz and extended tests * added reason to pytest.mark.xfail --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/plotting/_core.py | 8 ++- pandas/tests/plotting/test_frame.py | 76 +++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f632f64a2841e..e2b109ea17486 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -433,7 +433,7 @@ I/O Plotting ^^^^^^^^ - Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`) - +- Bug when plotting ``timedelta`` and ``datetime`` dtypes on y-axis (:issue:`16953`) Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index e5b9497993172..a0b7e93efd05c 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -342,7 +342,13 @@ def _compute_plot_data(self): label = 'None' data = data.to_frame(name=label) - numeric_data = data._convert(datetime=True)._get_numeric_data() + # GH16953, _convert is needed as fallback, for ``Series`` + # with ``dtype == object`` + data = data._convert(datetime=True, timedelta=True) + numeric_data = data.select_dtypes(include=[np.number, + "datetime", + "datetimetz", + "timedelta"]) try: is_empty = numeric_data.empty diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 67098529a0111..f3b287a8889c3 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -380,6 +380,82 @@ def test_subplots_timeseries(self): self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) + def test_subplots_timeseries_y_axis(self): + # GH16953 + data = {"numeric": np.array([1, 2, 5]), + "timedelta": [pd.Timedelta(-10, unit="s"), + pd.Timedelta(10, unit="m"), + pd.Timedelta(10, unit="h")], + "datetime_no_tz": [pd.to_datetime("2017-08-01 00:00:00"), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00")], + "datetime_all_tz": [pd.to_datetime("2017-08-01 00:00:00", + utc=True), + pd.to_datetime("2017-08-01 02:00:00", + utc=True), + pd.to_datetime("2017-08-02 00:00:00", + utc=True)], + "text": ["This", "should", "fail"]} + testdata = DataFrame(data) + + ax_numeric = testdata.plot(y="numeric") + assert (ax_numeric.get_lines()[0].get_data()[1] == + testdata["numeric"].values).all() + ax_timedelta = testdata.plot(y="timedelta") + assert (ax_timedelta.get_lines()[0].get_data()[1] == + testdata["timedelta"].values).all() + ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") + assert (ax_datetime_no_tz.get_lines()[0].get_data()[1] == + testdata["datetime_no_tz"].values).all() + ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") + assert (ax_datetime_all_tz.get_lines()[0].get_data()[1] == + testdata["datetime_all_tz"].values).all() + with pytest.raises(TypeError): + testdata.plot(y="text") + + @pytest.mark.xfail(reason='not support for period, categorical, ' + 'datetime_mixed_tz') + def test_subplots_timeseries_y_axis_not_supported(self): + """ + This test will fail for: + period: + since period isn't yet implemented in ``select_dtypes`` + and because it will need a custom value converter + + tick formater (as was done for x-axis plots) + + categorical: + because it will need a custom value converter + + tick formater (also doesn't work for x-axis, as of now) + + datetime_mixed_tz: + because of the way how pandas handels ``Series`` of + ``datetime`` objects with different timezone, + generally converting ``datetime`` objects in a tz-aware + form could help with this problem + """ + data = {"numeric": np.array([1, 2, 5]), + "period": [pd.Period('2017-08-01 00:00:00', freq='H'), + pd.Period('2017-08-01 02:00', freq='H'), + pd.Period('2017-08-02 00:00:00', freq='H')], + "categorical": pd.Categorical(["c", "b", "a"], + categories=["a", "b", "c"], + ordered=False), + "datetime_mixed_tz": [pd.to_datetime("2017-08-01 00:00:00", + utc=True), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00")]} + testdata = pd.DataFrame(data) + ax_period = testdata.plot(x="numeric", y="period") + assert (ax_period.get_lines()[0].get_data()[1] == + testdata["period"].values).all() + ax_categorical = testdata.plot(x="numeric", y="categorical") + assert (ax_categorical.get_lines()[0].get_data()[1] == + testdata["categorical"].values).all() + ax_datetime_mixed_tz = testdata.plot(x="numeric", + y="datetime_mixed_tz") + assert (ax_datetime_mixed_tz.get_lines()[0].get_data()[1] == + testdata["datetime_mixed_tz"].values).all() + @pytest.mark.slow def test_subplots_layout(self): # GH 6667 From 8edd85aa0ba2cc0a0f8e610d442d81e7b11e88d4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 6 Sep 2017 08:23:06 -0400 Subject: [PATCH 131/141] COMPAT: handle pyarrow deprecation of timestamps_to_ms in .from_pandas with pyarrow < 0.6.0 (#17447) closes #17438 --- ci/requirements-3.5.sh | 2 +- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/io/parquet.py | 18 ++++++++++++++---- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/ci/requirements-3.5.sh b/ci/requirements-3.5.sh index 33db9c28c78a9..d694ad3679ac1 100644 --- a/ci/requirements-3.5.sh +++ b/ci/requirements-3.5.sh @@ -8,4 +8,4 @@ echo "install 35" conda remove -n pandas python-dateutil --force pip install python-dateutil -conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 +conda install -n pandas -c conda-forge feather-format pyarrow=0.5.0 diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index e2b109ea17486..6214236b41e7c 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -126,7 +126,7 @@ Other Enhancements - :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) - :func:`read_html` handles colspan and rowspan arguments and attempts to infer a header if the header is not explicitly specified (:issue:`17054`) -- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. +- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. (:issue:`15838`, :issue:`17438`) - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) - `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`). - :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`). diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 09603fd6fdcce..4b507b7f5df6f 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -58,13 +58,23 @@ def __init__(self): "\nor via pip\n" "pip install -U pyarrow\n") + self._pyarrow_lt_050 = LooseVersion(pyarrow.__version__) < '0.5.0' + self._pyarrow_lt_060 = LooseVersion(pyarrow.__version__) < '0.6.0' self.api = pyarrow - def write(self, df, path, compression='snappy', **kwargs): + def write(self, df, path, compression='snappy', + coerce_timestamps='ms', **kwargs): path, _, _ = get_filepath_or_buffer(path) - table = self.api.Table.from_pandas(df, timestamps_to_ms=True) - self.api.parquet.write_table( - table, path, compression=compression, **kwargs) + if self._pyarrow_lt_060: + table = self.api.Table.from_pandas(df, timestamps_to_ms=True) + self.api.parquet.write_table( + table, path, compression=compression, **kwargs) + + else: + table = self.api.Table.from_pandas(df) + self.api.parquet.write_table( + table, path, compression=compression, + coerce_timestamps=coerce_timestamps, **kwargs) def read(self, path): path, _, _ = get_filepath_or_buffer(path) From 047727ac90f990d085e56be91701a1b394c3ef78 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Wed, 6 Sep 2017 15:55:12 +0100 Subject: [PATCH 132/141] DOC/TST: Add examples to MultiIndex.get_level_values + related changes (#17414) --- pandas/core/indexes/base.py | 12 ++++++++++-- pandas/core/indexes/multi.py | 23 +++++++++++++++++++++-- pandas/tests/indexes/test_base.py | 6 ++++++ 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6a30eaefaaae7..a9098126a38e3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2529,15 +2529,23 @@ def set_value(self, arr, key, value): def _get_level_values(self, level): """ Return an Index of values for requested level, equal to the length - of the index + of the index. Parameters ---------- - level : int + level : int or str + ``level`` is either the integer position of the level in the + MultiIndex, or the name of the level. Returns ------- values : Index + ``self``, as there is only one level in the Index. + + See also + --------- + pandas.MultiIndex.get_level_values : get values for a level of a + MultiIndex """ self._validate_index_level(level) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d7d5b6d128a2c..8b2cf0e7c0b40 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -882,15 +882,34 @@ def _get_level_values(self, level): def get_level_values(self, level): """ Return vector of label values for requested level, - equal to the length of the index + equal to the length of the index. Parameters ---------- - level : int or level name + level : int or str + ``level`` is either the integer position of the level in the + MultiIndex, or the name of the level. Returns ------- values : Index + ``values`` is a level of this MultiIndex converted to + a single :class:`Index` (or subclass thereof). + + Examples + --------- + + Create a MultiIndex: + + >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def'))) + >>> mi.names = ['level_1', 'level_2'] + + Get level values by supplying level as either integer or name: + + >>> mi.get_level_values(0) + Index(['a', 'b', 'c'], dtype='object', name='level_1') + >>> mi.get_level_values('level_2') + Index(['d', 'e', 'f'], dtype='object', name='level_2') """ level = self._get_level_number(level) values = self._get_level_values(level) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index aa32e75ba0d58..f96dbdcfb8acf 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1438,6 +1438,12 @@ def test_get_level_values(self): result = self.strIndex.get_level_values(0) tm.assert_index_equal(result, self.strIndex) + # test for name (GH 17414) + index_with_name = self.strIndex.copy() + index_with_name.name = 'a' + result = index_with_name.get_level_values('a') + tm.assert_index_equal(result, index_with_name) + def test_slice_keep_name(self): idx = Index(['a', 'b'], name='asdf') assert idx.name == idx[1:].name From 91a2300fd88165374e1dc4991440bf359b947392 Mon Sep 17 00:00:00 2001 From: John Owens Date: Wed, 6 Sep 2017 14:57:23 -0700 Subject: [PATCH 133/141] documentation changes only --- pandas/io/html.py | 60 +++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 9467a32fe670e..9a2a5ec153db6 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -204,7 +204,7 @@ def _parse_tables(self, doc, match, attrs): Parameters ---------- - doc : tree-like + doc : parser object The DOM from which to parse the table element. match : str or regular expression @@ -221,7 +221,7 @@ def _parse_tables(self, doc, match, attrs): Returns ------- - tables : list of node-like + tables : list of HTML table elements A list of elements to be parsed into raw data. """ raise AbstractMethodError(self) @@ -231,7 +231,7 @@ def _text_getter(self, obj): Parameters ---------- - obj : node-like + obj : an HTML element A DOM node. Returns @@ -246,16 +246,16 @@ def _equals_tag(self, obj, tag): Parameters ---------- - obj : node-like + obj : an HTML element A DOM node. - tag : string + tag : str Tag to be checked for equality Returns ------- boolean - Does the object match tag 'tag'? + boolean indicating if the object is equal to tag 'tag' """ raise AbstractMethodError(self) @@ -265,16 +265,16 @@ def _contains_tag(self, obj, tag): Parameters ---------- - obj : node-like + obj : an HTML element A DOM node. - tag : string + tag : str Tag to be found in this DOM Returns ------- boolean - Does the object contain tag 'tag'? + boolean indicating if the object contains tag 'tag' """ raise AbstractMethodError(self) @@ -283,11 +283,11 @@ def _extract_td(self, obj): Parameters ---------- - obj : node-like + obj : an HTML row element Returns ------- - columns : list of node-like + columns : list of HTML td elements These are the elements of each row, i.e., the columns. """ raise AbstractMethodError(self) @@ -297,13 +297,13 @@ def _extract_tr(self, table): Parameters ---------- - table : node-like + table : an HTML table element A table element that contains row elements. Returns ------- - rows : list of node-like - A list row elements of a table, usually or or ... element. """ raise AbstractMethodError(self) @@ -327,12 +327,12 @@ def _extract_tbody(self, table): Parameters ---------- - table : node-like + table : an HTML table element A table element that contains row elements. Returns ------- - tbody : node-like + tbody : an HTML tbody element A ... element. """ raise AbstractMethodError(self) @@ -342,12 +342,12 @@ def _extract_tfoot(self, table): Parameters ---------- - table : node-like + table : an HTML table element A table element that contains row elements. Returns ------- - tfoot : node-like + tfoot : an HTML tfoot element A ... element. """ raise AbstractMethodError(self) @@ -384,17 +384,17 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): Parameters ---------- - table_html : node-like + table_html : an HTML table element A single table element Returns ------- - header, body, footer - header : list of list of node-like + tuple of (header, body, footer) + header : list of list of HTML header elements List of rows, each of which is a list of parsed header elements - body : list of list of node-like + body : list of list of HTML body elements List of rows, each of which is a list of parsed body elements - footer : list of list of node-like + footer : list of list of HTML footer elements List of rows, each of which is a list of parsed footer elements """ header_rows = [] @@ -441,8 +441,8 @@ def _expand_colspan_rowspan(self, rows, fill_rowspan=True): Parameters ---------- - rows : list of list of node-like - List of rows, each of which is a list of nodes + rows : list of list of HTML td elements + List of rows, each of which is a list of elements in that row fill_rowspan : boolean Should a rowspan fill every item in the rowspan (True) or only the @@ -450,9 +450,9 @@ def _expand_colspan_rowspan(self, rows, fill_rowspan=True): Returns ------- - res : list of list of node-like - List of rows, each of which is a list of nodes, respecting - colspan/rowspan + res : list of list of HTML td elements + List of rows, each of which is a list of elements in that row, + respecting colspan/rowspan """ res = [] saved_span = [] From 41058ab9a224c8782806243c030e30df83d5f0c1 Mon Sep 17 00:00:00 2001 From: John Owens Date: Thu, 7 Sep 2017 09:00:13 -0700 Subject: [PATCH 134/141] documentation changes only --- pandas/io/html.py | 61 ++++++++++++++++------------------------------- 1 file changed, 21 insertions(+), 40 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 9a2a5ec153db6..d49058ec67d94 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -204,8 +204,7 @@ def _parse_tables(self, doc, match, attrs): Parameters ---------- - doc : parser object - The DOM from which to parse the table element. + doc : the DOM from which to parse the table element. match : str or regular expression The text to search for in the DOM tree. @@ -231,8 +230,7 @@ def _text_getter(self, obj): Parameters ---------- - obj : an HTML element - A DOM node. + obj : a DOM node. Returns ------- @@ -246,8 +244,7 @@ def _equals_tag(self, obj, tag): Parameters ---------- - obj : an HTML element - A DOM node. + obj : a DOM node. tag : str Tag to be checked for equality @@ -265,8 +262,7 @@ def _contains_tag(self, obj, tag): Parameters ---------- - obj : an HTML element - A DOM node. + obj : a DOM node. tag : str Tag to be found in this DOM @@ -287,8 +283,7 @@ def _extract_td(self, obj): Returns ------- - columns : list of HTML td elements - These are the elements of each row, i.e., the columns. + columns : list of HTML td elements (i.e., the columns in the row) """ raise AbstractMethodError(self) @@ -297,13 +292,11 @@ def _extract_tr(self, table): Parameters ---------- - table : an HTML table element - A table element that contains row elements. + table : a table element that contains row elements. Returns ------- - rows : list of HTML row elements - A list of row elements of a table, usually or or ... element. + thead : an HTML ... element. """ raise AbstractMethodError(self) @@ -327,13 +318,11 @@ def _extract_tbody(self, table): Parameters ---------- - table : an HTML table element - A table element that contains row elements. + table : a table element that contains row elements. Returns ------- - tbody : an HTML tbody element - A ... element. + tbody : an HTML ... element. """ raise AbstractMethodError(self) @@ -342,13 +331,11 @@ def _extract_tfoot(self, table): Parameters ---------- - table : an HTML table element - A table element that contains row elements. + table : a table element that contains row elements. Returns ------- - tfoot : an HTML tfoot element - A ... element. + tfoot : an HTML ... element. """ raise AbstractMethodError(self) @@ -357,7 +344,7 @@ def _build_doc(self): Returns ------- - obj : tree-like + obj : the DOM from which to parse the table element. """ raise AbstractMethodError(self) @@ -384,18 +371,14 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): Parameters ---------- - table_html : an HTML table element - A single table element + table_html : a single HTML table element. Returns ------- tuple of (header, body, footer) - header : list of list of HTML header elements - List of rows, each of which is a list of parsed header elements - body : list of list of HTML body elements - List of rows, each of which is a list of parsed body elements - footer : list of list of HTML footer elements - List of rows, each of which is a list of parsed footer elements + header : list of rows, each of which is a list of parsed header elements + body : list of rows, each of which is a list of parsed body elements + footer : list of rows, each of which is a list of parsed footer elements """ header_rows = [] body_rows = [] @@ -441,17 +424,15 @@ def _expand_colspan_rowspan(self, rows, fill_rowspan=True): Parameters ---------- - rows : list of list of HTML td elements - List of rows, each of which is a list of elements in that row + rows : list of rows, each of which is a list of elements in that row fill_rowspan : boolean Should a rowspan fill every item in the rowspan (True) or only the - bottommost element (False)? + bottommost element (False)? Default is True. Returns ------- - res : list of list of HTML td elements - List of rows, each of which is a list of elements in that row, + res : list of rows, each of which is a list of elements in that row, respecting colspan/rowspan """ res = [] From 4926913281a32d9b7225d93c60a9eebb967e57bf Mon Sep 17 00:00:00 2001 From: John Owens Date: Thu, 7 Sep 2017 09:02:02 -0700 Subject: [PATCH 135/141] documentation changes only, limited to 80 cols --- pandas/io/html.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index d49058ec67d94..849042d073603 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -296,7 +296,8 @@ def _extract_tr(self, table): Returns ------- - rows : a list of row elements of a table, usually or or
elements. + rows : list of HTML row elements + A list of row elements of a table, usually
elements. """ raise AbstractMethodError(self) @@ -312,12 +312,12 @@ def _extract_thead(self, table): Parameters ---------- - table : node-like + table : an HTML table element A table element that contains row elements. Returns ------- - thead : node-like + thead : an HTML thead element A
elements. + rows : a list of row elements of a table, usually
elements. """ raise AbstractMethodError(self) @@ -312,13 +305,11 @@ def _extract_thead(self, table): Parameters ---------- - table : an HTML table element - A table element that contains row elements. + table : a table element that contains row elements. Returns ------- - thead : an HTML thead element - A
elements. + rows : a list of row elements of a table, usually
+ elements. """ raise AbstractMethodError(self) @@ -376,9 +377,11 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): Returns ------- tuple of (header, body, footer) - header : list of rows, each of which is a list of parsed header elements + header : list of rows, each of which is a list of parsed header + elements body : list of rows, each of which is a list of parsed body elements - footer : list of rows, each of which is a list of parsed footer elements + footer : list of rows, each of which is a list of parsed footer + elements """ header_rows = [] body_rows = [] From 14235ec3b83c53f9b9fe97baac3df03d9dce1613 Mon Sep 17 00:00:00 2001 From: John Owens Date: Fri, 8 Sep 2017 15:02:04 -0700 Subject: [PATCH 136/141] more documentation edits --- pandas/io/html.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 849042d073603..79f720a6a102f 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -220,8 +220,7 @@ def _parse_tables(self, doc, match, attrs): Returns ------- - tables : list of HTML table elements - A list of elements to be parsed into raw data. + tables : A list of HTML
elements to be parsed into raw data. """ raise AbstractMethodError(self) @@ -251,7 +250,7 @@ def _equals_tag(self, obj, tag): Returns ------- - boolean + is_tag_equal : boolean boolean indicating if the object is equal to tag 'tag' """ raise AbstractMethodError(self) @@ -377,11 +376,11 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): Returns ------- tuple of (header, body, footer) - header : list of rows, each of which is a list of parsed header - elements + header : list of rows, each of which is a list of parsed + header elements body : list of rows, each of which is a list of parsed body elements - footer : list of rows, each of which is a list of parsed footer - elements + footer : list of rows, each of which is a list of parsed + footer elements """ header_rows = [] body_rows = [] @@ -395,7 +394,7 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): footer_rows = self._extract_tr( self._extract_tfoot(table_html)[0]) else: - # otherwise we need to split the body into header/body/foot + # Otherwise we need to split the body into header/body/foot. body_rows = self._extract_tr(table_html) if body_rows == []: # empty table, just return nothing @@ -413,8 +412,9 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): header = self._expand_colspan_rowspan(header_rows, fill_rowspan=False) body = self._expand_colspan_rowspan(body_rows, fill_rowspan=True) footer = self._expand_colspan_rowspan(footer_rows, fill_rowspan=False) - # the below line is lifted from _parse_raw_tfoot. Not sure what it - # does. + + # The below line is lifted from _parse_raw_tfoot. Not sure what + # it does. footer = np.atleast_1d(np.array(footer).squeeze( )) if footer and len(footer) == 1 else footer return header, body, footer From 196c8357756fa849ba917725cd515b432c443546 Mon Sep 17 00:00:00 2001 From: John Owens Date: Sat, 9 Sep 2017 07:46:46 -0700 Subject: [PATCH 137/141] minor documentation edits --- pandas/io/html.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 79f720a6a102f..543d82195306a 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -220,7 +220,7 @@ def _parse_tables(self, doc, match, attrs): Returns ------- - tables : A list of HTML
elements to be parsed into raw data. + tables : list of HTML
elements to be parsed into raw data. """ raise AbstractMethodError(self) @@ -295,7 +295,7 @@ def _extract_tr(self, table): Returns ------- - rows : a list of row elements of a table, usually or or
+ rows : list of row elements of a table, usually
elements. """ raise AbstractMethodError(self) From fed4b039d6da42e69fa7321728631ef2571f5a7f Mon Sep 17 00:00:00 2001 From: John Owens Date: Sat, 9 Sep 2017 12:28:17 -0700 Subject: [PATCH 138/141] better return type explanation in code, added issue number to tests --- pandas/io/html.py | 2 +- pandas/tests/io/test_html.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 543d82195306a..23d1dddb94fd9 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -268,7 +268,7 @@ def _contains_tag(self, obj, tag): Returns ------- - boolean + does_tag_contain : boolean boolean indicating if the object contains tag 'tag' """ raise AbstractMethodError(self) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 66dc353ce7065..ce12df60c565a 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -640,6 +640,7 @@ def test_different_number_of_rows(self): tm.assert_frame_equal(expected, res) def test_colspan_rowspan_are_1(self): + # GH17054 expected = """ @@ -669,6 +670,7 @@ def test_colspan_rowspan_are_1(self): tm.assert_frame_equal(expected, res) def test_colspan_rowspan_are_more_than_1(self): + # GH17054 expected = """
@@ -711,6 +713,7 @@ def test_colspan_rowspan_are_more_than_1(self): tm.assert_frame_equal(expected, res) def test_header_should_be_inferred_from_th_elements(self): + # GH17054 expected = """
From c2d9cc65534193bf51d1af077d596589ca05bd26 Mon Sep 17 00:00:00 2001 From: John Owens Date: Mon, 18 Sep 2017 15:31:20 -0700 Subject: [PATCH 139/141] cleaning up legacy documentation issues --- pandas/io/html.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index 23d1dddb94fd9..c151962379acd 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -196,6 +196,12 @@ def __init__(self, io, match, attrs, encoding): self.encoding = encoding def parse_tables(self): + """Parse and return all tables from the DOM. + + Returns + ------- + tables : list of parsed (header, body, footer) tuples from tables + """ tables = self._parse_tables(self._build_doc(), self.match, self.attrs) return (self._build_table(table) for table in tables) @@ -211,12 +217,11 @@ def _parse_tables(self, doc, match, attrs): attrs : dict A dictionary of table attributes that can be used to disambiguate - mutliple tables on a page. + multiple tables on a page. Raises ------ - ValueError - * If `match` does not match any text in the document. + ValueError : If `match` does not match any text in the document. Returns ------- From d4b213b1b7cece32fa9c299b0cb170d5ce8f59a7 Mon Sep 17 00:00:00 2001 From: John Owens Date: Mon, 18 Sep 2017 15:33:23 -0700 Subject: [PATCH 140/141] remove 'if' --- pandas/io/html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index c151962379acd..225fa91f90a1e 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -221,7 +221,7 @@ def _parse_tables(self, doc, match, attrs): Raises ------ - ValueError : If `match` does not match any text in the document. + ValueError : `match` does not match any text in the document. Returns ------- From b16f6d5677a8757847bf39819e0e0cfa18b3e3e7 Mon Sep 17 00:00:00 2001 From: John Owens Date: Mon, 18 Sep 2017 15:37:02 -0700 Subject: [PATCH 141/141] newlines for clarity --- pandas/io/html.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/io/html.py b/pandas/io/html.py index 225fa91f90a1e..d0021e9cc93d0 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -387,14 +387,17 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): footer : list of rows, each of which is a list of parsed footer elements """ + header_rows = [] body_rows = [] footer_rows = [] + # first, are there thead and tbody elements in the table? if (self._contains_tag(table_html, 'thead') and self._contains_tag(table_html, 'tbody')): header_rows = self._extract_tr(self._extract_thead(table_html)[0]) body_rows = self._extract_tr(self._extract_tbody(table_html)[0]) + if self._contains_tag(table_html, 'tfoot'): footer_rows = self._extract_tr( self._extract_tfoot(table_html)[0]) @@ -414,6 +417,7 @@ def _parse_raw_thead_tbody_tfoot(self, table_html): self._extract_td(body_rows[-1])): # this row should be a footer row, move it from body to footer footer_rows.insert(0, body_rows.pop()) + header = self._expand_colspan_rowspan(header_rows, fill_rowspan=False) body = self._expand_colspan_rowspan(body_rows, fill_rowspan=True) footer = self._expand_colspan_rowspan(footer_rows, fill_rowspan=False) @@ -443,6 +447,7 @@ def _expand_colspan_rowspan(self, rows, fill_rowspan=True): res : list of rows, each of which is a list of elements in that row, respecting colspan/rowspan """ + res = [] saved_span = [] for row in rows: