', indent)
if self.fmt.show_dimensions:
- by = chr(215) if compat.PY3 else unichr(215) # ×
+ by = chr(215) if compat.PY3 else unichr(215) # ×
self.write(u('
%d rows %s %d columns
') %
- (len(frame), by, len(frame.columns)) )
+ (len(frame), by, len(frame.columns)))
_put_lines(buf, self.elements)
def _write_header(self, indent):
@@ -783,8 +784,9 @@ def _column_header():
align=align)
if self.fmt.has_index_names:
- row = [x if x is not None else '' for x in self.frame.index.names] \
- + [''] * min(len(self.columns), self.max_cols)
+ row = [
+ x if x is not None else '' for x in self.frame.index.names
+ ] + [''] * min(len(self.columns), self.max_cols)
self.write_tr(row, indent, self.indent_delta, header=True)
indent -= self.indent_delta
@@ -851,7 +853,7 @@ def _write_hierarchical_rows(self, fmt_values, indent):
truncate = (len(frame) > self.max_rows)
idx_values = frame.index[:nrows].format(sparsify=False, adjoin=False,
- names=False)
+ names=False)
idx_values = lzip(*idx_values)
if self.fmt.sparsify:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index d0a1511ec1cca..93587cd11b597 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -432,8 +432,9 @@ def _repr_fits_horizontal_(self, ignore_width=False):
def _info_repr(self):
"""True if the repr should show the info view."""
info_repr_option = (get_option("display.large_repr") == "info")
- return info_repr_option and not \
- (self._repr_fits_horizontal_() and self._repr_fits_vertical_())
+ return info_repr_option and not (
+ self._repr_fits_horizontal_() and self._repr_fits_vertical_()
+ )
def __unicode__(self):
"""
@@ -486,8 +487,7 @@ def _repr_html_(self):
return ('
')
+ show_dimensions=True) + '\n')
else:
return None
@@ -1283,7 +1283,8 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
index_names=index_names,
header=header, index=index,
line_width=line_width,
- max_rows=max_rows, max_cols=max_cols,
+ max_rows=max_rows,
+ max_cols=max_cols,
show_dimensions=show_dimensions)
formatter.to_string()
@@ -1310,7 +1311,8 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None,
escape : boolean, default True
Convert the characters <, >, and & to HTML-safe sequences.=
max_rows : int, optional
- Maximum number of rows to show before truncating. If None, show all.
+ Maximum number of rows to show before truncating. If None, show
+ all.
max_cols : int, optional
Maximum number of columns to show before truncating. If None, show
all.
@@ -1336,7 +1338,8 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None,
header=header, index=index,
bold_rows=bold_rows,
escape=escape,
- max_rows=max_rows, max_cols=max_cols,
+ max_rows=max_rows,
+ max_cols=max_cols,
show_dimensions=show_dimensions)
formatter.to_html(classes=classes)
@@ -1904,7 +1907,8 @@ def _ensure_valid_index(self, value):
if not isinstance(value, Series):
raise ValueError('Cannot set a frame with no defined index '
- 'and a value that cannot be converted to a Series')
+ 'and a value that cannot be converted to a '
+ 'Series')
self._data.set_axis(1, value.index.copy(), check_axis=False)
def _set_item(self, key, value):
@@ -4597,7 +4601,7 @@ def extract_index(data):
def _prep_ndarray(values, copy=True):
- if not isinstance(values, (np.ndarray,Series)):
+ if not isinstance(values, (np.ndarray, Series)):
if len(values) == 0:
return np.empty((0, 0), dtype=object)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 4089b13fca5c7..624384e484dc0 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -42,8 +42,8 @@ def is_dictlike(x):
def _single_replace(self, to_replace, method, inplace, limit):
if self.ndim != 1:
- raise TypeError('cannot replace {0} with method {1} on a {2}'.format(to_replace,
- method,type(self).__name__))
+ raise TypeError('cannot replace {0} with method {1} on a {2}'
+ .format(to_replace, method, type(self).__name__))
orig_dtype = self.dtype
result = self if inplace else self.copy()
@@ -2047,7 +2047,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
# passing a single value that is scalar like
# when value is None (GH5319), for compat
if not is_dictlike(to_replace) and not is_dictlike(regex):
- to_replace = [ to_replace ]
+ to_replace = [to_replace]
if isinstance(to_replace, (tuple, list)):
return _single_replace(self, to_replace, method, inplace,
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 7a7fe32963457..960baa503036c 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -649,9 +649,9 @@ def _index_with_as_index(self, b):
original = self.obj.index
gp = self.grouper
levels = chain((gp.levels[i][gp.labels[i][b]]
- for i in range(len(gp.groupings))),
- (original.get_level_values(i)[b]
- for i in range(original.nlevels)))
+ for i in range(len(gp.groupings))),
+ (original.get_level_values(i)[b]
+ for i in range(original.nlevels)))
new = MultiIndex.from_arrays(list(levels))
new.names = gp.names + original.names
return new
@@ -2161,7 +2161,6 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
else:
key_index = Index(keys, name=key_names[0])
-
# make Nones an empty object
if com._count_not_none(*values) != len(values):
v = None
@@ -2170,14 +2169,20 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
break
if v is None:
return DataFrame()
- values = [ x if x is not None else v._constructor(**v._construct_axes_dict()) for x in values ]
+ values = [
+ x if x is not None else
+ v._constructor(**v._construct_axes_dict())
+ for x in values
+ ]
v = values[0]
if isinstance(v, (np.ndarray, Series)):
if isinstance(v, Series):
applied_index = self.obj._get_axis(self.axis)
- all_indexed_same = _all_indexes_same([x.index for x in values ])
+ all_indexed_same = _all_indexes_same([
+ x.index for x in values
+ ])
singular_series = (len(values) == 1 and
applied_index.nlevels == 1)
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 08f935539ecfc..a4e273c43e483 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -830,7 +830,9 @@ def _reindex(keys, level=None):
# see GH5553, make sure we use the right indexer
new_indexer = np.arange(len(indexer))
- new_indexer[cur_indexer] = np.arange(len(result._get_axis(axis)))
+ new_indexer[cur_indexer] = np.arange(
+ len(result._get_axis(axis))
+ )
new_indexer[missing_indexer] = -1
# we have a non_unique selector, need to use the original
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index e8b18ae93b287..471136dc2386b 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -3480,7 +3480,10 @@ def _delete_from_block(self, i, item):
super(SingleBlockManager, self)._delete_from_block(i, item)
# reset our state
- self._block = self.blocks[0] if len(self.blocks) else make_block(np.array([],dtype=self._block.dtype),[],[])
+ self._block = (
+ self.blocks[0] if len(self.blocks) else
+ make_block(np.array([], dtype=self._block.dtype), [], [])
+ )
self._values = self._block.values
def get_slice(self, slobj, raise_on_error=False):
diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
index d421fa36326aa..1244d0140a01b 100644
--- a/pandas/core/reshape.py
+++ b/pandas/core/reshape.py
@@ -786,6 +786,7 @@ def lreshape(data, groups, dropna=True, label=None):
return DataFrame(mdata, columns=id_cols + pivot_cols)
+
def wide_to_long(df, stubnames, i, j):
"""
Wide panel to long format. Less flexible but more user-friendly than melt.
@@ -848,8 +849,8 @@ def get_var_names(df, regex):
def melt_stub(df, stub, i, j):
varnames = get_var_names(df, "^"+stub)
- newdf = melt(df, id_vars=i, value_vars=varnames,
- value_name=stub, var_name=j)
+ newdf = melt(df, id_vars=i, value_vars=varnames, value_name=stub,
+ var_name=j)
newdf_j = newdf[j].str.replace(stub, "")
try:
newdf_j = newdf_j.astype(int)
@@ -870,6 +871,7 @@ def melt_stub(df, stub, i, j):
newdf = newdf.merge(new, how="outer", on=id_vars + [j], copy=False)
return newdf.set_index([i, j])
+
def convert_dummies(data, cat_variables, prefix_sep='_'):
"""
Compute DataFrame with specified columns converted to dummy variables (0 /
diff --git a/pandas/io/auth.py b/pandas/io/auth.py
index 15e3eb70d91b2..74b6b13000108 100644
--- a/pandas/io/auth.py
+++ b/pandas/io/auth.py
@@ -117,6 +117,7 @@ def init_service(http):
"""
return gapi.build('analytics', 'v3', http=http)
+
def reset_default_token_store():
import os
os.remove(DEFAULT_TOKEN_FILE)
diff --git a/pandas/io/clipboard.py b/pandas/io/clipboard.py
index 13135d255d9e2..143b507c41c3f 100644
--- a/pandas/io/clipboard.py
+++ b/pandas/io/clipboard.py
@@ -2,6 +2,7 @@
from pandas import compat, get_option, DataFrame
from pandas.compat import StringIO
+
def read_clipboard(**kwargs): # pragma: no cover
"""
Read text from clipboard and pass to read_table. See read_table for the
@@ -20,7 +21,10 @@ def read_clipboard(**kwargs): # pragma: no cover
# try to decode (if needed on PY3)
if compat.PY3:
try:
- text = compat.bytes_to_str(text,encoding=kwargs.get('encoding') or get_option('display.encoding'))
+ text = compat.bytes_to_str(
+ text, encoding=(kwargs.get('encoding') or
+ get_option('display.encoding'))
+ )
except:
pass
return read_table(StringIO(text), **kwargs)
@@ -58,7 +62,7 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover
if sep is None:
sep = '\t'
buf = StringIO()
- obj.to_csv(buf,sep=sep, **kwargs)
+ obj.to_csv(buf, sep=sep, **kwargs)
clipboard_set(buf.getvalue())
return
except:
@@ -70,4 +74,3 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover
else:
objstr = str(obj)
clipboard_set(objstr)
-
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 6b8186e253199..d6b2827f94d36 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -13,7 +13,8 @@
_urlopen = urlopen
from urllib.parse import urlparse as parse_url
import urllib.parse as compat_parse
- from urllib.parse import uses_relative, uses_netloc, uses_params, urlencode, urljoin
+ from urllib.parse import (uses_relative, uses_netloc, uses_params,
+ urlencode, urljoin)
from urllib.error import URLError
from http.client import HTTPException
else:
@@ -72,8 +73,8 @@ def _is_s3_url(url):
def maybe_read_encoded_stream(reader, encoding=None):
- """ read an encoded stream from the reader and transform the bytes to unicode
- if required based on the encoding
+ """read an encoded stream from the reader and transform the bytes to
+ unicode if required based on the encoding
Parameters
----------
@@ -84,7 +85,7 @@ def maybe_read_encoded_stream(reader, encoding=None):
-------
a tuple of (a stream of decoded bytes, the encoding which was used)
- """
+ """
if compat.PY3 or encoding is not None: # pragma: no cover
if encoding:
@@ -97,6 +98,7 @@ def maybe_read_encoded_stream(reader, encoding=None):
encoding = None
return reader, encoding
+
def get_filepath_or_buffer(filepath_or_buffer, encoding=None):
"""
If the filepath_or_buffer is a url, translate and return the buffer
@@ -114,7 +116,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None):
if _is_url(filepath_or_buffer):
req = _urlopen(str(filepath_or_buffer))
- return maybe_read_encoded_stream(req,encoding)
+ return maybe_read_encoded_stream(req, encoding)
if _is_s3_url(filepath_or_buffer):
try:
diff --git a/pandas/io/data.py b/pandas/io/data.py
index cf49515cac576..a3968446930e8 100644
--- a/pandas/io/data.py
+++ b/pandas/io/data.py
@@ -469,6 +469,7 @@ def fetch_data(url, name):
axis=1, join='outer')
return df
+
def get_data_famafrench(name):
# path of zip files
zip_file_url = ('http://mba.tuck.dartmouth.edu/pages/faculty/'
diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py
index ef92b8692c07f..3ffcef4b21552 100644
--- a/pandas/io/date_converters.py
+++ b/pandas/io/date_converters.py
@@ -26,7 +26,7 @@ def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col,
minute_col = _maybe_cast(minute_col)
second_col = _maybe_cast(second_col)
return lib.try_parse_datetime_components(year_col, month_col, day_col,
- hour_col, minute_col, second_col)
+ hour_col, minute_col, second_col)
def generic_parser(parse_func, *cols):
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
index b97c9da0b0d18..ad7c37fba4c2f 100644
--- a/pandas/io/excel.py
+++ b/pandas/io/excel.py
@@ -547,8 +547,8 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0):
colletter = get_column_letter(col)
xcell = wks.cell("%s%s" % (colletter, row))
for field in style.__fields__:
- xcell.style.__setattr__(field, \
- style.__getattribute__(field))
+ xcell.style.__setattr__(
+ field, style.__getattribute__(field))
@classmethod
def _convert_to_style(cls, style_dict):
@@ -778,10 +778,10 @@ def _convert_to_style(self, style_dict, num_format_str=None):
alignment = style_dict.get('alignment')
if alignment:
if (alignment.get('horizontal')
- and alignment['horizontal'] == 'center'):
+ and alignment['horizontal'] == 'center'):
xl_format.set_align('center')
if (alignment.get('vertical')
- and alignment['vertical'] == 'top'):
+ and alignment['vertical'] == 'top'):
xl_format.set_align('top')
# Map the cell borders to XlsxWriter border properties.
diff --git a/pandas/io/ga.py b/pandas/io/ga.py
index 4391b2637a837..f002994888932 100644
--- a/pandas/io/ga.py
+++ b/pandas/io/ga.py
@@ -48,8 +48,8 @@
%s
""" % _QUERY_PARAMS
-_GA_READER_DOC = """Given query parameters, return a DataFrame with all the data
-or an iterator that returns DataFrames containing chunks of the data
+_GA_READER_DOC = """Given query parameters, return a DataFrame with all the
+data or an iterator that returns DataFrames containing chunks of the data
Parameters
----------
@@ -89,12 +89,14 @@
Local host redirect if unspecified
"""
+
def reset_token_store():
"""
Deletes the default token store
"""
auth.reset_default_token_store()
+
@Substitution(extras=_AUTH_PARAMS)
@Appender(_GA_READER_DOC)
def read_ga(metrics, dimensions, start_date, **kwargs):
@@ -185,9 +187,8 @@ def _init_service(self, secrets):
return auth.init_service(http)
def get_account(self, name=None, id=None, **kwargs):
- """
- Retrieve an account that matches the name, id, or some account attribute
- specified in **kwargs
+ """ Retrieve an account that matches the name, id, or some account
+ attribute specified in **kwargs
Parameters
----------
@@ -385,6 +386,7 @@ def _maybe_add_arg(query, field, data, prefix='ga'):
data = ','.join(['%s:%s' % (prefix, x) for x in data])
query[field] = data
+
def _get_match(obj_store, name, id, **kwargs):
key, val = None, None
if len(kwargs) > 0:
diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
index 2d490ec071b4e..010277533589c 100644
--- a/pandas/io/gbq.py
+++ b/pandas/io/gbq.py
@@ -38,7 +38,8 @@
# These are some custom exceptions that the
# to_gbq() method can throw
-class SchemaMissing(PandasError,IOError):
+
+class SchemaMissing(PandasError, IOError):
"""
Raised when attempting to write a DataFrame to
a new table in Google BigQuery without specifying
@@ -46,14 +47,16 @@ class SchemaMissing(PandasError,IOError):
"""
pass
-class InvalidSchema(PandasError,IOError):
+
+class InvalidSchema(PandasError, IOError):
"""
Raised when attempting to write a DataFrame to
Google BigQuery with an invalid table schema.
"""
pass
-class TableExistsFail(PandasError,IOError):
+
+class TableExistsFail(PandasError, IOError):
"""
Raised when attempting to write a DataFrame to
an existing Google BigQuery table without specifying
@@ -61,7 +64,8 @@ class TableExistsFail(PandasError,IOError):
"""
pass
-class InvalidColumnOrder(PandasError,IOError):
+
+class InvalidColumnOrder(PandasError, IOError):
"""
Raised when the provided column order for output
results DataFrame does not match the schema
@@ -83,6 +87,7 @@ def _authenticate():
"""
return bq.Client.Get()
+
def _parse_entry(field_value, field_type):
"""
Given a value and the corresponding BigQuery data type,
@@ -147,10 +152,7 @@ def _parse_page(raw_page, col_names, col_types, col_dtypes):
page_row_count = len(raw_page)
# Place to hold the results for a page of data
- page_array = np.zeros(
- (page_row_count,),
- dtype=zip(col_names,col_dtypes)
- )
+ page_array = np.zeros((page_row_count,), dtype=zip(col_names, col_dtypes))
for row_num, raw_row in enumerate(raw_page):
entries = raw_row.get('f', [])
# Iterate over each entry - setting proper field types
@@ -163,6 +165,7 @@ def _parse_page(raw_page, col_names, col_types, col_dtypes):
return page_array
+
def _parse_data(client, job, index_col=None, col_order=None):
"""
Iterate through the query results and piece together the
@@ -196,9 +199,9 @@ def _parse_data(client, job, index_col=None, col_order=None):
Notes:
-----
- This script relies on Google being consistent with their
+ This script relies on Google being consistent with their
pagination API. We are using the most flexible iteration method
- that we could find in the bq.py/bigquery_client.py API's, but
+ that we could find in the bq.py/bigquery_client.py API's, but
these have undergone large amounts of change recently.
We have encountered bugs with this functionality, see:
@@ -209,10 +212,11 @@ def _parse_data(client, job, index_col=None, col_order=None):
# see: http://pandas.pydata.org/pandas-docs/dev/missing_data.html#missing-data-casting-rules-and-indexing
dtype_map = {'INTEGER': np.dtype(float),
'FLOAT': np.dtype(float),
- 'TIMESTAMP': 'M8[ns]'} # This seems to be buggy without nanosecond indicator
+ 'TIMESTAMP': 'M8[ns]'} # This seems to be buggy without
+ # nanosecond indicator
# We first need the schema to get information about the columns of
- # our dataframe.
+ # our dataframe.
table_dict = job['configuration']['query']['destinationTable']
fields = client.GetTableSchema(table_dict)['fields']
@@ -226,23 +230,23 @@ def _parse_data(client, job, index_col=None, col_order=None):
# TODO: Do this in one clean step
for field in fields:
col_types.append(field['type'])
- # Note the encoding... numpy doesn't like titles that are UTF8, which is the return
- # type from the API
+ # Note the encoding... numpy doesn't like titles that are UTF8, which
+ # is the return type from the API
col_names.append(field['name'].encode('ascii', 'ignore'))
- # Note, it would be nice to use 'str' types, but BigQuery doesn't have a fixed length
- # in mind - just maxes out at 64k
- col_dtypes.append(dtype_map.get(field['type'],object))
+ # Note, it would be nice to use 'str' types, but BigQuery doesn't have
+ # a fixed length in mind - just maxes out at 64k
+ col_dtypes.append(dtype_map.get(field['type'], object))
-
# How many columns are there
num_columns = len(col_names)
-
+
# Iterate over the result rows.
# Since Google's API now requires pagination of results,
- # we do that here. The following is repurposed from
+ # we do that here. The following is repurposed from
# bigquery_client.py :: Client._JobTableReader._ReadOnePage
- # TODO: Enable Reading From Table, see Client._TableTableReader._ReadOnePage
+ # TODO: Enable Reading From Table,
+ # see Client._TableTableReader._ReadOnePage
# Initially, no page token is set
page_token = None
@@ -254,13 +258,12 @@ def _parse_data(client, job, index_col=None, col_order=None):
total_rows = max_rows
# This is the starting row for a particular page...
- # is ignored if page_token is present, though
+ # is ignored if page_token is present, though
# it may be useful if we wish to implement SQL like LIMITs
# with minimums
start_row = 0
- # Keep our page DataFrames until the end when we
- # concatentate them
+ # Keep our page DataFrames until the end when we concatenate them
dataframe_list = list()
current_job = job['jobReference']
@@ -298,7 +301,8 @@ def _parse_data(client, job, index_col=None, col_order=None):
start_row += len(raw_page)
if total_rows > 0:
completed = (100 * start_row) / total_rows
- logger.info('Remaining Rows: ' + str(total_rows - start_row) + '(' + str(completed) + '% Complete)')
+ logger.info('Remaining Rows: ' + str(total_rows - start_row) + '('
+ + str(completed) + '% Complete)')
else:
logger.info('No Rows')
@@ -308,8 +312,9 @@ def _parse_data(client, job, index_col=None, col_order=None):
# but we felt it was still a good idea.
if not page_token and not raw_page and start_row != total_rows:
raise bigquery_client.BigqueryInterfaceError(
- ("Not enough rows returned by server. Expected: {0}" + \
- " Rows, But Recieved {1}").format(total_rows, start_row))
+ 'Not enough rows returned by server. Expected: {0} Rows, But '
+ 'Received {1}'.format(total_rows, start_row)
+ )
# Build final dataframe
final_df = concat(dataframe_list, ignore_index=True)
@@ -320,14 +325,19 @@ def _parse_data(client, job, index_col=None, col_order=None):
final_df.set_index(index_col, inplace=True)
col_names.remove(index_col)
else:
- raise InvalidColumnOrder('Index column "{0}" does not exist in DataFrame.'.format(index_col))
+ raise InvalidColumnOrder(
+ 'Index column "{0}" does not exist in DataFrame.'
+ .format(index_col)
+ )
# Change the order of columns in the DataFrame based on provided list
if col_order is not None:
if sorted(col_order) == sorted(col_names):
final_df = final_df[col_order]
else:
- raise InvalidColumnOrder('Column order does not match this DataFrame.')
+ raise InvalidColumnOrder(
+ 'Column order does not match this DataFrame.'
+ )
# Downcast floats to integers and objects to booleans
# if there are no NaN's. This is presently due to a
@@ -335,13 +345,15 @@ def _parse_data(client, job, index_col=None, col_order=None):
final_df._data = final_df._data.downcast(dtypes='infer')
return final_df
-def to_gbq(dataframe, destination_table, schema=None, col_order=None, if_exists='fail', **kwargs):
- """Write a DataFrame to a Google BigQuery table.
-
- If the table exists, the DataFrame will be appended. If not, a new table
- will be created, in which case the schema will have to be specified. By default,
- rows will be written in the order they appear in the DataFrame, though
- the user may specify an alternative order.
+
+def to_gbq(dataframe, destination_table, schema=None, col_order=None,
+ if_exists='fail', **kwargs):
+ """Write a DataFrame to a Google BigQuery table.
+
+ If the table exists, the DataFrame will be appended. If not, a new table
+ will be created, in which case the schema will have to be specified. By
+ default, rows will be written in the order they appear in the DataFrame,
+ though the user may specify an alternative order.
Parameters
----------
@@ -350,9 +362,11 @@ def to_gbq(dataframe, destination_table, schema=None, col_order=None, if_exists=
destination_table : string
name of table to be written, in the form 'dataset.tablename'
schema : sequence (optional)
- list of column types in order for data to be inserted, e.g. ['INTEGER', 'TIMESTAMP', 'BOOLEAN']
+ list of column types in order for data to be inserted,
+ e.g. ['INTEGER', 'TIMESTAMP', 'BOOLEAN']
col_order : sequence (optional)
- order which columns are to be inserted, e.g. ['primary_key', 'birthday', 'username']
+ order which columns are to be inserted,
+ e.g. ['primary_key', 'birthday', 'username']
if_exists : {'fail', 'replace', 'append'} (optional)
- fail: If table exists, do nothing.
- replace: If table exists, drop it, recreate it, and insert data.
@@ -362,42 +376,50 @@ def to_gbq(dataframe, destination_table, schema=None, col_order=None, if_exists=
Raises
------
SchemaMissing :
- Raised if the 'if_exists' parameter is set to 'replace', but no schema is specified
+ Raised if the 'if_exists' parameter is set to 'replace', but no schema
+ is specified
TableExists :
- Raised if the specified 'destination_table' exists but the 'if_exists' parameter is set to 'fail' (the default)
+ Raised if the specified 'destination_table' exists but the 'if_exists'
+ parameter is set to 'fail' (the default)
InvalidSchema :
Raised if the 'schema' parameter does not match the provided DataFrame
"""
if not _BQ_INSTALLED:
if sys.version_info >= (3, 0):
- raise NotImplementedError('gbq module does not support Python 3 yet')
+ raise NotImplementedError('gbq module does not support Python 3 '
+ 'yet')
else:
raise ImportError('Could not import Google BigQuery Client.')
if not _BQ_VALID_VERSION:
- raise ImportError("pandas requires bigquery >= 2.0.17 for Google BigQuery "
- "support, current version " + _BQ_VERSION)
+ raise ImportError("pandas requires bigquery >= 2.0.17 for Google "
+ "BigQuery support, current version " + _BQ_VERSION)
- ALLOWED_TYPES = ['STRING', 'INTEGER', 'FLOAT', 'BOOLEAN', 'TIMESTAMP', 'RECORD']
+ ALLOWED_TYPES = ['STRING', 'INTEGER', 'FLOAT', 'BOOLEAN', 'TIMESTAMP',
+ 'RECORD']
if if_exists == 'replace' and schema is None:
- raise SchemaMissing('Cannot replace a table without specifying the data schema')
+ raise SchemaMissing('Cannot replace a table without specifying the '
+ 'data schema')
else:
client = _authenticate()
table_reference = client.GetTableReference(destination_table)
if client.TableExists(table_reference):
if if_exists == 'fail':
- raise TableExistsFail('Cannot overwrite existing tables if \'if_exists="fail"\'')
+ raise TableExistsFail('Cannot overwrite existing tables if '
+ '\'if_exists="fail"\'')
else:
- # Build up a string representation of the
+ # Build up a string representation of the
# table's schema. Since the table already
# exists, we ask ask the API for it, which
# is returned in a list of dictionaries
# describing column data. Iterate over these
# and build up a string of form:
# "col_name1 : col_type1, col_name2 : col_type2..."
- schema_full = client.GetTableSchema(dict(table_reference))['fields']
+ schema_full = client.GetTableSchema(
+ dict(table_reference)
+ )['fields']
schema = ''
for count, row in enumerate(schema_full):
if count > 0:
@@ -406,11 +428,13 @@ def to_gbq(dataframe, destination_table, schema=None, col_order=None, if_exists=
else:
logger.info('Creating New Table')
if schema is None:
- raise SchemaMissing('Cannot create a new table without specifying the data schema')
+ raise SchemaMissing('Cannot create a new table without '
+ 'specifying the data schema')
else:
columns = dataframe.columns
if len(schema) != len(columns):
- raise InvalidSchema('Incorrect number of columns in schema')
+ raise InvalidSchema('Incorrect number of columns in '
+ 'schema')
else:
schema_string = ''
for count, name in enumerate(columns):
@@ -420,7 +444,9 @@ def to_gbq(dataframe, destination_table, schema=None, col_order=None, if_exists=
if column_type in ALLOWED_TYPES:
schema_string += name + ':' + schema[count].lower()
else:
- raise InvalidSchema('Invalid Type: ' + column_type + ". Must be one of: " + str(ALLOWED_TYPES))
+ raise InvalidSchema('Invalid Type: ' + column_type
+ + ". Must be one of: " +
+ str(ALLOWED_TYPES))
schema = schema_string
opts = kwargs
@@ -437,18 +463,22 @@ def to_gbq(dataframe, destination_table, schema=None, col_order=None, if_exists=
with tempfile.NamedTemporaryFile() as csv_file:
dataframe.to_csv(csv_file.name, index=False, encoding='utf-8')
- job = client.Load(table_reference, csv_file.name, schema=schema, **opts)
+ job = client.Load(table_reference, csv_file.name, schema=schema,
+ **opts)
-def read_gbq(query, project_id = None, destination_table = None, index_col=None, col_order=None, **kwargs):
+
+def read_gbq(query, project_id=None, destination_table=None, index_col=None,
+ col_order=None, **kwargs):
"""Load data from Google BigQuery.
-
- The main method a user calls to load data from Google BigQuery into a pandas DataFrame.
- This is a simple wrapper for Google's bq.py and bigquery_client.py, which we use
- to get the source data. Because of this, this script respects the user's bq settings
- file, '~/.bigqueryrc', if it exists. Such a file can be generated using 'bq init'. Further,
- additional parameters for the query can be specified as either ``**kwds`` in the command,
- or using FLAGS provided in the 'gflags' module. Particular options can be found in
- bigquery_client.py.
+
+ The main method a user calls to load data from Google BigQuery into a
+ pandas DataFrame. This is a simple wrapper for Google's bq.py and
+ bigquery_client.py, which we use to get the source data. Because of this,
+ this script respects the user's bq settings file, '~/.bigqueryrc', if it
+ exists. Such a file can be generated using 'bq init'. Further, additional
+ parameters for the query can be specified as either ``**kwds`` in the
+ command, or using FLAGS provided in the 'gflags' module. Particular options
+ can be found in bigquery_client.py.
Parameters
----------
@@ -464,8 +494,8 @@ def read_gbq(query, project_id = None, destination_table = None, index_col=None,
DataFrame
destination_table : string (optional)
If provided, send the results to the given table.
- **kwargs :
- To be passed to bq.Client.Create(). Particularly: 'trace',
+ **kwargs :
+ To be passed to bq.Client.Create(). Particularly: 'trace',
'sync', 'api', 'api_version'
Returns
@@ -476,13 +506,14 @@ def read_gbq(query, project_id = None, destination_table = None, index_col=None,
"""
if not _BQ_INSTALLED:
if sys.version_info >= (3, 0):
- raise NotImplementedError('gbq module does not support Python 3 yet')
+ raise NotImplementedError('gbq module does not support Python 3 '
+ 'yet')
else:
raise ImportError('Could not import Google BigQuery Client.')
if not _BQ_VALID_VERSION:
- raise ImportError("pandas requires bigquery >= 2.0.17 for Google BigQuery "
- "support, current version " + _BQ_VERSION)
+ raise ImportError('pandas requires bigquery >= 2.0.17 for Google '
+ 'BigQuery support, current version ' + _BQ_VERSION)
query_args = kwargs
query_args['project_id'] = project_id
@@ -493,5 +524,5 @@ def read_gbq(query, project_id = None, destination_table = None, index_col=None,
client = _authenticate()
job = client.Query(**query_args)
-
+
return _parse_data(client, job, index_col=index_col, col_order=col_order)
diff --git a/pandas/io/packers.py b/pandas/io/packers.py
index 08299738f31a2..5d392e94106e9 100644
--- a/pandas/io/packers.py
+++ b/pandas/io/packers.py
@@ -49,7 +49,8 @@
from pandas.compat import u, PY3
from pandas import (
Timestamp, Period, Series, DataFrame, Panel, Panel4D,
- Index, MultiIndex, Int64Index, PeriodIndex, DatetimeIndex, Float64Index, NaT
+ Index, MultiIndex, Int64Index, PeriodIndex, DatetimeIndex, Float64Index,
+ NaT
)
from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel
from pandas.sparse.array import BlockIndex, IntIndex
@@ -87,7 +88,8 @@ def to_msgpack(path_or_buf, *args, **kwargs):
args : an object or objects to serialize
append : boolean whether to append to an existing msgpack
(default is False)
- compress : type of compressor (zlib or blosc), default to None (no compression)
+ compress : type of compressor (zlib or blosc), default to None (no
+ compression)
"""
global compressor
compressor = kwargs.pop('compress', None)
@@ -111,6 +113,7 @@ def writer(fh):
else:
writer(path_or_buf)
+
def read_msgpack(path_or_buf, iterator=False, **kwargs):
"""
Load msgpack pandas object from the specified
@@ -153,7 +156,7 @@ def read(fh):
return read(fh)
# treat as a string-like
- if not hasattr(path_or_buf,'read'):
+ if not hasattr(path_or_buf, 'read'):
try:
fh = compat.BytesIO(path_or_buf)
@@ -230,6 +233,7 @@ def convert(values):
# ndarray (on original dtype)
return v.tostring()
+
def unconvert(values, dtype, compress=None):
if dtype == np.object_:
@@ -251,7 +255,8 @@ def unconvert(values, dtype, compress=None):
return np.frombuffer(values, dtype=dtype)
# from a string
- return np.fromstring(values.encode('latin1'),dtype=dtype)
+ return np.fromstring(values.encode('latin1'), dtype=dtype)
+
def encode(obj):
"""
@@ -264,11 +269,11 @@ def encode(obj):
return {'typ': 'period_index',
'klass': obj.__class__.__name__,
'name': getattr(obj, 'name', None),
- 'freq': getattr(obj,'freqstr',None),
+ 'freq': getattr(obj, 'freqstr', None),
'dtype': obj.dtype.num,
'data': convert(obj.asi8)}
elif isinstance(obj, DatetimeIndex):
- tz = getattr(obj,'tz',None)
+ tz = getattr(obj, 'tz', None)
# store tz info and data as UTC
if tz is not None:
@@ -279,8 +284,8 @@ def encode(obj):
'name': getattr(obj, 'name', None),
'dtype': obj.dtype.num,
'data': convert(obj.asi8),
- 'freq': getattr(obj,'freqstr',None),
- 'tz': tz }
+ 'freq': getattr(obj, 'freqstr', None),
+ 'tz': tz}
elif isinstance(obj, MultiIndex):
return {'typ': 'multi_index',
'klass': obj.__class__.__name__,
@@ -295,7 +300,9 @@ def encode(obj):
'data': convert(obj.values)}
elif isinstance(obj, Series):
if isinstance(obj, SparseSeries):
- raise NotImplementedError("msgpack sparse series is not implemented")
+ raise NotImplementedError(
+ 'msgpack sparse series is not implemented'
+ )
#d = {'typ': 'sparse_series',
# 'klass': obj.__class__.__name__,
# 'dtype': obj.dtype.num,
@@ -316,7 +323,9 @@ def encode(obj):
'compress': compressor}
elif issubclass(tobj, NDFrame):
if isinstance(obj, SparseDataFrame):
- raise NotImplementedError("msgpack sparse frame is not implemented")
+ raise NotImplementedError(
+ 'msgpack sparse frame is not implemented'
+ )
#d = {'typ': 'sparse_dataframe',
# 'klass': obj.__class__.__name__,
# 'columns': obj.columns}
@@ -326,7 +335,9 @@ def encode(obj):
# for name, ss in compat.iteritems(obj)])
#return d
elif isinstance(obj, SparsePanel):
- raise NotImplementedError("msgpack sparse frame is not implemented")
+ raise NotImplementedError(
+ 'msgpack sparse frame is not implemented'
+ )
#d = {'typ': 'sparse_panel',
# 'klass': obj.__class__.__name__,
# 'items': obj.items}
@@ -353,7 +364,8 @@ def encode(obj):
'compress': compressor
} for b in data.blocks]}
- elif isinstance(obj, (datetime, date, np.datetime64, timedelta, np.timedelta64)):
+ elif isinstance(obj, (datetime, date, np.datetime64, timedelta,
+ np.timedelta64)):
if isinstance(obj, Timestamp):
tz = obj.tzinfo
if tz is not None:
@@ -436,18 +448,22 @@ def decode(obj):
return Period(ordinal=obj['ordinal'], freq=obj['freq'])
elif typ == 'index':
dtype = dtype_for(obj['dtype'])
- data = unconvert(obj['data'], np.typeDict[obj['dtype']], obj.get('compress'))
+ data = unconvert(obj['data'], np.typeDict[obj['dtype']],
+ obj.get('compress'))
return globals()[obj['klass']](data, dtype=dtype, name=obj['name'])
elif typ == 'multi_index':
- data = unconvert(obj['data'], np.typeDict[obj['dtype']], obj.get('compress'))
- data = [ tuple(x) for x in data ]
+ data = unconvert(obj['data'], np.typeDict[obj['dtype']],
+ obj.get('compress'))
+ data = [tuple(x) for x in data]
return globals()[obj['klass']].from_tuples(data, names=obj['names'])
elif typ == 'period_index':
data = unconvert(obj['data'], np.int64, obj.get('compress'))
- return globals()[obj['klass']](data, name=obj['name'], freq=obj['freq'])
+ return globals()[obj['klass']](data, name=obj['name'],
+ freq=obj['freq'])
elif typ == 'datetime_index':
data = unconvert(obj['data'], np.int64, obj.get('compress'))
- result = globals()[obj['klass']](data, freq=obj['freq'], name=obj['name'])
+ result = globals()[obj['klass']](data, freq=obj['freq'],
+ name=obj['name'])
tz = obj['tz']
# reverse tz conversion
@@ -457,13 +473,17 @@ def decode(obj):
elif typ == 'series':
dtype = dtype_for(obj['dtype'])
index = obj['index']
- return globals()[obj['klass']](unconvert(obj['data'], dtype, obj['compress']), index=index, name=obj['name'])
+ return globals()[obj['klass']](unconvert(obj['data'], dtype,
+ obj['compress']),
+ index=index, name=obj['name'])
elif typ == 'block_manager':
axes = obj['axes']
def create_block(b):
dtype = dtype_for(b['dtype'])
- return make_block(unconvert(b['values'], dtype, b['compress']).reshape(b['shape']), b['items'], axes[0], klass=getattr(internals, b['klass']))
+ return make_block(unconvert(b['values'], dtype, b['compress'])
+ .reshape(b['shape']), b['items'], axes[0],
+ klass=getattr(internals, b['klass']))
blocks = [create_block(b) for b in obj['blocks']]
return globals()[obj['klass']](BlockManager(blocks, axes))
@@ -479,21 +499,29 @@ def create_block(b):
return np.timedelta64(int(obj['data']))
#elif typ == 'sparse_series':
# dtype = dtype_for(obj['dtype'])
- # return globals(
- # )[obj['klass']](unconvert(obj['sp_values'], dtype, obj['compress']), sparse_index=obj['sp_index'],
- # index=obj['index'], fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name'])
+ # return globals()[obj['klass']](
+ # unconvert(obj['sp_values'], dtype, obj['compress']),
+ # sparse_index=obj['sp_index'], index=obj['index'],
+ # fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name'])
#elif typ == 'sparse_dataframe':
- # return globals()[obj['klass']](obj['data'],
- # columns=obj['columns'], default_fill_value=obj['default_fill_value'], default_kind=obj['default_kind'])
+ # return globals()[obj['klass']](
+ # obj['data'], columns=obj['columns'],
+ # default_fill_value=obj['default_fill_value'],
+ # default_kind=obj['default_kind']
+ # )
#elif typ == 'sparse_panel':
- # return globals()[obj['klass']](obj['data'],
- # items=obj['items'], default_fill_value=obj['default_fill_value'], default_kind=obj['default_kind'])
+ # return globals()[obj['klass']](
+ # obj['data'], items=obj['items'],
+ # default_fill_value=obj['default_fill_value'],
+ # default_kind=obj['default_kind'])
elif typ == 'block_index':
- return globals()[obj['klass']](obj['length'], obj['blocs'], obj['blengths'])
+ return globals()[obj['klass']](obj['length'], obj['blocs'],
+ obj['blengths'])
elif typ == 'int_index':
return globals()[obj['klass']](obj['length'], obj['indices'])
elif typ == 'ndarray':
- return unconvert(obj['data'], np.typeDict[obj['dtype']], obj.get('compress')).reshape(obj['shape'])
+ return unconvert(obj['data'], np.typeDict[obj['dtype']],
+ obj.get('compress')).reshape(obj['shape'])
elif typ == 'np_scalar':
if obj.get('sub_typ') == 'np_complex':
return c2f(obj['real'], obj['imag'], obj['dtype'])
@@ -585,7 +613,7 @@ def __iter__(self):
try:
path_exists = os.path.exists(self.path)
- except (TypeError):
+ except TypeError:
path_exists = False
if path_exists:
@@ -595,7 +623,7 @@ def __iter__(self):
else:
- if not hasattr(self.path,'read'):
+ if not hasattr(self.path, 'read'):
fh = compat.BytesIO(self.path)
else:
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index e62ecd5a541df..bd0649a7a85f3 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -30,14 +30,15 @@
Parameters
----------
filepath_or_buffer : string or file handle / StringIO. The string could be
- a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host
- is expected. For instance, a local file could be
+ a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a
+ host is expected. For instance, a local file could be
file ://localhost/path/to/table.csv
%s
lineterminator : string (length 1), default None
Character to break file into lines. Only valid with C parser
quotechar : string
- The character to used to denote the start and end of a quoted item. Quoted items can include the delimiter and it will be ignored.
+ The character to used to denote the start and end of a quoted item. Quoted
+ items can include the delimiter and it will be ignored.
quoting : int
Controls whether quotes should be recognized. Values are taken from
`csv.QUOTE_*` values. Acceptable values are 0, 1, 2, and 3 for
@@ -55,9 +56,9 @@
header : int row number(s) to use as the column names, and the start of the
data. Defaults to 0 if no ``names`` passed, otherwise ``None``. Explicitly
pass ``header=0`` to be able to replace existing names. The header can be
- a list of integers that specify row locations for a multi-index on the columns
- E.g. [0,1,3]. Intervening rows that are not specified will be skipped.
- (E.g. 2 in this example are skipped)
+ a list of integers that specify row locations for a multi-index on the
+ columns E.g. [0,1,3]. Intervening rows that are not specified will be
+ skipped. (E.g. 2 in this example are skipped)
skiprows : list-like or integer
Row numbers to skip (0-indexed) or number of rows to skip (int)
at the start of the file
@@ -251,7 +252,7 @@ def _read(filepath_or_buffer, kwds):
'squeeze': False,
'compression': None,
'mangle_dupe_cols': True,
- 'tupleize_cols':False,
+ 'tupleize_cols': False,
}
@@ -437,9 +438,10 @@ def read_fwf(filepath_or_buffer, colspecs='infer', widths=None, **kwds):
# common NA values
# no longer excluding inf representations
# '1.#INF','-1.#INF', '1.#INF000000',
-_NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN',
- '#N/A','N/A', 'NA', '#NA', 'NULL', 'NaN',
- 'nan', ''])
+_NA_VALUES = set([
+ '-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A', 'N/A', 'NA', '#NA',
+ 'NULL', 'NaN', 'nan', ''
+])
class TextFileReader(object):
@@ -653,14 +655,14 @@ def __init__(self, kwds):
self.na_fvalues = kwds.get('na_fvalues')
self.true_values = kwds.get('true_values')
self.false_values = kwds.get('false_values')
- self.tupleize_cols = kwds.get('tupleize_cols',False)
+ self.tupleize_cols = kwds.get('tupleize_cols', False)
self._date_conv = _make_date_converter(date_parser=self.date_parser,
dayfirst=self.dayfirst)
# validate header options for mi
self.header = kwds.get('header')
- if isinstance(self.header,(list,tuple,np.ndarray)):
+ if isinstance(self.header, (list, tuple, np.ndarray)):
if kwds.get('as_recarray'):
raise ValueError("cannot specify as_recarray when "
"specifying a multi-index header")
@@ -702,7 +704,8 @@ def _should_parse_dates(self, i):
else:
return (j in self.parse_dates) or (name in self.parse_dates)
- def _extract_multi_indexer_columns(self, header, index_names, col_names, passed_names=False):
+ def _extract_multi_indexer_columns(self, header, index_names, col_names,
+ passed_names=False):
""" extract and return the names, index_names, col_names
header is a list-of-lists returned from the parsers """
if len(header) < 2:
@@ -715,8 +718,8 @@ def _extract_multi_indexer_columns(self, header, index_names, col_names, passed_
if ic is None:
ic = []
- if not isinstance(ic, (list,tuple,np.ndarray)):
- ic = [ ic ]
+ if not isinstance(ic, (list, tuple, np.ndarray)):
+ ic = [ic]
sic = set(ic)
# clean the index_names
@@ -726,22 +729,29 @@ def _extract_multi_indexer_columns(self, header, index_names, col_names, passed_
# extract the columns
field_count = len(header[0])
+
def extract(r):
- return tuple([ r[i] for i in range(field_count) if i not in sic ])
- columns = lzip(*[ extract(r) for r in header ])
+ return tuple([r[i] for i in range(field_count) if i not in sic])
+
+ columns = lzip(*[extract(r) for r in header])
names = ic + columns
- # if we find 'Unnamed' all of a single level, then our header was too long
+ # if we find 'Unnamed' all of a single level, then our header was too
+ # long
for n in range(len(columns[0])):
- if all([ 'Unnamed' in c[n] for c in columns ]):
- raise _parser.CParserError("Passed header=[%s] are too many rows for this "
- "multi_index of columns" % ','.join([ str(x) for x in self.header ]))
+ if all(['Unnamed' in c[n] for c in columns]):
+ raise _parser.CParserError(
+ "Passed header=[%s] are too many rows for this "
+ "multi_index of columns"
+ % ','.join([str(x) for x in self.header])
+ )
# clean the column names (if we have an index_col)
if len(ic):
- col_names = [ r[0] if len(r[0]) and 'Unnamed' not in r[0] else None for r in header ]
+ col_names = [r[0] if len(r[0]) and 'Unnamed' not in r[0] else None
+ for r in header]
else:
- col_names = [ None ] * len(header)
+ col_names = [None] * len(header)
passed_names = True
@@ -749,9 +759,10 @@ def extract(r):
def _maybe_make_multi_index_columns(self, columns, col_names=None):
# possibly create a column mi here
- if not self.tupleize_cols and len(columns) and not isinstance(
- columns, MultiIndex) and all([ isinstance(c,tuple) for c in columns]):
- columns = MultiIndex.from_tuples(columns,names=col_names)
+ if (not self.tupleize_cols and len(columns) and
+ not isinstance(columns, MultiIndex) and
+ all([isinstance(c, tuple) for c in columns])):
+ columns = MultiIndex.from_tuples(columns, names=col_names)
return columns
def _make_index(self, data, alldata, columns, indexnamerow=False):
@@ -849,9 +860,8 @@ def _agg_index(self, index, try_parse_dates=True):
if isinstance(self.na_values, dict):
col_name = self.index_names[i]
if col_name is not None:
- col_na_values, col_na_fvalues = _get_na_values(col_name,
- self.na_values,
- self.na_fvalues)
+ col_na_values, col_na_fvalues = _get_na_values(
+ col_name, self.na_values, self.na_fvalues)
arr, _ = self._convert_types(arr, col_na_values | col_na_fvalues)
arrays.append(arr)
@@ -865,14 +875,14 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
result = {}
for c, values in compat.iteritems(dct):
conv_f = None if converters is None else converters.get(c, None)
- col_na_values, col_na_fvalues = _get_na_values(c, na_values, na_fvalues)
+ col_na_values, col_na_fvalues = _get_na_values(c, na_values,
+ na_fvalues)
coerce_type = True
if conv_f is not None:
values = lib.map_infer(values, conv_f)
coerce_type = False
- cvals, na_count = self._convert_types(values,
- set(col_na_values) | col_na_fvalues,
- coerce_type)
+ cvals, na_count = self._convert_types(
+ values, set(col_na_values) | col_na_fvalues, coerce_type)
result[c] = cvals
if verbose and na_count:
print('Filled %d NA values in column %s' % (na_count, str(c)))
@@ -951,8 +961,12 @@ def __init__(self, src, **kwds):
else:
if len(self._reader.header) > 1:
# we have a multi index in the columns
- self.names, self.index_names, self.col_names, passed_names = self._extract_multi_indexer_columns(
- self._reader.header, self.index_names, self.col_names, passed_names)
+ self.names, self.index_names, self.col_names, passed_names = (
+ self._extract_multi_indexer_columns(
+ self._reader.header, self.index_names, self.col_names,
+ passed_names
+ )
+ )
else:
self.names = list(self._reader.header[0])
@@ -963,8 +977,9 @@ def __init__(self, src, **kwds):
else:
self.names = lrange(self._reader.table_width)
- # If the names were inferred (not passed by user) and usedcols is defined,
- # then ensure names refers to the used columns, not the document's columns.
+ # If the names were inferred (not passed by user) and usedcols is
+ # defined, then ensure names refers to the used columns, not the
+ # document's columns.
if self.usecols and passed_names:
col_indices = []
for u in self.usecols:
@@ -972,7 +987,8 @@ def __init__(self, src, **kwds):
col_indices.append(self.names.index(u))
else:
col_indices.append(u)
- self.names = [n for i, n in enumerate(self.names) if i in col_indices]
+ self.names = [n for i, n in enumerate(self.names)
+ if i in col_indices]
if len(self.names) < len(self.usecols):
raise ValueError("Usecols do not match names.")
@@ -982,11 +998,12 @@ def __init__(self, src, **kwds):
if not self._has_complex_date_col:
if (self._reader.leading_cols == 0 and
- _is_index_col(self.index_col)):
+ _is_index_col(self.index_col)):
self._name_processed = True
(index_names, self.names,
- self.index_col) = _clean_index_names(self.names, self.index_col)
+ self.index_col) = _clean_index_names(self.names,
+ self.index_col)
if self.index_names is None:
self.index_names = index_names
@@ -1265,8 +1282,11 @@ def __init__(self, f, **kwds):
# The original set is stored in self.original_columns.
if len(self.columns) > 1:
# we are processing a multi index column
- self.columns, self.index_names, self.col_names, _ = self._extract_multi_indexer_columns(
- self.columns, self.index_names, self.col_names)
+ self.columns, self.index_names, self.col_names, _ = (
+ self._extract_multi_indexer_columns(
+ self.columns, self.index_names, self.col_names
+ )
+ )
# Update list of original names to include all indices.
self.num_original_columns = len(self.columns)
else:
@@ -1291,7 +1311,8 @@ def __init__(self, f, **kwds):
self._no_thousands_columns = None
def _set_no_thousands_columns(self):
- # Create a set of column ids that are not to be stripped of thousands operators.
+ # Create a set of column ids that are not to be stripped of thousands
+ # operators.
noconvert_columns = set()
def _set(x):
@@ -1478,7 +1499,8 @@ def _infer_columns(self):
for i, c in enumerate(line):
if c == '':
if have_mi_columns:
- this_columns.append('Unnamed: %d_level_%d' % (i, level))
+ this_columns.append('Unnamed: %d_level_%d'
+ % (i, level))
else:
this_columns.append('Unnamed: %d' % i)
unnamed_count += 1
@@ -1494,16 +1516,17 @@ def _infer_columns(self):
counts[col] = cur_count + 1
elif have_mi_columns:
- # if we have grabbed an extra line, but its not in our format
- # so save in the buffer, and create an blank extra line for the rest of the
- # parsing code
+ # if we have grabbed an extra line, but its not in our
+ # format so save in the buffer, and create an blank extra
+ # line for the rest of the parsing code
if hr == header[-1]:
lc = len(this_columns)
- ic = len(self.index_col) if self.index_col is not None else 0
+ ic = (len(self.index_col)
+ if self.index_col is not None else 0)
if lc != unnamed_count and lc-ic > unnamed_count:
clear_buffer = False
- this_columns = [ None ] * lc
- self.buf = [ self.buf[-1] ]
+ this_columns = [None] * lc
+ self.buf = [self.buf[-1]]
columns.append(this_columns)
if len(columns) == 1:
@@ -1513,17 +1536,19 @@ def _infer_columns(self):
self._clear_buffer()
if names is not None:
- if (self.usecols is not None and len(names) != len(self.usecols)) \
- or (self.usecols is None and len(names) != len(columns[0])):
-
+ if ((self.usecols is not None
+ and len(names) != len(self.usecols))
+ or (self.usecols is None
+ and len(names) != len(columns[0]))):
raise ValueError('Number of passed names did not match '
- 'number of header fields in the file')
+ 'number of header fields in the file')
if len(columns) > 1:
raise TypeError('Cannot pass names with multi-index '
'columns')
if self.usecols is not None:
- # Set _use_cols. We don't store columns because they are overwritten.
+ # Set _use_cols. We don't store columns because they are
+ # overwritten.
self._handle_usecols(columns, names)
else:
self._col_indices = None
@@ -1538,9 +1563,9 @@ def _infer_columns(self):
num_original_columns = ncols
if not names:
if self.prefix:
- columns = [ ['X%d' % i for i in range(ncols)] ]
+ columns = [['X%d' % i for i in range(ncols)]]
else:
- columns = [ lrange(ncols) ]
+ columns = [lrange(ncols)]
columns = self._handle_usecols(columns, columns[0])
else:
if self.usecols is None or len(names) == num_original_columns:
@@ -1548,8 +1573,10 @@ def _infer_columns(self):
num_original_columns = len(names)
else:
if self.usecols and len(names) != len(self.usecols):
- raise ValueError('Number of passed names did not match '
- 'number of header fields in the file')
+ raise ValueError(
+ 'Number of passed names did not match number of '
+ 'header fields in the file'
+ )
# Ignore output but set used columns.
self._handle_usecols([names], names)
columns = [names]
@@ -1566,7 +1593,8 @@ def _handle_usecols(self, columns, usecols_key):
if self.usecols is not None:
if any([isinstance(u, string_types) for u in self.usecols]):
if len(columns) > 1:
- raise ValueError("If using multiple headers, usecols must be integers.")
+ raise ValueError("If using multiple headers, usecols must "
+ "be integers.")
col_indices = []
for u in self.usecols:
if isinstance(u, string_types):
@@ -1576,7 +1604,8 @@ def _handle_usecols(self, columns, usecols_key):
else:
col_indices = self.usecols
- columns = [[n for i, n in enumerate(column) if i in col_indices] for column in columns]
+ columns = [[n for i, n in enumerate(column) if i in col_indices]
+ for column in columns]
self._col_indices = col_indices
return columns
@@ -1640,8 +1669,9 @@ def _check_thousands(self, lines):
for i, x in enumerate(l):
if (not isinstance(x, compat.string_types) or
self.thousands not in x or
- (self._no_thousands_columns and i in self._no_thousands_columns) or
- nonnum.search(x.strip())):
+ (self._no_thousands_columns
+ and i in self._no_thousands_columns)
+ or nonnum.search(x.strip())):
rl.append(x)
else:
rl.append(x.replace(self.thousands, ''))
@@ -1746,9 +1776,14 @@ def _rows_to_cols(self, content):
if self.usecols:
if self._implicit_index:
- zipped_content = [a for i, a in enumerate(zipped_content) if i < len(self.index_col) or i - len(self.index_col) in self._col_indices]
+ zipped_content = [
+ a for i, a in enumerate(zipped_content)
+ if (i < len(self.index_col)
+ or i - len(self.index_col) in self._col_indices)
+ ]
else:
- zipped_content = [a for i, a in enumerate(zipped_content) if i in self._col_indices]
+ zipped_content = [a for i, a in enumerate(zipped_content)
+ if i in self._col_indices]
return zipped_content
def _get_lines(self, rows=None):
@@ -1802,8 +1837,8 @@ def _get_lines(self, rows=None):
except csv.Error as inst:
if 'newline inside string' in str(inst):
row_num = str(self.pos + rows)
- msg = ('EOF inside string starting with line '
- + row_num)
+ msg = ('EOF inside string starting with '
+ 'line ' + row_num)
raise Exception(msg)
raise
except StopIteration:
@@ -1948,7 +1983,9 @@ def _clean_na_values(na_values, keep_default_na=True):
for k, v in compat.iteritems(na_values):
v = set(list(v)) | _NA_VALUES
na_values[k] = v
- na_fvalues = dict([ (k, _floatify_na_values(v)) for k, v in na_values.items() ])
+ na_fvalues = dict([
+ (k, _floatify_na_values(v)) for k, v in na_values.items()
+ ])
else:
if not com.is_list_like(na_values):
na_values = [na_values]
@@ -1987,7 +2024,8 @@ def _clean_index_names(columns, index_col):
index_names.append(name)
# hack
- if isinstance(index_names[0], compat.string_types) and 'Unnamed' in index_names[0]:
+ if isinstance(index_names[0], compat.string_types)\
+ and 'Unnamed' in index_names[0]:
index_names[0] = None
return index_names, columns, index_col
@@ -2071,10 +2109,13 @@ def _get_col_names(colspec, columns):
def _concat_date_cols(date_cols):
if len(date_cols) == 1:
if compat.PY3:
- return np.array([compat.text_type(x) for x in date_cols[0]], dtype=object)
+ return np.array([compat.text_type(x) for x in date_cols[0]],
+ dtype=object)
else:
- return np.array([str(x) if not isinstance(x, compat.string_types) else x
- for x in date_cols[0]], dtype=object)
+ return np.array([
+ str(x) if not isinstance(x, compat.string_types) else x
+ for x in date_cols[0]
+ ], dtype=object)
rs = np.array([' '.join([compat.text_type(y) for y in x])
for x in zip(*date_cols)], dtype=object)
@@ -2101,9 +2142,9 @@ def __init__(self, f, colspecs, delimiter, comment):
for colspec in self.colspecs:
if not (isinstance(colspec, (tuple, list)) and
- len(colspec) == 2 and
- isinstance(colspec[0], (int, np.integer)) and
- isinstance(colspec[1], (int, np.integer))):
+ len(colspec) == 2 and
+ isinstance(colspec[0], (int, np.integer)) and
+ isinstance(colspec[1], (int, np.integer))):
raise TypeError('Each column specification must be '
'2 element tuple or list of integers')
diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
index 97633873e7b40..915c1e9ae1574 100644
--- a/pandas/io/pickle.py
+++ b/pandas/io/pickle.py
@@ -1,5 +1,6 @@
from pandas.compat import cPickle as pkl, pickle_compat as pc, PY3
+
def to_pickle(obj, path):
"""
Pickle (serialize) object to input file path
@@ -19,8 +20,8 @@ def read_pickle(path):
Load pickled pandas object (or any other pickled object) from the specified
file path
- Warning: Loading pickled data received from untrusted sources can be unsafe.
- See: http://docs.python.org/2.7/library/pickle.html
+ Warning: Loading pickled data received from untrusted sources can be
+ unsafe. See: http://docs.python.org/2.7/library/pickle.html
Parameters
----------
@@ -38,10 +39,10 @@ def try_read(path, encoding=None):
# pass encoding only if its not None as py2 doesn't handle
# the param
try:
- with open(path,'rb') as fh:
+ with open(path, 'rb') as fh:
return pc.load(fh, encoding=encoding, compat=False)
except:
- with open(path,'rb') as fh:
+ with open(path, 'rb') as fh:
return pc.load(fh, encoding=encoding, compat=True)
try:
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index db2028c70dc20..6ebc33afdd43d 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -74,10 +74,11 @@ def _ensure_term(where):
create the terms here with a frame_level=2 (we are 2 levels down)
"""
- # only consider list/tuple here as an ndarray is automaticaly a coordinate list
- if isinstance(where, (list,tuple)):
+ # only consider list/tuple here as an ndarray is automaticaly a coordinate
+ # list
+ if isinstance(where, (list, tuple)):
where = [w if not maybe_expression(w) else Term(w, scope_level=2)
- for w in where if w is not None ]
+ for w in where if w is not None]
elif maybe_expression(where):
where = Term(where, scope_level=2)
return where
@@ -124,11 +125,11 @@ class DuplicateWarning(Warning):
# formats
_FORMAT_MAP = {
- u('f') : 'fixed',
- u('fixed') : 'fixed',
- u('t') : 'table',
- u('table') : 'table',
- }
+ u('f'): 'fixed',
+ u('fixed'): 'fixed',
+ u('t'): 'table',
+ u('table'): 'table',
+}
format_deprecate_doc = """
the table keyword has been deprecated
@@ -169,7 +170,7 @@ class DuplicateWarning(Warning):
# table class map
_TABLE_MAP = {
u('generic_table'): 'GenericTable',
- u('appendable_series') : 'AppendableSeriesTable',
+ u('appendable_series'): 'AppendableSeriesTable',
u('appendable_multiseries'): 'AppendableMultiSeriesTable',
u('appendable_frame'): 'AppendableFrameTable',
u('appendable_multiframe'): 'AppendableMultiFrameTable',
@@ -202,8 +203,10 @@ class DuplicateWarning(Warning):
with config.config_prefix('io.hdf'):
config.register_option('dropna_table', True, dropna_doc,
validator=config.is_bool)
- config.register_option('default_format', None, format_doc,
- validator=config.is_one_of_factory(['fixed','table',None]))
+ config.register_option(
+ 'default_format', None, format_doc,
+ validator=config.is_one_of_factory(['fixed', 'table', None])
+ )
# oh the troubles to reduce import time
_table_mod = None
@@ -271,7 +274,7 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None,
def read_hdf(path_or_buf, key, **kwargs):
- """ read from the store, closeit if we opened it
+ """ read from the store, close it if we opened it
Retrieve pandas object stored in file, optionally based on where
criteria
@@ -281,12 +284,16 @@ def read_hdf(path_or_buf, key, **kwargs):
path_or_buf : path (string), or buffer to read from
key : group identifier in the store
where : list of Term (or convertable) objects, optional
- start : optional, integer (defaults to None), row number to start selection
- stop : optional, integer (defaults to None), row number to stop selection
- columns : optional, a list of columns that if not None, will limit the return columns
+ start : optional, integer (defaults to None), row number to start
+ selection
+ stop : optional, integer (defaults to None), row number to stop
+ selection
+ columns : optional, a list of columns that if not None, will limit the
+ return columns
iterator : optional, boolean, return an iterator, default False
chunksize : optional, nrows to include in iteration, return an iterator
- auto_close : optional, boolean, should automatically close the store when finished, default is False
+ auto_close : optional, boolean, should automatically close the store
+ when finished, default is False
Returns
-------
@@ -442,8 +449,8 @@ def __unicode__(self):
pprint_thing(s or 'invalid_HDFStore node'))
except Exception as detail:
keys.append(k)
- values.append(
- "[invalid_HDFStore node: %s]" % pprint_thing(detail))
+ values.append("[invalid_HDFStore node: %s]"
+ % pprint_thing(detail))
output += adjoin(12, keys, values)
else:
@@ -456,7 +463,8 @@ def __unicode__(self):
def keys(self):
"""
Return a (potentially unordered) list of the keys corresponding to the
- objects stored in the HDFStore. These are ABSOLUTE path-names (e.g. have the leading '/'
+ objects stored in the HDFStore. These are ABSOLUTE path-names (e.g.
+ have the leading '/'
"""
return [n._v_pathname for n in self.groups()]
@@ -482,15 +490,18 @@ def open(self, mode='a', **kwargs):
if self._mode != mode:
- # if we are chaning a write mode to read, ok
+ # if we are changing a write mode to read, ok
if self._mode in ['a', 'w'] and mode in ['r', 'r+']:
pass
elif mode in ['w']:
# this would truncate, raise here
if self.is_open:
- raise PossibleDataLossError("Re-opening the file [{0}] with mode [{1}] "
- "will delete the current file!".format(self._path, self._mode))
+ raise PossibleDataLossError(
+ "Re-opening the file [{0}] with mode [{1}] "
+ "will delete the current file!"
+ .format(self._path, self._mode)
+ )
self._mode = mode
@@ -588,10 +599,12 @@ def select(self, key, where=None, start=None, stop=None, columns=None,
where : list of Term (or convertable) objects, optional
start : integer (defaults to None), row number to start selection
stop : integer (defaults to None), row number to stop selection
- columns : a list of columns that if not None, will limit the return columns
+ columns : a list of columns that if not None, will limit the return
+ columns
iterator : boolean, return an iterator, default False
chunksize : nrows to include in iteration, return an iterator
- auto_close : boolean, should automatically close the store when finished, default is False
+ auto_close : boolean, should automatically close the store when
+ finished, default is False
Returns
-------
@@ -636,16 +649,20 @@ def select_as_coordinates(
stop : integer (defaults to None), row number to stop selection
"""
where = _ensure_term(where)
- return self.get_storer(key).read_coordinates(where=where, start=start, stop=stop, **kwargs)
+ return self.get_storer(key).read_coordinates(where=where, start=start,
+ stop=stop, **kwargs)
def unique(self, key, column, **kwargs):
warnings.warn("unique(key,column) is deprecated\n"
- "use select_column(key,column).unique() instead",FutureWarning)
- return self.get_storer(key).read_column(column=column, **kwargs).unique()
+ "use select_column(key,column).unique() instead",
+ FutureWarning)
+ return self.get_storer(key).read_column(column=column,
+ **kwargs).unique()
def select_column(self, key, column, **kwargs):
"""
- return a single column from the table. This is generally only useful to select an indexable
+ return a single column from the table. This is generally only useful to
+ select an indexable
Parameters
----------
@@ -654,8 +671,10 @@ def select_column(self, key, column, **kwargs):
Exceptions
----------
- raises KeyError if the column is not found (or key is not a valid store)
- raises ValueError if the column can not be extracted indivually (it is part of a data block)
+ raises KeyError if the column is not found (or key is not a valid
+ store)
+ raises ValueError if the column can not be extracted individually (it
+ is part of a data block)
"""
return self.get_storer(key).read_column(column=column, **kwargs)
@@ -668,7 +687,8 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None,
Parameters
----------
keys : a list of the tables
- selector : the table to apply the where criteria (defaults to keys[0] if not supplied)
+ selector : the table to apply the where criteria (defaults to keys[0]
+ if not supplied)
columns : the columns I want back
start : integer (defaults to None), row number to start selection
stop : integer (defaults to None), row number to stop selection
@@ -677,7 +697,8 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None,
Exceptions
----------
- raise if any of the keys don't refer to tables or if they are not ALL THE SAME DIMENSIONS
+ raise if any of the keys don't refer to tables or if they are not ALL
+ THE SAME DIMENSIONS
"""
# default to single select
@@ -708,8 +729,9 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None,
raise TypeError("Invalid table [%s]" % k)
if not t.is_table:
raise TypeError(
- "object [%s] is not a table, and cannot be used in all select as multiple" %
- t.pathname)
+ "object [%s] is not a table, and cannot be used in all "
+ "select as multiple" % t.pathname
+ )
if nrows is None:
nrows = t.nrows
@@ -735,12 +757,16 @@ def func(_start, _stop):
axis = list(set([t.non_index_axes[0][0] for t in tbls]))[0]
# concat and return
- return concat(objs, axis=axis, verify_integrity=False).consolidate()
+ return concat(objs, axis=axis,
+ verify_integrity=False).consolidate()
if iterator or chunksize is not None:
- return TableIterator(self, func, nrows=nrows, start=start, stop=stop, chunksize=chunksize, auto_close=auto_close)
+ return TableIterator(self, func, nrows=nrows, start=start,
+ stop=stop, chunksize=chunksize,
+ auto_close=auto_close)
- return TableIterator(self, func, nrows=nrows, start=start, stop=stop, auto_close=auto_close).get_values()
+ return TableIterator(self, func, nrows=nrows, start=start, stop=stop,
+ auto_close=auto_close).get_values()
def put(self, key, value, format=None, append=False, **kwargs):
"""
@@ -754,11 +780,12 @@ def put(self, key, value, format=None, append=False, **kwargs):
fixed(f) : Fixed format
Fast writing/reading. Not-appendable, nor searchable
table(t) : Table format
- Write as a PyTables Table structure which may perform worse but
- allow more flexible operations like searching / selecting subsets
- of the data
+ Write as a PyTables Table structure which may perform
+ worse but allow more flexible operations like searching
+ / selecting subsets of the data
append : boolean, default False
- This will force Table format, append the input data to the existing.
+ This will force Table format, append the input data to the
+ existing.
encoding : default None, provide an encoding for strings
"""
if format is None:
@@ -816,7 +843,8 @@ def remove(self, key, where=None, start=None, stop=None):
'can only remove with where on objects written as tables')
return s.delete(where=where, start=start, stop=stop)
- def append(self, key, value, format=None, append=True, columns=None, dropna=None, **kwargs):
+ def append(self, key, value, format=None, append=True, columns=None,
+ dropna=None, **kwargs):
"""
Append to Table in file. Node must already exist and be Table
format.
@@ -827,18 +855,20 @@ def append(self, key, value, format=None, append=True, columns=None, dropna=None
value : {Series, DataFrame, Panel, Panel4D}
format: 'table' is the default
table(t) : table format
- Write as a PyTables Table structure which may perform worse but
- allow more flexible operations like searching / selecting subsets
- of the data
- append : boolean, default True, append the input data to the existing
- data_columns : list of columns to create as data columns, or True to use all columns
+ Write as a PyTables Table structure which may perform
+ worse but allow more flexible operations like searching
+ / selecting subsets of the data
+ append : boolean, default True, append the input data to the
+ existing
+ data_columns : list of columns to create as data columns, or True to
+ use all columns
min_itemsize : dict of columns that specify minimum string sizes
nan_rep : string to use as string nan represenation
chunksize : size to chunk the writing
expectedrows : expected TOTAL row size of this table
encoding : default None, provide an encoding for strings
- dropna : boolean, default True, do not write an ALL nan row to the store
- settable by the option 'io.hdf.dropna_table'
+ dropna : boolean, default True, do not write an ALL nan row to
+ the store settable by the option 'io.hdf.dropna_table'
Notes
-----
Does *not* check if data being appended overlaps with existing
@@ -853,21 +883,24 @@ def append(self, key, value, format=None, append=True, columns=None, dropna=None
if format is None:
format = get_option("io.hdf.default_format") or 'table'
kwargs = self._validate_format(format, kwargs)
- self._write_to_group(key, value, append=append, dropna=dropna, **kwargs)
+ self._write_to_group(key, value, append=append, dropna=dropna,
+ **kwargs)
- def append_to_multiple(self, d, value, selector, data_columns=None, axes=None, dropna=True, **kwargs):
+ def append_to_multiple(self, d, value, selector, data_columns=None,
+ axes=None, dropna=True, **kwargs):
"""
Append to multiple tables
Parameters
----------
- d : a dict of table_name to table_columns, None is acceptable as the values of
- one node (this will get all the remaining columns)
+ d : a dict of table_name to table_columns, None is acceptable as the
+ values of one node (this will get all the remaining columns)
value : a pandas object
- selector : a string that designates the indexable table; all of its columns will
- be designed as data_columns, unless data_columns is passed, in which
- case these are used
- data_columns : list of columns to create as data columns, or True to use all columns
+ selector : a string that designates the indexable table; all of its
+ columns will be designed as data_columns, unless data_columns is
+ passed, in which case these are used
+ data_columns : list of columns to create as data columns, or True to
+ use all columns
dropna : if evaluates to True, drop rows from all tables if any single
row in each table has all NaN
@@ -879,15 +912,18 @@ def append_to_multiple(self, d, value, selector, data_columns=None, axes=None, d
if axes is not None:
raise TypeError("axes is currently not accepted as a parameter to"
" append_to_multiple; you can create the "
- "tables indepdently instead")
+ "tables independently instead")
if not isinstance(d, dict):
raise ValueError(
- "append_to_multiple must have a dictionary specified as the way to split the value")
+ "append_to_multiple must have a dictionary specified as the "
+ "way to split the value"
+ )
if selector not in d:
raise ValueError(
- "append_to_multiple requires a selector that is in passed dict")
+ "append_to_multiple requires a selector that is in passed dict"
+ )
# figure out the splitting axis (the non_index_axis)
axis = list(set(range(value.ndim)) - set(_AXES_MAP[type(value)]))[0]
@@ -899,7 +935,9 @@ def append_to_multiple(self, d, value, selector, data_columns=None, axes=None, d
if v is None:
if remain_key is not None:
raise ValueError(
- "append_to_multiple can only have one value in d that is None")
+ "append_to_multiple can only have one value in d that "
+ "is None"
+ )
remain_key = k
else:
remain_values.extend(v)
@@ -952,15 +990,23 @@ def create_table_index(self, key, **kwargs):
return
if not s.is_table:
- raise TypeError("cannot create table index on a Fixed format store")
+ raise TypeError(
+ "cannot create table index on a Fixed format store")
s.create_index(**kwargs)
def groups(self):
- """ return a list of all the top-level nodes (that are not themselves a pandas storage object) """
+ """return a list of all the top-level nodes (that are not themselves a
+ pandas storage object)
+ """
_tables()
self._check_if_open()
- return [g for g in self._handle.walkNodes() if getattr(g._v_attrs, 'pandas_type', None) or getattr(
- g, 'table', None) or (isinstance(g, _table_mod.table.Table) and g._v_name != u('table'))]
+ return [
+ g for g in self._handle.walkNodes()
+ if (getattr(g._v_attrs, 'pandas_type', None) or
+ getattr(g, 'table', None) or
+ (isinstance(g, _table_mod.table.Table) and
+ g._v_name != u('table')))
+ ]
def get_node(self, key):
""" return the node with the key or None if it does not exist """
@@ -981,16 +1027,16 @@ def get_storer(self, key):
s.infer_axes()
return s
- def copy(
- self, file, mode='w', propindexes=True, keys=None, complib = None, complevel = None,
- fletcher32=False, overwrite=True):
+ def copy(self, file, mode='w', propindexes=True, keys=None, complib=None,
+ complevel=None, fletcher32=False, overwrite=True):
""" copy the existing store to a new file, upgrading in place
Parameters
----------
propindexes: restore indexes in copied file (defaults to True)
keys : list of keys to include in the copy (defaults to all)
- overwrite : overwrite (remove and replace) existing nodes in the new store (default is True)
+ overwrite : overwrite (remove and replace) existing nodes in the
+ new store (default is True)
mode, complib, complevel, fletcher32 same as in HDFStore.__init__
Returns
@@ -1022,8 +1068,11 @@ def copy(
index = False
if propindexes:
index = [a.name for a in s.axes if a.is_indexed]
- new_store.append(k, data, index=index, data_columns=getattr(
- s, 'data_columns', None), encoding=s.encoding)
+ new_store.append(
+ k, data, index=index,
+ data_columns=getattr(s, 'data_columns', None),
+ encoding=s.encoding
+ )
else:
new_store.put(k, data, encoding=s.encoding)
@@ -1039,10 +1088,10 @@ def _validate_format(self, format, kwargs):
kwargs = kwargs.copy()
# table arg
- table = kwargs.pop('table',None)
+ table = kwargs.pop('table', None)
if table is not None:
- warnings.warn(format_deprecate_doc,FutureWarning)
+ warnings.warn(format_deprecate_doc, FutureWarning)
if table:
format = 'table'
@@ -1053,17 +1102,21 @@ def _validate_format(self, format, kwargs):
try:
kwargs['format'] = _FORMAT_MAP[format.lower()]
except:
- raise TypeError("invalid HDFStore format specified [{0}]".format(format))
+ raise TypeError("invalid HDFStore format specified [{0}]"
+ .format(format))
return kwargs
- def _create_storer(self, group, format=None, value=None, append=False, **kwargs):
+ def _create_storer(self, group, format=None, value=None, append=False,
+ **kwargs):
""" return a suitable class to operate """
def error(t):
raise TypeError(
- "cannot properly create the storer for: [%s] [group->%s,value->%s,format->%s,append->%s,kwargs->%s]" %
- (t, group, type(value), format, append, kwargs))
+ "cannot properly create the storer for: [%s] [group->%s,"
+ "value->%s,format->%s,append->%s,kwargs->%s]"
+ % (t, group, type(value), format, append, kwargs)
+ )
pt = _ensure_decoded(getattr(group._v_attrs, 'pandas_type', None))
tt = _ensure_decoded(getattr(group._v_attrs, 'table_type', None))
@@ -1073,12 +1126,14 @@ def error(t):
if value is None:
_tables()
- if getattr(group, 'table', None) or isinstance(group, _table_mod.table.Table):
+ if (getattr(group, 'table', None) or
+ isinstance(group, _table_mod.table.Table)):
pt = u('frame_table')
tt = u('generic_table')
else:
raise TypeError(
- "cannot create a storer if the object is not existing nor a value are passed")
+ "cannot create a storer if the object is not existing "
+ "nor a value are passed")
else:
try:
@@ -1104,14 +1159,14 @@ def error(t):
if value is not None:
if pt == u('series_table'):
- index = getattr(value,'index',None)
+ index = getattr(value, 'index', None)
if index is not None:
if index.nlevels == 1:
tt = u('appendable_series')
elif index.nlevels > 1:
tt = u('appendable_multiseries')
elif pt == u('frame_table'):
- index = getattr(value,'index',None)
+ index = getattr(value, 'index', None)
if index is not None:
if index.nlevels == 1:
tt = u('appendable_frame')
@@ -1138,8 +1193,7 @@ def error(t):
except:
error('_TABLE_MAP')
- def _write_to_group(
- self, key, value, format, index=True, append=False,
+ def _write_to_group(self, key, value, format, index=True, append=False,
complib=None, encoding=None, **kwargs):
group = self.get_node(key)
@@ -1150,7 +1204,7 @@ def _write_to_group(
# we don't want to store a table node at all if are object is 0-len
# as there are not dtypes
- if getattr(value,'empty',None) and (format == 'table' or append):
+ if getattr(value, 'empty', None) and (format == 'table' or append):
return
if group is None:
@@ -1175,7 +1229,8 @@ def _write_to_group(
if append:
# raise if we are trying to append to a Fixed format,
# or a table that exists (and we are putting)
- if not s.is_table or (s.is_table and format == 'fixed' and s.is_exists):
+ if (not s.is_table or
+ (s.is_table and format == 'fixed' and s.is_exists)):
raise ValueError('Can only append to Tables')
if not s.is_exists:
s.set_object_info()
@@ -1183,7 +1238,9 @@ def _write_to_group(
s.set_object_info()
if not s.is_table and complib:
- raise ValueError('Compression not supported on Fixed format stores')
+ raise ValueError(
+ 'Compression not supported on Fixed format stores'
+ )
# write the object
s.write(obj=value, append=append, complib=complib, **kwargs)
@@ -1210,8 +1267,8 @@ class TableIterator(object):
start : the passed start value (default is None)
stop : the passed stop value (default is None)
chunksize : the passed chunking valeu (default is 50000)
- auto_close : boolean, automatically close the store at the end of iteration,
- default is False
+ auto_close : boolean, automatically close the store at the end of
+ iteration, default is False
kwargs : the passed kwargs
"""
@@ -1274,10 +1331,9 @@ class IndexCol(StringMixin):
is_data_indexable = True
_info_fields = ['freq', 'tz', 'index_name']
- def __init__(
- self, values=None, kind=None, typ=None, cname=None, itemsize=None,
- name=None, axis=None, kind_attr=None, pos=None, freq=None, tz=None,
- index_name=None, **kwargs):
+ def __init__(self, values=None, kind=None, typ=None, cname=None,
+ itemsize=None, name=None, axis=None, kind_attr=None, pos=None,
+ freq=None, tz=None, index_name=None, **kwargs):
self.values = values
self.kind = kind
self.typ = typ
@@ -1335,7 +1391,8 @@ def __unicode__(self):
def __eq__(self, other):
""" compare 2 col items """
- return all([getattr(self, a, None) == getattr(other, a, None) for a in ['name', 'cname', 'axis', 'pos']])
+ return all([getattr(self, a, None) == getattr(other, a, None)
+ for a in ['name', 'cname', 'axis', 'pos']])
def __ne__(self, other):
return not self.__eq__(other)
@@ -1353,7 +1410,7 @@ def copy(self):
return new_self
def infer(self, table):
- """ infer this column from the table: create and return a new object """
+ """infer this column from the table: create and return a new object"""
new_self = self.copy()
new_self.set_table(table)
new_self.get_attr()
@@ -1420,7 +1477,8 @@ def __iter__(self):
def maybe_set_size(self, min_itemsize=None, **kwargs):
""" maybe set a string col itemsize:
- min_itemsize can be an interger or a dict with this columns name with an integer size """
+ min_itemsize can be an interger or a dict with this columns name
+ with an integer size """
if _ensure_decoded(self.kind) == u('string'):
if isinstance(min_itemsize, dict):
@@ -1446,10 +1504,11 @@ def validate_col(self, itemsize=None):
if itemsize is None:
itemsize = self.itemsize
if c.itemsize < itemsize:
- raise ValueError("Trying to store a string with len [%s] in [%s] column but\n"
- "this column has a limit of [%s]!\n"
- "Consider using min_itemsize to preset the sizes on these columns"
- % (itemsize, self.cname, c.itemsize))
+ raise ValueError(
+ "Trying to store a string with len [%s] in [%s] "
+ "column but\nthis column has a limit of [%s]!\n"
+ "Consider using min_itemsize to preset the sizes on "
+ "these columns" % (itemsize, self.cname, c.itemsize))
return c.itemsize
return None
@@ -1484,9 +1543,10 @@ def update_info(self, info):
setattr(self, key, None)
else:
- raise ValueError("invalid info for [%s] for [%s]"""
- ", existing_value [%s] conflicts with new value [%s]" % (self.name,
- key, existing_value, value))
+ raise ValueError(
+ "invalid info for [%s] for [%s], existing_value [%s] "
+ "conflicts with new value [%s]"
+ % (self.name, key, existing_value, value))
else:
if value is not None or existing_value is not None:
idx[key] = value
@@ -1537,7 +1597,8 @@ class DataCol(IndexCol):
----------
data : the actual data
- cname : the column name in the table to hold the data (typeically values)
+ cname : the column name in the table to hold the data (typically
+ values)
"""
is_an_indexable = False
is_data_indexable = False
@@ -1574,11 +1635,14 @@ def __init__(self, values=None, kind=None, typ=None,
self.set_data(data)
def __unicode__(self):
- return "name->%s,cname->%s,dtype->%s,shape->%s" % (self.name, self.cname, self.dtype, self.shape)
+ return "name->%s,cname->%s,dtype->%s,shape->%s" % (
+ self.name, self.cname, self.dtype, self.shape
+ )
def __eq__(self, other):
""" compare 2 col items """
- return all([getattr(self, a, None) == getattr(other, a, None) for a in ['name', 'cname', 'dtype', 'pos']])
+ return all([getattr(self, a, None) == getattr(other, a, None)
+ for a in ['name', 'cname', 'dtype', 'pos']])
def set_data(self, data, dtype=None):
self.data = data
@@ -1644,7 +1708,9 @@ def set_atom(self, block, existing_col, min_itemsize,
# if this block has more than one timezone, raise
if len(set([r.tzinfo for r in rvalues])) != 1:
raise TypeError(
- "too many timezones in this block, create separate data columns")
+ "too many timezones in this block, create separate "
+ "data columns"
+ )
# convert this column to datetime64[ns] utc, and save the tz
index = DatetimeIndex(rvalues)
@@ -1707,9 +1773,11 @@ def set_atom_string(
col = block.get(item)
inferred_type = lib.infer_dtype(col.ravel())
if inferred_type != 'string':
- raise TypeError("Cannot serialize the column [%s] because\n"
- "its data contents are [%s] object dtype" %
- (item, inferred_type))
+ raise TypeError(
+ "Cannot serialize the column [%s] because\n"
+ "its data contents are [%s] object dtype"
+ % (item, inferred_type)
+ )
# itemsize is the maximum length of a string (along any dimension)
itemsize = lib.max_len_string_array(com._ensure_object(data.ravel()))
@@ -1781,7 +1849,7 @@ def cvalues(self):
return self.data
def validate_attr(self, append):
- """ validate that we have the same order as the existing & same dtype """
+ """validate that we have the same order as the existing & same dtype"""
if append:
existing_fields = getattr(self.attrs, self.kind_attr, None)
if (existing_fields is not None and
@@ -1792,11 +1860,13 @@ def validate_attr(self, append):
existing_dtype = getattr(self.attrs, self.dtype_attr, None)
if (existing_dtype is not None and
existing_dtype != self.dtype):
- raise ValueError("appended items dtype do not match existing items dtype"
- " in table!")
+ raise ValueError("appended items dtype do not match existing "
+ "items dtype in table!")
def convert(self, values, nan_rep, encoding):
- """ set the data from this selection (and convert to the correct dtype if we can) """
+ """set the data from this selection (and convert to the correct dtype
+ if we can)
+ """
try:
values = values[self.cname]
except:
@@ -1829,9 +1899,10 @@ def convert(self, values, nan_rep, encoding):
try:
self.data = np.array(
[date.fromordinal(v) for v in self.data], dtype=object)
- except (ValueError):
+ except ValueError:
self.data = np.array(
- [date.fromtimestamp(v) for v in self.data], dtype=object)
+ [date.fromtimestamp(v) for v in self.data],
+ dtype=object)
elif dtype == u('datetime'):
self.data = np.array(
[datetime.fromtimestamp(v) for v in self.data],
@@ -1914,7 +1985,8 @@ def __init__(self, parent, group, encoding=None, **kwargs):
@property
def is_old_version(self):
- return self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1
+ return (self.version[0] <= 0 and self.version[1] <= 10 and
+ self.version[2] < 1)
def set_version(self):
""" compute and set our version """
@@ -1929,7 +2001,8 @@ def set_version(self):
@property
def pandas_type(self):
- return _ensure_decoded(getattr(self.group._v_attrs, 'pandas_type', None))
+ return _ensure_decoded(getattr(self.group._v_attrs,
+ 'pandas_type', None))
@property
def format_type(self):
@@ -2041,7 +2114,9 @@ def write(self, **kwargs):
"cannot write on an abstract storer: sublcasses should implement")
def delete(self, where=None, **kwargs):
- """ support fully deleting the node in its entirety (only) - where specification must be None """
+ """support fully deleting the node in its entirety (only) - where
+ specification must be None
+ """
if where is None:
self._handle.removeNode(self.group, recursive=True)
return None
@@ -2052,8 +2127,7 @@ def delete(self, where=None, **kwargs):
class GenericFixed(Fixed):
""" a generified fixed version """
- _index_type_map = {DatetimeIndex: 'datetime',
- PeriodIndex: 'period'}
+ _index_type_map = {DatetimeIndex: 'datetime', PeriodIndex: 'period'}
_reverse_index_map = dict([(v, k)
for k, v in compat.iteritems(_index_type_map)])
attributes = []
@@ -2078,11 +2152,13 @@ def f(values, freq=None, tz=None):
def validate_read(self, kwargs):
if kwargs.get('columns') is not None:
- raise TypeError("cannot pass a column specification when reading a Fixed format store."
- "this store must be selected in its entirety")
+ raise TypeError("cannot pass a column specification when reading "
+ "a Fixed format store. this store must be "
+ "selected in its entirety")
if kwargs.get('where') is not None:
- raise TypeError("cannot pass a where specification when reading from a Fixed format store."
- "this store must be selected in its entirety")
+ raise TypeError("cannot pass a where specification when reading "
+ "from a Fixed format store. this store must be "
+ "selected in its entirety")
@property
def is_exists(self):
@@ -2246,9 +2322,10 @@ def read_index_node(self, node):
data = node[:]
# If the index was an empty array write_array_empty() will
# have written a sentinel. Here we relace it with the original.
- if 'shape' in node._v_attrs \
- and self._is_empty_array(getattr(node._v_attrs, 'shape')):
- data = np.empty(getattr(node._v_attrs, 'shape'), dtype=getattr(node._v_attrs, 'value_type'))
+ if ('shape' in node._v_attrs and
+ self._is_empty_array(getattr(node._v_attrs, 'shape'))):
+ data = np.empty(getattr(node._v_attrs, 'shape'),
+ dtype=getattr(node._v_attrs, 'value_type'))
kind = _ensure_decoded(node._v_attrs.kind)
name = None
@@ -2268,8 +2345,8 @@ def read_index_node(self, node):
if kind in (u('date'), u('datetime')):
index = factory(
- _unconvert_index(data, kind, encoding=self.encoding), dtype=object,
- **kwargs)
+ _unconvert_index(data, kind, encoding=self.encoding),
+ dtype=object, **kwargs)
else:
index = factory(
_unconvert_index(data, kind, encoding=self.encoding), **kwargs)
@@ -2351,10 +2428,12 @@ def write_array(self, key, value, items=None):
else:
if value.dtype.type == np.datetime64:
self._handle.createArray(self.group, key, value.view('i8'))
- getattr(self.group, key)._v_attrs.value_type = 'datetime64'
+ getattr(
+ self.group, key)._v_attrs.value_type = 'datetime64'
elif value.dtype.type == np.timedelta64:
self._handle.createArray(self.group, key, value.view('i8'))
- getattr(self.group, key)._v_attrs.value_type = 'timedelta64'
+ getattr(
+ self.group, key)._v_attrs.value_type = 'timedelta64'
else:
self._handle.createArray(self.group, key, value)
@@ -2423,7 +2502,8 @@ def read(self, **kwargs):
sp_values = self.read_array('sp_values')
sp_index = self.read_index('sp_index')
return SparseSeries(sp_values, index=index, sparse_index=sp_index,
- kind=self.kind or u('block'), fill_value=self.fill_value,
+ kind=self.kind or u('block'),
+ fill_value=self.fill_value,
name=self.name)
def write(self, obj, **kwargs):
@@ -2596,14 +2676,20 @@ class Table(Fixed):
Attrs in Table Node
-------------------
- These are attributes that are store in the main table node, they are necessary
- to recreate these tables when read back in.
-
- index_axes : a list of tuples of the (original indexing axis and index column)
- non_index_axes: a list of tuples of the (original index axis and columns on a non-indexing axis)
- values_axes : a list of the columns which comprise the data of this table
- data_columns : a list of the columns that we are allowing indexing (these become single columns in values_axes), or True to force all columns
- nan_rep : the string to use for nan representations for string objects
+ These are attributes that are store in the main table node, they are
+ necessary to recreate these tables when read back in.
+
+ index_axes : a list of tuples of the (original indexing axis and
+ index column)
+ non_index_axes: a list of tuples of the (original index axis and
+ columns on a non-indexing axis)
+ values_axes : a list of the columns which comprise the data of this
+ table
+ data_columns : a list of the columns that we are allowing indexing
+ (these become single columns in values_axes), or True to force all
+ columns
+ nan_rep : the string to use for nan representations for string
+ objects
levels : the names of levels
"""
@@ -2641,14 +2727,10 @@ def __unicode__(self):
if self.is_old_version:
ver = "[%s]" % '.'.join([str(x) for x in self.version])
- return "%-12.12s%s (typ->%s,nrows->%s,ncols->%s,indexers->[%s]%s)" % (self.pandas_type,
- ver,
- self.table_type_short,
- self.nrows,
- self.ncols,
- ','.join(
- [a.name for a in self.index_axes]),
- dc)
+ return "%-12.12s%s (typ->%s,nrows->%s,ncols->%s,indexers->[%s]%s)" % (
+ self.pandas_type, ver, self.table_type_short, self.nrows,
+ self.ncols, ','.join([a.name for a in self.index_axes]), dc
+ )
def __getitem__(self, c):
""" return the axis for c """
@@ -2676,25 +2758,30 @@ def validate(self, other):
oax = ov[i]
if sax != oax:
raise ValueError(
- "invalid combinate of [%s] on appending data [%s] vs current table [%s]" %
- (c, sax, oax))
+ "invalid combinate of [%s] on appending data [%s] "
+ "vs current table [%s]" % (c, sax, oax))
# should never get here
raise Exception(
- "invalid combinate of [%s] on appending data [%s] vs current table [%s]" % (c, sv, ov))
+ "invalid combinate of [%s] on appending data [%s] vs "
+ "current table [%s]" % (c, sv, ov))
@property
def is_multi_index(self):
- """ the levels attribute is 1 or a list in the case of a multi-index """
- return isinstance(self.levels,list)
+ """the levels attribute is 1 or a list in the case of a multi-index"""
+ return isinstance(self.levels, list)
def validate_multiindex(self, obj):
- """ validate that we can store the multi-index; reset and return the new object """
- levels = [ l if l is not None else "level_{0}".format(i) for i, l in enumerate(obj.index.names) ]
+ """validate that we can store the multi-index; reset and return the
+ new object
+ """
+ levels = [l if l is not None else "level_{0}".format(i)
+ for i, l in enumerate(obj.index.names)]
try:
return obj.reset_index(), levels
- except (ValueError):
- raise ValueError("duplicate names/columns in the multi-index when storing as a table")
+ except ValueError:
+ raise ValueError("duplicate names/columns in the multi-index when "
+ "storing as a table")
@property
def nrows_expected(self):
@@ -2738,17 +2825,21 @@ def is_transposed(self):
@property
def data_orientation(self):
- """ return a tuple of my permutated axes, non_indexable at the front """
- return tuple(itertools.chain([int(a[0]) for a in self.non_index_axes], [int(a.axis) for a in self.index_axes]))
+ """return a tuple of my permutated axes, non_indexable at the front"""
+ return tuple(itertools.chain([int(a[0]) for a in self.non_index_axes],
+ [int(a.axis) for a in self.index_axes]))
def queryables(self):
""" return a dict of the kinds allowable columns for this object """
# compute the values_axes queryables
- return dict([(a.cname, a.kind) for a in self.index_axes] +
- [(self.storage_obj_type._AXIS_NAMES[axis], None) for axis, values in self.non_index_axes] +
- [(v.cname, v.kind) for v in self.values_axes if v.name in set(self.data_columns)]
- )
+ return dict(
+ [(a.cname, a.kind) for a in self.index_axes] +
+ [(self.storage_obj_type._AXIS_NAMES[axis], None)
+ for axis, values in self.non_index_axes] +
+ [(v.cname, v.kind) for v in self.values_axes
+ if v.name in set(self.data_columns)]
+ )
def index_cols(self):
""" return a list of my index cols """
@@ -2788,22 +2879,26 @@ def get_attrs(self):
self.levels = getattr(
self.attrs, 'levels', None) or []
t = self.table
- self.index_axes = [a.infer(t)
- for a in self.indexables if a.is_an_indexable]
- self.values_axes = [a.infer(t)
- for a in self.indexables if not a.is_an_indexable]
+ self.index_axes = [
+ a.infer(t) for a in self.indexables if a.is_an_indexable
+ ]
+ self.values_axes = [
+ a.infer(t) for a in self.indexables if not a.is_an_indexable
+ ]
def validate_version(self, where=None):
""" are we trying to operate on an old version? """
if where is not None:
- if self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1:
+ if (self.version[0] <= 0 and self.version[1] <= 10 and
+ self.version[2] < 1):
ws = incompatibility_doc % '.'.join(
[str(x) for x in self.version])
warnings.warn(ws, IncompatibilityWarning)
def validate_min_itemsize(self, min_itemsize):
- """ validate the min_itemisze doesn't contain items that are not in the axes
- this needs data_columns to be defined """
+ """validate the min_itemisze doesn't contain items that are not in the
+ axes this needs data_columns to be defined
+ """
if min_itemsize is None:
return
if not isinstance(min_itemsize, dict):
@@ -2817,8 +2912,8 @@ def validate_min_itemsize(self, min_itemsize):
continue
if k not in q:
raise ValueError(
- "min_itemsize has the key [%s] which is not an axis or data_column" %
- k)
+ "min_itemsize has the key [%s] which is not an axis or "
+ "data_column" % k)
@property
def indexables(self):
@@ -2828,8 +2923,10 @@ def indexables(self):
self._indexables = []
# index columns
- self._indexables.extend([IndexCol(name=name, axis=axis, pos=i)
- for i, (axis, name) in enumerate(self.attrs.index_cols)])
+ self._indexables.extend([
+ IndexCol(name=name, axis=axis, pos=i)
+ for i, (axis, name) in enumerate(self.attrs.index_cols)
+ ])
# values columns
dc = set(self.data_columns)
@@ -2839,7 +2936,8 @@ def f(i, c):
klass = DataCol
if c in dc:
klass = DataIndexableCol
- return klass.create_for_block(i=i, name=c, pos=base_pos + i, version=self.version)
+ return klass.create_for_block(i=i, name=c, pos=base_pos + i,
+ version=self.version)
self._indexables.extend(
[f(i, c) for i, c in enumerate(self.attrs.values_cols)])
@@ -2854,7 +2952,8 @@ def create_index(self, columns=None, optlevel=None, kind=None):
Paramaters
----------
- columns : False (don't create an index), True (create all columns index), None or list_like (the indexers to index)
+ columns : False (don't create an index), True (create all columns
+ index), None or list_like (the indexers to index)
optlevel: optimization level (defaults to 6)
kind : kind of index (defaults to 'medium')
@@ -2907,7 +3006,9 @@ def create_index(self, columns=None, optlevel=None, kind=None):
v.createIndex(**kw)
def read_axes(self, where, **kwargs):
- """ create and return the axes sniffed from the table: return boolean for success """
+ """create and return the axes sniffed from the table: return boolean
+ for success
+ """
# validate the version
self.validate_version(where)
@@ -2932,15 +3033,18 @@ def get_object(self, obj):
return obj
def validate_data_columns(self, data_columns, min_itemsize):
- """ take the input data_columns and min_itemize and create a data_columns spec """
+ """take the input data_columns and min_itemize and create a data
+ columns spec
+ """
if not len(self.non_index_axes):
return []
axis, axis_labels = self.non_index_axes[0]
- info = self.info.get(axis,dict())
+ info = self.info.get(axis, dict())
if info.get('type') == 'MultiIndex' and data_columns is not None:
- raise ValueError("cannot use a multi-index on axis [{0}] with data_columns".format(axis))
+ raise ValueError("cannot use a multi-index on axis [{0}] with "
+ "data_columns".format(axis))
# evaluate the passed data_columns, True == use all columns
# take only valide axis labels
@@ -2953,8 +3057,10 @@ def validate_data_columns(self, data_columns, min_itemsize):
if isinstance(min_itemsize, dict):
existing_data_columns = set(data_columns)
- data_columns.extend(
- [k for k in min_itemsize.keys() if k != 'values' and k not in existing_data_columns])
+ data_columns.extend([
+ k for k in min_itemsize.keys()
+ if k != 'values' and k not in existing_data_columns
+ ])
# return valid columns in the order of our axis
return [c for c in data_columns if c in axis_labels]
@@ -2962,17 +3068,21 @@ def validate_data_columns(self, data_columns, min_itemsize):
def create_axes(self, axes, obj, validate=True, nan_rep=None,
data_columns=None, min_itemsize=None, **kwargs):
""" create and return the axes
- leagcy tables create an indexable column, indexable index, non-indexable fields
+ leagcy tables create an indexable column, indexable index,
+ non-indexable fields
Parameters:
-----------
- axes: a list of the axes in order to create (names or numbers of the axes)
+ axes: a list of the axes in order to create (names or numbers of
+ the axes)
obj : the object to create axes on
- validate: validate the obj against an existiing object already written
+ validate: validate the obj against an existing object already
+ written
min_itemsize: a dict of the min size for a column in bytes
nan_rep : a values to use for string column nan_rep
encoding : the encoding for string values
- data_columns : a list of columns that we want to create separate to allow indexing (or True will force all colummns)
+ data_columns : a list of columns that we want to create separate to
+ allow indexing (or True will force all columns)
"""
@@ -2981,8 +3091,9 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
try:
axes = _AXES_MAP[type(obj)]
except:
- raise TypeError("cannot properly create the storer for: [group->%s,value->%s]" %
- (self.group._v_name, type(obj)))
+ raise TypeError("cannot properly create the storer for: "
+ "[group->%s,value->%s]"
+ % (self.group._v_name, type(obj)))
# map axes to numbers
axes = [obj._get_axis_number(a) for a in axes]
@@ -3021,7 +3132,8 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
if i in axes:
name = obj._AXIS_NAMES[i]
index_axes_map[i] = _convert_index(
- a, self.encoding, self.format_type).set_name(name).set_axis(i)
+ a, self.encoding, self.format_type
+ ).set_name(name).set_axis(i)
else:
# we might be able to change the axes on the appending data if
@@ -3037,16 +3149,17 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
append_axis = exist_axis
# the non_index_axes info
- info = _get_info(self.info,i)
+ info = _get_info(self.info, i)
info['names'] = list(a.names)
info['type'] = a.__class__.__name__
self.non_index_axes.append((i, append_axis))
# set axis positions (based on the axes)
- self.index_axes = [index_axes_map[a].set_pos(
- j).update_info(self.info) for j,
- a in enumerate(axes)]
+ self.index_axes = [
+ index_axes_map[a].set_pos(j).update_info(self.info)
+ for j, a in enumerate(axes)
+ ]
j = len(self.index_axes)
# check for column conflicts
@@ -3066,11 +3179,13 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
data_columns = self.validate_data_columns(
data_columns, min_itemsize)
if len(data_columns):
- blocks = block_obj.reindex_axis(Index(axis_labels) - Index(
- data_columns), axis=axis)._data.blocks
+ blocks = block_obj.reindex_axis(
+ Index(axis_labels) - Index(data_columns),
+ axis=axis
+ )._data.blocks
for c in data_columns:
- blocks.extend(block_obj.reindex_axis(
- [c], axis=axis)._data.blocks)
+ blocks.extend(
+ block_obj.reindex_axis([c], axis=axis)._data.blocks)
# reorder the blocks in the same order as the existing_table if we can
if existing_table is not None:
@@ -3097,7 +3212,8 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
name = None
# we have a data_column
- if data_columns and len(b.items) == 1 and b.items[0] in data_columns:
+ if (data_columns and len(b.items) == 1 and
+ b.items[0] in data_columns):
klass = DataIndexableCol
name = b.items[0]
self.data_columns.append(name)
@@ -3108,8 +3224,9 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
try:
existing_col = existing_table.values_axes[i]
except:
- raise ValueError("Incompatible appended table [%s] with existing table [%s]" %
- (blocks, existing_table.values_axes))
+ raise ValueError("Incompatible appended table [%s] with "
+ "existing table [%s]"
+ % (blocks, existing_table.values_axes))
else:
existing_col = None
@@ -3128,9 +3245,12 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
self.values_axes.append(col)
except (NotImplementedError, ValueError, TypeError) as e:
raise e
- except (Exception) as detail:
- raise Exception("cannot find the correct atom type -> [dtype->%s,items->%s] %s" % (
- b.dtype.name, b.items, str(detail)))
+ except Exception as detail:
+ raise Exception(
+ "cannot find the correct atom type -> "
+ "[dtype->%s,items->%s] %s"
+ % (b.dtype.name, b.items, str(detail))
+ )
j += 1
# validate our min_itemsize
@@ -3160,7 +3280,8 @@ def process_filter(field, filt):
# see if the field is the name of an axis
if field == axis_name:
takers = op(axis_values, filt)
- return obj.ix._getitem_axis(takers, axis=axis_number)
+ return obj.ix._getitem_axis(takers,
+ axis=axis_number)
# this might be the name of a file IN an axis
elif field in axis_values:
@@ -3173,7 +3294,8 @@ def process_filter(field, filt):
if isinstance(obj, DataFrame):
axis_number = 1 - axis_number
takers = op(values, filt)
- return obj.ix._getitem_axis(takers, axis=axis_number)
+ return obj.ix._getitem_axis(takers,
+ axis=axis_number)
raise ValueError(
"cannot find the field [%s] for filtering!" % field)
@@ -3182,8 +3304,8 @@ def process_filter(field, filt):
return obj
- def create_description(
- self, complib=None, complevel=None, fletcher32=False, expectedrows=None):
+ def create_description(self, complib=None, complevel=None,
+ fletcher32=False, expectedrows=None):
""" create the description of the table from the axes & values """
# expected rows estimate
@@ -3197,9 +3319,9 @@ def create_description(
if complib:
if complevel is None:
complevel = self._complevel or 9
- filters = _tables().Filters(complevel=complevel,
- complib=complib,
- fletcher32=fletcher32 or self._fletcher32)
+ filters = _tables().Filters(
+ complevel=complevel, complib=complib,
+ fletcher32=fletcher32 or self._fletcher32)
d['filters'] = filters
elif self._filters is not None:
d['filters'] = self._filters
@@ -3207,7 +3329,9 @@ def create_description(
return d
def read_coordinates(self, where=None, start=None, stop=None, **kwargs):
- """ select coordinates (row numbers) from a table; return the coordinates object """
+ """select coordinates (row numbers) from a table; return the
+ coordinates object
+ """
# validate the version
self.validate_version(where)
@@ -3222,7 +3346,9 @@ def read_coordinates(self, where=None, start=None, stop=None, **kwargs):
return Index(self.selection.select_coords())
def read_column(self, column, where=None, **kwargs):
- """ return a single column from the table, generally only indexables are interesting """
+ """return a single column from the table, generally only indexables
+ are interesting
+ """
# validate the version
self.validate_version()
@@ -3241,13 +3367,14 @@ def read_column(self, column, where=None, **kwargs):
if not a.is_data_indexable:
raise ValueError(
- "column [%s] can not be extracted individually; it is not data indexable" %
- column)
+ "column [%s] can not be extracted individually; it is "
+ "not data indexable" % column)
# column must be an indexable or a data column
c = getattr(self.table.cols, column)
a.set_info(self.info)
- return Series(a.convert(c[:], nan_rep=self.nan_rep, encoding=self.encoding).take_data())
+ return Series(a.convert(c[:], nan_rep=self.nan_rep,
+ encoding=self.encoding).take_data())
raise KeyError("column [%s] not found in the table" % column)
@@ -3268,7 +3395,8 @@ def read(self, **kwargs):
def write(self, **kwargs):
""" write in a format that we can search later on (but cannot append
to): write out the indicies and the values using _write_array
- (e.g. a CArray) create an indexing table so that we can search"""
+ (e.g. a CArray) create an indexing table so that we can search
+ """
raise NotImplementedError("WORKTable needs to implement write")
@@ -3279,11 +3407,12 @@ class LegacyTable(Table):
append (but doesn't require them), and stores the data in a format
that can be easily searched
- """
- _indexables = [IndexCol(name='index', axis=1, pos=0),
- IndexCol(name='column', axis=2,
- pos=1, index_kind='columns_kind'),
- DataCol(name='fields', cname='values', kind_attr='fields', pos=2)]
+ """
+ _indexables = [
+ IndexCol(name='index', axis=1, pos=0),
+ IndexCol(name='column', axis=2, pos=1, index_kind='columns_kind'),
+ DataCol(name='fields', cname='values', kind_attr='fields', pos=2)
+ ]
table_type = u('legacy')
ndim = 3
@@ -3291,7 +3420,9 @@ def write(self, **kwargs):
raise TypeError("write operations are not allowed on legacy tables!")
def read(self, where=None, columns=None, **kwargs):
- """ we have n indexable columns, with an arbitrary number of data axes """
+ """we have n indexable columns, with an arbitrary number of data
+ axes
+ """
if not self.read_axes(where=where, **kwargs):
return None
@@ -3395,8 +3526,8 @@ class AppendableTable(LegacyTable):
table_type = u('appendable')
def write(self, obj, axes=None, append=False, complib=None,
- complevel=None, fletcher32=None, min_itemsize=None, chunksize=None,
- expectedrows=None, dropna=True, **kwargs):
+ complevel=None, fletcher32=None, min_itemsize=None,
+ chunksize=None, expectedrows=None, dropna=True, **kwargs):
if not append and self.is_exists:
self._handle.removeNode(self.group, 'table')
@@ -3485,7 +3616,7 @@ def write_data(self, chunksize, dropna=True):
# reshape the values if needed
values = [a.take_data() for a in self.values_axes]
values = [v.transpose(np.roll(np.arange(v.ndim), v.ndim - 1))
- for v in values]
+ for v in values]
bvalues = []
for i, v in enumerate(values):
new_shape = (nrows,) + self.dtype[names[nindexes + i]].shape
@@ -3617,7 +3748,8 @@ def read(self, where=None, columns=None, **kwargs):
if not self.read_axes(where=where, **kwargs):
return None
- info = self.info.get(self.non_index_axes[0][0],dict()) if len(self.non_index_axes) else dict()
+ info = (self.info.get(self.non_index_axes[0][0], dict())
+ if len(self.non_index_axes) else dict())
index = self.index_axes[0].values
frames = []
for a in self.values_axes:
@@ -3630,7 +3762,7 @@ def read(self, where=None, columns=None, **kwargs):
cols = Index(a.values)
names = info.get('names')
if names is not None:
- cols.set_names(names,inplace=True)
+ cols.set_names(names, inplace=True)
if self.is_transposed:
values = a.cvalues
@@ -3679,9 +3811,10 @@ def write(self, obj, data_columns=None, **kwargs):
""" we are going to write this as a frame table """
if not isinstance(obj, DataFrame):
name = obj.name or 'values'
- obj = DataFrame({ name : obj }, index=obj.index)
+ obj = DataFrame({name: obj}, index=obj.index)
obj.columns = [name]
- return super(AppendableSeriesTable, self).write(obj=obj, data_columns=obj.columns, **kwargs)
+ return super(AppendableSeriesTable, self).write(
+ obj=obj, data_columns=obj.columns, **kwargs)
def read(self, columns=None, **kwargs):
@@ -3694,13 +3827,14 @@ def read(self, columns=None, **kwargs):
if is_multi_index:
s.set_index(self.levels, inplace=True)
- s = s.iloc[:,0]
+ s = s.iloc[:, 0]
# remove the default name
if s.name == 'values':
s.name = None
return s
+
class AppendableMultiSeriesTable(AppendableSeriesTable):
""" support the new appendable table formats """
pandas_kind = u('series_table')
@@ -3715,8 +3849,8 @@ def write(self, obj, **kwargs):
obj.columns = cols
return super(AppendableMultiSeriesTable, self).write(obj=obj, **kwargs)
-class GenericTable(AppendableFrameTable):
+class GenericTable(AppendableFrameTable):
""" a table that read/writes the generic pytables table format """
pandas_kind = u('frame_table')
table_type = u('generic_table')
@@ -3756,7 +3890,7 @@ def indexables(self):
for i, n in enumerate(d._v_names):
dc = GenericDataIndexableCol(
- name=n, pos=i, values=[n], version = self.version)
+ name=n, pos=i, values=[n], version=self.version)
self._indexables.append(dc)
return self._indexables
@@ -3786,7 +3920,8 @@ def write(self, obj, data_columns=None, **kwargs):
for n in self.levels:
if n not in data_columns:
data_columns.insert(0, n)
- return super(AppendableMultiFrameTable, self).write(obj=obj, data_columns=data_columns, **kwargs)
+ return super(AppendableMultiFrameTable, self).write(
+ obj=obj, data_columns=data_columns, **kwargs)
def read(self, columns=None, **kwargs):
if columns is not None:
@@ -3798,7 +3933,9 @@ def read(self, columns=None, **kwargs):
df = df.set_index(self.levels)
# remove names for 'level_%d'
- df.index = df.index.set_names([ None if self._re_levels.search(l) else l for l in df.index.names ])
+ df.index = df.index.set_names([
+ None if self._re_levels.search(l) else l for l in df.index.names
+ ])
return df
@@ -3844,11 +3981,12 @@ def _reindex_axis(obj, axis, labels, other=None):
if other is not None:
labels = labels & _ensure_index(other.unique())
if not labels.equals(ax):
- slicer = [ slice(None, None) ] * obj.ndim
+ slicer = [slice(None, None)] * obj.ndim
slicer[axis] = labels
obj = obj.loc[tuple(slicer)]
return obj
+
def _get_info(info, name):
""" get/create the info for this name """
try:
@@ -3857,19 +3995,21 @@ def _get_info(info, name):
idx = info[name] = dict()
return idx
+
def _convert_index(index, encoding=None, format_type=None):
index_name = getattr(index, 'name', None)
if isinstance(index, DatetimeIndex):
converted = index.asi8
return IndexCol(converted, 'datetime64', _tables().Int64Col(),
- freq=getattr(index, 'freq', None), tz=getattr(index, 'tz', None),
+ freq=getattr(index, 'freq', None),
+ tz=getattr(index, 'tz', None),
index_name=index_name)
elif isinstance(index, (Int64Index, PeriodIndex)):
atom = _tables().Int64Col()
return IndexCol(
index.values, 'integer', atom, freq=getattr(index, 'freq', None),
- index_name=index_name)
+ index_name=index_name)
if isinstance(index, MultiIndex):
raise TypeError('MultiIndex not supported here!')
@@ -3881,7 +4021,8 @@ def _convert_index(index, encoding=None, format_type=None):
if inferred_type == 'datetime64':
converted = values.view('i8')
return IndexCol(converted, 'datetime64', _tables().Int64Col(),
- freq=getattr(index, 'freq', None), tz=getattr(index, 'tz', None),
+ freq=getattr(index, 'freq', None),
+ tz=getattr(index, 'tz', None),
index_name=index_name)
elif inferred_type == 'datetime':
converted = np.array([(time.mktime(v.timetuple()) +
@@ -3901,15 +4042,18 @@ def _convert_index(index, encoding=None, format_type=None):
converted = _convert_string_array(values, encoding)
itemsize = converted.dtype.itemsize
return IndexCol(
- converted, 'string', _tables().StringCol(itemsize), itemsize=itemsize,
- index_name=index_name)
+ converted, 'string', _tables().StringCol(itemsize),
+ itemsize=itemsize, index_name=index_name
+ )
elif inferred_type == 'unicode':
if format_type == 'fixed':
atom = _tables().ObjectAtom()
return IndexCol(np.asarray(values, dtype='O'), 'object', atom,
index_name=index_name)
raise TypeError(
- "[unicode] is not supported as a in index type for [{0}] formats".format(format_type))
+ "[unicode] is not supported as a in index type for [{0}] formats"
+ .format(format_type)
+ )
elif inferred_type == 'integer':
# take a guess for now, hope the values fit
@@ -4027,6 +4171,7 @@ def _need_convert(kind):
return True
return False
+
class Selection(object):
"""
@@ -4065,9 +4210,14 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs):
stop = self.table.nrows
self.coordinates = np.arange(start, stop)[where]
elif issubclass(where.dtype.type, np.integer):
- if (self.start is not None and (where < self.start).any()) or (self.stop is not None and (where >= self.stop).any()):
+ if ((self.start is not None and
+ (where < self.start).any()) or
+ (self.stop is not None and
+ (where >= self.stop).any())):
raise ValueError(
- "where must have index locations >= start and < stop")
+ "where must have index locations >= start and "
+ "< stop"
+ )
self.coordinates = where
except:
@@ -4089,21 +4239,27 @@ def generate(self, where):
q = self.table.queryables()
try:
return Expr(where, queryables=q, encoding=self.table.encoding)
- except (NameError) as detail:
-
- # raise a nice message, suggesting that the user should use data_columns
- raise ValueError("The passed where expression: {0}\n"
- " contains an invalid variable reference\n"
- " all of the variable refrences must be a reference to\n"
- " an axis (e.g. 'index' or 'columns'), or a data_column\n"
- " The currently defined references are: {1}\n".format(where,','.join(q.keys())))
+ except NameError as detail:
+ # raise a nice message, suggesting that the user should use
+ # data_columns
+ raise ValueError(
+ "The passed where expression: {0}\n"
+ " contains an invalid variable reference\n"
+ " all of the variable refrences must be a "
+ "reference to\n"
+ " an axis (e.g. 'index' or 'columns'), or a "
+ "data_column\n"
+ " The currently defined references are: {1}\n"
+ .format(where, ','.join(q.keys()))
+ )
def select(self):
"""
generate the selection
"""
if self.condition is not None:
- return self.table.table.readWhere(self.condition.format(), start=self.start, stop=self.stop)
+ return self.table.table.readWhere(self.condition.format(),
+ start=self.start, stop=self.stop)
elif self.coordinates is not None:
return self.table.table.readCoordinates(self.coordinates)
return self.table.table.read(start=self.start, stop=self.stop)
@@ -4115,7 +4271,9 @@ def select_coords(self):
if self.condition is None:
return np.arange(self.table.nrows)
- return self.table.table.getWhereList(self.condition.format(), start=self.start, stop=self.stop, sort=True)
+ return self.table.table.getWhereList(self.condition.format(),
+ start=self.start, stop=self.stop,
+ sort=True)
# utilities ###
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 1d0d1d17ec631..8c172db162cd6 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -2,9 +2,9 @@
Module contains tools for processing Stata files into DataFrames
The StataReader below was originally written by Joe Presbrey as part of PyDTA.
-It has been extended and improved by Skipper Seabold from the Statsmodels project
-who also developed the StataWriter and was finally added to pandas in an once again
-improved version.
+It has been extended and improved by Skipper Seabold from the Statsmodels
+project who also developed the StataWriter and was finally added to pandas in
+an once again improved version.
You can find more information on http://presbrey.mit.edu/PyDTA and
http://statsmodels.sourceforge.net/devel/
@@ -25,7 +25,8 @@
from pandas.io.common import get_filepath_or_buffer
-def read_stata(filepath_or_buffer, convert_dates=True, convert_categoricals=True, encoding=None, index=None):
+def read_stata(filepath_or_buffer, convert_dates=True,
+ convert_categoricals=True, encoding=None, index=None):
"""
Read Stata file into DataFrame
@@ -63,7 +64,8 @@ def _stata_elapsed_date_to_datetime(date, fmt):
Examples
--------
- >>> _stata_elapsed_date_to_datetime(52, "%tw") datetime.datetime(1961, 1, 1, 0, 0)
+ >>> _stata_elapsed_date_to_datetime(52, "%tw")
+ datetime.datetime(1961, 1, 1, 0, 0)
Notes
-----
@@ -199,8 +201,11 @@ def __init__(self, offset, value):
'.' or ('.' + chr(value - offset + 96))
else:
self._str = '.'
- string = property(lambda self: self._str, doc="The Stata representation of the missing value: '.', '.a'..'.z'")
- value = property(lambda self: self._value, doc='The binary representation of the missing value.')
+ string = property(lambda self: self._str,
+ doc="The Stata representation of the missing value: "
+ "'.', '.a'..'.z'")
+ value = property(lambda self: self._value,
+ doc='The binary representation of the missing value.')
def __unicode__(self):
return self.string
@@ -292,19 +297,22 @@ def _decode_bytes(self, str, errors=None):
class StataReader(StataParser):
"""
- Class for working with a Stata dataset. There are two possibilities for usage:
+ Class for working with a Stata dataset. There are two possibilities for
+ usage:
* The from_dta() method on the DataFrame class.
- This will return a DataFrame with the Stata dataset. Note that when using the
- from_dta() method, you will not have access to meta-information like variable
- labels or the data label.
-
- * Work with this object directly. Upon instantiation, the header of the Stata data
- file is read, giving you access to attributes like variable_labels(), data_label(),
- nobs(), ... A DataFrame with the data is returned by the read() method; this will
- also fill up the value_labels. Note that calling the value_labels() method will
- result in an error if the read() method has not been called yet. This is because
- the value labels are stored at the end of a Stata dataset, after the data.
+ This will return a DataFrame with the Stata dataset. Note that when
+ using the from_dta() method, you will not have access to
+ meta-information like variable labels or the data label.
+
+ * Work with this object directly. Upon instantiation, the header of the
+ Stata data file is read, giving you access to attributes like
+ variable_labels(), data_label(), nobs(), ... A DataFrame with the data
+ is returned by the read() method; this will also fill up the
+ value_labels. Note that calling the value_labels() method will result in
+ an error if the read() method has not been called yet. This is because
+ the value labels are stored at the end of a Stata dataset, after the
+ data.
Parameters
----------
@@ -323,7 +331,9 @@ def __init__(self, path_or_buf, encoding='cp1252'):
self._data_read = False
self._value_labels_read = False
if isinstance(path_or_buf, str):
- path_or_buf, encoding = get_filepath_or_buffer(path_or_buf, encoding=self._default_encoding)
+ path_or_buf, encoding = get_filepath_or_buffer(
+ path_or_buf, encoding=self._default_encoding
+ )
if isinstance(path_or_buf, (str, compat.text_type, bytes)):
self.path_or_buf = open(path_or_buf, 'rb')
@@ -334,17 +344,22 @@ def __init__(self, path_or_buf, encoding='cp1252'):
def _read_header(self):
first_char = self.path_or_buf.read(1)
- if struct.unpack('c', first_char)[0] == b'<': # format 117 or higher (XML like)
+ if struct.unpack('c', first_char)[0] == b'<':
+ # format 117 or higher (XML like)
self.path_or_buf.read(27) # stata_dta>
self.format_version = int(self.path_or_buf.read(3))
if self.format_version not in [117]:
- raise ValueError("Version of given Stata file is not 104, 105, 108, 113 (Stata 8/9), 114 (Stata 10/11), 115 (Stata 12) or 117 (Stata 13)")
+ raise ValueError("Version of given Stata file is not 104, "
+ "105, 108, 113 (Stata 8/9), 114 (Stata "
+ "10/11), 115 (Stata 12) or 117 (Stata 13)")
self.path_or_buf.read(21) #
self.byteorder = self.path_or_buf.read(3) == "MSF" and '>' or '<'
self.path_or_buf.read(15) #
- self.nvar = struct.unpack(self.byteorder + 'H', self.path_or_buf.read(2))[0]
+ self.nvar = struct.unpack(self.byteorder + 'H',
+ self.path_or_buf.read(2))[0]
self.path_or_buf.read(7) #
- self.nobs = struct.unpack(self.byteorder + 'I', self.path_or_buf.read(4))[0]
+ self.nobs = struct.unpack(self.byteorder + 'I',
+ self.path_or_buf.read(4))[0]
self.path_or_buf.read(11) #