Skip to content

Commit 99e9ec0

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into tslibs-offsets-years
2 parents d3553ab + 9c799e2 commit 99e9ec0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1023
-1121
lines changed

doc/source/io.rst

+9-5
Original file line numberDiff line numberDiff line change
@@ -4427,8 +4427,10 @@ Several caveats.
44274427

44284428
- This is a newer library, and the format, though stable, is not guaranteed to be backward compatible
44294429
to the earlier versions.
4430-
- The format will NOT write an ``Index``, or ``MultiIndex`` for the ``DataFrame`` and will raise an
4431-
error if a non-default one is provided. You can simply ``.reset_index()`` in order to store the index.
4430+
- The format will NOT write an ``Index``, or ``MultiIndex`` for the
4431+
``DataFrame`` and will raise an error if a non-default one is provided. You
4432+
can ``.reset_index()`` to store the index or ``.reset_index(drop=True)`` to
4433+
ignore it.
44324434
- Duplicate column names and non-string columns names are not supported
44334435
- Non supported types include ``Period`` and actual python object types. These will raise a helpful error message
44344436
on an attempt at serialization.
@@ -4491,8 +4493,10 @@ dtypes, including extension dtypes such as datetime with tz.
44914493

44924494
Several caveats.
44934495

4494-
- The format will NOT write an ``Index``, or ``MultiIndex`` for the ``DataFrame`` and will raise an
4495-
error if a non-default one is provided. You can simply ``.reset_index(drop=True)`` in order to store the index.
4496+
- The format will NOT write an ``Index``, or ``MultiIndex`` for the
4497+
``DataFrame`` and will raise an error if a non-default one is provided. You
4498+
can ``.reset_index()`` to store the index or ``.reset_index(drop=True)`` to
4499+
ignore it.
44964500
- Duplicate column names and non-string columns names are not supported
44974501
- Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype.
44984502
- Non supported types include ``Period`` and actual python object types. These will raise a helpful error message
@@ -4538,7 +4542,7 @@ Read from a parquet file.
45384542
45394543
result.dtypes
45404544
4541-
Read only certain columns of a parquet file.
4545+
Read only certain columns of a parquet file.
45424546

45434547
.. ipython:: python
45444548

doc/source/whatsnew/v0.21.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ I/O
8686
- Bug in :func:`read_csv` for handling null values in index columns when specifying ``na_filter=False`` (:issue:`5239`)
8787
- Bug in :meth:`DataFrame.to_csv` when the table had ``MultiIndex`` columns, and a list of strings was passed in for ``header`` (:issue:`5539`)
8888
- :func:`read_parquet` now allows to specify the columns to read from a parquet file (:issue:`18154`)
89+
- :func:`read_parquet` now allows to specify kwargs which are passed to the respective engine (:issue:`18216`)
8990

9091
Plotting
9192
^^^^^^^^

pandas/_libs/lib.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -1484,7 +1484,7 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True):
14841484
if len(slices) == 1:
14851485
yield blkno, slice(slices[0][0], slices[0][1])
14861486
else:
1487-
tot_len = sum([stop - start for start, stop in slices])
1487+
tot_len = sum(stop - start for start, stop in slices)
14881488
result = np.empty(tot_len, dtype=np.int64)
14891489
res_view = result
14901490

pandas/compat/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ def east_asian_len(data, encoding=None, ambiguous_width=1):
266266
Calculate display width considering unicode East Asian Width
267267
"""
268268
if isinstance(data, text_type):
269-
return sum([_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data])
269+
return sum(_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data)
270270
else:
271271
return len(data)
272272

@@ -318,7 +318,7 @@ def east_asian_len(data, encoding=None, ambiguous_width=1):
318318
data = data.decode(encoding)
319319
except UnicodeError:
320320
pass
321-
return sum([_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data])
321+
return sum(_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data)
322322
else:
323323
return len(data)
324324

pandas/core/base.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -486,13 +486,13 @@ def _agg(arg, func):
486486

487487
def is_any_series():
488488
# return a boolean if we have *any* nested series
489-
return any([isinstance(r, ABCSeries)
490-
for r in compat.itervalues(result)])
489+
return any(isinstance(r, ABCSeries)
490+
for r in compat.itervalues(result))
491491

492492
def is_any_frame():
493493
# return a boolean if we have *any* nested series
494-
return any([isinstance(r, ABCDataFrame)
495-
for r in compat.itervalues(result)])
494+
return any(isinstance(r, ABCDataFrame)
495+
for r in compat.itervalues(result))
496496

497497
if isinstance(result, list):
498498
return concat(result, keys=keys, axis=1), True

pandas/core/config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -800,7 +800,7 @@ def inner(x):
800800
from pandas.io.formats.printing import pprint_thing as pp
801801
if x not in legal_values:
802802

803-
if not any([c(x) for c in callables]):
803+
if not any(c(x) for c in callables):
804804
pp_values = pp("|".join(lmap(pp, legal_values)))
805805
msg = "Value must be one of {pp_values}"
806806
if len(callables):

pandas/core/frame.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,7 @@ def _repr_fits_horizontal_(self, ignore_width=False):
601601

602602
d.to_string(buf=buf)
603603
value = buf.getvalue()
604-
repr_width = max([len(l) for l in value.split('\n')])
604+
repr_width = max(len(l) for l in value.split('\n'))
605605

606606
return repr_width < width
607607

@@ -1798,7 +1798,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
17981798
def _verbose_repr():
17991799
lines.append('Data columns (total %d columns):' %
18001800
len(self.columns))
1801-
space = max([len(pprint_thing(k)) for k in self.columns]) + 4
1801+
space = max(len(pprint_thing(k)) for k in self.columns) + 4
18021802
counts = None
18031803

18041804
tmpl = "%s%s"
@@ -5805,7 +5805,12 @@ def idxmin(self, axis=0, skipna=True):
58055805
0 or 'index' for row-wise, 1 or 'columns' for column-wise
58065806
skipna : boolean, default True
58075807
Exclude NA/null values. If an entire row/column is NA, the result
5808-
will be NA
5808+
will be NA.
5809+
5810+
Raises
5811+
------
5812+
ValueError
5813+
* If the row/column is empty
58095814
58105815
Returns
58115816
-------
@@ -5836,7 +5841,12 @@ def idxmax(self, axis=0, skipna=True):
58365841
0 or 'index' for row-wise, 1 or 'columns' for column-wise
58375842
skipna : boolean, default True
58385843
Exclude NA/null values. If an entire row/column is NA, the result
5839-
will be first index.
5844+
will be NA.
5845+
5846+
Raises
5847+
------
5848+
ValueError
5849+
* If the row/column is empty
58405850
58415851
Returns
58425852
-------
@@ -6414,7 +6424,7 @@ def convert(arr):
64146424

64156425

64166426
def _get_names_from_index(data):
6417-
has_some_name = any([getattr(s, 'name', None) is not None for s in data])
6427+
has_some_name = any(getattr(s, 'name', None) is not None for s in data)
64186428
if not has_some_name:
64196429
return _default_index(len(data))
64206430

pandas/core/generic.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -1006,8 +1006,8 @@ def _set_axis_name(self, name, axis=0, inplace=False):
10061006
# Comparisons
10071007

10081008
def _indexed_same(self, other):
1009-
return all([self._get_axis(a).equals(other._get_axis(a))
1010-
for a in self._AXIS_ORDERS])
1009+
return all(self._get_axis(a).equals(other._get_axis(a))
1010+
for a in self._AXIS_ORDERS)
10111011

10121012
def __neg__(self):
10131013
values = _values_from_object(self)
@@ -2989,8 +2989,8 @@ def reindex(self, *args, **kwargs):
29892989

29902990
# if all axes that are requested to reindex are equal, then only copy
29912991
# if indicated must have index names equal here as well as values
2992-
if all([self._get_axis(axis).identical(ax)
2993-
for axis, ax in axes.items() if ax is not None]):
2992+
if all(self._get_axis(axis).identical(ax)
2993+
for axis, ax in axes.items() if ax is not None):
29942994
if copy:
29952995
return self.copy()
29962996
return self
@@ -5886,8 +5886,8 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
58865886

58875887
# if we are NOT aligned, raise as we cannot where index
58885888
if (axis is None and
5889-
not all([other._get_axis(i).equals(ax)
5890-
for i, ax in enumerate(self.axes)])):
5889+
not all(other._get_axis(i).equals(ax)
5890+
for i, ax in enumerate(self.axes))):
58915891
raise InvalidIndexError
58925892

58935893
# slice me out of the other

pandas/core/groupby.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -3890,8 +3890,7 @@ def first_not_none(values):
38903890
# values are not series or array-like but scalars
38913891
else:
38923892
# only coerce dates if we find at least 1 datetime
3893-
coerce = True if any([isinstance(x, Timestamp)
3894-
for x in values]) else False
3893+
coerce = any(isinstance(x, Timestamp) for x in values)
38953894
# self._selection_name not passed through to Series as the
38963895
# result should not take the name of original selection
38973896
# of columns
@@ -4303,8 +4302,8 @@ def _reindex_output(self, result):
43034302
return result
43044303
elif len(groupings) == 1:
43054304
return result
4306-
elif not any([isinstance(ping.grouper, (Categorical, CategoricalIndex))
4307-
for ping in groupings]):
4305+
elif not any(isinstance(ping.grouper, (Categorical, CategoricalIndex))
4306+
for ping in groupings):
43084307
return result
43094308

43104309
levels_list = [ping.group_index for ping in groupings]

pandas/core/indexes/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -907,7 +907,7 @@ def _extend_line(s, line, value, display_width, next_line_prefix):
907907

908908
def best_len(values):
909909
if values:
910-
return max([adj.len(x) for x in values])
910+
return max(adj.len(x) for x in values)
911911
else:
912912
return 0
913913

@@ -4246,7 +4246,7 @@ def _trim_front(strings):
42464246
Trims zeros and decimal points
42474247
"""
42484248
trimmed = strings
4249-
while len(strings) > 0 and all([x[0] == ' ' for x in trimmed]):
4249+
while len(strings) > 0 and all(x[0] == ' ' for x in trimmed):
42504250
trimmed = [x[1:] for x in trimmed]
42514251
return trimmed
42524252

pandas/core/indexes/multi.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ def _is_memory_usage_qualified(self):
461461
""" return a boolean if we need a qualified .info display """
462462
def f(l):
463463
return 'mixed' in l or 'string' in l or 'unicode' in l
464-
return any([f(l) for l in self._inferred_type_levels])
464+
return any(f(l) for l in self._inferred_type_levels)
465465

466466
@Appender(Index.memory_usage.__doc__)
467467
def memory_usage(self, deep=False):
@@ -489,9 +489,9 @@ def _nbytes(self, deep=False):
489489
# for implementations with no useful getsizeof (PyPy)
490490
objsize = 24
491491

492-
level_nbytes = sum((i.memory_usage(deep=deep) for i in self.levels))
493-
label_nbytes = sum((i.nbytes for i in self.labels))
494-
names_nbytes = sum((getsizeof(i, objsize) for i in self.names))
492+
level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels)
493+
label_nbytes = sum(i.nbytes for i in self.labels)
494+
names_nbytes = sum(getsizeof(i, objsize) for i in self.names)
495495
result = level_nbytes + label_nbytes + names_nbytes
496496

497497
# include our engine hashtable
@@ -2214,12 +2214,12 @@ def partial_selection(key, indexer=None):
22142214
# here we have a completely specified key, but are
22152215
# using some partial string matching here
22162216
# GH4758
2217-
all_dates = [(l.is_all_dates and
2217+
all_dates = ((l.is_all_dates and
22182218
not isinstance(k, compat.string_types))
2219-
for k, l in zip(key, self.levels)]
2219+
for k, l in zip(key, self.levels))
22202220
can_index_exactly = any(all_dates)
2221-
if (any([l.is_all_dates
2222-
for k, l in zip(key, self.levels)]) and
2221+
if (any(l.is_all_dates
2222+
for k, l in zip(key, self.levels)) and
22232223
not can_index_exactly):
22242224
indexer = self.get_loc(key)
22252225

pandas/core/indexes/range.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,8 @@ def nbytes(self):
193193
On implementations where this is undetermined (PyPy)
194194
assume 24 bytes for each value
195195
"""
196-
return sum([getsizeof(getattr(self, v), 24) for v in
197-
['_start', '_stop', '_step']])
196+
return sum(getsizeof(getattr(self, v), 24) for v in
197+
['_start', '_stop', '_step'])
198198

199199
def memory_usage(self, deep=False):
200200
"""
@@ -613,8 +613,8 @@ def _evaluate_numeric_binop(self, other):
613613
# for compat with numpy / Int64Index
614614
# even if we can represent as a RangeIndex, return
615615
# as a Float64Index if we have float-like descriptors
616-
if not all([is_integer(x) for x in
617-
[rstart, rstop, rstep]]):
616+
if not all(is_integer(x) for x in
617+
[rstart, rstop, rstep]):
618618
result = result.astype('float64')
619619

620620
return result

pandas/core/indexing.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,8 @@ def _should_validate_iterable(self, axis=None):
217217
return True
218218

219219
def _is_nested_tuple_indexer(self, tup):
220-
if any([isinstance(ax, MultiIndex) for ax in self.obj.axes]):
221-
return any([is_nested_tuple(tup, ax) for ax in self.obj.axes])
220+
if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):
221+
return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)
222222
return False
223223

224224
def _convert_tuple(self, key, is_setter=False):
@@ -342,7 +342,7 @@ def _setitem_with_indexer(self, indexer, value):
342342
len(_ax) for _i, _ax in enumerate(self.obj.axes)
343343
if _i != i
344344
]
345-
if any([not l for l in len_non_info_axes]):
345+
if any(not l for l in len_non_info_axes):
346346
if not is_list_like_indexer(value):
347347
raise ValueError("cannot set a frame with no "
348348
"defined index and a scalar")
@@ -690,7 +690,7 @@ def _align_series(self, indexer, ser, multiindex_indexer=False):
690690
# we have a frame, with multiple indexers on both axes; and a
691691
# series, so need to broadcast (see GH5206)
692692
if (sum_aligners == self.ndim and
693-
all([is_sequence(_) for _ in indexer])):
693+
all(is_sequence(_) for _ in indexer)):
694694
ser = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values
695695

696696
# single indexer

pandas/core/internals.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -3539,13 +3539,13 @@ def is_mixed_type(self):
35393539
def is_numeric_mixed_type(self):
35403540
# Warning, consolidation needs to get checked upstairs
35413541
self._consolidate_inplace()
3542-
return all([block.is_numeric for block in self.blocks])
3542+
return all(block.is_numeric for block in self.blocks)
35433543

35443544
@property
35453545
def is_datelike_mixed_type(self):
35463546
# Warning, consolidation needs to get checked upstairs
35473547
self._consolidate_inplace()
3548-
return any([block.is_datelike for block in self.blocks])
3548+
return any(block.is_datelike for block in self.blocks)
35493549

35503550
@property
35513551
def is_view(self):
@@ -4574,7 +4574,7 @@ def concat(self, to_concat, new_axis):
45744574
if len(non_empties) > 0:
45754575
blocks = [obj.blocks[0] for obj in non_empties]
45764576

4577-
if all([type(b) is type(blocks[0]) for b in blocks[1:]]): # noqa
4577+
if all(type(b) is type(blocks[0]) for b in blocks[1:]): # noqa
45784578
new_block = blocks[0].concat_same_type(blocks)
45794579
else:
45804580
values = [x.values for x in blocks]
@@ -5211,13 +5211,13 @@ def is_uniform_join_units(join_units):
52115211
"""
52125212
return (
52135213
# all blocks need to have the same type
5214-
all([type(ju.block) is type(join_units[0].block) for ju in join_units]) and # noqa
5214+
all(type(ju.block) is type(join_units[0].block) for ju in join_units) and # noqa
52155215
# no blocks that would get missing values (can lead to type upcasts)
5216-
all([not ju.is_na for ju in join_units]) and
5216+
all(not ju.is_na for ju in join_units) and
52175217
# no blocks with indexers (as then the dimensions do not fit)
5218-
all([not ju.indexers for ju in join_units]) and
5218+
all(not ju.indexers for ju in join_units) and
52195219
# disregard Panels
5220-
all([ju.block.ndim <= 2 for ju in join_units]) and
5220+
all(ju.block.ndim <= 2 for ju in join_units) and
52215221
# only use this path when there is something to concatenate
52225222
len(join_units) > 1)
52235223

pandas/core/series.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -1306,7 +1306,13 @@ def idxmin(self, axis=None, skipna=True, *args, **kwargs):
13061306
Parameters
13071307
----------
13081308
skipna : boolean, default True
1309-
Exclude NA/null values
1309+
Exclude NA/null values. If the entire Series is NA, the result
1310+
will be NA.
1311+
1312+
Raises
1313+
------
1314+
ValueError
1315+
* If the Series is empty
13101316
13111317
Returns
13121318
-------
@@ -1336,7 +1342,13 @@ def idxmax(self, axis=None, skipna=True, *args, **kwargs):
13361342
Parameters
13371343
----------
13381344
skipna : boolean, default True
1339-
Exclude NA/null values
1345+
Exclude NA/null values. If the entire Series is NA, the result
1346+
will be NA.
1347+
1348+
Raises
1349+
------
1350+
ValueError
1351+
* If the Series is empty
13401352
13411353
Returns
13421354
-------

pandas/core/sparse/frame.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -347,8 +347,8 @@ def density(self):
347347
Ratio of non-sparse points to total (dense) data points
348348
represented in the frame
349349
"""
350-
tot_nonsparse = sum([ser.sp_index.npoints
351-
for _, ser in compat.iteritems(self)])
350+
tot_nonsparse = sum(ser.sp_index.npoints
351+
for _, ser in compat.iteritems(self))
352352
tot = len(self.index) * len(self.columns)
353353
return tot_nonsparse / float(tot)
354354

0 commit comments

Comments
 (0)