Skip to content

Commit a1dbdf2

Browse files
authored
Merge branch 'master' into master
2 parents 9333952 + d0d28fe commit a1dbdf2

18 files changed

+480
-322
lines changed

asv_bench/asv.conf.json

+6-4
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,10 @@
117117
// with results. If the commit is `null`, regression detection is
118118
// skipped for the matching benchmark.
119119
//
120-
// "regressions_first_commits": {
121-
// "some_benchmark": "352cdf", // Consider regressions only after this commit
122-
// "another_benchmark": null, // Skip regression detection altogether
123-
// }
120+
"regressions_first_commits": {
121+
"*": "v0.20.0"
122+
},
123+
"regression_thresholds": {
124+
"*": 0.05
125+
}
124126
}

doc/source/whatsnew/v0.21.0.txt

+6
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ Other Enhancements
129129
- `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`).
130130
- :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`).
131131
- :func:`Series.clip()` and :func:`DataFrame.clip()` now treat NA values for upper and lower arguments as None instead of raising `ValueError` (:issue:`17276`).
132+
- :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`)
133+
134+
132135

133136
.. _whatsnew_0210.api_breaking:
134137

@@ -386,6 +389,9 @@ Numeric
386389
Categorical
387390
^^^^^^^^^^^
388391
- Bug in :func:`Series.isin` when called with a categorical (:issue`16639`)
392+
- Bug in the categorical constructor with empty values and categories causing
393+
the ``.categories`` to be an empty ``Float64Index`` rather than an empty
394+
``Index`` with object dtype (:issue:`17248`)
389395

390396

391397
Other

pandas/core/categorical.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,10 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False):
290290
# On list with NaNs, int values will be converted to float. Use
291291
# "object" dtype to prevent this. In the end objects will be
292292
# casted to int/... in the category assignment step.
293-
dtype = 'object' if isna(values).any() else None
293+
if len(values) == 0 or isna(values).any():
294+
dtype = 'object'
295+
else:
296+
dtype = None
294297
values = _sanitize_array(values, None, dtype=dtype)
295298

296299
if categories is None:

pandas/core/frame.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -802,8 +802,7 @@ def itertuples(self, index=True, name="Pandas"):
802802
# fallback to regular tuples
803803
return zip(*arrays)
804804

805-
if compat.PY3: # pragma: no cover
806-
items = iteritems
805+
items = iteritems
807806

808807
def __len__(self):
809808
"""Returns length of info axis, but here we use the index """

pandas/core/generic.py

+87-6
Original file line numberDiff line numberDiff line change
@@ -2063,18 +2063,77 @@ def __delitem__(self, key):
20632063

20642064
def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs):
20652065
"""
2066-
Analogous to ndarray.take
2066+
Return the elements in the given *positional* indices along an axis.
2067+
2068+
This means that we are not indexing according to actual values in
2069+
the index attribute of the object. We are indexing according to the
2070+
actual position of the element in the object.
20672071
20682072
Parameters
20692073
----------
2070-
indices : list / array of ints
2074+
indices : array-like
2075+
An array of ints indicating which positions to take.
20712076
axis : int, default 0
2072-
convert : translate neg to pos indices (default)
2073-
is_copy : mark the returned frame as a copy
2077+
The axis on which to select elements. "0" means that we are
2078+
selecting rows, "1" means that we are selecting columns, etc.
2079+
convert : bool, default True
2080+
Whether to convert negative indices to positive ones, just as with
2081+
indexing into Python lists. For example, if `-1` was passed in,
2082+
this index would be converted ``n - 1``.
2083+
is_copy : bool, default True
2084+
Whether to return a copy of the original object or not.
2085+
2086+
Examples
2087+
--------
2088+
>>> df = pd.DataFrame([('falcon', 'bird', 389.0),
2089+
('parrot', 'bird', 24.0),
2090+
('lion', 'mammal', 80.5),
2091+
('monkey', 'mammal', np.nan)],
2092+
columns=('name', 'class', 'max_speed'),
2093+
index=[0, 2, 3, 1])
2094+
>>> df
2095+
name class max_speed
2096+
0 falcon bird 389.0
2097+
2 parrot bird 24.0
2098+
3 lion mammal 80.5
2099+
1 monkey mammal NaN
2100+
2101+
Take elements at positions 0 and 3 along the axis 0 (default).
2102+
2103+
Note how the actual indices selected (0 and 1) do not correspond to
2104+
our selected indices 0 and 3. That's because we are selecting the 0th
2105+
and 3rd rows, not rows whose indices equal 0 and 3.
2106+
2107+
>>> df.take([0, 3])
2108+
0 falcon bird 389.0
2109+
1 monkey mammal NaN
2110+
2111+
Take elements at indices 1 and 2 along the axis 1 (column selection).
2112+
2113+
>>> df.take([1, 2], axis=1)
2114+
class max_speed
2115+
0 bird 389.0
2116+
2 bird 24.0
2117+
3 mammal 80.5
2118+
1 mammal NaN
2119+
2120+
We may take elements using negative integers for positive indices,
2121+
starting from the end of the object, just like with Python lists.
2122+
2123+
>>> df.take([-1, -2])
2124+
name class max_speed
2125+
1 monkey mammal NaN
2126+
3 lion mammal 80.5
20742127
20752128
Returns
20762129
-------
20772130
taken : type of caller
2131+
An array-like containing the elements taken from the object.
2132+
2133+
See Also
2134+
--------
2135+
numpy.ndarray.take
2136+
numpy.take
20782137
"""
20792138
nv.validate_take(tuple(), kwargs)
20802139
self._consolidate_inplace()
@@ -2978,14 +3037,36 @@ def filter(self, items=None, like=None, regex=None, axis=None):
29783037

29793038
def head(self, n=5):
29803039
"""
2981-
Returns first n rows
3040+
Return the first n rows.
3041+
3042+
Parameters
3043+
----------
3044+
n : int, default 5
3045+
Number of rows to select.
3046+
3047+
Returns
3048+
-------
3049+
obj_head : type of caller
3050+
The first n rows of the caller object.
29823051
"""
3052+
29833053
return self.iloc[:n]
29843054

29853055
def tail(self, n=5):
29863056
"""
2987-
Returns last n rows
3057+
Return the last n rows.
3058+
3059+
Parameters
3060+
----------
3061+
n : int, default 5
3062+
Number of rows to select.
3063+
3064+
Returns
3065+
-------
3066+
obj_tail : type of caller
3067+
The last n rows of the caller object.
29883068
"""
3069+
29893070
if n == 0:
29903071
return self.iloc[0:0]
29913072
return self.iloc[-n:]

pandas/core/indexing.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -1985,9 +1985,31 @@ def get_indexer(_i, _idx):
19851985

19861986

19871987
def maybe_convert_indices(indices, n):
1988-
""" if we have negative indicies, translate to postive here
1989-
if have indicies that are out-of-bounds, raise an IndexError
19901988
"""
1989+
Attempt to convert indices into valid, positive indices.
1990+
1991+
If we have negative indices, translate to positive here.
1992+
If we have indices that are out-of-bounds, raise an IndexError.
1993+
1994+
Parameters
1995+
----------
1996+
indices : array-like
1997+
The array of indices that we are to convert.
1998+
n : int
1999+
The number of elements in the array that we are indexing.
2000+
2001+
Returns
2002+
-------
2003+
valid_indices : array-like
2004+
An array-like of positive indices that correspond to the ones
2005+
that were passed in initially to this function.
2006+
2007+
Raises
2008+
------
2009+
IndexError : one of the converted indices either exceeded the number
2010+
of elements (specified by `n`) OR was still negative.
2011+
"""
2012+
19912013
if isinstance(indices, list):
19922014
indices = np.array(indices)
19932015
if len(indices) == 0:

pandas/core/series.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1110,8 +1110,7 @@ def iteritems(self):
11101110
"""
11111111
return zip(iter(self.index), iter(self))
11121112

1113-
if compat.PY3: # pragma: no cover
1114-
items = iteritems
1113+
items = iteritems
11151114

11161115
# ----------------------------------------------------------------------
11171116
# Misc public methods

pandas/io/parsers.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -487,18 +487,18 @@ def _read(filepath_or_buffer, kwds):
487487
'widths': None,
488488
}
489489

490-
_c_unsupported = set(['skipfooter'])
491-
_python_unsupported = set([
490+
_c_unsupported = {'skipfooter'}
491+
_python_unsupported = {
492492
'low_memory',
493493
'buffer_lines',
494494
'float_precision',
495-
])
496-
_deprecated_args = set([
495+
}
496+
_deprecated_args = {
497497
'as_recarray',
498498
'buffer_lines',
499499
'compact_ints',
500500
'use_unsigned',
501-
])
501+
}
502502

503503

504504
def _make_parser_function(name, sep=','):

pandas/tests/frame/test_api.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,16 @@ def test_nonzero(self):
171171
def test_iteritems(self):
172172
df = self.klass([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b'])
173173
for k, v in compat.iteritems(df):
174-
assert type(v) == self.klass._constructor_sliced
174+
assert isinstance(v, self.klass._constructor_sliced)
175+
176+
def test_items(self):
177+
# issue #17213, #13918
178+
cols = ['a', 'b', 'c']
179+
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols)
180+
for c, (k, v) in zip(cols, df.items()):
181+
assert c == k
182+
assert isinstance(v, Series)
183+
assert (df[k] == v).all()
175184

176185
def test_iter(self):
177186
assert tm.equalContents(list(self.frame), self.frame.columns)

pandas/tests/indexes/test_multi.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,7 @@ def test_from_arrays_empty(self):
776776
arrays = [[]] * N
777777
names = list('ABC')[:N]
778778
result = MultiIndex.from_arrays(arrays=arrays, names=names)
779-
expected = MultiIndex(levels=[np.array([])] * N, labels=[[]] * N,
779+
expected = MultiIndex(levels=[[]] * N, labels=[[]] * N,
780780
names=names)
781781
tm.assert_index_equal(result, expected)
782782

@@ -829,7 +829,7 @@ def test_from_product_empty(self):
829829

830830
# 1 level
831831
result = MultiIndex.from_product([[]], names=['A'])
832-
expected = pd.Float64Index([], name='A')
832+
expected = pd.Index([], name='A')
833833
tm.assert_index_equal(result, expected)
834834

835835
# 2 levels
@@ -838,7 +838,7 @@ def test_from_product_empty(self):
838838
names = ['A', 'B']
839839
for first, second in zip(l1, l2):
840840
result = MultiIndex.from_product([first, second], names=names)
841-
expected = MultiIndex(levels=[np.array(first), np.array(second)],
841+
expected = MultiIndex(levels=[first, second],
842842
labels=[[], []], names=names)
843843
tm.assert_index_equal(result, expected)
844844

@@ -847,8 +847,7 @@ def test_from_product_empty(self):
847847
for N in range(4):
848848
lvl2 = lrange(N)
849849
result = MultiIndex.from_product([[], lvl2, []], names=names)
850-
expected = MultiIndex(levels=[np.array(A)
851-
for A in [[], lvl2, []]],
850+
expected = MultiIndex(levels=[[], lvl2, []],
852851
labels=[[], [], []], names=names)
853852
tm.assert_index_equal(result, expected)
854853

pandas/tests/reshape/test_concat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -680,7 +680,7 @@ def test_concat_categorical_empty(self):
680680
tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
681681

682682
s1 = pd.Series([], dtype='category')
683-
s2 = pd.Series([])
683+
s2 = pd.Series([], dtype='object')
684684

685685
# different dtype => not-category
686686
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)

pandas/tests/reshape/test_union_categoricals.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,11 @@ def test_union_categoricals_empty(self):
107107
exp = Categorical([])
108108
tm.assert_categorical_equal(res, exp)
109109

110-
res = union_categoricals([pd.Categorical([]),
111-
pd.Categorical([1.0])])
112-
exp = Categorical([1.0])
110+
res = union_categoricals([Categorical([]),
111+
Categorical(['1'])])
112+
exp = Categorical(['1'])
113113
tm.assert_categorical_equal(res, exp)
114114

115-
# to make dtype equal
116-
nanc = pd.Categorical(np.array([np.nan], dtype=np.float64))
117-
res = union_categoricals([nanc,
118-
pd.Categorical([])])
119-
tm.assert_categorical_equal(res, nanc)
120-
121115
def test_union_categorical_same_category(self):
122116
# check fastpath
123117
c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4])

pandas/tests/series/test_api.py

+10
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,16 @@ def test_iteritems(self):
301301
# assert is lazy (genrators don't define reverse, lists do)
302302
assert not hasattr(self.series.iteritems(), 'reverse')
303303

304+
def test_items(self):
305+
for idx, val in self.series.items():
306+
assert val == self.series[idx]
307+
308+
for idx, val in self.ts.items():
309+
assert val == self.ts[idx]
310+
311+
# assert is lazy (genrators don't define reverse, lists do)
312+
assert not hasattr(self.series.items(), 'reverse')
313+
304314
def test_raise_on_info(self):
305315
s = Series(np.random.randn(10))
306316
with pytest.raises(AttributeError):

pandas/tests/test_categorical.py

+10
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,16 @@ def test_setitem_listlike(self):
112112
result = c.codes[np.array([100000]).astype(np.int64)]
113113
tm.assert_numpy_array_equal(result, np.array([5], dtype='int8'))
114114

115+
def test_constructor_empty(self):
116+
# GH 17248
117+
c = Categorical([])
118+
expected = Index([])
119+
tm.assert_index_equal(c.categories, expected)
120+
121+
c = Categorical([], categories=[1, 2, 3])
122+
expected = pd.Int64Index([1, 2, 3])
123+
tm.assert_index_equal(c.categories, expected)
124+
115125
def test_constructor_unsortable(self):
116126

117127
# it works!

0 commit comments

Comments
 (0)