Skip to content

Commit d5e43ae

Browse files
committed
Merge remote-tracking branch 'refs/remotes/pydata/master' into Fix-for-#11317
2 parents 2a9a05c + bc643ec commit d5e43ae

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+1193
-624
lines changed

asv_bench/asv.conf.json

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
"numexpr": [],
4444
"pytables": [],
4545
"openpyxl": [],
46+
"xlsxwriter": [],
4647
"xlrd": [],
4748
"xlwt": []
4849
},

asv_bench/benchmarks/gil.py

+46
Original file line numberDiff line numberDiff line change
@@ -320,3 +320,49 @@ def time_nogil_kth_smallest(self):
320320
def run(arr):
321321
algos.kth_smallest(arr, self.k)
322322
run()
323+
324+
class nogil_datetime_fields(object):
325+
goal_time = 0.2
326+
327+
def setup(self):
328+
self.N = 100000000
329+
self.dti = pd.date_range('1900-01-01', periods=self.N, freq='D')
330+
self.period = self.dti.to_period('D')
331+
if (not have_real_test_parallel):
332+
raise NotImplementedError
333+
334+
def time_datetime_field_year(self):
335+
@test_parallel(num_threads=2)
336+
def run(dti):
337+
dti.year
338+
run(self.dti)
339+
340+
def time_datetime_field_day(self):
341+
@test_parallel(num_threads=2)
342+
def run(dti):
343+
dti.day
344+
run(self.dti)
345+
346+
def time_datetime_field_daysinmonth(self):
347+
@test_parallel(num_threads=2)
348+
def run(dti):
349+
dti.days_in_month
350+
run(self.dti)
351+
352+
def time_datetime_field_normalize(self):
353+
@test_parallel(num_threads=2)
354+
def run(dti):
355+
dti.normalize()
356+
run(self.dti)
357+
358+
def time_datetime_to_period(self):
359+
@test_parallel(num_threads=2)
360+
def run(dti):
361+
dti.to_period('S')
362+
run(self.dti)
363+
364+
def time_period_to_datetime(self):
365+
@test_parallel(num_threads=2)
366+
def run(period):
367+
period.to_timestamp()
368+
run(self.period)

asv_bench/benchmarks/series_methods.py

+20
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,23 @@ def setup(self):
7171
def time_series_nsmallest2(self):
7272
self.s2.nsmallest(3, take_last=True)
7373
self.s2.nsmallest(3, take_last=False)
74+
75+
76+
class series_dropna_int64(object):
77+
goal_time = 0.2
78+
79+
def setup(self):
80+
self.s = Series(np.random.randint(1, 10, 1000000))
81+
82+
def time_series_dropna_int64(self):
83+
self.s.dropna()
84+
85+
class series_dropna_datetime(object):
86+
goal_time = 0.2
87+
88+
def setup(self):
89+
self.s = Series(pd.date_range('2000-01-01', freq='S', periods=1000000))
90+
self.s[np.random.randint(1, 1000000, 100)] = pd.NaT
91+
92+
def time_series_dropna_datetime(self):
93+
self.s.dropna()

ci/install_conda.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ bash miniconda.sh -b -p $HOME/miniconda || exit 1
7373
conda config --set always_yes yes --set changeps1 no || exit 1
7474
conda update -q conda || exit 1
7575
conda config --add channels conda-forge || exit 1
76-
conda config --add channels http://conda.binstar.org/pandas || exit 1
76+
conda config --add channels http://conda.anaconda.org/pandas || exit 1
7777
conda config --set ssl_verify false || exit 1
7878

7979
# Useful for debugging any issues with conda

ci/requirements-2.7.pip

+2
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,5 @@ blosc
22
httplib2
33
google-api-python-client == 1.2
44
python-gflags == 2.0
5+
pathlib
6+
py

ci/requirements-2.7_SLOW.pip

Whitespace-only changes.

ci/requirements-3.4.build

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ python-dateutil
22
pytz
33
numpy=1.8.1
44
cython
5+
libgfortran

doc/source/conf.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -299,8 +299,9 @@
299299
intersphinx_mapping = {
300300
'statsmodels': ('http://statsmodels.sourceforge.net/devel/', None),
301301
'matplotlib': ('http://matplotlib.org/', None),
302-
'python': ('http://docs.python.org/', None),
303-
'numpy': ('http://docs.scipy.org/doc/numpy', None)
302+
'python': ('http://docs.python.org/3', None),
303+
'numpy': ('http://docs.scipy.org/doc/numpy', None),
304+
'py': ('http://pylib.readthedocs.org/en/latest/', None)
304305
}
305306
import glob
306307
autosummary_generate = glob.glob("*.rst")

doc/source/io.rst

+3-2
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,10 @@ for some advanced strategies
7979

8080
They can take a number of arguments:
8181

82-
- ``filepath_or_buffer``: Either a string path to a file, URL
82+
- ``filepath_or_buffer``: Either a path to a file (a :class:`python:str`,
83+
:class:`python:pathlib.Path`, or :class:`py:py._path.local.LocalPath`), URL
8384
(including http, ftp, and S3 locations), or any object with a ``read``
84-
method (such as an open file or ``StringIO``).
85+
method (such as an open file or :class:`~python:io.StringIO`).
8586
- ``sep`` or ``delimiter``: A delimiter / separator to split fields
8687
on. With ``sep=None``, ``read_csv`` will try to infer the delimiter
8788
automatically in some cases by "sniffing".

doc/source/whatsnew/v0.17.1.txt

+19-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ Enhancements
2828
Other Enhancements
2929
^^^^^^^^^^^^^^^^^^
3030

31+
- ``pd.read_*`` functions can now also accept :class:`python:pathlib.Path`, or :class:`py:py._path.local.LocalPath`
32+
objects for the ``filepath_or_buffer`` argument. (:issue:`11033`)
33+
- Improve the error message displayed in :func:`pandas.io.gbq.to_gbq` when the DataFrame does not match the schema of the destination table (:issue:`11359`)
34+
3135
.. _whatsnew_0171.api:
3236

3337
API changes
@@ -38,6 +42,7 @@ API changes
3842
- Regression from 0.16.2 for output formatting of long floats/nan, restored in (:issue:`11302`)
3943
- Prettyprinting sets (e.g. in DataFrame cells) now uses set literal syntax (``{x, y}``) instead of
4044
Legacy Python syntax (``set([x, y])``) (:issue:`11215`)
45+
- Indexing with a null key will raise a ``TypeError``, instead of a ``ValueError`` (:issue:`11356`)
4146

4247
.. _whatsnew_0171.deprecations:
4348

@@ -54,6 +59,13 @@ Performance Improvements
5459
~~~~~~~~~~~~~~~~~~~~~~~~
5560

5661
- Checking monotonic-ness before sorting on an index (:issue:`11080`)
62+
- ``Series.dropna`` performance improvement when its dtype can't contain ``NaN`` (:issue:`11159`)
63+
64+
65+
- Release the GIL on most datetime field operations (e.g. ``DatetimeIndex.year``, ``Series.dt.year``), normalization, and conversion to and from ``Period``, ``DatetimeIndex.to_period`` and ``PeriodIndex.to_timestamp`` (:issue:`11263`)
66+
67+
68+
- Improved performance to ``to_excel`` (:issue:`11352`)
5769

5870
.. _whatsnew_0171.bug_fixes:
5971

@@ -65,12 +77,18 @@ Bug Fixes
6577

6678
- Bug in ``HDFStore.select`` when comparing with a numpy scalar in a where clause (:issue:`11283`)
6779

80+
6881
- Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issue:`11295`)
6982
- Bug in comparisons of Series vs list-likes (:issue:`11339`)
7083

71-
- Bug in list-like indexing with a mixed-integer Index (:issue:`11320`)
7284

85+
- Bug in ``DataFrame.replace`` with a ``datetime64[ns, tz]`` and a non-compat to_replace (:issue:`11326`, :issue:`11153`)
86+
87+
88+
89+
- Bug in list-like indexing with a mixed-integer Index (:issue:`11320`)
7390

91+
- Bug in ``pivot_table`` with ``margins=True`` when indexes are of ``Categorical`` dtype (:issue:`10993`)
7492
- Bug in ``DataFrame.plot`` cannot use hex strings colors (:issue:`10299`)
7593

7694

pandas/core/common.py

+36-2
Original file line numberDiff line numberDiff line change
@@ -444,14 +444,24 @@ def mask_missing(arr, values_to_mask):
444444
mask = None
445445
for x in nonna:
446446
if mask is None:
447-
mask = arr == x
447+
448+
# numpy elementwise comparison warning
449+
if is_numeric_v_string_like(arr, x):
450+
mask = False
451+
else:
452+
mask = arr == x
448453

449454
# if x is a string and arr is not, then we get False and we must
450455
# expand the mask to size arr.shape
451456
if np.isscalar(mask):
452457
mask = np.zeros(arr.shape, dtype=bool)
453458
else:
454-
mask |= arr == x
459+
460+
# numpy elementwise comparison warning
461+
if is_numeric_v_string_like(arr, x):
462+
mask |= False
463+
else:
464+
mask |= arr == x
455465

456466
if na_mask.any():
457467
if mask is None:
@@ -2382,6 +2392,9 @@ def _maybe_make_list(obj):
23822392
is_complex = lib.is_complex
23832393

23842394

2395+
def is_string_like(obj):
2396+
return isinstance(obj, (compat.text_type, compat.string_types))
2397+
23852398
def is_iterator(obj):
23862399
# python 3 generators have __next__ instead of next
23872400
return hasattr(obj, 'next') or hasattr(obj, '__next__')
@@ -2525,6 +2538,27 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype):
25252538
return issubclass(tipo, (np.datetime64, np.timedelta64))
25262539

25272540

2541+
def is_numeric_v_string_like(a, b):
2542+
"""
2543+
numpy doesn't like to compare numeric arrays vs scalar string-likes
2544+
2545+
return a boolean result if this is the case for a,b or b,a
2546+
2547+
"""
2548+
is_a_array = isinstance(a, np.ndarray)
2549+
is_b_array = isinstance(b, np.ndarray)
2550+
2551+
is_a_numeric_array = is_a_array and is_numeric_dtype(a)
2552+
is_b_numeric_array = is_b_array and is_numeric_dtype(b)
2553+
2554+
is_a_scalar_string_like = not is_a_array and is_string_like(a)
2555+
is_b_scalar_string_like = not is_b_array and is_string_like(b)
2556+
2557+
return (
2558+
is_a_numeric_array and is_b_scalar_string_like) or (
2559+
is_b_numeric_array and is_a_scalar_string_like
2560+
)
2561+
25282562
def is_datetimelike_v_numeric(a, b):
25292563
# return if we have an i8 convertible and numeric comparision
25302564
if not hasattr(a,'dtype'):

pandas/core/format.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1708,9 +1708,9 @@ def _format_value(self, val):
17081708
if lib.checknull(val):
17091709
val = self.na_rep
17101710
elif com.is_float(val):
1711-
if np.isposinf(val):
1711+
if lib.isposinf_scalar(val):
17121712
val = self.inf_rep
1713-
elif np.isneginf(val):
1713+
elif lib.isneginf_scalar(val):
17141714
val = '-%s' % self.inf_rep
17151715
elif self.float_format is not None:
17161716
val = float(self.float_format % val)

pandas/core/generic.py

-2
Original file line numberDiff line numberDiff line change
@@ -2999,8 +2999,6 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
29992999
'{0!r}').format(type(to_replace).__name__)
30003000
raise TypeError(msg) # pragma: no cover
30013001

3002-
new_data = new_data.convert(copy=not inplace, numeric=False)
3003-
30043002
if inplace:
30053003
self._update_inplace(new_data)
30063004
else:

pandas/core/index.py

+33-6
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,10 @@ def astype(self, dtype):
627627
return Index(self.values.astype(dtype), name=self.name,
628628
dtype=dtype)
629629

630+
def _to_safe_for_reshape(self):
631+
""" convert to object if we are a categorical """
632+
return self
633+
630634
def to_datetime(self, dayfirst=False):
631635
"""
632636
For an Index containing strings or datetime.datetime objects, attempt
@@ -862,9 +866,10 @@ def to_int():
862866
return self._invalid_indexer('label', key)
863867

864868
if is_float(key):
865-
if not self.is_floating():
866-
warnings.warn("scalar indexers for index type {0} should be integers and not floating point".format(
867-
type(self).__name__), FutureWarning, stacklevel=3)
869+
if isnull(key):
870+
return self._invalid_indexer('label', key)
871+
warnings.warn("scalar indexers for index type {0} should be integers and not floating point".format(
872+
type(self).__name__), FutureWarning, stacklevel=3)
868873
return to_int()
869874

870875
return key
@@ -3189,6 +3194,10 @@ def duplicated(self, keep='first'):
31893194
from pandas.hashtable import duplicated_int64
31903195
return duplicated_int64(self.codes.astype('i8'), keep)
31913196

3197+
def _to_safe_for_reshape(self):
3198+
""" convert to object if we are a categorical """
3199+
return self.astype('object')
3200+
31923201
def get_loc(self, key, method=None):
31933202
"""
31943203
Get integer location for requested label
@@ -3721,9 +3730,23 @@ def astype(self, dtype):
37213730
return Index(self._values, name=self.name, dtype=dtype)
37223731

37233732
def _convert_scalar_indexer(self, key, kind=None):
3733+
"""
3734+
convert a scalar indexer
3735+
3736+
Parameters
3737+
----------
3738+
key : label of the slice bound
3739+
kind : optional, type of the indexing operation (loc/ix/iloc/None)
3740+
3741+
right now we are converting
3742+
floats -> ints if the index supports it
3743+
"""
3744+
37243745
if kind == 'iloc':
3725-
return super(Float64Index, self)._convert_scalar_indexer(key,
3726-
kind=kind)
3746+
if is_integer(key):
3747+
return key
3748+
return super(Float64Index, self)._convert_scalar_indexer(key, kind=kind)
3749+
37273750
return key
37283751

37293752
def _convert_slice_indexer(self, key, kind=None):
@@ -4276,7 +4299,7 @@ def _reference_duplicate_name(self, name):
42764299
Returns True if the name refered to in self.names is duplicated.
42774300
"""
42784301
# count the times name equals an element in self.names.
4279-
return np.sum(name == np.asarray(self.names)) > 1
4302+
return sum(name == n for n in self.names) > 1
42804303

42814304
def _format_native_types(self, **kwargs):
42824305
return self.values
@@ -4514,6 +4537,10 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
45144537
else:
45154538
return result_levels
45164539

4540+
def _to_safe_for_reshape(self):
4541+
""" convert to object if we are a categorical """
4542+
return self.set_levels([ i._to_safe_for_reshape() for i in self.levels ])
4543+
45174544
def to_hierarchical(self, n_repeat, n_shuffle=1):
45184545
"""
45194546
Return a MultiIndex reshaped to conform to the

pandas/core/indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1285,7 +1285,7 @@ def _has_valid_type(self, key, axis):
12851285

12861286
def error():
12871287
if isnull(key):
1288-
raise ValueError(
1288+
raise TypeError(
12891289
"cannot use label indexing with a null key")
12901290
raise KeyError("the label [%s] is not in the [%s]" %
12911291
(key, self.obj._get_axis_name(axis)))

0 commit comments

Comments
 (0)