Skip to content

Commit ed7e469

Browse files
Merge remote-tracking branch 'upstream/master' into typing
2 parents 97f3b3e + 2ec7f2f commit ed7e469

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+1088
-699
lines changed

ci/code_checks.sh

+4
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
190190
invgrep -R --include="*.rst" ".. ipython ::" doc/source
191191
RET=$(($RET + $?)) ; echo $MSG "DONE"
192192

193+
    MSG='Check for extra blank lines after the class definition' ; echo $MSG
194+
    invgrep -R --include="*.py" --include="*.pyx" -E 'class.*:\n\n( )+"""' .
195+
    RET=$(($RET + $?)) ; echo $MSG "DONE"
196+
193197
MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
194198
set -o pipefail
195199
if [[ "$AZURE" == "true" ]]; then

doc/source/user_guide/integer_na.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ up with a ``float64`` dtype Series:
6363
pd.Series([1, 2, np.nan])
6464
6565
Operations involving an integer array will behave similar to NumPy arrays.
66-
Missing values will be propagated, and and the data will be coerced to another
66+
Missing values will be propagated, and the data will be coerced to another
6767
dtype if needed.
6868

6969
.. ipython:: python

doc/source/user_guide/io.rst

-5
Original file line numberDiff line numberDiff line change
@@ -4671,7 +4671,6 @@ See the `Full Documentation <https://github.com/wesm/feather>`__.
46714671
Write to a feather file.
46724672

46734673
.. ipython:: python
4674-
:okwarning:
46754674
46764675
df.to_feather('example.feather')
46774676
@@ -4748,7 +4747,6 @@ See the documentation for `pyarrow <https://arrow.apache.org/docs/python/>`__ an
47484747
Write to a parquet file.
47494748

47504749
.. ipython:: python
4751-
:okwarning:
47524750
47534751
df.to_parquet('example_pa.parquet', engine='pyarrow')
47544752
df.to_parquet('example_fp.parquet', engine='fastparquet')
@@ -4765,7 +4763,6 @@ Read from a parquet file.
47654763
Read only certain columns of a parquet file.
47664764

47674765
.. ipython:: python
4768-
:okwarning:
47694766
47704767
result = pd.read_parquet('example_fp.parquet',
47714768
engine='fastparquet', columns=['a', 'b'])
@@ -4788,7 +4785,6 @@ Serializing a ``DataFrame`` to parquet may include the implicit index as one or
47884785
more columns in the output file. Thus, this code:
47894786

47904787
.. ipython:: python
4791-
:okwarning:
47924788
47934789
df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
47944790
df.to_parquet('test.parquet', engine='pyarrow')
@@ -4805,7 +4801,6 @@ If you want to omit a dataframe's indexes when writing, pass ``index=False`` to
48054801
:func:`~pandas.DataFrame.to_parquet`:
48064802

48074803
.. ipython:: python
4808-
:okwarning:
48094804
48104805
df.to_parquet('test.parquet', index=False)
48114806

doc/source/whatsnew/v0.20.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Check the :ref:`API Changes <whatsnew_0200.api_breaking>` and :ref:`deprecations
3333

3434
.. note::
3535

36-
This is a combined release for 0.20.0 and and 0.20.1.
36+
This is a combined release for 0.20.0 and 0.20.1.
3737
Version 0.20.1 contains one additional change for backwards-compatibility with downstream projects using pandas' ``utils`` routines. (:issue:`16250`)
3838

3939
.. contents:: What's new in v0.20.0

doc/source/whatsnew/v1.0.0.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ Datetimelike
318318

319319
Timedelta
320320
^^^^^^^^^
321-
321+
- Bug in subtracting a :class:`TimedeltaIndex` or :class:`TimedeltaArray` from a ``np.datetime64`` object (:issue:`29558`)
322322
-
323323
-
324324

@@ -400,6 +400,8 @@ I/O
400400
- Bug in :meth:`DataFrame.to_html` when using ``formatters=<list>`` and ``max_cols`` together. (:issue:`25955`)
401401
- Bug in :meth:`Styler.background_gradient` not able to work with dtype ``Int64`` (:issue:`28869`)
402402
- Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`)
403+
- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
404+
-
403405

404406
Plotting
405407
^^^^^^^^

pandas/_libs/hashtable_class_helper.pxi.in

+5-5
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ cdef class {{name}}Vector:
100100
PyMem_Free(self.data)
101101
self.data = NULL
102102

103-
def __len__(self):
103+
def __len__(self) -> int:
104104
return self.data.n
105105

106106
cpdef to_array(self):
@@ -168,7 +168,7 @@ cdef class StringVector:
168168
PyMem_Free(self.data)
169169
self.data = NULL
170170

171-
def __len__(self):
171+
def __len__(self) -> int:
172172
return self.data.n
173173

174174
def to_array(self):
@@ -212,7 +212,7 @@ cdef class ObjectVector:
212212
self.ao = np.empty(_INIT_VEC_CAP, dtype=object)
213213
self.data = <PyObject**>self.ao.data
214214

215-
def __len__(self):
215+
def __len__(self) -> int:
216216
return self.n
217217

218218
cdef inline append(self, object obj):
@@ -270,7 +270,7 @@ cdef class {{name}}HashTable(HashTable):
270270
size_hint = min(size_hint, _SIZE_HINT_LIMIT)
271271
kh_resize_{{dtype}}(self.table, size_hint)
272272

273-
def __len__(self):
273+
def __len__(self) -> int:
274274
return self.table.size
275275

276276
def __dealloc__(self):
@@ -897,7 +897,7 @@ cdef class PyObjectHashTable(HashTable):
897897
kh_destroy_pymap(self.table)
898898
self.table = NULL
899899

900-
def __len__(self):
900+
def __len__(self) -> int:
901901
return self.table.size
902902

903903
def __contains__(self, object key):

pandas/_libs/indexing.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ cdef class _NDFrameIndexerBase:
1111
self._ndim = None
1212

1313
@property
14-
def ndim(self):
14+
def ndim(self) -> int:
1515
# Delay `ndim` instantiation until required as reading it
1616
# from `obj` isn't entirely cheap.
1717
ndim = self._ndim

pandas/_libs/internals.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ cdef class BlockPlacement:
6666
def __repr__(self) -> str:
6767
return str(self)
6868

69-
def __len__(self):
69+
def __len__(self) -> int:
7070
cdef:
7171
slice s = self._ensure_has_slice()
7272
if s is not None:

pandas/_libs/lib.pyx

+48-5
Original file line numberDiff line numberDiff line change
@@ -971,6 +971,7 @@ cdef class Seen:
971971
bint nat_ # seen nat
972972
bint bool_ # seen_bool
973973
bint null_ # seen_null
974+
bint nan_ # seen_np.nan
974975
bint uint_ # seen_uint (unsigned integer)
975976
bint sint_ # seen_sint (signed integer)
976977
bint float_ # seen_float
@@ -995,6 +996,7 @@ cdef class Seen:
995996
self.nat_ = 0
996997
self.bool_ = 0
997998
self.null_ = 0
999+
self.nan_ = 0
9981000
self.uint_ = 0
9991001
self.sint_ = 0
10001002
self.float_ = 0
@@ -1953,10 +1955,37 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
19531955
@cython.wraparound(False)
19541956
def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
19551957
bint safe=0, bint convert_datetime=0,
1956-
bint convert_timedelta=0):
1958+
bint convert_timedelta=0,
1959+
bint convert_to_nullable_integer=0):
19571960
"""
19581961
Type inference function-- convert object array to proper dtype
1962+
1963+
Parameters
1964+
----------
1965+
values : ndarray
1966+
Array of object elements to convert.
1967+
try_float : bool, default False
1968+
If an array-like object contains only float or NaN values is
1969+
encountered, whether to convert and return an array of float dtype.
1970+
safe : bool, default False
1971+
Whether to upcast numeric type (e.g. int cast to float). If set to
1972+
True, no upcasting will be performed.
1973+
convert_datetime : bool, default False
1974+
If an array-like object contains only datetime values or NaT is
1975+
encountered, whether to convert and return an array of M8[ns] dtype.
1976+
convert_timedelta : bool, default False
1977+
If an array-like object contains only timedelta values or NaT is
1978+
encountered, whether to convert and return an array of m8[ns] dtype.
1979+
convert_to_nullable_integer : bool, default False
1980+
If an array-like object contains only interger values (and NaN) is
1981+
encountered, whether to convert and return an IntegerArray.
1982+
1983+
Returns
1984+
-------
1985+
array : array of converted object values to more specific dtypes if
1986+
pplicable
19591987
"""
1988+
19601989
cdef:
19611990
Py_ssize_t i, n
19621991
ndarray[float64_t] floats
@@ -1977,6 +2006,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
19772006
ints = np.empty(n, dtype='i8')
19782007
uints = np.empty(n, dtype='u8')
19792008
bools = np.empty(n, dtype=np.uint8)
2009+
mask = np.full(n, False)
19802010

19812011
if convert_datetime:
19822012
datetimes = np.empty(n, dtype='M8[ns]')
@@ -1994,6 +2024,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
19942024
if val is None:
19952025
seen.null_ = 1
19962026
floats[i] = complexes[i] = fnan
2027+
mask[i] = True
19972028
elif val is NaT:
19982029
seen.nat_ = 1
19992030
if convert_datetime:
@@ -2003,6 +2034,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
20032034
if not (convert_datetime or convert_timedelta):
20042035
seen.object_ = 1
20052036
break
2037+
elif val is np.nan:
2038+
seen.nan_ = 1
2039+
mask[i] = True
2040+
floats[i] = complexes[i] = val
20062041
elif util.is_bool_object(val):
20072042
seen.bool_ = 1
20082043
bools[i] = val
@@ -2084,11 +2119,19 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
20842119

20852120
if not seen.object_:
20862121
if not safe:
2087-
if seen.null_:
2122+
if seen.null_ or seen.nan_:
20882123
if seen.is_float_or_complex:
20892124
if seen.complex_:
20902125
return complexes
2091-
elif seen.float_ or seen.int_:
2126+
elif seen.float_:
2127+
return floats
2128+
elif seen.int_:
2129+
if convert_to_nullable_integer:
2130+
from pandas.core.arrays import IntegerArray
2131+
return IntegerArray(ints, mask)
2132+
else:
2133+
return floats
2134+
elif seen.nan_:
20922135
return floats
20932136
else:
20942137
if not seen.bool_:
@@ -2127,7 +2170,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
21272170
if seen.complex_:
21282171
if not seen.int_:
21292172
return complexes
2130-
elif seen.float_:
2173+
elif seen.float_ or seen.nan_:
21312174
if not seen.int_:
21322175
return floats
21332176
else:
@@ -2151,7 +2194,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
21512194
if seen.complex_:
21522195
if not seen.int_:
21532196
return complexes
2154-
elif seen.float_:
2197+
elif seen.float_ or seen.nan_:
21552198
if not seen.int_:
21562199
return floats
21572200
elif seen.int_:

0 commit comments

Comments
 (0)