Skip to content

Commit 4cce86d

Browse files
authored
Merge branch 'master' into doc-multiindex-get_slice_bound
2 parents de298f5 + 3577b5a commit 4cce86d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+1060
-967
lines changed

ci/azure/windows.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ jobs:
3131
- bash: |
3232
source activate pandas-dev
3333
conda list
34-
ci\\incremental\\build.cmd
34+
python setup.py build_ext -q -i
35+
python -m pip install --no-build-isolation -e .
3536
displayName: 'Build'
3637
- bash: |
3738
source activate pandas-dev

ci/code_checks.sh

+6-2
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
5252
black --version
5353

5454
MSG='Checking black formatting' ; echo $MSG
55-
black . --check
55+
black . --check
5656
RET=$(($RET + $?)) ; echo $MSG "DONE"
5757

5858
# `setup.cfg` contains the list of error codes that are being ignored in flake8
@@ -104,7 +104,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
104104
isort --version-number
105105

106106
# Imports - Check formatting using isort see setup.cfg for settings
107-
MSG='Check import format using isort ' ; echo $MSG
107+
MSG='Check import format using isort' ; echo $MSG
108108
ISORT_CMD="isort --recursive --check-only pandas asv_bench"
109109
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
110110
eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]}))
@@ -203,6 +203,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
203203
invgrep -R --include=*.{py,pyx} '\.__class__' pandas
204204
RET=$(($RET + $?)) ; echo $MSG "DONE"
205205

206+
MSG='Check for use of xrange instead of range' ; echo $MSG
207+
invgrep -R --include=*.{py,pyx} 'xrange' pandas
208+
RET=$(($RET + $?)) ; echo $MSG "DONE"
209+
206210
MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
207211
INVGREP_APPEND=" <- trailing whitespaces found"
208212
invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *

ci/incremental/build.cmd

-9
This file was deleted.

ci/run_tests.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,6 @@ sh -c "$PYTEST_CMD"
3838

3939
if [[ "$COVERAGE" && $? == 0 && "$TRAVIS_BRANCH" == "master" ]]; then
4040
echo "uploading coverage"
41-
echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME"
42-
bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME
41+
echo "bash <(curl -s https://codecov.io/bash) -Z -c -f $COVERAGE_FNAME"
42+
bash <(curl -s https://codecov.io/bash) -Z -c -f $COVERAGE_FNAME
4343
fi

doc/source/_static/favicon.ico

-3.81 KB
Binary file not shown.

doc/source/conf.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,11 @@
204204
# Theme options are theme-specific and customize the look and feel of a theme
205205
# further. For a list of options available for each theme, see the
206206
# documentation.
207-
# html_theme_options = {}
207+
html_theme_options = {
208+
"external_links": [],
209+
"github_url": "https://github.com/pandas-dev/pandas",
210+
"twitter_url": "https://twitter.com/pandas_dev",
211+
}
208212

209213
# Add any paths that contain custom themes here, relative to this directory.
210214
# html_theme_path = ["themes"]
@@ -228,7 +232,7 @@
228232
# The name of an image file (within the static path) to use as favicon of the
229233
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
230234
# pixels large.
231-
html_favicon = os.path.join(html_static_path[0], "favicon.ico")
235+
html_favicon = "../../web/pandas/static/img/favicon.ico"
232236

233237
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
234238
# using the given strftime format.

doc/source/user_guide/io.rst

+2-3
Original file line numberDiff line numberDiff line change
@@ -4763,10 +4763,10 @@ Parquet supports partitioning of data based on the values of one or more columns
47634763
.. ipython:: python
47644764
47654765
df = pd.DataFrame({'a': [0, 0, 1, 1], 'b': [0, 1, 0, 1]})
4766-
df.to_parquet(fname='test', engine='pyarrow',
4766+
df.to_parquet(path='test', engine='pyarrow',
47674767
partition_cols=['a'], compression=None)
47684768
4769-
The `fname` specifies the parent directory to which data will be saved.
4769+
The `path` specifies the parent directory to which data will be saved.
47704770
The `partition_cols` are the column names by which the dataset will be partitioned.
47714771
Columns are partitioned in the order they are given. The partition splits are
47724772
determined by the unique values in the partition columns.
@@ -4828,7 +4828,6 @@ See also some :ref:`cookbook examples <cookbook.sql>` for some advanced strategi
48284828
The key functions are:
48294829

48304830
.. autosummary::
4831-
:toctree: ../reference/api/
48324831

48334832
read_sql_table
48344833
read_sql_query

doc/source/user_guide/text.rst

+8-1
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ These are places where the behavior of ``StringDtype`` objects differ from
7474
l. For ``StringDtype``, :ref:`string accessor methods<api.series.str>`
7575
that return **numeric** output will always return a nullable integer dtype,
7676
rather than either int or float dtype, depending on the presence of NA values.
77+
Methods returning **boolean** output will return a nullable boolean dtype.
7778

7879
.. ipython:: python
7980
@@ -89,7 +90,13 @@ l. For ``StringDtype``, :ref:`string accessor methods<api.series.str>`
8990
s.astype(object).str.count("a")
9091
s.astype(object).dropna().str.count("a")
9192
92-
When NA values are present, the output dtype is float64.
93+
When NA values are present, the output dtype is float64. Similarly for
94+
methods returning boolean values.
95+
96+
.. ipython:: python
97+
98+
s.str.isdigit()
99+
s.str.match("a")
93100
94101
2. Some string methods, like :meth:`Series.str.decode` are not available
95102
on ``StringArray`` because ``StringArray`` only holds strings, not

doc/source/whatsnew/v1.0.0.rst

+3-2
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,8 @@ Deprecations
502502
- :func:`pandas.json_normalize` is now exposed in the top-level namespace.
503503
Usage of ``json_normalize`` as ``pandas.io.json.json_normalize`` is now deprecated and
504504
it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`).
505-
-
505+
- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`)
506+
506507

507508
.. _whatsnew_1000.prior_deprecations:
508509

@@ -711,7 +712,7 @@ Datetimelike
711712
- Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`)
712713
- Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`)
713714
- Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`)
714-
-
715+
- Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`)
715716

716717
Timedelta
717718
^^^^^^^^^

pandas/_libs/parsers.pyx

+20-1
Original file line numberDiff line numberDiff line change
@@ -1367,7 +1367,26 @@ def _ensure_encoded(list lst):
13671367
# common NA values
13681368
# no longer excluding inf representations
13691369
# '1.#INF','-1.#INF', '1.#INF000000',
1370-
_NA_VALUES = _ensure_encoded(list(icom._NA_VALUES))
1370+
STR_NA_VALUES = {
1371+
"-1.#IND",
1372+
"1.#QNAN",
1373+
"1.#IND",
1374+
"-1.#QNAN",
1375+
"#N/A N/A",
1376+
"#N/A",
1377+
"N/A",
1378+
"n/a",
1379+
"NA",
1380+
"#NA",
1381+
"NULL",
1382+
"null",
1383+
"NaN",
1384+
"-NaN",
1385+
"nan",
1386+
"-nan",
1387+
"",
1388+
}
1389+
_NA_VALUES = _ensure_encoded(list(STR_NA_VALUES))
13711390

13721391

13731392
def _maybe_upcast(arr):

pandas/_libs/testing.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ cpdef assert_almost_equal(a, b,
159159

160160
raise_assert_detail(obj, f"{obj} length are different", na, nb, r)
161161

162-
for i in xrange(len(a)):
162+
for i in range(len(a)):
163163
try:
164164
assert_almost_equal(a[i], b[i],
165165
check_less_precise=check_less_precise)

pandas/_libs/tslibs/timestamps.pyx

+18-7
Original file line numberDiff line numberDiff line change
@@ -336,11 +336,22 @@ class Timestamp(_Timestamp):
336336
"""
337337
return cls(datetime.combine(date, time))
338338

339-
def __new__(cls, object ts_input=_no_input,
340-
object freq=None, tz=None, unit=None,
341-
year=None, month=None, day=None,
342-
hour=None, minute=None, second=None, microsecond=None,
343-
nanosecond=None, tzinfo=None):
339+
def __new__(
340+
cls,
341+
object ts_input=_no_input,
342+
object freq=None,
343+
tz=None,
344+
unit=None,
345+
year=None,
346+
month=None,
347+
day=None,
348+
hour=None,
349+
minute=None,
350+
second=None,
351+
microsecond=None,
352+
nanosecond=None,
353+
tzinfo=None
354+
):
344355
# The parameter list folds together legacy parameter names (the first
345356
# four) and positional and keyword parameter names from pydatetime.
346357
#
@@ -401,8 +412,8 @@ class Timestamp(_Timestamp):
401412
freq = None
402413

403414
if getattr(ts_input, 'tzinfo', None) is not None and tz is not None:
404-
raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with the"
405-
" tz parameter. Use tz_convert instead.")
415+
raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with "
416+
"the tz parameter. Use tz_convert instead.")
406417

407418
ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0)
408419

pandas/core/arrays/datetimes.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -794,16 +794,17 @@ def _add_offset(self, offset):
794794
values = self.tz_localize(None)
795795
else:
796796
values = self
797-
result = offset.apply_index(values)
798-
if self.tz is not None:
799-
result = result.tz_localize(self.tz)
797+
result = offset.apply_index(values).tz_localize(self.tz)
800798

801799
except NotImplementedError:
802800
warnings.warn(
803801
"Non-vectorized DateOffset being applied to Series or DatetimeIndex",
804802
PerformanceWarning,
805803
)
806804
result = self.astype("O") + offset
805+
if len(self) == 0:
806+
# _from_sequence won't be able to infer self.tz
807+
return type(self)._from_sequence(result).tz_localize(self.tz)
807808

808809
return type(self)._from_sequence(result, freq="infer")
809810

pandas/core/frame.py

+30-19
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,12 @@
3737

3838
from pandas._libs import algos as libalgos, lib
3939
from pandas.compat.numpy import function as nv
40-
from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature
40+
from pandas.util._decorators import (
41+
Appender,
42+
Substitution,
43+
deprecate_kwarg,
44+
rewrite_axis_style_signature,
45+
)
4146
from pandas.util._validators import (
4247
validate_axis_style_args,
4348
validate_bool_kwarg,
@@ -1740,7 +1745,7 @@ def to_records(self, index=True, column_dtypes=None, index_dtypes=None):
17401745
rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)],
17411746
dtype=[('I', 'S2'), ('A', '<i8'), ('B', '<f8')])
17421747
1743-
>>> index_dtypes = "<S{}".format(df.index.str.len().max())
1748+
>>> index_dtypes = f"<S{df.index.str.len().max()}"
17441749
>>> df.to_records(index_dtypes=index_dtypes)
17451750
rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)],
17461751
dtype=[('I', 'S1'), ('A', '<i8'), ('B', '<f8')])
@@ -1829,9 +1834,10 @@ def _from_arrays(cls, arrays, columns, index, dtype=None):
18291834
mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
18301835
return cls(mgr)
18311836

1837+
@deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
18321838
def to_stata(
18331839
self,
1834-
fname,
1840+
path,
18351841
convert_dates=None,
18361842
write_index=True,
18371843
byteorder=None,
@@ -1849,11 +1855,16 @@ def to_stata(
18491855
18501856
Parameters
18511857
----------
1852-
fname : str, buffer or path object
1858+
path : str, buffer or path object
18531859
String, path object (pathlib.Path or py._path.local.LocalPath) or
18541860
object implementing a binary write() function. If using a buffer
18551861
then the buffer will not be automatically closed after the file
18561862
data has been written.
1863+
1864+
.. versionchanged:: 1.0.0
1865+
1866+
Previously this was "fname"
1867+
18571868
convert_dates : dict
18581869
Dictionary mapping columns containing datetime types to stata
18591870
internal format to use when writing the dates. Options are 'tc',
@@ -1927,7 +1938,7 @@ def to_stata(
19271938
kwargs["convert_strl"] = convert_strl
19281939

19291940
writer = statawriter(
1930-
fname,
1941+
path,
19311942
self,
19321943
convert_dates=convert_dates,
19331944
byteorder=byteorder,
@@ -1939,22 +1950,24 @@ def to_stata(
19391950
)
19401951
writer.write_file()
19411952

1942-
def to_feather(self, fname):
1953+
@deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
1954+
def to_feather(self, path):
19431955
"""
19441956
Write out the binary feather-format for DataFrames.
19451957
19461958
Parameters
19471959
----------
1948-
fname : str
1960+
path : str
19491961
String file path.
19501962
"""
19511963
from pandas.io.feather_format import to_feather
19521964

1953-
to_feather(self, fname)
1965+
to_feather(self, path)
19541966

1967+
@deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
19551968
def to_parquet(
19561969
self,
1957-
fname,
1970+
path,
19581971
engine="auto",
19591972
compression="snappy",
19601973
index=None,
@@ -1973,11 +1986,13 @@ def to_parquet(
19731986
19741987
Parameters
19751988
----------
1976-
fname : str
1989+
path : str
19771990
File path or Root Directory path. Will be used as Root Directory
19781991
path while writing a partitioned dataset.
19791992
1980-
.. versionchanged:: 0.24.0
1993+
.. versionchanged:: 1.0.0
1994+
1995+
Previously this was "fname"
19811996
19821997
engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
19831998
Parquet library to use. If 'auto', then the option
@@ -2034,7 +2049,7 @@ def to_parquet(
20342049

20352050
to_parquet(
20362051
self,
2037-
fname,
2052+
path,
20382053
engine,
20392054
compression=compression,
20402055
index=index,
@@ -2340,13 +2355,9 @@ def _sizeof_fmt(num, size_qualifier):
23402355
# returns size in human readable format
23412356
for x in ["bytes", "KB", "MB", "GB", "TB"]:
23422357
if num < 1024.0:
2343-
return "{num:3.1f}{size_q} {x}".format(
2344-
num=num, size_q=size_qualifier, x=x
2345-
)
2358+
return f"{num:3.1f}{size_qualifier} {x}"
23462359
num /= 1024.0
2347-
return "{num:3.1f}{size_q} {pb}".format(
2348-
num=num, size_q=size_qualifier, pb="PB"
2349-
)
2360+
return f"{num:3.1f}{size_qualifier} PB"
23502361

23512362
if verbose:
23522363
_verbose_repr()
@@ -2359,7 +2370,7 @@ def _sizeof_fmt(num, size_qualifier):
23592370
_verbose_repr()
23602371

23612372
counts = self._data.get_dtype_counts()
2362-
dtypes = ["{k}({kk:d})".format(k=k[0], kk=k[1]) for k in sorted(counts.items())]
2373+
dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())]
23632374
lines.append(f"dtypes: {', '.join(dtypes)}")
23642375

23652376
if memory_usage is None:

0 commit comments

Comments
 (0)