Skip to content

Commit 9cec0f3

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents 537bfff + dc4eaf3 commit 9cec0f3

31 files changed

+635
-153
lines changed

.github/workflows/ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ jobs:
6464
- name: Testing docstring validation script
6565
run: |
6666
source activate pandas-dev
67-
pytest --capture=no --strict scripts
67+
pytest --capture=no --strict-markers scripts
6868
if: always()
6969

7070
- name: Running benchmarks

ci/run_tests.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ if [[ $(uname) == "Linux" && -z $DISPLAY ]]; then
2020
XVFB="xvfb-run "
2121
fi
2222

23-
PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile -s --strict --durations=30 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas"
23+
PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile -s --strict-markers --durations=30 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas"
2424

2525
if [[ $(uname) != "Linux" && $(uname) != "Darwin" ]]; then
2626
# GH#37455 windows py38 build appears to be running out of memory

doc/source/user_guide/10min.rst

+2
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,8 @@ We use the standard convention for referencing the matplotlib API:
722722
723723
plt.close("all")
724724
725+
The :meth:`~plt.close` method is used to `close <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.close.html>`__ a figure window.
726+
725727
.. ipython:: python
726728
727729
ts = pd.Series(np.random.randn(1000), index=pd.date_range("1/1/2000", periods=1000))

doc/source/user_guide/indexing.rst

+2
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,8 @@ NA values in a boolean array propagate as ``False``:
380380

381381
.. versionchanged:: 1.0.2
382382

383+
.. ipython:: python
384+
383385
mask = pd.array([True, False, True, False, pd.NA, False], dtype="boolean")
384386
mask
385387
df1[mask]

doc/source/user_guide/io.rst

+14
Original file line numberDiff line numberDiff line change
@@ -1627,6 +1627,20 @@ functions - the following example shows reading a CSV file:
16271627
16281628
df = pd.read_csv("https://download.bls.gov/pub/time.series/cu/cu.item", sep="\t")
16291629
1630+
.. versionadded:: 1.3.0
1631+
1632+
A custom header can be sent alongside HTTP(s) requests by passing a dictionary
1633+
of header key value mappings to the ``storage_options`` keyword argument as shown below:
1634+
1635+
.. code-block:: python
1636+
1637+
headers = {"User-Agent": "pandas"}
1638+
df = pd.read_csv(
1639+
"https://download.bls.gov/pub/time.series/cu/cu.item",
1640+
sep="\t",
1641+
storage_options=headers
1642+
)
1643+
16301644
All URLs which are not local files or HTTP(s) are handled by
16311645
`fsspec`_, if installed, and its various filesystem implementations
16321646
(including Amazon S3, Google Cloud, SSH, FTP, webHDFS...).

doc/source/whatsnew/v1.3.0.rst

+22-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,26 @@ including other versions of pandas.
1313
Enhancements
1414
~~~~~~~~~~~~
1515

16+
.. _whatsnew_130.read_csv_json_http_headers:
17+
18+
Custom HTTP(s) headers when reading csv or json files
19+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
20+
21+
When reading from a remote URL that is not handled by fsspec (ie. HTTP and
22+
HTTPS) the dictionary passed to ``storage_options`` will be used to create the
23+
headers included in the request. This can be used to control the User-Agent
24+
header or send other custom headers (:issue:`36688`).
25+
For example:
26+
27+
.. ipython:: python
28+
29+
headers = {"User-Agent": "pandas"}
30+
df = pd.read_csv(
31+
"https://download.bls.gov/pub/time.series/cu/cu.item",
32+
sep="\t",
33+
storage_options=headers
34+
)
35+
1636
1737
.. _whatsnew_130.enhancements.other:
1838

@@ -174,7 +194,7 @@ Timezones
174194
Numeric
175195
^^^^^^^
176196
- Bug in :meth:`DataFrame.quantile`, :meth:`DataFrame.sort_values` causing incorrect subsequent indexing behavior (:issue:`38351`)
177-
-
197+
- Bug in :meth:`DataFrame.select_dtypes` with ``include=np.number`` now retains numeric ``ExtensionDtype`` columns (:issue:`35340`)
178198
-
179199

180200
Conversion
@@ -211,7 +231,7 @@ Missing
211231
MultiIndex
212232
^^^^^^^^^^
213233

214-
-
234+
- Bug in :meth:`DataFrame.drop` raising ``TypeError`` when :class:`MultiIndex` is non-unique and no level is provided (:issue:`36293`)
215235
-
216236

217237
I/O

pandas/_testing.py

+2
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@
108108
+ BYTES_DTYPES
109109
)
110110

111+
NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA]
112+
111113

112114
# set testing_mode
113115
_testing_mode_warnings = (DeprecationWarning, ResourceWarning)

pandas/conftest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ def nselect_method(request):
266266
# ----------------------------------------------------------------
267267
# Missing values & co.
268268
# ----------------------------------------------------------------
269-
@pytest.fixture(params=[None, np.nan, pd.NaT, float("nan"), pd.NA], ids=str)
269+
@pytest.fixture(params=tm.NULL_OBJECTS, ids=str)
270270
def nulls_fixture(request):
271271
"""
272272
Fixture for each null type in pandas.

pandas/core/arrays/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ def __init__(
321321
if is_categorical_dtype(values):
322322
if dtype.categories is None:
323323
dtype = CategoricalDtype(values.categories, dtype.ordered)
324-
elif not isinstance(values, (ABCIndex, ABCSeries)):
324+
elif not isinstance(values, (ABCIndex, ABCSeries, ExtensionArray)):
325325
# sanitize_array coerces np.nan to a string under certain versions
326326
# of numpy
327327
values = maybe_infer_to_datetimelike(values, convert_dates=True)

pandas/core/arrays/sparse/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1061,7 +1061,7 @@ def astype(self, dtype=None, copy=True):
10611061
else:
10621062
return self.copy()
10631063
dtype = self.dtype.update_dtype(dtype)
1064-
subtype = dtype._subtype_with_str
1064+
subtype = pandas_dtype(dtype._subtype_with_str)
10651065
# TODO copy=False is broken for astype_nansafe with int -> float, so cannot
10661066
# passthrough copy keyword: https://github.com/pandas-dev/pandas/issues/34456
10671067
sp_values = astype_nansafe(self.sp_values, subtype, copy=True)

pandas/core/dtypes/cast.py

+9-15
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@
7070
is_timedelta64_dtype,
7171
is_timedelta64_ns_dtype,
7272
is_unsigned_integer_dtype,
73-
pandas_dtype,
7473
)
7574
from pandas.core.dtypes.dtypes import (
7675
DatetimeTZDtype,
@@ -80,11 +79,8 @@
8079
)
8180
from pandas.core.dtypes.generic import (
8281
ABCDataFrame,
83-
ABCDatetimeArray,
84-
ABCDatetimeIndex,
8582
ABCExtensionArray,
86-
ABCPeriodArray,
87-
ABCPeriodIndex,
83+
ABCIndex,
8884
ABCSeries,
8985
)
9086
from pandas.core.dtypes.inference import is_list_like
@@ -965,7 +961,7 @@ def astype_nansafe(
965961
Parameters
966962
----------
967963
arr : ndarray
968-
dtype : np.dtype
964+
dtype : np.dtype or ExtensionDtype
969965
copy : bool, default True
970966
If False, a view will be attempted but may fail, if
971967
e.g. the item sizes don't align.
@@ -978,11 +974,11 @@ def astype_nansafe(
978974
The dtype was a datetime64/timedelta64 dtype, but it had no unit.
979975
"""
980976
# dispatch on extension dtype if needed
981-
if is_extension_array_dtype(dtype):
977+
if isinstance(dtype, ExtensionDtype):
982978
return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy)
983979

984-
if not isinstance(dtype, np.dtype):
985-
dtype = pandas_dtype(dtype)
980+
elif not isinstance(dtype, np.dtype):
981+
raise ValueError("dtype must be np.dtype or ExtensionDtype")
986982

987983
if issubclass(dtype.type, str):
988984
return lib.ensure_string_array(
@@ -1252,11 +1248,9 @@ def maybe_infer_to_datetimelike(
12521248
leave inferred dtype 'date' alone
12531249
12541250
"""
1255-
# TODO: why not timedelta?
1256-
if isinstance(
1257-
value, (ABCDatetimeIndex, ABCPeriodIndex, ABCDatetimeArray, ABCPeriodArray)
1258-
):
1259-
return value
1251+
if isinstance(value, (ABCIndex, ABCExtensionArray)):
1252+
if not is_object_dtype(value.dtype):
1253+
raise ValueError("array-like value must be object-dtype")
12601254

12611255
v = value
12621256

@@ -1431,7 +1425,7 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]):
14311425
value = to_timedelta(value, errors="raise")._values
14321426
except OutOfBoundsDatetime:
14331427
raise
1434-
except (AttributeError, ValueError, TypeError):
1428+
except (ValueError, TypeError):
14351429
pass
14361430

14371431
# coerce datetimelike to object

0 commit comments

Comments
 (0)