Skip to content

Commit bda57dc

Browse files
committed
Merge remote-tracking branch 'upstream/master' into dtypedoc
2 parents 2d64a65 + b804372 commit bda57dc

File tree

118 files changed

+2714
-1674
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

118 files changed

+2714
-1674
lines changed

.travis.yml

-5
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,12 @@ matrix:
4848
- mysql
4949
- postgresql
5050

51-
# In allow_failures
5251
- env:
5352
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
5453
services:
5554
- mysql
5655
- postgresql
5756

58-
allow_failures:
59-
- env:
60-
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
61-
6257
before_install:
6358
- echo "before_install"
6459
# set non-blocking IO on travis

asv_bench/asv.conf.json

+4-1
Original file line numberDiff line numberDiff line change
@@ -122,5 +122,8 @@
122122
".*": "0409521665"
123123
},
124124
"regression_thresholds": {
125-
}
125+
},
126+
"build_command":
127+
["python setup.py build -j4",
128+
"PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"],
126129
}

asv_bench/benchmarks/io/json.py

+24
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,30 @@ def peakmem_to_json_wide(self, orient, frame):
132132
df.to_json(self.fname, orient=orient)
133133

134134

135+
class ToJSONISO(BaseIO):
136+
fname = "__test__.json"
137+
params = [["split", "columns", "index", "values", "records"]]
138+
param_names = ["orient"]
139+
140+
def setup(self, orient):
141+
N = 10 ** 5
142+
index = date_range("20000101", periods=N, freq="H")
143+
timedeltas = timedelta_range(start=1, periods=N, freq="s")
144+
datetimes = date_range(start=1, periods=N, freq="s")
145+
self.df = DataFrame(
146+
{
147+
"td_1": timedeltas,
148+
"td_2": timedeltas,
149+
"ts_1": datetimes,
150+
"ts_2": datetimes,
151+
},
152+
index=index,
153+
)
154+
155+
def time_iso_format(self, orient):
156+
self.df.to_json(orient=orient, date_format="iso")
157+
158+
135159
class ToJSONLines(BaseIO):
136160

137161
fname = "__test__.json"

ci/azure/posix.yml

+21-7
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,24 @@ jobs:
1919
ENV_FILE: ci/deps/azure-36-minimum_versions.yaml
2020
CONDA_PY: "36"
2121
PATTERN: "not slow and not network"
22+
2223
py36_locale_slow_old_np:
2324
ENV_FILE: ci/deps/azure-36-locale_slow.yaml
2425
CONDA_PY: "36"
2526
PATTERN: "slow"
26-
LOCALE_OVERRIDE: "zh_CN.UTF-8"
27+
# pandas does not use the language (zh_CN), but should support diferent encodings (utf8)
28+
# we should test with encodings different than utf8, but doesn't seem like Ubuntu supports any
29+
LANG: "zh_CN.utf8"
30+
LC_ALL: "zh_CN.utf8"
2731
EXTRA_APT: "language-pack-zh-hans"
2832

2933
py36_locale:
3034
ENV_FILE: ci/deps/azure-36-locale.yaml
3135
CONDA_PY: "36"
3236
PATTERN: "not slow and not network"
33-
LOCALE_OVERRIDE: "it_IT.UTF-8"
37+
LANG: "it_IT.utf8"
38+
LC_ALL: "it_IT.utf8"
39+
EXTRA_APT: "language-pack-it"
3440

3541
py36_32bit:
3642
ENV_FILE: ci/deps/azure-36-32bit.yaml
@@ -42,7 +48,9 @@ jobs:
4248
ENV_FILE: ci/deps/azure-37-locale.yaml
4349
CONDA_PY: "37"
4450
PATTERN: "not slow and not network"
45-
LOCALE_OVERRIDE: "zh_CN.UTF-8"
51+
LANG: "zh_CN.utf8"
52+
LC_ALL: "zh_CN.utf8"
53+
EXTRA_APT: "language-pack-zh-hans"
4654

4755
py37_np_dev:
4856
ENV_FILE: ci/deps/azure-37-numpydev.yaml
@@ -54,10 +62,16 @@ jobs:
5462

5563
steps:
5664
- script: |
57-
if [ "$(uname)" == "Linux" ]; then sudo apt-get install -y libc6-dev-i386 $EXTRA_APT; fi
58-
echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
59-
echo "Creating Environment"
60-
ci/setup_env.sh
65+
if [ "$(uname)" == "Linux" ]; then
66+
sudo apt-get update
67+
sudo apt-get install -y libc6-dev-i386 $EXTRA_APT
68+
fi
69+
displayName: 'Install extra packages'
70+
71+
- script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
72+
displayName: 'Set conda path'
73+
74+
- script: ci/setup_env.sh
6175
displayName: 'Setup environment and build pandas'
6276

6377
- script: |

ci/azure/windows.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
- bash: |
3535
source activate pandas-dev
3636
conda list
37-
python setup.py build_ext -q -i
37+
python setup.py build_ext -q -i -j 4
3838
python -m pip install --no-build-isolation -e .
3939
displayName: 'Build'
4040

ci/code_checks.sh

+8
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
100100
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp
101101
RET=$(($RET + $?)) ; echo $MSG "DONE"
102102

103+
MSG='Check for use of not concatenated strings' ; echo $MSG
104+
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
105+
$BASE_DIR/scripts/validate_string_concatenation.py --format="[error]{source_path}:{line_number}:{msg}" .
106+
else
107+
$BASE_DIR/scripts/validate_string_concatenation.py .
108+
fi
109+
RET=$(($RET + $?)) ; echo $MSG "DONE"
110+
103111
echo "isort --version-number"
104112
isort --version-number
105113

ci/deps/azure-36-locale_slow.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ dependencies:
1313
- pytest-azurepipelines
1414

1515
# pandas dependencies
16-
- beautifulsoup4==4.6.0
16+
- beautifulsoup4=4.6.0
1717
- bottleneck=1.2.*
1818
- lxml
1919
- matplotlib=2.2.2

ci/run_tests.sh

-11
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,6 @@
55
# https://github.com/pytest-dev/pytest/issues/1075
66
export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')
77

8-
if [ -n "$LOCALE_OVERRIDE" ]; then
9-
export LC_ALL="$LOCALE_OVERRIDE"
10-
export LANG="$LOCALE_OVERRIDE"
11-
PANDAS_LOCALE=`python -c 'import pandas; pandas.get_option("display.encoding")'`
12-
if [[ "$LOCALE_OVERRIDE" != "$PANDAS_LOCALE" ]]; then
13-
echo "pandas could not detect the locale. System locale: $LOCALE_OVERRIDE, pandas detected: $PANDAS_LOCALE"
14-
# TODO Not really aborting the tests until https://github.com/pandas-dev/pandas/issues/23923 is fixed
15-
# exit 1
16-
fi
17-
fi
18-
198
if [[ "not network" == *"$PATTERN"* ]]; then
209
export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4;
2110
fi

ci/setup_env.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
#!/bin/bash -e
22

33
# edit the locale file if needed
4-
if [ -n "$LOCALE_OVERRIDE" ]; then
4+
if [[ "$(uname)" == "Linux" && -n "$LC_ALL" ]]; then
55
echo "Adding locale to the first line of pandas/__init__.py"
66
rm -f pandas/__init__.pyc
7-
SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LOCALE_OVERRIDE')\n"
7+
SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LC_ALL')\n"
88
sed -i "$SEDC" pandas/__init__.py
9+
910
echo "[head -4 pandas/__init__.py]"
1011
head -4 pandas/__init__.py
1112
echo
12-
sudo locale-gen "$LOCALE_OVERRIDE"
1313
fi
1414

1515
MINICONDA_DIR="$HOME/miniconda3"

doc/source/getting_started/10min.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -697,8 +697,9 @@ Plotting
697697

698698
See the :ref:`Plotting <visualization>` docs.
699699

700+
We use the standard convention for referencing the matplotlib API:
701+
700702
.. ipython:: python
701-
:suppress:
702703
703704
import matplotlib.pyplot as plt
704705
plt.close('all')

doc/source/whatsnew/v1.0.0.rst

+19-4
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ Dedicated string data type
5656
^^^^^^^^^^^^^^^^^^^^^^^^^^
5757

5858
We've added :class:`StringDtype`, an extension type dedicated to string data.
59-
Previously, strings were typically stored in object-dtype NumPy arrays.
59+
Previously, strings were typically stored in object-dtype NumPy arrays. (:issue:`29975`)
6060

6161
.. warning::
6262

@@ -216,13 +216,18 @@ Other enhancements
216216
(:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
217217
now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
218218
- The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`)
219+
- :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`)
219220
- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`)
220221
- :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`)
221222
- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)
222223
- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`)
223224
- :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` added (:issue:`11052`)
224-
225225
- :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`)
226+
- Added new writer for exporting Stata dta files in version 118, ``StataWriter118``. This format supports exporting strings containing Unicode characters (:issue:`23573`)
227+
- :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`)
228+
- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30296`)
229+
230+
226231

227232
Build Changes
228233
^^^^^^^^^^^^^
@@ -781,6 +786,7 @@ Datetimelike
781786
- Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`)
782787
- Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`)
783788
- Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`)
789+
- Bug in :meth:`Series.astype`, :meth:`Index.astype`, and :meth:`DataFrame.astype` failing to handle ``NaT`` when casting to an integer dtype (:issue:`28492`)
784790
- Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`)
785791
- Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`)
786792
- Bug in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`)
@@ -796,6 +802,7 @@ Datetimelike
796802
- Bug in :meth:`DataFrame.append` would remove the timezone-awareness of new data (:issue:`30238`)
797803
- Bug in :meth:`Series.cummin` and :meth:`Series.cummax` with timezone-aware dtype incorrectly dropping its timezone (:issue:`15553`)
798804
- Bug in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` where inplace addition and subtraction did not actually operate inplace (:issue:`24115`)
805+
- Bug in :func:`pandas.to_datetime` when called with ``Series`` storing ``IntegerArray`` raising ``TypeError`` instead of returning ``Series`` (:issue:`30050`)
799806

800807
Timedelta
801808
^^^^^^^^^
@@ -824,6 +831,7 @@ Numeric
824831
- Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`)
825832
- Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`)
826833
- Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`)
834+
- Bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` when there are duplicated indices (:issue:`30463`)
827835
- Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`)
828836

829837
Conversion
@@ -858,6 +866,7 @@ Indexing
858866
- Bug when indexing with ``.loc`` where the index was a :class:`CategoricalIndex` with non-string categories didn't work (:issue:`17569`, :issue:`30225`)
859867
- :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`)
860868
- Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`)
869+
- Bug in :meth:`Series.__setitem__` incorrectly assigning values with boolean indexer when the length of new data matches the number of ``True`` values and new data is not a ``Series`` or an ``np.array`` (:issue:`30567`)
861870

862871
Missing
863872
^^^^^^^
@@ -893,6 +902,7 @@ I/O
893902
- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
894903
- Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`)
895904
- :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`)
905+
- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`)
896906

897907
Plotting
898908
^^^^^^^^
@@ -908,12 +918,13 @@ Plotting
908918
- :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`)
909919
- :meth:`DataFrame.plot` now allow a ``backend`` keyword argument to allow changing between backends in one session (:issue:`28619`).
910920
- Bug in color validation incorrectly raising for non-color styles (:issue:`29122`).
921+
- Allow :meth: `DataFrame.plot.scatter` to plot ``objects`` and ``datetime`` type data (:issue:`18755`, :issue:`30391`)
911922
- Bug in :meth:`DataFrame.hist`, ``xrot=0`` does not work with ``by`` and subplots (:issue:`30288`).
912923

913924
Groupby/resample/rolling
914925
^^^^^^^^^^^^^^^^^^^^^^^^
915926

916-
-
927+
- Bug in :meth:`DataFrame.groupby.apply` only showing output from a single group when function returns an :class:`Index` (:issue:`28652`)
917928
- Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`)
918929
- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty series or dataframe (:issue:`28427`)
919930
- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue:`28192`)
@@ -943,6 +954,7 @@ Reshaping
943954
- :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`)
944955
- Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`).
945956
- Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`)
957+
- Bug in :meth:`Index.join` that caused infinite recursion error for mismatched ``MultiIndex`` name orders. (:issue:`25760`, :issue:`28956`)
946958
- Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ValueError (:issue:`28664`)
947959
- Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`)
948960
- Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`)
@@ -980,7 +992,10 @@ Other
980992
- Fixed :class:`IntegerArray` returning ``inf`` rather than ``NaN`` for operations dividing by 0 (:issue:`27398`)
981993
- Fixed ``pow`` operations for :class:`IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`)
982994
- Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`)
983-
- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:29069`)
995+
- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:`29069`)
996+
- Bug in :class:`DataFrame` constructor when passing a 2D ``ndarray`` and an extension dtype (:issue:`12513`)
997+
- Bug in :meth:`DaataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`)
998+
- Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`)
984999

9851000
.. _whatsnew_1000.contributors:
9861001

pandas/__init__.py

+39-2
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@
3939
"the C extensions first."
4040
)
4141

42-
from datetime import datetime
43-
4442
from pandas._config import (
4543
get_option,
4644
set_option,
@@ -210,6 +208,19 @@ class Panel:
210208

211209
return Panel
212210

211+
elif name == "datetime":
212+
warnings.warn(
213+
"The pandas.datetime class is deprecated "
214+
"and will be removed from pandas in a future version. "
215+
"Import from datetime module instead.",
216+
FutureWarning,
217+
stacklevel=2,
218+
)
219+
220+
from datetime import datetime as dt
221+
222+
return dt
223+
213224
elif name == "np":
214225

215226
warnings.warn(
@@ -264,13 +275,39 @@ def __getattr__(self, item):
264275
FutureWarning,
265276
stacklevel=2,
266277
)
278+
267279
try:
268280
return getattr(self.np, item)
269281
except AttributeError:
270282
raise AttributeError(f"module numpy has no attribute {item}")
271283

272284
np = __numpy()
273285

286+
class __Datetime:
287+
def __init__(self):
288+
from datetime import datetime as dt
289+
290+
self.datetime = dt
291+
292+
def __getattr__(self, item):
293+
import warnings
294+
295+
warnings.warn(
296+
"The pandas.datetime class is deprecated "
297+
"and will be removed from pandas in a future version. "
298+
"Import from datetime instead.",
299+
FutureWarning,
300+
stacklevel=2,
301+
)
302+
303+
try:
304+
return getattr(self.datetime, item)
305+
except AttributeError:
306+
raise AttributeError(f"module datetime has no attribute {item}")
307+
308+
datetime = __Datetime().datetime
309+
310+
274311
# module level doc-string
275312
__doc__ = """
276313
pandas - a powerful data analysis and manipulation library for Python

0 commit comments

Comments
 (0)