Skip to content

Commit 0303791

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into cln-arith
2 parents 4a72863 + 257ad4e commit 0303791

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+1387
-512
lines changed

.travis.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ matrix:
4141
- JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)"
4242

4343
- env:
44-
- JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network and not clipboard)"
45-
46-
- env:
47-
- JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network and not clipboard)"
44+
- JOB="3.8, slow" ENV_FILE="ci/deps/travis-38-slow.yaml" PATTERN="slow" SQL="1"
45+
services:
46+
- mysql
47+
- postgresql
4848

4949
- env:
5050
- JOB="3.7, locale" ENV_FILE="ci/deps/travis-37-locale.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"

ci/azure/posix.yml

+14-18
Original file line numberDiff line numberDiff line change
@@ -20,39 +20,35 @@ jobs:
2020
CONDA_PY: "37"
2121
PATTERN: "not slow and not network and not clipboard"
2222

23+
py37:
24+
ENV_FILE: ci/deps/azure-37.yaml
25+
CONDA_PY: "37"
26+
PATTERN: "not slow and not network and not clipboard"
27+
2328
py37_locale_slow:
2429
ENV_FILE: ci/deps/azure-37-locale_slow.yaml
2530
CONDA_PY: "37"
2631
PATTERN: "slow"
27-
# pandas does not use the language (zh_CN), but should support different encodings (utf8)
28-
# we should test with encodings different than utf8, but doesn't seem like Ubuntu supports any
29-
LANG: "zh_CN.utf8"
30-
LC_ALL: "zh_CN.utf8"
31-
EXTRA_APT: "language-pack-zh-hans"
32+
LANG: "it_IT.utf8"
33+
LC_ALL: "it_IT.utf8"
34+
EXTRA_APT: "language-pack-it xsel"
3235

3336
py37_slow:
3437
ENV_FILE: ci/deps/azure-37-slow.yaml
3538
CONDA_PY: "37"
3639
PATTERN: "slow"
3740

38-
py37_locale:
39-
ENV_FILE: ci/deps/azure-37-locale.yaml
40-
CONDA_PY: "37"
41-
PATTERN: "not slow and not network"
42-
LANG: "it_IT.utf8"
43-
LC_ALL: "it_IT.utf8"
44-
EXTRA_APT: "language-pack-it xsel"
45-
46-
# py37_32bit:
47-
# ENV_FILE: ci/deps/azure-37-32bit.yaml
48-
# CONDA_PY: "37"
49-
# PATTERN: "not slow and not network and not clipboard"
50-
# BITS32: "yes"
41+
py38:
42+
ENV_FILE: ci/deps/azure-38.yaml
43+
CONDA_PY: "38"
44+
PATTERN: "not slow and not network and not clipboard"
5145

5246
py38_locale:
5347
ENV_FILE: ci/deps/azure-38-locale.yaml
5448
CONDA_PY: "38"
5549
PATTERN: "not slow and not network"
50+
# pandas does not use the language (zh_CN), but should support different encodings (utf8)
51+
# we should test with encodings different than utf8, but doesn't seem like Ubuntu supports any
5652
LANG: "zh_CN.utf8"
5753
LC_ALL: "zh_CN.utf8"
5854
EXTRA_APT: "language-pack-zh-hans xsel"

ci/deps/azure-37-32bit.yaml

-26
This file was deleted.

ci/deps/azure-37-slow.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ dependencies:
1010
- pytest>=5.0.1
1111
- pytest-xdist>=1.21
1212
- hypothesis>=3.58.0
13+
- pytest-azurepipelines
1314

1415
# pandas dependencies
1516
- beautifulsoup4

ci/deps/travis-37.yaml renamed to ci/deps/azure-37.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ dependencies:
1010
- pytest>=5.0.1
1111
- pytest-xdist>=1.21
1212
- hypothesis>=3.58.0
13+
- pytest-azurepipelines
1314

1415
# pandas dependencies
1516
- botocore>=1.11

ci/deps/travis-38.yaml renamed to ci/deps/azure-38.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ dependencies:
1010
- pytest>=5.0.1
1111
- pytest-xdist>=1.21
1212
- hypothesis>=3.58.0
13+
- pytest-azurepipelines
1314

1415
# pandas dependencies
1516
- numpy
1617
- python-dateutil
1718
- nomkl
1819
- pytz
19-
- pip
2020
- tabulate==0.8.3

ci/deps/travis-37-locale.yaml

+14-8
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,12 @@ dependencies:
1111
- pytest-xdist>=1.21
1212
- hypothesis>=3.58.0
1313

14-
# pandas dependencies
14+
# required
15+
- numpy
16+
- python-dateutil
17+
- pytz
18+
19+
# optional
1520
- beautifulsoup4
1621
- blosc=1.15.0
1722
- python-blosc
@@ -20,22 +25,23 @@ dependencies:
2025
- ipython
2126
- jinja2
2227
- lxml=4.3.0
23-
- matplotlib=3.0.*
28+
- matplotlib
2429
- nomkl
2530
- numexpr
26-
- numpy
2731
- openpyxl
2832
- pandas-gbq
2933
- google-cloud-bigquery>=1.27.2 # GH 36436
3034
- pyarrow>=0.17
31-
- psycopg2=2.7
32-
- pymysql=0.7.11
3335
- pytables>=3.5.1
34-
- python-dateutil
35-
- pytz
3636
- scipy
37-
- sqlalchemy=1.3.0
3837
- xarray=0.12.0
3938
- xlrd
4039
- xlsxwriter
4140
- xlwt
41+
- moto
42+
- flask
43+
44+
# sql
45+
- psycopg2=2.7
46+
- pymysql=0.7.11
47+
- sqlalchemy=1.3.0

ci/deps/azure-37-locale.yaml renamed to ci/deps/travis-38-slow.yaml

+11-11
Original file line numberDiff line numberDiff line change
@@ -3,35 +3,35 @@ channels:
33
- defaults
44
- conda-forge
55
dependencies:
6-
- python=3.7.*
6+
- python=3.8.*
77

88
# tools
99
- cython>=0.29.21
1010
- pytest>=5.0.1
1111
- pytest-xdist>=1.21
12-
- pytest-asyncio
1312
- hypothesis>=3.58.0
14-
- pytest-azurepipelines
1513

1614
# pandas dependencies
1715
- beautifulsoup4
16+
- fsspec>=0.7.4
1817
- html5lib
19-
- ipython
20-
- jinja2
2118
- lxml
22-
- matplotlib>=3.3.0
23-
- moto
24-
- flask
25-
- nomkl
19+
- matplotlib
2620
- numexpr
27-
- numpy=1.16.*
21+
- numpy
2822
- openpyxl
23+
- patsy
24+
- psycopg2
25+
- pymysql
2926
- pytables
3027
- python-dateutil
3128
- pytz
29+
- s3fs>=0.4.0
30+
- moto>=1.3.14
3231
- scipy
33-
- xarray
32+
- sqlalchemy
3433
- xlrd
3534
- xlsxwriter
3635
- xlwt
3736
- moto
37+
- flask

doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst

+4-1
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,10 @@ aggregating statistics for given columns can be defined using the
123123
.. ipython:: python
124124
125125
titanic.agg(
126-
{"Age": ["min", "max", "median", "skew"], "Fare": ["min", "max", "median", "mean"]}
126+
{
127+
"Age": ["min", "max", "median", "skew"],
128+
"Fare": ["min", "max", "median", "mean"],
129+
}
127130
)
128131
129132
.. raw:: html

doc/source/user_guide/advanced.rst

+8-3
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,8 @@ whereas a tuple of lists refer to several values within a level:
304304
.. ipython:: python
305305
306306
s = pd.Series(
307-
[1, 2, 3, 4, 5, 6], index=pd.MultiIndex.from_product([["A", "B"], ["c", "d", "e"]])
307+
[1, 2, 3, 4, 5, 6],
308+
index=pd.MultiIndex.from_product([["A", "B"], ["c", "d", "e"]]),
308309
)
309310
s.loc[[("A", "c"), ("B", "d")]] # list of tuples
310311
s.loc[(["A", "B"], ["c", "d"])] # tuple of lists
@@ -819,7 +820,9 @@ values **not** in the categories, similarly to how you can reindex **any** panda
819820

820821
.. ipython:: python
821822
822-
df3 = pd.DataFrame({"A": np.arange(3), "B": pd.Series(list("abc")).astype("category")})
823+
df3 = pd.DataFrame(
824+
{"A": np.arange(3), "B": pd.Series(list("abc")).astype("category")}
825+
)
823826
df3 = df3.set_index("B")
824827
df3
825828
@@ -934,7 +937,9 @@ example, be millisecond offsets.
934937
np.random.randn(5, 2), index=np.arange(5) * 250.0, columns=list("AB")
935938
),
936939
pd.DataFrame(
937-
np.random.randn(6, 2), index=np.arange(4, 10) * 250.1, columns=list("AB")
940+
np.random.randn(6, 2),
941+
index=np.arange(4, 10) * 250.1,
942+
columns=list("AB"),
938943
),
939944
]
940945
)

doc/source/user_guide/basics.rst

+28-8
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,10 @@ which we illustrate:
464464
{"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]}
465465
)
466466
df2 = pd.DataFrame(
467-
{"A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0], "B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0]}
467+
{
468+
"A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0],
469+
"B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0],
470+
}
468471
)
469472
df1
470473
df2
@@ -712,7 +715,10 @@ Similarly, you can get the most frequently occurring value(s), i.e. the mode, of
712715
s5 = pd.Series([1, 1, 3, 3, 3, 5, 5, 7, 7, 7])
713716
s5.mode()
714717
df5 = pd.DataFrame(
715-
{"A": np.random.randint(0, 7, size=50), "B": np.random.randint(-10, 15, size=50)}
718+
{
719+
"A": np.random.randint(0, 7, size=50),
720+
"B": np.random.randint(-10, 15, size=50),
721+
}
716722
)
717723
df5.mode()
718724
@@ -1192,7 +1198,9 @@ to :ref:`merging/joining functionality <merging>`:
11921198

11931199
.. ipython:: python
11941200
1195-
s = pd.Series(["six", "seven", "six", "seven", "six"], index=["a", "b", "c", "d", "e"])
1201+
s = pd.Series(
1202+
["six", "seven", "six", "seven", "six"], index=["a", "b", "c", "d", "e"]
1203+
)
11961204
t = pd.Series({"six": 6.0, "seven": 7.0})
11971205
s
11981206
s.map(t)
@@ -1494,7 +1502,9 @@ labels).
14941502
14951503
df = pd.DataFrame(
14961504
{"x": [1, 2, 3, 4, 5, 6], "y": [10, 20, 30, 40, 50, 60]},
1497-
index=pd.MultiIndex.from_product([["a", "b", "c"], [1, 2]], names=["let", "num"]),
1505+
index=pd.MultiIndex.from_product(
1506+
[["a", "b", "c"], [1, 2]], names=["let", "num"]
1507+
),
14981508
)
14991509
df
15001510
df.rename_axis(index={"let": "abc"})
@@ -1803,7 +1813,9 @@ used to sort a pandas object by its index levels.
18031813
}
18041814
)
18051815
1806-
unsorted_df = df.reindex(index=["a", "d", "c", "b"], columns=["three", "two", "one"])
1816+
unsorted_df = df.reindex(
1817+
index=["a", "d", "c", "b"], columns=["three", "two", "one"]
1818+
)
18071819
unsorted_df
18081820
18091821
# DataFrame
@@ -1849,7 +1861,9 @@ to use to determine the sorted order.
18491861

18501862
.. ipython:: python
18511863
1852-
df1 = pd.DataFrame({"one": [2, 1, 1, 1], "two": [1, 3, 2, 4], "three": [5, 4, 3, 2]})
1864+
df1 = pd.DataFrame(
1865+
{"one": [2, 1, 1, 1], "two": [1, 3, 2, 4], "three": [5, 4, 3, 2]}
1866+
)
18531867
df1.sort_values(by="two")
18541868
18551869
The ``by`` parameter can take a list of column names, e.g.:
@@ -1994,7 +2008,9 @@ all levels to ``by``.
19942008

19952009
.. ipython:: python
19962010
1997-
df1.columns = pd.MultiIndex.from_tuples([("a", "one"), ("a", "two"), ("b", "three")])
2011+
df1.columns = pd.MultiIndex.from_tuples(
2012+
[("a", "one"), ("a", "two"), ("b", "three")]
2013+
)
19982014
df1.sort_values(by=("a", "two"))
19992015
20002016
@@ -2245,7 +2261,11 @@ to the correct type.
22452261
import datetime
22462262
22472263
df = pd.DataFrame(
2248-
[[1, 2], ["a", "b"], [datetime.datetime(2016, 3, 2), datetime.datetime(2016, 3, 2)]]
2264+
[
2265+
[1, 2],
2266+
["a", "b"],
2267+
[datetime.datetime(2016, 3, 2), datetime.datetime(2016, 3, 2)],
2268+
]
22492269
)
22502270
df = df.T
22512271
df

doc/source/user_guide/categorical.rst

+17-3
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,11 @@ The ordering of the categorical is determined by the ``categories`` of that colu
513513
514514
dfs = pd.DataFrame(
515515
{
516-
"A": pd.Categorical(list("bbeebbaa"), categories=["e", "a", "b"], ordered=True),
516+
"A": pd.Categorical(
517+
list("bbeebbaa"),
518+
categories=["e", "a", "b"],
519+
ordered=True,
520+
),
517521
"B": [1, 2, 1, 2, 2, 1, 2, 1],
518522
}
519523
)
@@ -642,7 +646,13 @@ Groupby will also show "unused" categories:
642646
df.groupby("cats").mean()
643647
644648
cats2 = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"])
645-
df2 = pd.DataFrame({"cats": cats2, "B": ["c", "d", "c", "d"], "values": [1, 2, 3, 4]})
649+
df2 = pd.DataFrame(
650+
{
651+
"cats": cats2,
652+
"B": ["c", "d", "c", "d"],
653+
"values": [1, 2, 3, 4],
654+
}
655+
)
646656
df2.groupby(["cats", "B"]).mean()
647657
648658
@@ -1115,7 +1125,11 @@ You can use ``fillna`` to handle missing values before applying a function.
11151125
.. ipython:: python
11161126
11171127
df = pd.DataFrame(
1118-
{"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"], "cats": pd.Categorical([1, 2, 3, 2])}
1128+
{
1129+
"a": [1, 2, 3, 4],
1130+
"b": ["a", "b", "c", "d"],
1131+
"cats": pd.Categorical([1, 2, 3, 2]),
1132+
}
11191133
)
11201134
df.apply(lambda row: type(row["cats"]), axis=1)
11211135
df.apply(lambda col: col.dtype, axis=0)

0 commit comments

Comments
 (0)