Skip to content

Commit 6aa8ced

Browse files
author
MomIsBestFriend
committed
Changed x.__class__ to type(x)
2 parents 61cf55d + 853ec9a commit 6aa8ced

File tree

109 files changed

+2803
-1243
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

109 files changed

+2803
-1243
lines changed

asv_bench/benchmarks/categoricals.py

+27-15
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,6 @@
1414
pass
1515

1616

17-
class Concat:
18-
def setup(self):
19-
N = 10 ** 5
20-
self.s = pd.Series(list("aabbcd") * N).astype("category")
21-
22-
self.a = pd.Categorical(list("aabbcd") * N)
23-
self.b = pd.Categorical(list("bbcdjk") * N)
24-
25-
def time_concat(self):
26-
pd.concat([self.s, self.s])
27-
28-
def time_union(self):
29-
union_categoricals([self.a, self.b])
30-
31-
3217
class Constructor:
3318
def setup(self):
3419
N = 10 ** 5
@@ -77,6 +62,33 @@ def time_existing_series(self):
7762
pd.Categorical(self.series)
7863

7964

65+
class CategoricalOps:
66+
params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"]
67+
param_names = ["op"]
68+
69+
def setup(self, op):
70+
N = 10 ** 5
71+
self.cat = pd.Categorical(list("aabbcd") * N, ordered=True)
72+
73+
def time_categorical_op(self, op):
74+
getattr(self.cat, op)("b")
75+
76+
77+
class Concat:
78+
def setup(self):
79+
N = 10 ** 5
80+
self.s = pd.Series(list("aabbcd") * N).astype("category")
81+
82+
self.a = pd.Categorical(list("aabbcd") * N)
83+
self.b = pd.Categorical(list("bbcdjk") * N)
84+
85+
def time_concat(self):
86+
pd.concat([self.s, self.s])
87+
88+
def time_union(self):
89+
union_categoricals([self.a, self.b])
90+
91+
8092
class ValueCounts:
8193

8294
params = [True, False]

ci/azure/posix.yml

+7-10
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,13 @@ jobs:
4444
PATTERN: "not slow and not network"
4545
LOCALE_OVERRIDE: "zh_CN.UTF-8"
4646

47-
# https://github.com/pandas-dev/pandas/issues/29432
48-
# py37_np_dev:
49-
# ENV_FILE: ci/deps/azure-37-numpydev.yaml
50-
# CONDA_PY: "37"
51-
# PATTERN: "not slow and not network"
52-
# TEST_ARGS: "-W error"
53-
# PANDAS_TESTING_MODE: "deprecate"
54-
# EXTRA_APT: "xsel"
55-
# # TODO:
56-
# continueOnError: true
47+
py37_np_dev:
48+
ENV_FILE: ci/deps/azure-37-numpydev.yaml
49+
CONDA_PY: "37"
50+
PATTERN: "not slow and not network"
51+
TEST_ARGS: "-W error"
52+
PANDAS_TESTING_MODE: "deprecate"
53+
EXTRA_APT: "xsel"
5754

5855
steps:
5956
- script: |

ci/deps/azure-macos-36.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ dependencies:
2020
- matplotlib=2.2.3
2121
- nomkl
2222
- numexpr
23-
- numpy=1.13.3
23+
- numpy=1.14
2424
- openpyxl
25-
- pyarrow
25+
- pyarrow>=0.12.0
2626
- pytables
2727
- python-dateutil==2.6.1
2828
- pytz

ci/deps/azure-windows-36.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ dependencies:
2020
- numexpr
2121
- numpy=1.15.*
2222
- openpyxl
23-
- pyarrow
23+
- pyarrow>=0.12.0
2424
- pytables
2525
- python-dateutil
2626
- pytz

doc/redirects.csv

-1
Original file line numberDiff line numberDiff line change
@@ -828,7 +828,6 @@ generated/pandas.MultiIndex.sortlevel,../reference/api/pandas.MultiIndex.sortlev
828828
generated/pandas.MultiIndex.swaplevel,../reference/api/pandas.MultiIndex.swaplevel
829829
generated/pandas.MultiIndex.to_flat_index,../reference/api/pandas.MultiIndex.to_flat_index
830830
generated/pandas.MultiIndex.to_frame,../reference/api/pandas.MultiIndex.to_frame
831-
generated/pandas.MultiIndex.to_hierarchical,../reference/api/pandas.MultiIndex.to_hierarchical
832831
generated/pandas.notna,../reference/api/pandas.notna
833832
generated/pandas.notnull,../reference/api/pandas.notnull
834833
generated/pandas.option_context,../reference/api/pandas.option_context

doc/source/development/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@ Development
1919
developer
2020
policies
2121
roadmap
22+
meeting

doc/source/development/meeting.rst

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
.. _meeting:
2+
3+
==================
4+
Developer Meetings
5+
==================
6+
7+
We hold regular developer meetings on the second Wednesday
8+
of each month at 18:00 UTC. These meetings and their minutes are open to
9+
the public. All are welcome to join.
10+
11+
Minutes
12+
-------
13+
14+
The minutes of past meetings are available in `this Google Document <https://docs.google.com/document/d/1tGbTiYORHiSPgVMXawiweGJlBw5dOkVJLY-licoBmBU/edit?usp=sharing>`__.
15+
16+
Calendar
17+
--------
18+
19+
This calendar shows all the developer meetings.
20+
21+
.. raw:: html
22+
23+
<iframe src="https://calendar.google.com/calendar/embed?src=pgbn14p6poja8a1cf2dv2jhrmg%40group.calendar.google.com" style="border: 0" width="800" height="600" frameborder="0" scrolling="no"></iframe>
24+
25+
You can subscribe to this calendar with the following links:
26+
27+
* `iCal <https://calendar.google.com/calendar/ical/pgbn14p6poja8a1cf2dv2jhrmg%40group.calendar.google.com/public/basic.ics>`__
28+
* `Google calendar <https://calendar.google.com/calendar/embed?src=pgbn14p6poja8a1cf2dv2jhrmg%40group.calendar.google.com>`__
29+
30+
Additionally, we'll sometimes have one-off meetings on specific topics.
31+
These will be published on the same calendar.
32+

doc/source/getting_started/basics.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1950,6 +1950,7 @@ sparse :class:`SparseDtype` (none) :class:`arrays.
19501950
intervals :class:`IntervalDtype` :class:`Interval` :class:`arrays.IntervalArray` :ref:`advanced.intervalindex`
19511951
nullable integer :class:`Int64Dtype`, ... (none) :class:`arrays.IntegerArray` :ref:`integer_na`
19521952
Strings :class:`StringDtype` :class:`str` :class:`arrays.StringArray` :ref:`text`
1953+
Boolean (with NA) :class:`BooleanDtype` :class:`bool` :class:`arrays.BooleanArray` :ref:`api.arrays.bool`
19531954
=================== ========================= ================== ============================= =============================
19541955

19551956
Pandas has two ways to store strings.

doc/source/getting_started/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ matplotlib 2.2.2 Visualization
258258
openpyxl 2.4.8 Reading / writing for xlsx files
259259
pandas-gbq 0.8.0 Google Big Query access
260260
psycopg2 PostgreSQL engine for sqlalchemy
261-
pyarrow 0.9.0 Parquet and feather reading / writing
261+
pyarrow 0.12.0 Parquet and feather reading / writing
262262
pymysql 0.7.11 MySQL engine for sqlalchemy
263263
pyreadstat SPSS files (.sav) reading
264264
pytables 3.4.2 HDF5 reading / writing

doc/source/reference/arrays.rst

+23
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Nullable Integer :class:`Int64Dtype`, ... (none) :ref:`api.array
2525
Categorical :class:`CategoricalDtype` (none) :ref:`api.arrays.categorical`
2626
Sparse :class:`SparseDtype` (none) :ref:`api.arrays.sparse`
2727
Strings :class:`StringDtype` :class:`str` :ref:`api.arrays.string`
28+
Boolean (with NA) :class:`BooleanDtype` :class:`bool` :ref:`api.arrays.bool`
2829
=================== ========================= ================== =============================
2930

3031
Pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`).
@@ -485,6 +486,28 @@ The ``Series.str`` accessor is available for ``Series`` backed by a :class:`arra
485486
See :ref:`api.series.str` for more.
486487

487488

489+
.. _api.arrays.bool:
490+
491+
Boolean data with missing values
492+
--------------------------------
493+
494+
The boolean dtype (with the alias ``"boolean"``) provides support for storing
495+
boolean data (True, False values) with missing values, which is not possible
496+
with a bool :class:`numpy.ndarray`.
497+
498+
.. autosummary::
499+
:toctree: api/
500+
:template: autosummary/class_without_autosummary.rst
501+
502+
arrays.BooleanArray
503+
504+
.. autosummary::
505+
:toctree: api/
506+
:template: autosummary/class_without_autosummary.rst
507+
508+
BooleanDtype
509+
510+
488511
.. Dtype attributes which are manually listed in their docstrings: including
489512
.. it here to make sure a docstring page is built for them
490513

doc/source/reference/indexing.rst

-1
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,6 @@ MultiIndex components
305305

306306
MultiIndex.set_levels
307307
MultiIndex.set_codes
308-
MultiIndex.to_hierarchical
309308
MultiIndex.to_flat_index
310309
MultiIndex.to_frame
311310
MultiIndex.is_lexsorted

doc/source/reference/style.rst

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ Style application
4141
Styler.set_caption
4242
Styler.set_properties
4343
Styler.set_uuid
44+
Styler.set_na_rep
4445
Styler.clear
4546
Styler.pipe
4647

doc/source/user_guide/scale.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ Use efficient datatypes
9393
-----------------------
9494

9595
The default pandas data types are not the most memory efficient. This is
96-
especially true for high-cardinality text data (columns with relatively few
97-
unique values). By using more efficient data types you can store larger datasets
98-
in memory.
96+
especially true for text data columns with relatively few unique values (commonly
97+
referred to as "low-cardinality" data). By using more efficient data types you
98+
can store larger datasets in memory.
9999

100100
.. ipython:: python
101101

doc/source/user_guide/style.ipynb

+60
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
"df = pd.DataFrame({'A': np.linspace(1, 10, 10)})\n",
6868
"df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],\n",
6969
" axis=1)\n",
70+
"df.iloc[3, 3] = np.nan\n",
7071
"df.iloc[0, 2] = np.nan"
7172
]
7273
},
@@ -402,6 +403,38 @@
402403
"df.style.format({\"B\": lambda x: \"±{:.2f}\".format(abs(x))})"
403404
]
404405
},
406+
{
407+
"cell_type": "markdown",
408+
"metadata": {},
409+
"source": [
410+
"You can format the text displayed for missing values by `na_rep`."
411+
]
412+
},
413+
{
414+
"cell_type": "code",
415+
"execution_count": null,
416+
"metadata": {},
417+
"outputs": [],
418+
"source": [
419+
"df.style.format(\"{:.2%}\", na_rep=\"-\")"
420+
]
421+
},
422+
{
423+
"cell_type": "markdown",
424+
"metadata": {},
425+
"source": [
426+
"These formatting techniques can be used in combination with styling."
427+
]
428+
},
429+
{
430+
"cell_type": "code",
431+
"execution_count": null,
432+
"metadata": {},
433+
"outputs": [],
434+
"source": [
435+
"df.style.highlight_max().format(None, na_rep=\"-\")"
436+
]
437+
},
405438
{
406439
"cell_type": "markdown",
407440
"metadata": {},
@@ -659,6 +692,7 @@
659692
"- precision\n",
660693
"- captions\n",
661694
"- table-wide styles\n",
695+
"- missing values representation\n",
662696
"- hiding the index or columns\n",
663697
"\n",
664698
"Each of these can be specified in two ways:\n",
@@ -800,6 +834,32 @@
800834
"We hope to collect some useful ones either in pandas, or preferable in a new package that [builds on top](#Extensibility) the tools here."
801835
]
802836
},
837+
{
838+
"cell_type": "markdown",
839+
"metadata": {},
840+
"source": [
841+
"### Missing values"
842+
]
843+
},
844+
{
845+
"cell_type": "markdown",
846+
"metadata": {},
847+
"source": [
848+
"You can control the default missing values representation for the entire table through `set_na_rep` method."
849+
]
850+
},
851+
{
852+
"cell_type": "code",
853+
"execution_count": null,
854+
"metadata": {},
855+
"outputs": [],
856+
"source": [
857+
"(df.style\n",
858+
" .set_na_rep(\"FAIL\")\n",
859+
" .format(None, na_rep=\"PASS\", subset=[\"D\"])\n",
860+
" .highlight_null(\"yellow\"))"
861+
]
862+
},
803863
{
804864
"cell_type": "markdown",
805865
"metadata": {},

0 commit comments

Comments
 (0)