Skip to content

Commit f297ee8

Browse files
committed
ENH: Provide dict object for to_dict() pandas-dev#16122
2 parents d6c0deb + 4bdbcb6 commit f297ee8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+909
-504
lines changed

.travis.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ after_success:
123123

124124
after_script:
125125
- echo "after_script start"
126-
- source activate pandas && python -c "import pandas; pandas.show_versions();"
126+
- source activate pandas && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
127127
- if [ -e /tmp/single.xml ]; then
128128
ci/print_skipped.py /tmp/single.xml;
129129
fi

asv_bench/benchmarks/indexing.py

+19-4
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ def time_getitem_list_like(self):
1919
def time_getitem_array(self):
2020
self.s[np.arange(10000)]
2121

22+
def time_getitem_lists(self):
23+
self.s[np.arange(10000).tolist()]
24+
2225
def time_iloc_array(self):
2326
self.s.iloc[np.arange(10000)]
2427

@@ -190,9 +193,15 @@ def setup(self):
190193
np.arange(1000)], names=['one', 'two'])
191194

192195
import string
193-
self.mistring = MultiIndex.from_product(
194-
[np.arange(1000),
195-
np.arange(20), list(string.ascii_letters)],
196+
197+
self.mi_large = MultiIndex.from_product(
198+
[np.arange(1000), np.arange(20), list(string.ascii_letters)],
199+
names=['one', 'two', 'three'])
200+
self.mi_med = MultiIndex.from_product(
201+
[np.arange(1000), np.arange(10), list('A')],
202+
names=['one', 'two', 'three'])
203+
self.mi_small = MultiIndex.from_product(
204+
[np.arange(100), list('A'), list('A')],
196205
names=['one', 'two', 'three'])
197206

198207
def time_series_xs_mi_ix(self):
@@ -215,8 +224,14 @@ def time_multiindex_get_indexer(self):
215224
(0, 16), (0, 17), (0, 18),
216225
(0, 19)], dtype=object))
217226

227+
def time_multiindex_large_get_loc(self):
228+
self.mi_large.get_loc((999, 19, 'Z'))
229+
230+
def time_multiindex_med_get_loc(self):
231+
self.mi_med.get_loc((999, 9, 'A'))
232+
218233
def time_multiindex_string_get_loc(self):
219-
self.mistring.get_loc((999, 19, 'Z'))
234+
self.mi_small.get_loc((99, 'A', 'A'))
220235

221236
def time_is_monotonic(self):
222237
self.miint.is_monotonic

ci/install_travis.sh

+16-11
Original file line numberDiff line numberDiff line change
@@ -119,15 +119,7 @@ if [ "$COVERAGE" ]; then
119119
fi
120120

121121
echo
122-
if [ "$BUILD_TEST" ]; then
123-
124-
# build & install testing
125-
echo ["Starting installation test."]
126-
bash ci/install_release_build.sh
127-
conda uninstall -y cython
128-
time pip install dist/*tar.gz || exit 1
129-
130-
else
122+
if [ -z "$BUILD_TEST" ]; then
131123

132124
# build but don't install
133125
echo "[build em]"
@@ -163,9 +155,22 @@ fi
163155
# w/o removing anything else
164156
echo
165157
echo "[removing installed pandas]"
166-
conda remove pandas --force
158+
conda remove pandas -y --force
167159

168-
if [ -z "$BUILD_TEST" ]; then
160+
if [ "$BUILD_TEST" ]; then
161+
162+
# remove any installation
163+
pip uninstall -y pandas
164+
conda list pandas
165+
pip list --format columns |grep pandas
166+
167+
# build & install testing
168+
echo ["building release"]
169+
bash scripts/build_dist_for_release.sh
170+
conda uninstall -y cython
171+
time pip install dist/*tar.gz || exit 1
172+
173+
else
169174

170175
# install our pandas
171176
echo

ci/script_multi.sh

+13-7
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,26 @@ export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 429496
1919
echo PYTHONHASHSEED=$PYTHONHASHSEED
2020

2121
if [ "$BUILD_TEST" ]; then
22-
echo "build-test"
22+
echo "[build-test]"
23+
24+
echo "[env]"
25+
pip list --format columns |grep pandas
26+
27+
echo "[running]"
2328
cd /tmp
24-
pwd
25-
conda list pandas
26-
echo "running"
27-
python -c "import pandas; pandas.test(['-n 2'])"
29+
unset PYTHONPATH
30+
python -c 'import pandas; pandas.test(["-n 2", "--skip-slow", "--skip-network", "-r xX", "-m not single"])'
31+
2832
elif [ "$DOC" ]; then
2933
echo "We are not running pytest as this is a doc-build"
34+
3035
elif [ "$COVERAGE" ]; then
3136
echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
3237
pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
38+
3339
else
34-
echo pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
35-
pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest
40+
echo pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
41+
pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest
3642
fi
3743

3844
RET="$?"

ci/script_single.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ elif [ "$COVERAGE" ]; then
2020
echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
2121
pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
2222
else
23-
echo pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas
24-
pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest
23+
echo pytest -m "single" -r xX --junitxml=/tmp/single.xml $TEST_ARGS pandas
24+
pytest -m "single" -r xX --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest
2525
fi
2626

2727
RET="$?"

doc/make.py

+18-5
Original file line numberDiff line numberDiff line change
@@ -115,15 +115,28 @@ def maybe_exclude_notebooks():
115115
notebooks = [os.path.join(base, 'source', nb)
116116
for nb in ['style.ipynb']]
117117
contents = {}
118-
try:
119-
import nbconvert
120-
nbconvert.utils.pandoc.get_pandoc_version()
121-
except (ImportError, nbconvert.utils.pandoc.PandocMissing):
122-
print("Warning: Pandoc is not installed. Skipping Notebooks.")
118+
119+
def _remove_notebooks():
123120
for nb in notebooks:
124121
with open(nb, 'rt') as f:
125122
contents[nb] = f.read()
126123
os.remove(nb)
124+
125+
# Skip notebook conversion if
126+
# 1. nbconvert isn't installed, or
127+
# 2. nbconvert is installed, but pandoc isn't
128+
try:
129+
import nbconvert
130+
except ImportError:
131+
print("Warning: nbconvert not installed. Skipping notebooks.")
132+
_remove_notebooks()
133+
else:
134+
try:
135+
nbconvert.utils.pandoc.get_pandoc_version()
136+
except nbconvert.utils.pandoc.PandocMissing:
137+
print("Warning: Pandoc is not installed. Skipping notebooks.")
138+
_remove_notebooks()
139+
127140
yield
128141
for nb, content in contents.items():
129142
with open(nb, 'wt') as f:

doc/source/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ installed), make sure you have `pytest
202202
Dependencies
203203
------------
204204

205-
* `setuptools <http://pythonhosted.org/setuptools>`__
205+
* `setuptools <https://setuptools.readthedocs.io/en/latest/>`__
206206
* `NumPy <http://www.numpy.org>`__: 1.7.1 or higher
207207
* `python-dateutil <http://labix.org/python-dateutil>`__: 1.5 or higher
208208
* `pytz <http://pytz.sourceforge.net/>`__: Needed for time zone support

doc/source/style.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
"\n",
1313
"<span style=\"color: red\">*Provisional: This is a new feature and still under development. We'll be adding features and possibly making breaking changes in future releases. We'd love to hear your feedback.*</span>\n",
1414
"\n",
15-
"This document is written as a Jupyter Notebook, and can be viewed or downloaded [here](http://nbviewer.ipython.org/github/pandas-dev/pandas/blob/master/doc/source/html-styling.ipynb).\n",
15+
"This document is written as a Jupyter Notebook, and can be viewed or downloaded [here](http://nbviewer.ipython.org/github/pandas-dev/pandas/blob/master/doc/source/style.ipynb).\n",
1616
"\n",
1717
"You can apply **conditional formatting**, the visual styling of a DataFrame\n",
1818
"depending on the data within, by using the ``DataFrame.style`` property.\n",

doc/source/whatsnew.rst

+4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ What's New
1818

1919
These are new features and improvements of note in each release.
2020

21+
.. include:: whatsnew/v0.21.0.txt
22+
23+
.. include:: whatsnew/v0.20.2.txt
24+
2125
.. include:: whatsnew/v0.20.0.txt
2226

2327
.. include:: whatsnew/v0.19.2.txt

doc/source/whatsnew/v0.20.2.txt

+18-14
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
.. _whatsnew_0201:
1+
.. _whatsnew_0202:
22

3-
v0.20.1 (???)
3+
v0.20.2 (???)
44
-------------
55

66
This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes,
@@ -9,51 +9,56 @@ We recommend that all users upgrade to this version.
99

1010
Highlights include:
1111

12-
.. contents:: What's new in v0.20.1
12+
.. contents:: What's new in v0.20.2
1313
:local:
1414
:backlinks: none
1515

1616

17-
.. _whatsnew_0201.enhancements:
17+
.. _whatsnew_0202.enhancements:
1818

1919
Enhancements
2020
~~~~~~~~~~~~
2121

22+
- Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`)
2223

23-
24-
.. _whatsnew_0201.performance:
24+
.. _whatsnew_0202.performance:
2525

2626
Performance Improvements
2727
~~~~~~~~~~~~~~~~~~~~~~~~
2828

29+
- Performance regression fix when indexing with a list-like (:issue:`16285`)
30+
- Performance regression fix for small MultiIndexes (:issuse:`16319`)
2931

30-
31-
.. _whatsnew_0201.bug_fixes:
32+
.. _whatsnew_0202.bug_fixes:
3233

3334
Bug Fixes
3435
~~~~~~~~~
3536

37+
- Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`)
38+
3639
Conversion
3740
^^^^^^^^^^
3841

39-
42+
- Bug in ``pd.to_numeric()`` in which empty data inputs were causing Python to crash (:issue:`16302`)
4043

4144

4245
Indexing
4346
^^^^^^^^
4447

45-
48+
- Bug in ``DataFrame.reset_index(level=)`` with single level index (:issue:`16263`)
4649

4750

4851
I/O
4952
^^^
5053

51-
54+
- Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`)
5255

5356

5457
Plotting
5558
^^^^^^^^
5659

60+
- Bug in ``DataFrame.plot`` with a single column and a list-like ``color`` (:issue:`3486`)
61+
5762

5863

5964

@@ -66,13 +71,12 @@ Groupby/Resample/Rolling
6671
Sparse
6772
^^^^^^
6873

69-
70-
74+
- Bug in construction of SparseDataFrame from ``scipy.sparse.dok_matrix`` (:issue:`16179`)
7175

7276
Reshaping
7377
^^^^^^^^^
7478

75-
79+
- Bug in ``DataFrame.stack`` with unsorted levels in MultiIndex columns (:issue:`16323`)
7680

7781

7882
Numeric

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ New features
2727
Other Enhancements
2828
^^^^^^^^^^^^^^^^^^
2929
- ``Series.to_dict()`` and ``DataFrame.to_dict()`` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`)
30+
- ``RangeIndex.append`` now returns a ``RangeIndex`` object when possible (:issue:`16212`)
3031

3132

3233

pandas/_libs/hashtable.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ cdef struct Int64VectorData:
5252
cdef class Int64Vector:
5353
cdef Int64VectorData *data
5454
cdef ndarray ao
55+
cdef bint external_view_exists
5556

5657
cdef resize(self)
5758
cpdef to_array(self)

pandas/_libs/hashtable.pyx

+13
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ cdef class Factorizer:
6464
>>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20)
6565
array([ 0, 1, 20])
6666
"""
67+
if self.uniques.external_view_exists:
68+
uniques = ObjectVector()
69+
uniques.extend(self.uniques.to_array())
70+
self.uniques = uniques
6771
labels = self.table.get_labels(values, self.uniques,
6872
self.count, na_sentinel, check_null)
6973
mask = (labels == na_sentinel)
@@ -99,6 +103,15 @@ cdef class Int64Factorizer:
99103

100104
def factorize(self, int64_t[:] values, sort=False,
101105
na_sentinel=-1, check_null=True):
106+
"""
107+
Factorize values with nans replaced by na_sentinel
108+
>>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20)
109+
array([ 0, 1, 20])
110+
"""
111+
if self.uniques.external_view_exists:
112+
uniques = Int64Vector()
113+
uniques.extend(self.uniques.to_array())
114+
self.uniques = uniques
102115
labels = self.table.get_labels(values, self.uniques,
103116
self.count, na_sentinel,
104117
check_null)

0 commit comments

Comments
 (0)