Skip to content

Commit a94b8ad

Browse files
author
Artemy Kolchinsky
committed
Merge branch 'master' of github.com:pydata/pandas
2 parents 03f7a8d + 85069eb commit a94b8ad

34 files changed

+130
-66
lines changed

doc/make.py

+27-20
Original file line numberDiff line numberDiff line change
@@ -31,48 +31,48 @@
3131
SPHINX_BUILD = 'sphinxbuild'
3232

3333

34-
def upload_dev():
34+
def upload_dev(user='pandas'):
3535
'push a copy to the pydata dev directory'
36-
if os.system('cd build/html; rsync -avz . pandas@pandas.pydata.org'
37-
':/usr/share/nginx/pandas/pandas-docs/dev/ -essh'):
36+
if os.system('cd build/html; rsync -avz . {0}@pandas.pydata.org'
37+
':/usr/share/nginx/pandas/pandas-docs/dev/ -essh'.format(user)):
3838
raise SystemExit('Upload to Pydata Dev failed')
3939

4040

41-
def upload_dev_pdf():
41+
def upload_dev_pdf(user='pandas'):
4242
'push a copy to the pydata dev directory'
43-
if os.system('cd build/latex; scp pandas.pdf pandas@pandas.pydata.org'
44-
':/usr/share/nginx/pandas/pandas-docs/dev/'):
43+
if os.system('cd build/latex; scp pandas.pdf {0}@pandas.pydata.org'
44+
':/usr/share/nginx/pandas/pandas-docs/dev/'.format(user)):
4545
raise SystemExit('PDF upload to Pydata Dev failed')
4646

4747

48-
def upload_stable():
48+
def upload_stable(user='pandas'):
4949
'push a copy to the pydata stable directory'
50-
if os.system('cd build/html; rsync -avz . pandas@pandas.pydata.org'
51-
':/usr/share/nginx/pandas/pandas-docs/stable/ -essh'):
50+
if os.system('cd build/html; rsync -avz . {0}@pandas.pydata.org'
51+
':/usr/share/nginx/pandas/pandas-docs/stable/ -essh'.format(user)):
5252
raise SystemExit('Upload to stable failed')
5353

5454

55-
def upload_stable_pdf():
55+
def upload_stable_pdf(user='pandas'):
5656
'push a copy to the pydata dev directory'
57-
if os.system('cd build/latex; scp pandas.pdf pandas@pandas.pydata.org'
58-
':/usr/share/nginx/pandas/pandas-docs/stable/'):
57+
if os.system('cd build/latex; scp pandas.pdf {0}@pandas.pydata.org'
58+
':/usr/share/nginx/pandas/pandas-docs/stable/'.format(user)):
5959
raise SystemExit('PDF upload to stable failed')
6060

6161

62-
def upload_prev(ver, doc_root='./'):
62+
def upload_prev(ver, doc_root='./', user='pandas'):
6363
'push a copy of older release to appropriate version directory'
6464
local_dir = doc_root + 'build/html'
6565
remote_dir = '/usr/share/nginx/pandas/pandas-docs/version/%s/' % ver
66-
cmd = 'cd %s; rsync -avz . pandas@pandas.pydata.org:%s -essh'
67-
cmd = cmd % (local_dir, remote_dir)
66+
cmd = 'cd %s; rsync -avz . %s@pandas.pydata.org:%s -essh'
67+
cmd = cmd % (local_dir, user, remote_dir)
6868
print(cmd)
6969
if os.system(cmd):
7070
raise SystemExit(
7171
'Upload to %s from %s failed' % (remote_dir, local_dir))
7272

7373
local_dir = doc_root + 'build/latex'
74-
pdf_cmd = 'cd %s; scp pandas.pdf pandas@pandas.pydata.org:%s'
75-
pdf_cmd = pdf_cmd % (local_dir, remote_dir)
74+
pdf_cmd = 'cd %s; scp pandas.pdf %s@pandas.pydata.org:%s'
75+
pdf_cmd = pdf_cmd % (local_dir, user, remote_dir)
7676
if os.system(pdf_cmd):
7777
raise SystemExit('Upload PDF to %s from %s failed' % (ver, doc_root))
7878

@@ -337,6 +337,10 @@ def generate_index(api=True, single=False, **kwds):
337337
type=str,
338338
default=False,
339339
help='filename of section to compile, e.g. "indexing"')
340+
argparser.add_argument('--user',
341+
type=str,
342+
default=False,
343+
help='Username to connect to the pydata server')
340344

341345
def main():
342346
args, unknown = argparser.parse_known_args()
@@ -354,16 +358,19 @@ def main():
354358
ver = sys.argv[2]
355359

356360
if ftype == 'build_previous':
357-
build_prev(ver)
361+
build_prev(ver, user=args.user)
358362
if ftype == 'upload_previous':
359-
upload_prev(ver)
363+
upload_prev(ver, user=args.user)
360364
elif len(sys.argv) == 2:
361365
for arg in sys.argv[1:]:
362366
func = funcd.get(arg)
363367
if func is None:
364368
raise SystemExit('Do not know how to handle %s; valid args are %s' % (
365369
arg, list(funcd.keys())))
366-
func()
370+
if args.user:
371+
func(user=args.user)
372+
else:
373+
func()
367374
else:
368375
small_docs = False
369376
all()

doc/source/faq.rst

+4
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ when calling ``df.info()``:
5050
5151
df.info()
5252
53+
The ``+`` symbol indicates that the true memory usage could be higher, because
54+
pandas does not count the memory used by values in columns with
55+
``dtype=object``.
56+
5357
By default the display option is set to ``True`` but can be explicitly
5458
overridden by passing the ``memory_usage`` argument when invoking ``df.info()``.
5559

doc/source/io.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -3411,7 +3411,7 @@ Of course, you can specify a more "complex" query.
34113411
34123412
pd.read_sql_query("SELECT id, Col_1, Col_2 FROM data WHERE id = 42;", engine)
34133413
3414-
The func:`~pandas.read_sql_query` function supports a ``chunksize`` argument.
3414+
The :func:`~pandas.read_sql_query` function supports a ``chunksize`` argument.
34153415
Specifying this will return an iterator through chunks of the query result:
34163416

34173417
.. ipython:: python

doc/source/visualization.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1186,7 +1186,7 @@ with "(right)" in the legend. To turn off the automatic marking, use the
11861186
Suppressing Tick Resolution Adjustment
11871187
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
11881188

1189-
pandas includes automatically tick resolution adjustment for regular frequency
1189+
pandas includes automatic tick resolution adjustment for regular frequency
11901190
time-series data. For limited cases where pandas cannot infer the frequency
11911191
information (e.g., in an externally created ``twinx``), you can choose to
11921192
suppress this behavior for alignment purposes.

doc/source/whatsnew.rst

+22-22
Original file line numberDiff line numberDiff line change
@@ -18,46 +18,46 @@ What's New
1818

1919
These are new features and improvements of note in each release.
2020

21-
.. include:: v0.15.1.txt
21+
.. include:: whatsnew/v0.15.1.txt
2222

23-
.. include:: v0.15.0.txt
23+
.. include:: whatsnew/v0.15.0.txt
2424

25-
.. include:: v0.14.1.txt
25+
.. include:: whatsnew/v0.14.1.txt
2626

27-
.. include:: v0.14.0.txt
27+
.. include:: whatsnew/v0.14.0.txt
2828

29-
.. include:: v0.13.1.txt
29+
.. include:: whatsnew/v0.13.1.txt
3030

31-
.. include:: v0.13.0.txt
31+
.. include:: whatsnew/v0.13.0.txt
3232

33-
.. include:: v0.12.0.txt
33+
.. include:: whatsnew/v0.12.0.txt
3434

35-
.. include:: v0.11.0.txt
35+
.. include:: whatsnew/v0.11.0.txt
3636

37-
.. include:: v0.10.1.txt
37+
.. include:: whatsnew/v0.10.1.txt
3838

39-
.. include:: v0.10.0.txt
39+
.. include:: whatsnew/v0.10.0.txt
4040

41-
.. include:: v0.9.1.txt
41+
.. include:: whatsnew/v0.9.1.txt
4242

43-
.. include:: v0.9.0.txt
43+
.. include:: whatsnew/v0.9.0.txt
4444

45-
.. include:: v0.8.1.txt
45+
.. include:: whatsnew/v0.8.1.txt
4646

47-
.. include:: v0.8.0.txt
47+
.. include:: whatsnew/v0.8.0.txt
4848

49-
.. include:: v0.7.3.txt
49+
.. include:: whatsnew/v0.7.3.txt
5050

51-
.. include:: v0.7.2.txt
51+
.. include:: whatsnew/v0.7.2.txt
5252

53-
.. include:: v0.7.1.txt
53+
.. include:: whatsnew/v0.7.1.txt
5454

55-
.. include:: v0.7.0.txt
55+
.. include:: whatsnew/v0.7.0.txt
5656

57-
.. include:: v0.6.1.txt
57+
.. include:: whatsnew/v0.6.1.txt
5858

59-
.. include:: v0.6.0.txt
59+
.. include:: whatsnew/v0.6.0.txt
6060

61-
.. include:: v0.5.0.txt
61+
.. include:: whatsnew/v0.5.0.txt
6262

63-
.. include:: v0.4.x.txt
63+
.. include:: whatsnew/v0.4.x.txt
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

doc/source/v0.15.1.txt renamed to doc/source/whatsnew/v0.15.1.txt

+6
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ Enhancements
2828

2929
- Added option to select columns when importing Stata files (:issue:`7935`)
3030

31+
- Qualify memory usage in ``DataFrame.info()`` by adding ``+`` if it is a lower bound (:issue:`8578`)
32+
3133

3234
.. _whatsnew_0151.performance:
3335

@@ -45,3 +47,7 @@ Experimental
4547

4648
Bug Fixes
4749
~~~~~~~~~
50+
51+
- Bug in ``cut``/``qcut`` when using ``Series`` and ``retbins=True`` (:issue:`8589`)
52+
53+
- Fix ``shape`` attribute for ``MultiIndex`` (:issue:`8609`)
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

pandas/core/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ def transpose(self):
297297
@property
298298
def shape(self):
299299
""" return a tuple of the shape of the underlying data """
300-
return self._data.shape
300+
return self.values.shape
301301

302302
@property
303303
def ndim(self):

pandas/core/frame.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -1478,13 +1478,13 @@ def _verbose_repr():
14781478
def _non_verbose_repr():
14791479
lines.append(self.columns.summary(name='Columns'))
14801480

1481-
def _sizeof_fmt(num):
1481+
def _sizeof_fmt(num, size_qualifier):
14821482
# returns size in human readable format
14831483
for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
14841484
if num < 1024.0:
1485-
return "%3.1f %s" % (num, x)
1485+
return "%3.1f%s %s" % (num, size_qualifier, x)
14861486
num /= 1024.0
1487-
return "%3.1f %s" % (num, 'PB')
1487+
return "%3.1f%s %s" % (num, size_qualifier, 'PB')
14881488

14891489
if verbose:
14901490
_verbose_repr()
@@ -1502,8 +1502,14 @@ def _sizeof_fmt(num):
15021502
if memory_usage is None:
15031503
memory_usage = get_option('display.memory_usage')
15041504
if memory_usage: # append memory usage of df to display
1505+
# size_qualifier is just a best effort; not guaranteed to catch all
1506+
# cases (e.g., it misses categorical data even with object
1507+
# categories)
1508+
size_qualifier = ('+' if 'object' in counts
1509+
or self.index.dtype.kind == 'O' else '')
1510+
mem_usage = self.memory_usage(index=True).sum()
15051511
lines.append("memory usage: %s\n" %
1506-
_sizeof_fmt(self.memory_usage(index=True).sum()))
1512+
_sizeof_fmt(mem_usage, size_qualifier))
15071513
_put_lines(buf, lines)
15081514

15091515
def memory_usage(self, index=False):

pandas/io/parsers.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,9 @@ class ParserWarning(Warning):
3333
3434
Parameters
3535
----------
36-
filepath_or_buffer : string or file handle / StringIO. The string could be
37-
a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a
36+
filepath_or_buffer : string or file handle / StringIO
37+
The string could be a URL. Valid URL schemes include
38+
http, ftp, s3, and file. For file URLs, a
3839
host is expected. For instance, a local file could be
3940
file ://localhost/path/to/table.csv
4041
%s
@@ -59,7 +60,8 @@ class ParserWarning(Warning):
5960
dialect : string or csv.Dialect instance, default None
6061
If None defaults to Excel dialect. Ignored if sep longer than 1 char
6162
See csv.Dialect documentation for more details
62-
header : int row number(s) to use as the column names, and the start of the
63+
header : int, list of ints
64+
Row number(s) to use as the column names, and the start of the
6365
data. Defaults to 0 if no ``names`` passed, otherwise ``None``. Explicitly
6466
pass ``header=0`` to be able to replace existing names. The header can be
6567
a list of integers that specify row locations for a multi-index on the
@@ -78,7 +80,7 @@ class ParserWarning(Warning):
7880
names : array-like
7981
List of column names to use. If file contains no header row, then you
8082
should explicitly pass header=None
81-
prefix : string or None (default)
83+
prefix : string, default None
8284
Prefix to add to column numbers when no header, e.g 'X' for X0, X1, ...
8385
na_values : list-like or dict, default None
8486
Additional strings to recognize as NA/NaN. If dict passed, specific
@@ -113,7 +115,7 @@ class ParserWarning(Warning):
113115
must be a single character. Like empty lines (as long as ``skip_blank_lines=True``),
114116
fully commented lines are ignored by the parameter `header`
115117
but not by `skiprows`. For example, if comment='#', parsing
116-
'#empty\n1,2,3\na,b,c' with `header=0` will result in '1,2,3' being
118+
'#empty\\na,b,c\\n1,2,3' with `header=0` will result in 'a,b,c' being
117119
treated as the header.
118120
decimal : str, default '.'
119121
Character to recognize as decimal point. E.g. use ',' for European data

pandas/tests/test_frame.py

+15
Original file line numberDiff line numberDiff line change
@@ -6732,6 +6732,21 @@ def test_info_memory_usage(self):
67326732
res = buf.getvalue().splitlines()
67336733
self.assertTrue("memory usage: " not in res[-1])
67346734

6735+
df.info(buf=buf, memory_usage=True)
6736+
res = buf.getvalue().splitlines()
6737+
# memory usage is a lower bound, so print it as XYZ+ MB
6738+
self.assertTrue(re.match(r"memory usage: [^+]+\+", res[-1]))
6739+
6740+
df.iloc[:, :5].info(buf=buf, memory_usage=True)
6741+
res = buf.getvalue().splitlines()
6742+
# excluded column with object dtype, so estimate is accurate
6743+
self.assertFalse(re.match(r"memory usage: [^+]+\+", res[-1]))
6744+
6745+
df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo'])
6746+
df_with_object_index.info(buf=buf, memory_usage=True)
6747+
res = buf.getvalue().splitlines()
6748+
self.assertTrue(re.match(r"memory usage: [^+]+\+", res[-1]))
6749+
67356750
# Test a DataFrame with duplicate columns
67366751
dtypes = ['int64', 'int64', 'int64', 'float64']
67376752
data = {}

pandas/tests/test_index.py

+11
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,17 @@ def f():
8383
pass
8484
tm.assertRaisesRegexp(ValueError,'The truth value of a',f)
8585

86+
def test_ndarray_compat_properties(self):
87+
88+
idx = self.create_index()
89+
self.assertTrue(idx.T.equals(idx))
90+
self.assertTrue(idx.transpose().equals(idx))
91+
92+
values = idx.values
93+
for prop in ['shape', 'ndim', 'size', 'itemsize', 'nbytes']:
94+
self.assertEqual(getattr(idx, prop), getattr(values, prop))
95+
96+
8697
class TestIndex(Base, tm.TestCase):
8798
_holder = Index
8899
_multiprocess_can_split_ = True

pandas/tools/tests/test_tile.py

+10
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,16 @@ def test_qcut_return_categorical(self):
248248
ordered=True))
249249
tm.assert_series_equal(res, exp)
250250

251+
def test_series_retbins(self):
252+
# GH 8589
253+
s = Series(np.arange(4))
254+
result, bins = cut(s, 2, retbins=True)
255+
assert_equal(result.cat.codes.values, [0, 0, 1, 1])
256+
assert_almost_equal(bins, [-0.003, 1.5, 3])
257+
258+
result, bins = qcut(s, 2, retbins=True)
259+
assert_equal(result.cat.codes.values, [0, 0, 1, 1])
260+
assert_almost_equal(bins, [0, 1.5, 3])
251261

252262

253263
def curpath():

0 commit comments

Comments
 (0)