Skip to content

Commit c5a87d8

Browse files
Ajay SaxenaAjay Saxena
Ajay Saxena
authored and
Ajay Saxena
committed
Merge branch 'test_branch' of https://github.com/aileronajay/pandas into test_branch
pull from github
2 parents f708c2e + ed21736 commit c5a87d8

17 files changed

+34
-20
lines changed

doc/source/indexing.rst

+4
Original file line numberDiff line numberDiff line change
@@ -1467,6 +1467,10 @@ with duplicates dropped.
14671467
idx1.symmetric_difference(idx2)
14681468
idx1 ^ idx2
14691469
1470+
.. note::
1471+
1472+
The resulting index from a set operation will be sorted in ascending order.
1473+
14701474
Missing values
14711475
~~~~~~~~~~~~~~
14721476

doc/source/whatsnew/v0.20.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ New features
3030
Other enhancements
3131
^^^^^^^^^^^^^^^^^^
3232

33-
33+
- ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`)
3434

3535

3636
.. _whatsnew_0200.api_breaking:

pandas/io/clipboard.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@
33
from pandas.compat import StringIO, PY2
44

55

6-
def read_clipboard(**kwargs): # pragma: no cover
7-
"""
6+
def read_clipboard(sep='\s+', **kwargs): # pragma: no cover
7+
r"""
88
Read text from clipboard and pass to read_table. See read_table for the
99
full argument list
1010
11-
If unspecified, `sep` defaults to '\s+'
11+
Parameters
12+
----------
13+
sep : str, default '\s+'.
14+
A string or regex delimiter. The default of '\s+' denotes
15+
one or more whitespace characters.
1216
1317
Returns
1418
-------
@@ -29,7 +33,7 @@ def read_clipboard(**kwargs): # pragma: no cover
2933
except:
3034
pass
3135

32-
# Excel copies into clipboard with \t seperation
36+
# Excel copies into clipboard with \t separation
3337
# inspect no more then the 10 first lines, if they
3438
# all contain an equal number (>0) of tabs, infer
3539
# that this came from excel and set 'sep' accordingly
@@ -43,12 +47,12 @@ def read_clipboard(**kwargs): # pragma: no cover
4347

4448
counts = set([x.lstrip().count('\t') for x in lines])
4549
if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
46-
kwargs['sep'] = '\t'
50+
sep = '\t'
4751

48-
if kwargs.get('sep') is None and kwargs.get('delim_whitespace') is None:
49-
kwargs['sep'] = '\s+'
52+
if sep is None and kwargs.get('delim_whitespace') is None:
53+
sep = '\s+'
5054

51-
return read_table(StringIO(text), **kwargs)
55+
return read_table(StringIO(text), sep=sep, **kwargs)
5256

5357

5458
def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover

pandas/io/excel.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from pandas.tseries.period import Period
2222
from pandas import json
2323
from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass,
24-
string_types)
24+
string_types, OrderedDict)
2525
from pandas.core import config
2626
from pandas.formats.printing import pprint_thing
2727
import pandas.compat as compat
@@ -418,9 +418,9 @@ def _parse_cell(cell_contents, cell_typ):
418418
sheets = [sheetname]
419419

420420
# handle same-type duplicates.
421-
sheets = list(set(sheets))
421+
sheets = list(OrderedDict.fromkeys(sheets).keys())
422422

423-
output = {}
423+
output = OrderedDict()
424424

425425
for asheetname in sheets:
426426
if verbose:
-5 KB
Binary file not shown.
296 Bytes
Binary file not shown.
315 Bytes
Binary file not shown.

pandas/io/tests/parser/common.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -630,18 +630,18 @@ def test_read_csv_parse_simple_list(self):
630630
def test_url(self):
631631
# HTTP(S)
632632
url = ('https://raw.github.com/pandas-dev/pandas/master/'
633-
'pandas/io/tests/parser/data/salary.table.csv')
633+
'pandas/io/tests/parser/data/salaries.csv')
634634
url_table = self.read_table(url)
635635
dirpath = tm.get_data_path()
636-
localtable = os.path.join(dirpath, 'salary.table.csv')
636+
localtable = os.path.join(dirpath, 'salaries.csv')
637637
local_table = self.read_table(localtable)
638638
tm.assert_frame_equal(url_table, local_table)
639639
# TODO: ftp testing
640640

641641
@tm.slow
642642
def test_file(self):
643643
dirpath = tm.get_data_path()
644-
localtable = os.path.join(dirpath, 'salary.table.csv')
644+
localtable = os.path.join(dirpath, 'salaries.csv')
645645
local_table = self.read_table(localtable)
646646

647647
try:
283 Bytes
Binary file not shown.
336 Bytes
Binary file not shown.
445 Bytes
Binary file not shown.

pandas/io/tests/parser/test_network.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@ class TestUrlGz(tm.TestCase):
1818

1919
def setUp(self):
2020
dirpath = tm.get_data_path()
21-
localtable = os.path.join(dirpath, 'salary.table.csv')
21+
localtable = os.path.join(dirpath, 'salaries.csv')
2222
self.local_table = read_table(localtable)
2323

2424
@tm.network
2525
def test_url_gz(self):
2626
url = ('https://raw.github.com/pandas-dev/pandas/'
27-
'master/pandas/io/tests/parser/data/salary.table.gz')
27+
'master/pandas/io/tests/parser/data/salaries.csv.gz')
2828
url_table = read_table(url, compression="gzip", engine="python")
2929
tm.assert_frame_equal(url_table, self.local_table)
3030

pandas/io/tests/test_clipboard.py

+2
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ def check_round_trip_frame(self, data_type, excel=None, sep=None):
7474
def test_round_trip_frame_sep(self):
7575
for dt in self.data_types:
7676
self.check_round_trip_frame(dt, sep=',')
77+
self.check_round_trip_frame(dt, sep='\s+')
78+
self.check_round_trip_frame(dt, sep='|')
7779

7880
def test_round_trip_frame_string(self):
7981
for dt in self.data_types:

pandas/io/tests/test_excel.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -379,8 +379,12 @@ def test_reading_all_sheets(self):
379379
# See PR #9450
380380
basename = 'test_multisheet'
381381
dfs = self.get_exceldf(basename, sheetname=None)
382-
expected_keys = ['Alpha', 'Beta', 'Charlie']
382+
# ensure this is not alphabetical to test order preservation
383+
expected_keys = ['Charlie', 'Alpha', 'Beta']
383384
tm.assert_contains_all(expected_keys, dfs.keys())
385+
# Issue 9930
386+
# Ensure sheet order is preserved
387+
tm.assert_equal(expected_keys, list(dfs.keys()))
384388

385389
def test_reading_multiple_specific_sheets(self):
386390
# Test reading specific sheetnames by specifying a mixed list

pandas/tools/merge.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -816,8 +816,8 @@ def _validate_specification(self):
816816
self.left_on = self.right_on = common_cols
817817
elif self.on is not None:
818818
if self.left_on is not None or self.right_on is not None:
819-
raise MergeError('Can only pass on OR left_on and '
820-
'right_on')
819+
raise MergeError('Can only pass argument "on" OR "left_on" '
820+
'and "right_on", not a combination of both.')
821821
self.left_on = self.right_on = self.on
822822
elif self.left_on is not None:
823823
n = len(self.left_on)

0 commit comments

Comments
 (0)