Skip to content

Commit e1d5a27

Browse files
authored
TST: split out some sparse tests (pandas-dev#18968)
1 parent dbec3c9 commit e1d5a27

17 files changed

+302
-240
lines changed

ci/install_travis.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -178,15 +178,15 @@ if [ "$PIP_BUILD_TEST" ]; then
178178

179179
# build & install testing
180180
echo "[building release]"
181-
bash scripts/build_dist_for_release.sh
181+
time bash scripts/build_dist_for_release.sh || exit 1
182182
conda uninstall -y cython
183-
time pip install dist/*tar.gz --quiet || exit 1
183+
time pip install dist/*tar.gz || exit 1
184184

185185
elif [ "$CONDA_BUILD_TEST" ]; then
186186

187187
# build & install testing
188188
echo "[building conda recipe]"
189-
conda build ./conda.recipe --numpy 1.13 --python 3.5 -q --no-test
189+
time conda build ./conda.recipe --numpy 1.13 --python 3.5 -q --no-test
190190

191191
echo "[installing]"
192192
conda install pandas --use-local

pandas/core/dtypes/dtypes.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ class DatetimeTZDtype(ExtensionDtype):
402402
num = 101
403403
base = np.dtype('M8[ns]')
404404
_metadata = ['unit', 'tz']
405-
_match = re.compile("(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
405+
_match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
406406
_cache = {}
407407

408408
def __new__(cls, unit=None, tz=None):
@@ -514,7 +514,7 @@ class PeriodDtype(ExtensionDtype):
514514
base = np.dtype('O')
515515
num = 102
516516
_metadata = ['freq']
517-
_match = re.compile("(P|p)eriod\[(?P<freq>.+)\]")
517+
_match = re.compile(r"(P|p)eriod\[(?P<freq>.+)\]")
518518
_cache = {}
519519

520520
def __new__(cls, freq=None):
@@ -632,7 +632,7 @@ class IntervalDtype(ExtensionDtype):
632632
base = np.dtype('O')
633633
num = 103
634634
_metadata = ['subtype']
635-
_match = re.compile("(I|i)nterval\[(?P<subtype>.+)\]")
635+
_match = re.compile(r"(I|i)nterval\[(?P<subtype>.+)\]")
636636
_cache = {}
637637

638638
def __new__(cls, subtype=None):

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2635,7 +2635,7 @@ def insert(self, loc, column, value, allow_duplicates=False):
26352635
allow_duplicates=allow_duplicates)
26362636

26372637
def assign(self, **kwargs):
2638-
"""
2638+
r"""
26392639
Assign new columns to a DataFrame, returning a new object
26402640
(a copy) with all the original columns in addition to the new ones.
26412641

pandas/core/strings.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ def str_endswith(arr, pat, na=np.nan):
306306

307307

308308
def str_replace(arr, pat, repl, n=-1, case=None, flags=0):
309-
"""
309+
r"""
310310
Replace occurrences of pattern/regex in the Series/Index with
311311
some other string. Equivalent to :meth:`str.replace` or
312312
:func:`re.sub`.
@@ -598,7 +598,7 @@ def _str_extract_frame(arr, pat, flags=0):
598598

599599

600600
def str_extract(arr, pat, flags=0, expand=None):
601-
"""
601+
r"""
602602
For each subject string in the Series, extract groups from the
603603
first match of regular expression pat.
604604
@@ -635,23 +635,23 @@ def str_extract(arr, pat, flags=0, expand=None):
635635
Non-matches will be NaN.
636636
637637
>>> s = Series(['a1', 'b2', 'c3'])
638-
>>> s.str.extract('([ab])(\d)')
638+
>>> s.str.extract(r'([ab])(\d)')
639639
0 1
640640
0 a 1
641641
1 b 2
642642
2 NaN NaN
643643
644644
A pattern may contain optional groups.
645645
646-
>>> s.str.extract('([ab])?(\d)')
646+
>>> s.str.extract(r'([ab])?(\d)')
647647
0 1
648648
0 a 1
649649
1 b 2
650650
2 NaN 3
651651
652652
Named groups will become column names in the result.
653653
654-
>>> s.str.extract('(?P<letter>[ab])(?P<digit>\d)')
654+
>>> s.str.extract(r'(?P<letter>[ab])(?P<digit>\d)')
655655
letter digit
656656
0 a 1
657657
1 b 2
@@ -660,15 +660,15 @@ def str_extract(arr, pat, flags=0, expand=None):
660660
A pattern with one group will return a DataFrame with one column
661661
if expand=True.
662662
663-
>>> s.str.extract('[ab](\d)', expand=True)
663+
>>> s.str.extract(r'[ab](\d)', expand=True)
664664
0
665665
0 1
666666
1 2
667667
2 NaN
668668
669669
A pattern with one group will return a Series if expand=False.
670670
671-
>>> s.str.extract('[ab](\d)', expand=False)
671+
>>> s.str.extract(r'[ab](\d)', expand=False)
672672
0 1
673673
1 2
674674
2 NaN
@@ -694,7 +694,7 @@ def str_extract(arr, pat, flags=0, expand=None):
694694

695695

696696
def str_extractall(arr, pat, flags=0):
697-
"""
697+
r"""
698698
For each subject string in the Series, extract groups from all
699699
matches of regular expression pat. When each subject string in the
700700
Series has exactly one match, extractall(pat).xs(0, level='match')
@@ -728,7 +728,7 @@ def str_extractall(arr, pat, flags=0):
728728
Indices with no matches will not appear in the result.
729729
730730
>>> s = Series(["a1a2", "b1", "c1"], index=["A", "B", "C"])
731-
>>> s.str.extractall("[ab](\d)")
731+
>>> s.str.extractall(r"[ab](\d)")
732732
0
733733
match
734734
A 0 1
@@ -737,7 +737,7 @@ def str_extractall(arr, pat, flags=0):
737737
738738
Capture group names are used for column names of the result.
739739
740-
>>> s.str.extractall("[ab](?P<digit>\d)")
740+
>>> s.str.extractall(r"[ab](?P<digit>\d)")
741741
digit
742742
match
743743
A 0 1
@@ -746,7 +746,7 @@ def str_extractall(arr, pat, flags=0):
746746
747747
A pattern with two groups will return a DataFrame with two columns.
748748
749-
>>> s.str.extractall("(?P<letter>[ab])(?P<digit>\d)")
749+
>>> s.str.extractall(r"(?P<letter>[ab])(?P<digit>\d)")
750750
letter digit
751751
match
752752
A 0 a 1
@@ -755,7 +755,7 @@ def str_extractall(arr, pat, flags=0):
755755
756756
Optional groups that do not match are NaN in the result.
757757
758-
>>> s.str.extractall("(?P<letter>[ab])?(?P<digit>\d)")
758+
>>> s.str.extractall(r"(?P<letter>[ab])?(?P<digit>\d)")
759759
letter digit
760760
match
761761
A 0 a 1

pandas/io/clipboards.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from pandas.compat import StringIO, PY2
44

55

6-
def read_clipboard(sep='\s+', **kwargs): # pragma: no cover
6+
def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover
77
r"""
88
Read text from clipboard and pass to read_table. See read_table for the
99
full argument list
@@ -55,10 +55,10 @@ def read_clipboard(sep='\s+', **kwargs): # pragma: no cover
5555

5656
counts = {x.lstrip().count('\t') for x in lines}
5757
if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
58-
sep = '\t'
58+
sep = r'\t'
5959

6060
if sep is None and kwargs.get('delim_whitespace') is None:
61-
sep = '\s+'
61+
sep = r'\s+'
6262

6363
return read_table(StringIO(text), sep=sep, **kwargs)
6464

@@ -99,7 +99,7 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover
9999
if excel:
100100
try:
101101
if sep is None:
102-
sep = '\t'
102+
sep = r'\t'
103103
buf = StringIO()
104104
# clipboard_set (pyperclip) expects unicode
105105
obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs)

pandas/io/formats/format.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1002,7 +1002,7 @@ def get_col_type(dtype):
10021002
buf.write('\\end{longtable}\n')
10031003

10041004
def _format_multicolumn(self, row, ilevels):
1005-
"""
1005+
r"""
10061006
Combine columns belonging to a group to a single multicolumn entry
10071007
according to self.multicolumn_format
10081008
@@ -1040,7 +1040,7 @@ def append_col():
10401040
return row2
10411041

10421042
def _format_multirow(self, row, ilevels, i, rows):
1043-
"""
1043+
r"""
10441044
Check following rows, whether row should be a multirow
10451045
10461046
e.g.: becomes:
@@ -1071,7 +1071,7 @@ def _print_cline(self, buf, i, icol):
10711071
"""
10721072
for cl in self.clinebuf:
10731073
if cl[0] == i:
1074-
buf.write('\cline{{{cl:d}-{icol:d}}}\n'
1074+
buf.write('\\cline{{{cl:d}-{icol:d}}}\n'
10751075
.format(cl=cl[1], icol=icol))
10761076
# remove entries that have been written to buffer
10771077
self.clinebuf = [x for x in self.clinebuf if x[0] != i]

pandas/io/parsers.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
# so we need to remove it if we see it.
5252
_BOM = u('\ufeff')
5353

54-
_parser_params = """Also supports optionally iterating or breaking of the file
54+
_parser_params = r"""Also supports optionally iterating or breaking of the file
5555
into chunks.
5656
5757
Additional help can be found in the `online docs for IO Tools
@@ -842,19 +842,19 @@ def _clean_options(self, options, engine):
842842
" sep=None with delim_whitespace=False"
843843
engine = 'python'
844844
elif sep is not None and len(sep) > 1:
845-
if engine == 'c' and sep == '\s+':
845+
if engine == 'c' and sep == r'\s+':
846846
result['delim_whitespace'] = True
847847
del result['delimiter']
848848
elif engine not in ('python', 'python-fwf'):
849849
# wait until regex engine integrated
850850
fallback_reason = "the 'c' engine does not support"\
851851
" regex separators (separators > 1 char and"\
852-
" different from '\s+' are"\
852+
r" different from '\s+' are"\
853853
" interpreted as regex)"
854854
engine = 'python'
855855
elif delim_whitespace:
856856
if 'python' in engine:
857-
result['delimiter'] = '\s+'
857+
result['delimiter'] = r'\s+'
858858
elif sep is not None:
859859
encodeable = True
860860
try:

pandas/io/pytables.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1792,7 +1792,7 @@ def create_for_block(
17921792
# name values_0
17931793
try:
17941794
if version[0] == 0 and version[1] <= 10 and version[2] == 0:
1795-
m = re.search("values_block_(\d+)", name)
1795+
m = re.search(r"values_block_(\d+)", name)
17961796
if m:
17971797
name = "values_%s" % m.groups()[0]
17981798
except:
@@ -4297,7 +4297,7 @@ class AppendableMultiFrameTable(AppendableFrameTable):
42974297
table_type = u('appendable_multiframe')
42984298
obj_type = DataFrame
42994299
ndim = 2
4300-
_re_levels = re.compile("^level_\d+$")
4300+
_re_levels = re.compile(r"^level_\d+$")
43014301

43024302
@property
43034303
def table_type_short(self):

pandas/io/sql.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1306,7 +1306,7 @@ def _create_table_setup(self):
13061306
column_names_and_types = \
13071307
self._get_column_names_and_types(self._sql_type_name)
13081308

1309-
pat = re.compile('\s+')
1309+
pat = re.compile(r'\s+')
13101310
column_names = [col_name for col_name, _, _ in column_names_and_types]
13111311
if any(map(pat.search, column_names)):
13121312
warnings.warn(_SAFE_NAMES_WARNING, stacklevel=6)

pandas/tests/indexes/interval/test_interval.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -599,7 +599,7 @@ def test_monotonic(self, closed):
599599
assert idx.is_monotonic_decreasing
600600
assert idx._is_strictly_monotonic_decreasing
601601

602-
@pytest.mark.xfail(reason='not a valid repr as we use interval notation')
602+
@pytest.mark.skip(reason='not a valid repr as we use interval notation')
603603
def test_repr(self):
604604
i = IntervalIndex.from_tuples([(0, 1), (1, 2)], closed='right')
605605
expected = ("IntervalIndex(left=[0, 1],"
@@ -619,11 +619,11 @@ def test_repr(self):
619619
"\n dtype='interval[datetime64[ns]]')")
620620
assert repr(i) == expected
621621

622-
@pytest.mark.xfail(reason='not a valid repr as we use interval notation')
622+
@pytest.mark.skip(reason='not a valid repr as we use interval notation')
623623
def test_repr_max_seq_item_setting(self):
624624
super(TestIntervalIndex, self).test_repr_max_seq_item_setting()
625625

626-
@pytest.mark.xfail(reason='not a valid repr as we use interval notation')
626+
@pytest.mark.skip(reason='not a valid repr as we use interval notation')
627627
def test_repr_roundtrip(self):
628628
super(TestIntervalIndex, self).test_repr_roundtrip()
629629

pandas/tests/sparse/frame/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import pytest
2+
import numpy as np
3+
from pandas import SparseDataFrame, DataFrame, SparseSeries
4+
from pandas.util import testing as tm
5+
6+
7+
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
8+
'(GH 17386)')
9+
def test_quantile():
10+
# GH 17386
11+
data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
12+
q = 0.1
13+
14+
sparse_df = SparseDataFrame(data)
15+
result = sparse_df.quantile(q)
16+
17+
dense_df = DataFrame(data)
18+
dense_expected = dense_df.quantile(q)
19+
sparse_expected = SparseSeries(dense_expected)
20+
21+
tm.assert_series_equal(result, dense_expected)
22+
tm.assert_sp_series_equal(result, sparse_expected)
23+
24+
25+
@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
26+
'(GH 17386)')
27+
def test_quantile_multi():
28+
# GH 17386
29+
data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
30+
q = [0.1, 0.5]
31+
32+
sparse_df = SparseDataFrame(data)
33+
result = sparse_df.quantile(q)
34+
35+
dense_df = DataFrame(data)
36+
dense_expected = dense_df.quantile(q)
37+
sparse_expected = SparseDataFrame(dense_expected)
38+
39+
tm.assert_frame_equal(result, dense_expected)
40+
tm.assert_sp_frame_equal(result, sparse_expected)

0 commit comments

Comments
 (0)