Skip to content

TST: split out some sparse tests #18968

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Dec 28, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ci/install_travis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -178,15 +178,15 @@ if [ "$PIP_BUILD_TEST" ]; then

# build & install testing
echo "[building release]"
bash scripts/build_dist_for_release.sh
time bash scripts/build_dist_for_release.sh || exit 1
conda uninstall -y cython
time pip install dist/*tar.gz --quiet || exit 1
time pip install dist/*tar.gz || exit 1

elif [ "$CONDA_BUILD_TEST" ]; then

# build & install testing
echo "[building conda recipe]"
conda build ./conda.recipe --numpy 1.13 --python 3.5 -q --no-test
time conda build ./conda.recipe --numpy 1.13 --python 3.5 -q --no-test

echo "[installing]"
conda install pandas --use-local
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ class DatetimeTZDtype(ExtensionDtype):
num = 101
base = np.dtype('M8[ns]')
_metadata = ['unit', 'tz']
_match = re.compile("(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
_match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
_cache = {}

def __new__(cls, unit=None, tz=None):
Expand Down Expand Up @@ -514,7 +514,7 @@ class PeriodDtype(ExtensionDtype):
base = np.dtype('O')
num = 102
_metadata = ['freq']
_match = re.compile("(P|p)eriod\[(?P<freq>.+)\]")
_match = re.compile(r"(P|p)eriod\[(?P<freq>.+)\]")
_cache = {}

def __new__(cls, freq=None):
Expand Down Expand Up @@ -632,7 +632,7 @@ class IntervalDtype(ExtensionDtype):
base = np.dtype('O')
num = 103
_metadata = ['subtype']
_match = re.compile("(I|i)nterval\[(?P<subtype>.+)\]")
_match = re.compile(r"(I|i)nterval\[(?P<subtype>.+)\]")
_cache = {}

def __new__(cls, subtype=None):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2635,7 +2635,7 @@ def insert(self, loc, column, value, allow_duplicates=False):
allow_duplicates=allow_duplicates)

def assign(self, **kwargs):
"""
r"""
Assign new columns to a DataFrame, returning a new object
(a copy) with all the original columns in addition to the new ones.

Expand Down
24 changes: 12 additions & 12 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def str_endswith(arr, pat, na=np.nan):


def str_replace(arr, pat, repl, n=-1, case=None, flags=0):
"""
r"""
Replace occurrences of pattern/regex in the Series/Index with
some other string. Equivalent to :meth:`str.replace` or
:func:`re.sub`.
Expand Down Expand Up @@ -598,7 +598,7 @@ def _str_extract_frame(arr, pat, flags=0):


def str_extract(arr, pat, flags=0, expand=None):
"""
r"""
For each subject string in the Series, extract groups from the
first match of regular expression pat.

Expand Down Expand Up @@ -635,23 +635,23 @@ def str_extract(arr, pat, flags=0, expand=None):
Non-matches will be NaN.

>>> s = Series(['a1', 'b2', 'c3'])
>>> s.str.extract('([ab])(\d)')
>>> s.str.extract(r'([ab])(\d)')
0 1
0 a 1
1 b 2
2 NaN NaN

A pattern may contain optional groups.

>>> s.str.extract('([ab])?(\d)')
>>> s.str.extract(r'([ab])?(\d)')
0 1
0 a 1
1 b 2
2 NaN 3

Named groups will become column names in the result.

>>> s.str.extract('(?P<letter>[ab])(?P<digit>\d)')
>>> s.str.extract(r'(?P<letter>[ab])(?P<digit>\d)')
letter digit
0 a 1
1 b 2
Expand All @@ -660,15 +660,15 @@ def str_extract(arr, pat, flags=0, expand=None):
A pattern with one group will return a DataFrame with one column
if expand=True.

>>> s.str.extract('[ab](\d)', expand=True)
>>> s.str.extract(r'[ab](\d)', expand=True)
0
0 1
1 2
2 NaN

A pattern with one group will return a Series if expand=False.

>>> s.str.extract('[ab](\d)', expand=False)
>>> s.str.extract(r'[ab](\d)', expand=False)
0 1
1 2
2 NaN
Expand All @@ -694,7 +694,7 @@ def str_extract(arr, pat, flags=0, expand=None):


def str_extractall(arr, pat, flags=0):
"""
r"""
For each subject string in the Series, extract groups from all
matches of regular expression pat. When each subject string in the
Series has exactly one match, extractall(pat).xs(0, level='match')
Expand Down Expand Up @@ -728,7 +728,7 @@ def str_extractall(arr, pat, flags=0):
Indices with no matches will not appear in the result.

>>> s = Series(["a1a2", "b1", "c1"], index=["A", "B", "C"])
>>> s.str.extractall("[ab](\d)")
>>> s.str.extractall(r"[ab](\d)")
0
match
A 0 1
Expand All @@ -737,7 +737,7 @@ def str_extractall(arr, pat, flags=0):

Capture group names are used for column names of the result.

>>> s.str.extractall("[ab](?P<digit>\d)")
>>> s.str.extractall(r"[ab](?P<digit>\d)")
digit
match
A 0 1
Expand All @@ -746,7 +746,7 @@ def str_extractall(arr, pat, flags=0):

A pattern with two groups will return a DataFrame with two columns.

>>> s.str.extractall("(?P<letter>[ab])(?P<digit>\d)")
>>> s.str.extractall(r"(?P<letter>[ab])(?P<digit>\d)")
letter digit
match
A 0 a 1
Expand All @@ -755,7 +755,7 @@ def str_extractall(arr, pat, flags=0):

Optional groups that do not match are NaN in the result.

>>> s.str.extractall("(?P<letter>[ab])?(?P<digit>\d)")
>>> s.str.extractall(r"(?P<letter>[ab])?(?P<digit>\d)")
letter digit
match
A 0 a 1
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/clipboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pandas.compat import StringIO, PY2


def read_clipboard(sep='\s+', **kwargs): # pragma: no cover
def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover
r"""
Read text from clipboard and pass to read_table. See read_table for the
full argument list
Expand Down Expand Up @@ -55,10 +55,10 @@ def read_clipboard(sep='\s+', **kwargs): # pragma: no cover

counts = {x.lstrip().count('\t') for x in lines}
if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
sep = '\t'
sep = r'\t'

if sep is None and kwargs.get('delim_whitespace') is None:
sep = '\s+'
sep = r'\s+'

return read_table(StringIO(text), sep=sep, **kwargs)

Expand Down Expand Up @@ -99,7 +99,7 @@ def to_clipboard(obj, excel=None, sep=None, **kwargs): # pragma: no cover
if excel:
try:
if sep is None:
sep = '\t'
sep = r'\t'
buf = StringIO()
# clipboard_set (pyperclip) expects unicode
obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs)
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,7 +1002,7 @@ def get_col_type(dtype):
buf.write('\\end{longtable}\n')

def _format_multicolumn(self, row, ilevels):
"""
r"""
Combine columns belonging to a group to a single multicolumn entry
according to self.multicolumn_format

Expand Down Expand Up @@ -1040,7 +1040,7 @@ def append_col():
return row2

def _format_multirow(self, row, ilevels, i, rows):
"""
r"""
Check following rows, whether row should be a multirow

e.g.: becomes:
Expand Down Expand Up @@ -1071,7 +1071,7 @@ def _print_cline(self, buf, i, icol):
"""
for cl in self.clinebuf:
if cl[0] == i:
buf.write('\cline{{{cl:d}-{icol:d}}}\n'
buf.write('\\cline{{{cl:d}-{icol:d}}}\n'
.format(cl=cl[1], icol=icol))
# remove entries that have been written to buffer
self.clinebuf = [x for x in self.clinebuf if x[0] != i]
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
# so we need to remove it if we see it.
_BOM = u('\ufeff')

_parser_params = """Also supports optionally iterating or breaking of the file
_parser_params = r"""Also supports optionally iterating or breaking of the file
into chunks.

Additional help can be found in the `online docs for IO Tools
Expand Down Expand Up @@ -842,19 +842,19 @@ def _clean_options(self, options, engine):
" sep=None with delim_whitespace=False"
engine = 'python'
elif sep is not None and len(sep) > 1:
if engine == 'c' and sep == '\s+':
if engine == 'c' and sep == r'\s+':
result['delim_whitespace'] = True
del result['delimiter']
elif engine not in ('python', 'python-fwf'):
# wait until regex engine integrated
fallback_reason = "the 'c' engine does not support"\
" regex separators (separators > 1 char and"\
" different from '\s+' are"\
r" different from '\s+' are"\
" interpreted as regex)"
engine = 'python'
elif delim_whitespace:
if 'python' in engine:
result['delimiter'] = '\s+'
result['delimiter'] = r'\s+'
elif sep is not None:
encodeable = True
try:
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1792,7 +1792,7 @@ def create_for_block(
# name values_0
try:
if version[0] == 0 and version[1] <= 10 and version[2] == 0:
m = re.search("values_block_(\d+)", name)
m = re.search(r"values_block_(\d+)", name)
if m:
name = "values_%s" % m.groups()[0]
except:
Expand Down Expand Up @@ -4297,7 +4297,7 @@ class AppendableMultiFrameTable(AppendableFrameTable):
table_type = u('appendable_multiframe')
obj_type = DataFrame
ndim = 2
_re_levels = re.compile("^level_\d+$")
_re_levels = re.compile(r"^level_\d+$")

@property
def table_type_short(self):
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -1306,7 +1306,7 @@ def _create_table_setup(self):
column_names_and_types = \
self._get_column_names_and_types(self._sql_type_name)

pat = re.compile('\s+')
pat = re.compile(r'\s+')
column_names = [col_name for col_name, _, _ in column_names_and_types]
if any(map(pat.search, column_names)):
warnings.warn(_SAFE_NAMES_WARNING, stacklevel=6)
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/indexes/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ def test_monotonic(self, closed):
assert idx.is_monotonic_decreasing
assert idx._is_strictly_monotonic_decreasing

@pytest.mark.xfail(reason='not a valid repr as we use interval notation')
@pytest.mark.skip(reason='not a valid repr as we use interval notation')
def test_repr(self):
i = IntervalIndex.from_tuples([(0, 1), (1, 2)], closed='right')
expected = ("IntervalIndex(left=[0, 1],"
Expand All @@ -619,11 +619,11 @@ def test_repr(self):
"\n dtype='interval[datetime64[ns]]')")
assert repr(i) == expected

@pytest.mark.xfail(reason='not a valid repr as we use interval notation')
@pytest.mark.skip(reason='not a valid repr as we use interval notation')
def test_repr_max_seq_item_setting(self):
super(TestIntervalIndex, self).test_repr_max_seq_item_setting()

@pytest.mark.xfail(reason='not a valid repr as we use interval notation')
@pytest.mark.skip(reason='not a valid repr as we use interval notation')
def test_repr_roundtrip(self):
super(TestIntervalIndex, self).test_repr_roundtrip()

Expand Down
Empty file.
40 changes: 40 additions & 0 deletions pandas/tests/sparse/frame/test_analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import pytest
import numpy as np
from pandas import SparseDataFrame, DataFrame, SparseSeries
from pandas.util import testing as tm


@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
'(GH 17386)')
def test_quantile():
# GH 17386
data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
q = 0.1

sparse_df = SparseDataFrame(data)
result = sparse_df.quantile(q)

dense_df = DataFrame(data)
dense_expected = dense_df.quantile(q)
sparse_expected = SparseSeries(dense_expected)

tm.assert_series_equal(result, dense_expected)
tm.assert_sp_series_equal(result, sparse_expected)


@pytest.mark.xfail(reason='Wrong SparseBlock initialization '
'(GH 17386)')
def test_quantile_multi():
# GH 17386
data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]]
q = [0.1, 0.5]

sparse_df = SparseDataFrame(data)
result = sparse_df.quantile(q)

dense_df = DataFrame(data)
dense_expected = dense_df.quantile(q)
sparse_expected = SparseDataFrame(dense_expected)

tm.assert_frame_equal(result, dense_expected)
tm.assert_sp_frame_equal(result, sparse_expected)
Loading