|
9 | 9 | import numpy as np
|
10 | 10 | from pandas import (DataFrame, Series, date_range, Timedelta, Timestamp,
|
11 | 11 | Categorical, compat, concat, option_context)
|
12 |
| -from pandas.compat import u |
| 12 | +from pandas.compat import u, PY2 |
13 | 13 | from pandas import _np_version_under1p14
|
14 | 14 |
|
15 | 15 | from pandas.core.dtypes.dtypes import DatetimeTZDtype, CategoricalDtype
|
|
21 | 21 | import pandas as pd
|
22 | 22 |
|
23 | 23 |
|
| 24 | +@pytest.fixture(params=[str, compat.text_type]) |
| 25 | +def text_dtype(request): |
| 26 | + return request.param |
| 27 | + |
| 28 | + |
24 | 29 | class TestDataFrameDataTypes(TestData):
|
25 | 30 |
|
26 | 31 | def test_concat_empty_dataframe_dtypes(self):
|
@@ -351,27 +356,24 @@ def test_select_dtypes_datetime_with_tz(self):
|
351 | 356 | expected = df3.reindex(columns=[])
|
352 | 357 | assert_frame_equal(result, expected)
|
353 | 358 |
|
354 |
| - def test_select_dtypes_str_raises(self): |
355 |
| - df = DataFrame({'a': list('abc'), |
356 |
| - 'g': list(u('abc')), |
357 |
| - 'b': list(range(1, 4)), |
358 |
| - 'c': np.arange(3, 6).astype('u1'), |
359 |
| - 'd': np.arange(4.0, 7.0, dtype='float64'), |
360 |
| - 'e': [True, False, True], |
361 |
| - 'f': pd.date_range('now', periods=3).values}) |
362 |
| - string_dtypes = set((str, 'str', np.string_, 'S1', |
363 |
| - 'unicode', np.unicode_, 'U1')) |
364 |
| - try: |
365 |
| - string_dtypes.add(unicode) |
366 |
| - except NameError: |
367 |
| - pass |
368 |
| - for dt in string_dtypes: |
369 |
| - with tm.assert_raises_regex(TypeError, |
370 |
| - 'string dtypes are not allowed'): |
371 |
| - df.select_dtypes(include=[dt]) |
372 |
| - with tm.assert_raises_regex(TypeError, |
373 |
| - 'string dtypes are not allowed'): |
374 |
| - df.select_dtypes(exclude=[dt]) |
| 359 | + @pytest.mark.parametrize( |
| 360 | + "dtype", {str, "str", np.string_, "S1", |
| 361 | + "unicode", np.unicode_, "U1"} |
| 362 | + .union({unicode} if PY2 else {})) |
| 363 | + @pytest.mark.parametrize("arg", ["include", "exclude"]) |
| 364 | + def test_select_dtypes_str_raises(self, dtype, arg): |
| 365 | + df = DataFrame({"a": list("abc"), |
| 366 | + "g": list(u("abc")), |
| 367 | + "b": list(range(1, 4)), |
| 368 | + "c": np.arange(3, 6).astype("u1"), |
| 369 | + "d": np.arange(4.0, 7.0, dtype="float64"), |
| 370 | + "e": [True, False, True], |
| 371 | + "f": pd.date_range("now", periods=3).values}) |
| 372 | + msg = "string dtypes are not allowed" |
| 373 | + kwargs = {arg: [dtype]} |
| 374 | + |
| 375 | + with tm.assert_raises_regex(TypeError, msg): |
| 376 | + df.select_dtypes(**kwargs) |
375 | 377 |
|
376 | 378 | def test_select_dtypes_bad_arg_raises(self):
|
377 | 379 | df = DataFrame({'a': list('abc'),
|
@@ -502,61 +504,59 @@ def test_astype_with_view(self):
|
502 | 504 | tf = self.frame.astype(np.float64)
|
503 | 505 | casted = tf.astype(np.int64, copy=False) # noqa
|
504 | 506 |
|
505 |
| - def test_astype_cast_nan_inf_int(self): |
506 |
| - # GH14265, check nan and inf raise error when converting to int |
507 |
| - types = [np.int32, np.int64] |
508 |
| - values = [np.nan, np.inf] |
509 |
| - msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer' |
| 507 | + @pytest.mark.parametrize("dtype", [np.int32, np.int64]) |
| 508 | + @pytest.mark.parametrize("val", [np.nan, np.inf]) |
| 509 | + def test_astype_cast_nan_inf_int(self, val, dtype): |
| 510 | + # see gh-14265 |
| 511 | + # |
| 512 | + # Check NaN and inf --> raise error when converting to int. |
| 513 | + msg = "Cannot convert non-finite values \\(NA or inf\\) to integer" |
| 514 | + df = DataFrame([val]) |
510 | 515 |
|
511 |
| - for this_type in types: |
512 |
| - for this_val in values: |
513 |
| - df = DataFrame([this_val]) |
514 |
| - with tm.assert_raises_regex(ValueError, msg): |
515 |
| - df.astype(this_type) |
| 516 | + with tm.assert_raises_regex(ValueError, msg): |
| 517 | + df.astype(dtype) |
516 | 518 |
|
517 |
| - def test_astype_str(self): |
518 |
| - # GH9757 |
519 |
| - a = Series(date_range('2010-01-04', periods=5)) |
520 |
| - b = Series(date_range('3/6/2012 00:00', periods=5, tz='US/Eastern')) |
521 |
| - c = Series([Timedelta(x, unit='d') for x in range(5)]) |
| 519 | + def test_astype_str(self, text_dtype): |
| 520 | + # see gh-9757 |
| 521 | + a = Series(date_range("2010-01-04", periods=5)) |
| 522 | + b = Series(date_range("3/6/2012 00:00", periods=5, tz="US/Eastern")) |
| 523 | + c = Series([Timedelta(x, unit="d") for x in range(5)]) |
522 | 524 | d = Series(range(5))
|
523 | 525 | e = Series([0.0, 0.2, 0.4, 0.6, 0.8])
|
524 | 526 |
|
525 |
| - df = DataFrame({'a': a, 'b': b, 'c': c, 'd': d, 'e': e}) |
526 |
| - |
527 |
| - # datetimelike |
528 |
| - # Test str and unicode on python 2.x and just str on python 3.x |
529 |
| - for tt in set([str, compat.text_type]): |
530 |
| - result = df.astype(tt) |
531 |
| - |
532 |
| - expected = DataFrame({ |
533 |
| - 'a': list(map(tt, map(lambda x: Timestamp(x)._date_repr, |
534 |
| - a._values))), |
535 |
| - 'b': list(map(tt, map(Timestamp, b._values))), |
536 |
| - 'c': list(map(tt, map(lambda x: Timedelta(x) |
537 |
| - ._repr_base(format='all'), c._values))), |
538 |
| - 'd': list(map(tt, d._values)), |
539 |
| - 'e': list(map(tt, e._values)), |
540 |
| - }) |
541 |
| - |
542 |
| - assert_frame_equal(result, expected) |
543 |
| - |
544 |
| - # float/nan |
545 |
| - # 11302 |
546 |
| - # consistency in astype(str) |
547 |
| - for tt in set([str, compat.text_type]): |
548 |
| - result = DataFrame([np.NaN]).astype(tt) |
549 |
| - expected = DataFrame(['nan']) |
550 |
| - assert_frame_equal(result, expected) |
551 |
| - |
552 |
| - result = DataFrame([1.12345678901234567890]).astype(tt) |
553 |
| - if _np_version_under1p14: |
554 |
| - # < 1.14 truncates |
555 |
| - expected = DataFrame(['1.12345678901']) |
556 |
| - else: |
557 |
| - # >= 1.14 preserves the full repr |
558 |
| - expected = DataFrame(['1.1234567890123457']) |
559 |
| - assert_frame_equal(result, expected) |
| 527 | + df = DataFrame({"a": a, "b": b, "c": c, "d": d, "e": e}) |
| 528 | + |
| 529 | + # Datetime-like |
| 530 | + # Test str and unicode on Python 2.x and just str on Python 3.x |
| 531 | + result = df.astype(text_dtype) |
| 532 | + |
| 533 | + expected = DataFrame({ |
| 534 | + "a": list(map(text_dtype, |
| 535 | + map(lambda x: Timestamp(x)._date_repr, a._values))), |
| 536 | + "b": list(map(text_dtype, map(Timestamp, b._values))), |
| 537 | + "c": list(map(text_dtype, |
| 538 | + map(lambda x: Timedelta(x)._repr_base(format="all"), |
| 539 | + c._values))), |
| 540 | + "d": list(map(text_dtype, d._values)), |
| 541 | + "e": list(map(text_dtype, e._values)), |
| 542 | + }) |
| 543 | + |
| 544 | + assert_frame_equal(result, expected) |
| 545 | + |
| 546 | + def test_astype_str_float(self, text_dtype): |
| 547 | + # see gh-11302 |
| 548 | + result = DataFrame([np.NaN]).astype(text_dtype) |
| 549 | + expected = DataFrame(["nan"]) |
| 550 | + |
| 551 | + assert_frame_equal(result, expected) |
| 552 | + result = DataFrame([1.12345678901234567890]).astype(text_dtype) |
| 553 | + |
| 554 | + # < 1.14 truncates |
| 555 | + # >= 1.14 preserves the full repr |
| 556 | + val = ("1.12345678901" if _np_version_under1p14 |
| 557 | + else "1.1234567890123457") |
| 558 | + expected = DataFrame([val]) |
| 559 | + assert_frame_equal(result, expected) |
560 | 560 |
|
561 | 561 | @pytest.mark.parametrize("dtype_class", [dict, Series])
|
562 | 562 | def test_astype_dict_like(self, dtype_class):
|
|
0 commit comments