|
12 | 12 |
|
13 | 13 | from numpy import nan
|
14 | 14 | import numpy as np
|
| 15 | +from pandas.io.common import DtypeWarning |
15 | 16 |
|
16 | 17 | from pandas import DataFrame, Series, Index, MultiIndex, DatetimeIndex
|
17 | 18 | from pandas.compat import(
|
@@ -1889,6 +1890,24 @@ def test_usecols_index_col_conflict(self):
|
1889 | 1890 | df = pd.read_csv(StringIO(data), usecols=['Price', 'P2', 'P3'], parse_dates=True, index_col=['Price', 'P2'])
|
1890 | 1891 | tm.assert_frame_equal(expected, df)
|
1891 | 1892 |
|
| 1893 | + def test_chunks_have_consistent_numerical_type(self): |
| 1894 | + integers = [str(i) for i in range(499999)] |
| 1895 | + data = "a\n" + "\n".join(integers + ["1.0", "2.0"] + integers) |
| 1896 | + |
| 1897 | + with tm.assert_produces_warning(False): |
| 1898 | + df = self.read_csv(StringIO(data)) |
| 1899 | + self.assertTrue(type(df.a[0]) is np.float64) # Assert that types were coerced. |
| 1900 | + self.assertEqual(df.a.dtype, np.float) |
| 1901 | + |
| 1902 | + def test_warn_if_chunks_have_mismatched_type(self): |
| 1903 | + # See test in TestCParserLowMemory. |
| 1904 | + integers = [str(i) for i in range(499999)] |
| 1905 | + data = "a\n" + "\n".join(integers + ['a', 'b'] + integers) |
| 1906 | + |
| 1907 | + with tm.assert_produces_warning(False): |
| 1908 | + df = self.read_csv(StringIO(data)) |
| 1909 | + self.assertEqual(df.a.dtype, np.object) |
| 1910 | + |
1892 | 1911 |
|
1893 | 1912 | class TestPythonParser(ParserTests, unittest.TestCase):
|
1894 | 1913 | def test_negative_skipfooter_raises(self):
|
@@ -2352,7 +2371,6 @@ def test_usecols_dtypes(self):
|
2352 | 2371 | self.assertTrue((result.dtypes == [object, np.int, np.float]).all())
|
2353 | 2372 | self.assertTrue((result2.dtypes == [object, np.float]).all())
|
2354 | 2373 |
|
2355 |
| - |
2356 | 2374 | def test_usecols_implicit_index_col(self):
|
2357 | 2375 | # #2654
|
2358 | 2376 | data = 'a,b,c\n4,apple,bat,5.7\n8,orange,cow,10'
|
@@ -2579,16 +2597,22 @@ def test_tokenize_CR_with_quoting(self):
|
2579 | 2597 |
|
2580 | 2598 | def test_raise_on_no_columns(self):
|
2581 | 2599 | # single newline
|
2582 |
| - data = """ |
2583 |
| -""" |
| 2600 | + data = "\n" |
2584 | 2601 | self.assertRaises(ValueError, self.read_csv, StringIO(data))
|
2585 | 2602 |
|
2586 | 2603 | # test with more than a single newline
|
2587 |
| - data = """ |
| 2604 | + data = "\n\n\n" |
| 2605 | + self.assertRaises(ValueError, self.read_csv, StringIO(data)) |
2588 | 2606 |
|
| 2607 | + def test_warn_if_chunks_have_mismatched_type(self): |
| 2608 | + # Issue #3866 If chunks are different types and can't |
| 2609 | + # be coerced using numerical types, then issue warning. |
| 2610 | + integers = [str(i) for i in range(499999)] |
| 2611 | + data = "a\n" + "\n".join(integers + ['a', 'b'] + integers) |
2589 | 2612 |
|
2590 |
| -""" |
2591 |
| - self.assertRaises(ValueError, self.read_csv, StringIO(data)) |
| 2613 | + with tm.assert_produces_warning(DtypeWarning): |
| 2614 | + df = self.read_csv(StringIO(data)) |
| 2615 | + self.assertEqual(df.a.dtype, np.object) |
2592 | 2616 |
|
2593 | 2617 |
|
2594 | 2618 | class TestParseSQL(unittest.TestCase):
|
|
0 commit comments