|
8 | 8 | import re
|
9 | 9 | import unittest
|
10 | 10 | import nose
|
| 11 | +import warnings |
11 | 12 |
|
12 | 13 | from numpy import nan
|
13 | 14 | import numpy as np
|
@@ -2301,7 +2302,6 @@ def test_usecols_dtypes(self):
|
2301 | 2302 | self.assertTrue((result.dtypes == [object, np.int, np.float]).all())
|
2302 | 2303 | self.assertTrue((result2.dtypes == [object, np.float]).all())
|
2303 | 2304 |
|
2304 |
| - |
2305 | 2305 | def test_usecols_implicit_index_col(self):
|
2306 | 2306 | # #2654
|
2307 | 2307 | data = 'a,b,c\n4,apple,bat,5.7\n8,orange,cow,10'
|
@@ -2528,17 +2528,36 @@ def test_tokenize_CR_with_quoting(self):
|
2528 | 2528 |
|
2529 | 2529 | def test_raise_on_no_columns(self):
|
2530 | 2530 | # single newline
|
2531 |
| - data = """ |
2532 |
| -""" |
| 2531 | + data = "\n" |
2533 | 2532 | self.assertRaises(ValueError, self.read_csv, StringIO(data))
|
2534 | 2533 |
|
2535 | 2534 | # test with more than a single newline
|
2536 |
| - data = """ |
2537 |
| -
|
2538 |
| -
|
2539 |
| -""" |
| 2535 | + data = "\n\n\n" |
2540 | 2536 | self.assertRaises(ValueError, self.read_csv, StringIO(data))
|
2541 | 2537 |
|
| 2538 | + def test_chunks_have_consistent_numerical_type(self): |
| 2539 | + # Issue #3866 If chunks are different types and *can* |
| 2540 | + # be coerced using numerical types, then do so. |
| 2541 | + integers = [str(i) for i in range(499999)] |
| 2542 | + data = "a\n" + "\n".join(integers + ["1.0", "2.0"] + integers) |
| 2543 | + |
| 2544 | + with warnings.catch_warnings(record=True) as w: |
| 2545 | + df = self.read_csv(StringIO(data), low_memory=True) |
| 2546 | + if len(w) > 0: |
| 2547 | + self.fail("Unexpected warning raised.") |
| 2548 | + self.assertTrue(type(df.a[0]) is np.float64) # Assert that types were coerced. |
| 2549 | + self.assertEqual(df.a.dtype, np.float) |
| 2550 | + |
| 2551 | + def test_warn_if_chunks_have_mismatched_type(self): |
| 2552 | + # Issue #3866 If chunks are different types and can't |
| 2553 | + # be coerced using numerical types, then issue warning. |
| 2554 | + integers = [str(i) for i in range(499999)] |
| 2555 | + data = "a\n" + "\n".join(integers + ['a', 'b'] + integers) |
| 2556 | + |
| 2557 | + df = self.read_csv(StringIO(data), low_memory=True) |
| 2558 | + tm.assert_produces_warning() |
| 2559 | + self.assertEqual(df.a.dtype, np.object) |
| 2560 | + |
2542 | 2561 |
|
2543 | 2562 | class TestParseSQL(unittest.TestCase):
|
2544 | 2563 |
|
|
0 commit comments