|
2 | 2 | import nose
|
3 | 3 | import itertools
|
4 | 4 | import warnings
|
| 5 | +from datetime import datetime |
5 | 6 |
|
6 | 7 | from pandas.compat import range, lrange, lzip, StringIO, lmap, map
|
| 8 | +from pandas.tslib import NaT |
7 | 9 | from numpy import nan
|
8 | 10 | from numpy.random import randn
|
9 | 11 | import numpy as np
|
|
14 | 16 | from pandas.core.api import (DataFrame, Index, Series, Panel, isnull,
|
15 | 17 | MultiIndex, Float64Index, Timestamp)
|
16 | 18 | from pandas.util.testing import (assert_almost_equal, assert_series_equal,
|
17 |
| - assert_frame_equal, assert_panel_equal) |
| 19 | + assert_frame_equal, assert_panel_equal, |
| 20 | + assert_attr_equal) |
18 | 21 | from pandas import concat
|
19 | 22 |
|
20 | 23 | import pandas.util.testing as tm
|
@@ -3816,6 +3819,139 @@ def test_float_index_non_scalar_assignment(self):
|
3816 | 3819 | tm.assert_frame_equal(df,df2)
|
3817 | 3820 |
|
3818 | 3821 |
|
| 3822 | +class TestSeriesNoneCoercion(tm.TestCase): |
| 3823 | + EXPECTED_RESULTS = [ |
| 3824 | + # For numeric series, we should coerce to NaN. |
| 3825 | + ([1, 2, 3], [np.nan, 2, 3]), |
| 3826 | + ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]), |
| 3827 | + |
| 3828 | + # For datetime series, we should coerce to NaT. |
| 3829 | + ([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], |
| 3830 | + [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]), |
| 3831 | + |
| 3832 | + # For objects, we should preserve the None value. |
| 3833 | + (["foo", "bar", "baz"], [None, "bar", "baz"]), |
| 3834 | + ] |
| 3835 | + |
| 3836 | + def test_coercion_with_setitem(self): |
| 3837 | + for start_data, expected_result in self.EXPECTED_RESULTS: |
| 3838 | + start_series = Series(start_data) |
| 3839 | + start_series[0] = None |
| 3840 | + |
| 3841 | + expected_series = Series(expected_result) |
| 3842 | + |
| 3843 | + assert_attr_equal('dtype', start_series, expected_series) |
| 3844 | + self.assert_numpy_array_equivalent( |
| 3845 | + start_series.values, |
| 3846 | + expected_series.values, strict_nan=True) |
| 3847 | + |
| 3848 | + def test_coercion_with_loc_setitem(self): |
| 3849 | + for start_data, expected_result in self.EXPECTED_RESULTS: |
| 3850 | + start_series = Series(start_data) |
| 3851 | + start_series.loc[0] = None |
| 3852 | + |
| 3853 | + expected_series = Series(expected_result) |
| 3854 | + |
| 3855 | + assert_attr_equal('dtype', start_series, expected_series) |
| 3856 | + self.assert_numpy_array_equivalent( |
| 3857 | + start_series.values, |
| 3858 | + expected_series.values, strict_nan=True) |
| 3859 | + |
| 3860 | + def test_coercion_with_setitem_and_series(self): |
| 3861 | + for start_data, expected_result in self.EXPECTED_RESULTS: |
| 3862 | + start_series = Series(start_data) |
| 3863 | + start_series[start_series == start_series[0]] = None |
| 3864 | + |
| 3865 | + expected_series = Series(expected_result) |
| 3866 | + |
| 3867 | + assert_attr_equal('dtype', start_series, expected_series) |
| 3868 | + self.assert_numpy_array_equivalent( |
| 3869 | + start_series.values, |
| 3870 | + expected_series.values, strict_nan=True) |
| 3871 | + |
| 3872 | + def test_coercion_with_loc_and_series(self): |
| 3873 | + for start_data, expected_result in self.EXPECTED_RESULTS: |
| 3874 | + start_series = Series(start_data) |
| 3875 | + start_series.loc[start_series == start_series[0]] = None |
| 3876 | + |
| 3877 | + expected_series = Series(expected_result) |
| 3878 | + |
| 3879 | + assert_attr_equal('dtype', start_series, expected_series) |
| 3880 | + self.assert_numpy_array_equivalent( |
| 3881 | + start_series.values, |
| 3882 | + expected_series.values, strict_nan=True) |
| 3883 | + |
| 3884 | + |
| 3885 | +class TestDataframeNoneCoercion(tm.TestCase): |
| 3886 | + EXPECTED_SINGLE_ROW_RESULTS = [ |
| 3887 | + # For numeric series, we should coerce to NaN. |
| 3888 | + ([1, 2, 3], [np.nan, 2, 3]), |
| 3889 | + ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]), |
| 3890 | + |
| 3891 | + # For datetime series, we should coerce to NaT. |
| 3892 | + ([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], |
| 3893 | + [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]), |
| 3894 | + |
| 3895 | + # For objects, we should preserve the None value. |
| 3896 | + (["foo", "bar", "baz"], [None, "bar", "baz"]), |
| 3897 | + ] |
| 3898 | + |
| 3899 | + def test_coercion_with_loc(self): |
| 3900 | + for start_data, expected_result, in self.EXPECTED_SINGLE_ROW_RESULTS: |
| 3901 | + start_dataframe = DataFrame({'foo': start_data}) |
| 3902 | + start_dataframe.loc[0, ['foo']] = None |
| 3903 | + |
| 3904 | + expected_dataframe = DataFrame({'foo': expected_result}) |
| 3905 | + |
| 3906 | + assert_attr_equal('dtype', start_dataframe['foo'], expected_dataframe['foo']) |
| 3907 | + self.assert_numpy_array_equivalent( |
| 3908 | + start_dataframe['foo'].values, |
| 3909 | + expected_dataframe['foo'].values, strict_nan=True) |
| 3910 | + |
| 3911 | + def test_coercion_with_setitem_and_dataframe(self): |
| 3912 | + for start_data, expected_result, in self.EXPECTED_SINGLE_ROW_RESULTS: |
| 3913 | + start_dataframe = DataFrame({'foo': start_data}) |
| 3914 | + start_dataframe[start_dataframe['foo'] == start_dataframe['foo'][0]] = None |
| 3915 | + |
| 3916 | + expected_dataframe = DataFrame({'foo': expected_result}) |
| 3917 | + |
| 3918 | + assert_attr_equal('dtype', start_dataframe['foo'], expected_dataframe['foo']) |
| 3919 | + self.assert_numpy_array_equivalent( |
| 3920 | + start_dataframe['foo'].values, |
| 3921 | + expected_dataframe['foo'].values, strict_nan=True) |
| 3922 | + |
| 3923 | + def test_none_coercion_loc_and_dataframe(self): |
| 3924 | + for start_data, expected_result, in self.EXPECTED_SINGLE_ROW_RESULTS: |
| 3925 | + start_dataframe = DataFrame({'foo': start_data}) |
| 3926 | + start_dataframe.loc[start_dataframe['foo'] == start_dataframe['foo'][0]] = None |
| 3927 | + |
| 3928 | + expected_dataframe = DataFrame({'foo': expected_result}) |
| 3929 | + |
| 3930 | + assert_attr_equal('dtype', start_dataframe['foo'], expected_dataframe['foo']) |
| 3931 | + self.assert_numpy_array_equivalent( |
| 3932 | + start_dataframe['foo'].values, |
| 3933 | + expected_dataframe['foo'].values, strict_nan=True) |
| 3934 | + |
| 3935 | + def test_none_coercion_mixed_dtypes(self): |
| 3936 | + start_dataframe = DataFrame({ |
| 3937 | + 'a': [1, 2, 3], |
| 3938 | + 'b': [1.0, 2.0, 3.0], |
| 3939 | + 'c': [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], |
| 3940 | + 'd': ['a', 'b', 'c']}) |
| 3941 | + start_dataframe.iloc[0] = None |
| 3942 | + |
| 3943 | + expected_dataframe = DataFrame({ |
| 3944 | + 'a': [np.nan, 2, 3], |
| 3945 | + 'b': [np.nan, 2.0, 3.0], |
| 3946 | + 'c': [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)], |
| 3947 | + 'd': [None, 'b', 'c']}) |
| 3948 | + |
| 3949 | + for column in expected_dataframe.columns: |
| 3950 | + assert_attr_equal('dtype', start_dataframe[column], expected_dataframe[column]) |
| 3951 | + self.assert_numpy_array_equivalent( |
| 3952 | + start_dataframe[column].values, |
| 3953 | + expected_dataframe[column].values, strict_nan=True) |
| 3954 | + |
3819 | 3955 |
|
3820 | 3956 | if __name__ == '__main__':
|
3821 | 3957 | nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
|
|
0 commit comments