|
4 | 4 |
|
5 | 5 | import numpy as np
|
6 | 6 |
|
7 |
| -from pandas import (date_range, period_range, |
8 |
| - Series, Index, DatetimeIndex, |
9 |
| - TimedeltaIndex, PeriodIndex) |
| 7 | +from pandas import (DatetimeIndex, Float64Index, Index, Int64Index, |
| 8 | + NaT, Period, PeriodIndex, Series, Timedelta, |
| 9 | + TimedeltaIndex, date_range, period_range, |
| 10 | + timedelta_range) |
10 | 11 |
|
11 | 12 | import pandas.util.testing as tm
|
12 | 13 |
|
@@ -849,3 +850,203 @@ def test_fillna_timedelta(self):
|
849 | 850 | exp = pd.Index(
|
850 | 851 | [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object)
|
851 | 852 | self.assert_index_equal(idx.fillna('x'), exp)
|
| 853 | + |
| 854 | + |
| 855 | +class TestAstype(tm.TestCase): |
| 856 | + |
| 857 | + def test_DatetimeIndex_astype(self): |
| 858 | + # GH 13149, GH 13209 |
| 859 | + idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) |
| 860 | + |
| 861 | + result = idx.astype(object) |
| 862 | + expected = Index([Timestamp('2016-05-16')] + [NaT] * 3, dtype=object) |
| 863 | + tm.assert_index_equal(result, expected) |
| 864 | + |
| 865 | + result = idx.astype(int) |
| 866 | + expected = Int64Index([1463356800000000000] + |
| 867 | + [-9223372036854775808] * 3, dtype=np.int64) |
| 868 | + tm.assert_index_equal(result, expected) |
| 869 | + |
| 870 | + def test_DatetimeIndex_astype_str(self): |
| 871 | + # GH 13149, GH 13209 |
| 872 | + # Also: Previously, Python2 returned a unicode representation u'NaT', |
| 873 | + # instead of a string, due to a default parameter na_rep=u('NaT') in |
| 874 | + # DatetimeIndex._format_native_types(). Consequently, 'result' had |
| 875 | + # a mixed inferred type and failed tm.assert_index_equal(). |
| 876 | + |
| 877 | + idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) |
| 878 | + result = idx.astype(str) |
| 879 | + expected = Index(['2016-05-16', 'NaT', 'NaT', 'NaT'], dtype=object) |
| 880 | + tm.assert_index_equal(result, expected) |
| 881 | + |
| 882 | + def test_DatetimeIndex_astype_datetime64(self): |
| 883 | + # GH 13149, GH 13209 |
| 884 | + idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) |
| 885 | + |
| 886 | + result = idx.astype('datetime64[ns]') |
| 887 | + tm.assert_index_equal(result, idx) |
| 888 | + self.assertFalse(result is idx) |
| 889 | + |
| 890 | + result = idx.astype('datetime64[ns]', copy=False) |
| 891 | + tm.assert_index_equal(result, idx) |
| 892 | + self.assertTrue(result is idx) |
| 893 | + |
| 894 | + idx_tz = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN], tz='EST') |
| 895 | + result = idx_tz.astype('datetime64[ns]') |
| 896 | + expected = DatetimeIndex(['2016-05-16 05:00:00', 'NaT', 'NaT', 'NaT'], |
| 897 | + dtype='datetime64[ns]') |
| 898 | + tm.assert_index_equal(result, expected) |
| 899 | + |
| 900 | + def test_DatetimeIndex_astype_raises(self): |
| 901 | + # GH 13149, GH 13209 |
| 902 | + idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) |
| 903 | + |
| 904 | + self.assertRaises(ValueError, idx.astype, float) |
| 905 | + self.assertRaises(ValueError, idx.astype, 'timedelta64') |
| 906 | + self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') |
| 907 | + self.assertRaises(ValueError, idx.astype, 'datetime64') |
| 908 | + self.assertRaises(ValueError, idx.astype, 'datetime64[D]') |
| 909 | + |
| 910 | + def test_date_range(self): |
| 911 | + rng = date_range('1/1/2000', periods=10) |
| 912 | + |
| 913 | + result = rng.astype('i8') |
| 914 | + self.assert_numpy_array_equal(result, rng.asi8) |
| 915 | + |
| 916 | + # with tz |
| 917 | + rng = date_range('1/1/2000', periods=10, tz='US/Eastern') |
| 918 | + result = rng.astype('datetime64[ns]') |
| 919 | + expected = (date_range('1/1/2000', periods=10, |
| 920 | + tz='US/Eastern') |
| 921 | + .tz_convert('UTC').tz_localize(None)) |
| 922 | + tm.assert_index_equal(result, expected) |
| 923 | + |
| 924 | + # BUG#10442 : testing astype(str) is correct for Series/DatetimeIndex |
| 925 | + result = pd.Series(pd.date_range('2012-01-01', periods=3)).astype(str) |
| 926 | + expected = pd.Series( |
| 927 | + ['2012-01-01', '2012-01-02', '2012-01-03'], dtype=object) |
| 928 | + tm.assert_series_equal(result, expected) |
| 929 | + |
| 930 | + result = Series(pd.date_range('2012-01-01', periods=3, |
| 931 | + tz='US/Eastern')).astype(str) |
| 932 | + expected = Series(['2012-01-01 00:00:00-05:00', |
| 933 | + '2012-01-02 00:00:00-05:00', |
| 934 | + '2012-01-03 00:00:00-05:00'], |
| 935 | + dtype=object) |
| 936 | + tm.assert_series_equal(result, expected) |
| 937 | + |
| 938 | + def test_DatetimeIndexOps_astype_str(self): |
| 939 | + # test astype string - #10442 |
| 940 | + result = date_range('2012-01-01', periods=4, |
| 941 | + name='test_name').astype(str) |
| 942 | + expected = Index(['2012-01-01', '2012-01-02', '2012-01-03', |
| 943 | + '2012-01-04'], name='test_name', dtype=object) |
| 944 | + tm.assert_index_equal(result, expected) |
| 945 | + |
| 946 | + # test astype string with tz and name |
| 947 | + result = date_range('2012-01-01', periods=3, name='test_name', |
| 948 | + tz='US/Eastern').astype(str) |
| 949 | + expected = Index(['2012-01-01 00:00:00-05:00', |
| 950 | + '2012-01-02 00:00:00-05:00', |
| 951 | + '2012-01-03 00:00:00-05:00'], |
| 952 | + name='test_name', dtype=object) |
| 953 | + tm.assert_index_equal(result, expected) |
| 954 | + |
| 955 | + # test astype string with freqH and name |
| 956 | + result = date_range('1/1/2011', periods=3, freq='H', |
| 957 | + name='test_name').astype(str) |
| 958 | + expected = Index(['2011-01-01 00:00:00', '2011-01-01 01:00:00', |
| 959 | + '2011-01-01 02:00:00'], |
| 960 | + name='test_name', dtype=object) |
| 961 | + tm.assert_index_equal(result, expected) |
| 962 | + |
| 963 | + # test astype string with freqH and timezone |
| 964 | + result = date_range('3/6/2012 00:00', periods=2, freq='H', |
| 965 | + tz='Europe/London', name='test_name').astype(str) |
| 966 | + expected = Index(['2012-03-06 00:00:00+00:00', |
| 967 | + '2012-03-06 01:00:00+00:00'], |
| 968 | + dtype=object, name='test_name') |
| 969 | + tm.assert_index_equal(result, expected) |
| 970 | + |
| 971 | + def test_TimedeltaIndex_astype(self): |
| 972 | + # GH 13149, GH 13209 |
| 973 | + idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) |
| 974 | + |
| 975 | + result = idx.astype(object) |
| 976 | + expected = Index([Timedelta('1 days 03:46:40')] + [pd.NaT] * 3, |
| 977 | + dtype=object) |
| 978 | + tm.assert_index_equal(result, expected) |
| 979 | + |
| 980 | + result = idx.astype(int) |
| 981 | + expected = Int64Index([100000000000000] + [-9223372036854775808] * 3, |
| 982 | + dtype=np.int64) |
| 983 | + tm.assert_index_equal(result, expected) |
| 984 | + |
| 985 | + def test_TimedeltaIndex_astype_timedelta64(self): |
| 986 | + # GH 13149, GH 13209 |
| 987 | + idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) |
| 988 | + |
| 989 | + result = idx.astype('timedelta64') |
| 990 | + expected = Float64Index([1e+14] + [np.NaN] * 3, dtype='float64') |
| 991 | + tm.assert_index_equal(result, expected) |
| 992 | + |
| 993 | + result = idx.astype('timedelta64[ns]') |
| 994 | + tm.assert_index_equal(result, idx) |
| 995 | + self.assertFalse(result is idx) |
| 996 | + |
| 997 | + result = idx.astype('timedelta64[ns]', copy=False) |
| 998 | + tm.assert_index_equal(result, idx) |
| 999 | + self.assertTrue(result is idx) |
| 1000 | + |
| 1001 | + def test_TimedeltaIndex_astype_raises(self): |
| 1002 | + # GH 13149, GH 13209 |
| 1003 | + idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) |
| 1004 | + |
| 1005 | + self.assertRaises(ValueError, idx.astype, float) |
| 1006 | + self.assertRaises(ValueError, idx.astype, str) |
| 1007 | + self.assertRaises(ValueError, idx.astype, 'datetime64') |
| 1008 | + self.assertRaises(ValueError, idx.astype, 'datetime64[ns]') |
| 1009 | + |
| 1010 | + def test_timedelta_range(self): |
| 1011 | + rng = timedelta_range('1 days', periods=10) |
| 1012 | + |
| 1013 | + result = rng.astype('i8') |
| 1014 | + self.assert_numpy_array_equal(result, rng.asi8) |
| 1015 | + |
| 1016 | + def test_PeriodIndex(self): |
| 1017 | + # GH 13149, GH 13209 |
| 1018 | + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') |
| 1019 | + |
| 1020 | + result = idx.astype(object) |
| 1021 | + expected = Index([Period('2016-05-16', freq='D')] + |
| 1022 | + [Period(NaT, freq='D')] * 3, dtype='object') |
| 1023 | + # Hack because of lack of support for Period null checking (GH12759) |
| 1024 | + tm.assert_index_equal(result[:1], expected[:1]) |
| 1025 | + result_arr = np.asarray([p.ordinal for p in result], dtype=np.int64) |
| 1026 | + expected_arr = np.asarray([p.ordinal for p in expected], |
| 1027 | + dtype=np.int64) |
| 1028 | + tm.assert_numpy_array_equal(result_arr, expected_arr) |
| 1029 | + # TODO: When GH12759 is resolved, change the above hack to: |
| 1030 | + # tm.assert_index_equal(result, expected) # now, it raises. |
| 1031 | + |
| 1032 | + result = idx.astype(int) |
| 1033 | + expected = Int64Index([16937] + [-9223372036854775808] * 3, |
| 1034 | + dtype=np.int64) |
| 1035 | + tm.assert_index_equal(result, expected) |
| 1036 | + |
| 1037 | + def test_PeriodIndex_raises(self): |
| 1038 | + # GH 13149, GH 13209 |
| 1039 | + idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') |
| 1040 | + |
| 1041 | + self.assertRaises(ValueError, idx.astype, str) |
| 1042 | + self.assertRaises(ValueError, idx.astype, float) |
| 1043 | + self.assertRaises(ValueError, idx.astype, 'timedelta64') |
| 1044 | + self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') |
| 1045 | + self.assertRaises(ValueError, idx.astype, 'datetime64') |
| 1046 | + self.assertRaises(ValueError, idx.astype, 'datetime64[ns]') |
| 1047 | + |
| 1048 | + def test_period_range(self): |
| 1049 | + idx = period_range('1990', '2009', freq='A') |
| 1050 | + |
| 1051 | + result = idx.astype('i8') |
| 1052 | + self.assert_numpy_array_equal(result, idx.values) |
0 commit comments