|
18 | 18 | from pandas.core.daterange import DateRange
|
19 | 19 | import pandas.core.datetools as datetools
|
20 | 20 | import pandas.tseries.offsets as offsets
|
| 21 | +import pandas.tseries.tools as tools |
21 | 22 | import pandas.tseries.frequencies as fmod
|
22 | 23 | import pandas as pd
|
23 | 24 |
|
@@ -49,6 +50,11 @@ def _skip_if_no_pytz():
|
49 | 50 | except ImportError:
|
50 | 51 | raise nose.SkipTest("pytz not installed")
|
51 | 52 |
|
| 53 | +def _skip_if_has_locale(): |
| 54 | + import locale |
| 55 | + lang, _ = locale.getlocale() |
| 56 | + if lang is not None: |
| 57 | + raise nose.SkipTest("Specific locale is set {0}".format(lang)) |
52 | 58 |
|
53 | 59 | class TestTimeSeriesDuplicates(tm.TestCase):
|
54 | 60 | _multiprocess_can_split_ = True
|
@@ -909,12 +915,8 @@ def test_to_datetime_on_datetime64_series(self):
|
909 | 915 | self.assertEquals(result[0], s[0])
|
910 | 916 |
|
911 | 917 | def test_to_datetime_with_apply(self):
|
912 |
| - |
913 | 918 | # this is only locale tested with US/None locales
|
914 |
| - import locale |
915 |
| - (lang,encoding) = locale.getlocale() |
916 |
| - if lang is not None: |
917 |
| - raise nose.SkipTest("format codes cannot work with a locale of {0}".format(lang)) |
| 919 | + _skip_if_has_locale() |
918 | 920 |
|
919 | 921 | # GH 5195
|
920 | 922 | # with a format and coerce a single item to_datetime fails
|
@@ -3124,6 +3126,177 @@ def test_date_range_fy5252(self):
|
3124 | 3126 | self.assertEqual(dr[1], Timestamp('2014-01-30'))
|
3125 | 3127 |
|
3126 | 3128 |
|
| 3129 | +class TestToDatetimeInferFormat(tm.TestCase): |
| 3130 | + def test_to_datetime_infer_datetime_format_consistent_format(self): |
| 3131 | + time_series = pd.Series( |
| 3132 | + pd.date_range('20000101', periods=50, freq='H') |
| 3133 | + ) |
| 3134 | + |
| 3135 | + test_formats = [ |
| 3136 | + '%m-%d-%Y', |
| 3137 | + '%m/%d/%Y %H:%M:%S.%f', |
| 3138 | + '%Y-%m-%dT%H:%M:%S.%f', |
| 3139 | + ] |
| 3140 | + |
| 3141 | + for test_format in test_formats: |
| 3142 | + s_as_dt_strings = time_series.apply( |
| 3143 | + lambda x: x.strftime(test_format) |
| 3144 | + ) |
| 3145 | + |
| 3146 | + with_format = pd.to_datetime(s_as_dt_strings, format=test_format) |
| 3147 | + no_infer = pd.to_datetime( |
| 3148 | + s_as_dt_strings, infer_datetime_format=False |
| 3149 | + ) |
| 3150 | + yes_infer = pd.to_datetime( |
| 3151 | + s_as_dt_strings, infer_datetime_format=True |
| 3152 | + ) |
| 3153 | + |
| 3154 | + # Whether the format is explicitly passed, it is inferred, or |
| 3155 | + # it is not inferred, the results should all be the same |
| 3156 | + self.assert_(np.array_equal(with_format, no_infer)) |
| 3157 | + self.assert_(np.array_equal(no_infer, yes_infer)) |
| 3158 | + |
| 3159 | + def test_to_datetime_infer_datetime_format_inconsistent_format(self): |
| 3160 | + test_series = pd.Series( |
| 3161 | + np.array([ |
| 3162 | + '01/01/2011 00:00:00', |
| 3163 | + '01-02-2011 00:00:00', |
| 3164 | + '2011-01-03T00:00:00', |
| 3165 | + ])) |
| 3166 | + |
| 3167 | + # When the format is inconsistent, infer_datetime_format should just |
| 3168 | + # fallback to the default parsing |
| 3169 | + self.assert_(np.array_equal( |
| 3170 | + pd.to_datetime(test_series, infer_datetime_format=False), |
| 3171 | + pd.to_datetime(test_series, infer_datetime_format=True) |
| 3172 | + )) |
| 3173 | + |
| 3174 | + test_series = pd.Series( |
| 3175 | + np.array([ |
| 3176 | + 'Jan/01/2011', |
| 3177 | + 'Feb/01/2011', |
| 3178 | + 'Mar/01/2011', |
| 3179 | + ])) |
| 3180 | + |
| 3181 | + self.assert_(np.array_equal( |
| 3182 | + pd.to_datetime(test_series, infer_datetime_format=False), |
| 3183 | + pd.to_datetime(test_series, infer_datetime_format=True) |
| 3184 | + )) |
| 3185 | + |
| 3186 | + def test_to_datetime_infer_datetime_format_series_with_nans(self): |
| 3187 | + test_series = pd.Series( |
| 3188 | + np.array([ |
| 3189 | + '01/01/2011 00:00:00', |
| 3190 | + np.nan, |
| 3191 | + '01/03/2011 00:00:00', |
| 3192 | + np.nan, |
| 3193 | + ])) |
| 3194 | + |
| 3195 | + self.assert_(np.array_equal( |
| 3196 | + pd.to_datetime(test_series, infer_datetime_format=False), |
| 3197 | + pd.to_datetime(test_series, infer_datetime_format=True) |
| 3198 | + )) |
| 3199 | + |
| 3200 | + def test_to_datetime_infer_datetime_format_series_starting_with_nans(self): |
| 3201 | + test_series = pd.Series( |
| 3202 | + np.array([ |
| 3203 | + np.nan, |
| 3204 | + np.nan, |
| 3205 | + '01/01/2011 00:00:00', |
| 3206 | + '01/02/2011 00:00:00', |
| 3207 | + '01/03/2011 00:00:00', |
| 3208 | + ])) |
| 3209 | + |
| 3210 | + self.assert_(np.array_equal( |
| 3211 | + pd.to_datetime(test_series, infer_datetime_format=False), |
| 3212 | + pd.to_datetime(test_series, infer_datetime_format=True) |
| 3213 | + )) |
| 3214 | + |
| 3215 | + |
| 3216 | +class TestGuessDatetimeFormat(tm.TestCase): |
| 3217 | + def test_guess_datetime_format_with_parseable_formats(self): |
| 3218 | + dt_string_to_format = ( |
| 3219 | + ('20111230', '%Y%m%d'), |
| 3220 | + ('2011-12-30', '%Y-%m-%d'), |
| 3221 | + ('30-12-2011', '%d-%m-%Y'), |
| 3222 | + ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'), |
| 3223 | + ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'), |
| 3224 | + ('2011-12-30 00:00:00.000000', '%Y-%m-%d %H:%M:%S.%f'), |
| 3225 | + ) |
| 3226 | + |
| 3227 | + for dt_string, dt_format in dt_string_to_format: |
| 3228 | + self.assertEquals( |
| 3229 | + tools._guess_datetime_format(dt_string), |
| 3230 | + dt_format |
| 3231 | + ) |
| 3232 | + |
| 3233 | + def test_guess_datetime_format_with_dayfirst(self): |
| 3234 | + ambiguous_string = '01/01/2011' |
| 3235 | + self.assertEquals( |
| 3236 | + tools._guess_datetime_format(ambiguous_string, dayfirst=True), |
| 3237 | + '%d/%m/%Y' |
| 3238 | + ) |
| 3239 | + self.assertEquals( |
| 3240 | + tools._guess_datetime_format(ambiguous_string, dayfirst=False), |
| 3241 | + '%m/%d/%Y' |
| 3242 | + ) |
| 3243 | + |
| 3244 | + def test_guess_datetime_format_with_locale_specific_formats(self): |
| 3245 | + # The month names will vary depending on the locale, in which |
| 3246 | + # case these wont be parsed properly (dateutil can't parse them) |
| 3247 | + _skip_if_has_locale() |
| 3248 | + |
| 3249 | + dt_string_to_format = ( |
| 3250 | + ('30/Dec/2011', '%d/%b/%Y'), |
| 3251 | + ('30/December/2011', '%d/%B/%Y'), |
| 3252 | + ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S'), |
| 3253 | + ) |
| 3254 | + |
| 3255 | + for dt_string, dt_format in dt_string_to_format: |
| 3256 | + self.assertEquals( |
| 3257 | + tools._guess_datetime_format(dt_string), |
| 3258 | + dt_format |
| 3259 | + ) |
| 3260 | + |
| 3261 | + def test_guess_datetime_format_invalid_inputs(self): |
| 3262 | + # A datetime string must include a year, month and a day for it |
| 3263 | + # to be guessable, in addition to being a string that looks like |
| 3264 | + # a datetime |
| 3265 | + invalid_dts = [ |
| 3266 | + '2013', |
| 3267 | + '01/2013', |
| 3268 | + '12:00:00', |
| 3269 | + '1/1/1/1', |
| 3270 | + 'this_is_not_a_datetime', |
| 3271 | + '51a', |
| 3272 | + 9, |
| 3273 | + datetime(2011, 1, 1), |
| 3274 | + ] |
| 3275 | + |
| 3276 | + for invalid_dt in invalid_dts: |
| 3277 | + self.assertTrue(tools._guess_datetime_format(invalid_dt) is None) |
| 3278 | + |
| 3279 | + def test_guess_datetime_format_for_array(self): |
| 3280 | + expected_format = '%Y-%m-%d %H:%M:%S.%f' |
| 3281 | + dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format) |
| 3282 | + |
| 3283 | + test_arrays = [ |
| 3284 | + np.array([dt_string, dt_string, dt_string], dtype='O'), |
| 3285 | + np.array([np.nan, np.nan, dt_string], dtype='O'), |
| 3286 | + np.array([dt_string, 'random_string'], dtype='O'), |
| 3287 | + ] |
| 3288 | + |
| 3289 | + for test_array in test_arrays: |
| 3290 | + self.assertEqual( |
| 3291 | + tools._guess_datetime_format_for_array(test_array), |
| 3292 | + expected_format |
| 3293 | + ) |
| 3294 | + |
| 3295 | + format_for_string_of_nans = tools._guess_datetime_format_for_array( |
| 3296 | + np.array([np.nan, np.nan, np.nan], dtype='O') |
| 3297 | + ) |
| 3298 | + self.assertTrue(format_for_string_of_nans is None) |
| 3299 | + |
3127 | 3300 | if __name__ == '__main__':
|
3128 | 3301 | nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
|
3129 | 3302 | exit=False)
|
0 commit comments