From 3b0b53ccafdee7c6bdb63a3534913195a61c23b5 Mon Sep 17 00:00:00 2001 From: immerrr Date: Mon, 3 Mar 2014 16:28:24 +0400 Subject: [PATCH 1/9] BUG: fix _ref_locs corruption when slice indexing across columns axis --- doc/source/release.rst | 1 + pandas/core/internals.py | 6 ++++++ pandas/tests/test_frame.py | 13 +++++++++++++ 3 files changed, 20 insertions(+) diff --git a/doc/source/release.rst b/doc/source/release.rst index 08bfcbe42ad5b..49955ec79e9f3 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -194,6 +194,7 @@ Bug Fixes - Bug in ``read_html`` tests where redirected invalid URLs would make one test fail (:issue:`6445`). - Bug in multi-axis indexing using ``.loc`` on non-unique indices (:issue:`6504`) +- Bug that caused _ref_locs corruption when slice indexing across columns axis of a DataFrame (:issue:`6525`) pandas 0.13.1 ------------- diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 74a8ce0118d88..6d2b2933eb597 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -133,6 +133,12 @@ def take_ref_locs(self, indexer): tindexer[indexer] = False tindexer = tindexer.astype(int).cumsum()[indexer] ref_locs = ref_locs[indexer] + + # Make sure the result is a copy, or otherwise self._ref_locs will be + # updated. + if ref_locs.base is not None: + ref_locs = ref_locs.copy() + ref_locs -= tindexer return ref_locs diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 3c39d610c1b88..a36b3c5b15384 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -12267,6 +12267,19 @@ def test_empty_frame_dtypes_ftypes(self): ('b', 'bool:dense'), ('c', 'float64:dense')]))) + def test_dtypes_are_correct_after_column_slice(self): + # GH6525 + df = pd.DataFrame(index=range(5), columns=list("abc"), dtype=np.float_) + odict = OrderedDict + assert_series_equal(df.dtypes, + pd.Series(odict([('a', np.float_), ('b', np.float_), + ('c', np.float_),]))) + assert_series_equal(df.iloc[:,2:].dtypes, + pd.Series(odict([('c', np.float_)]))) + assert_series_equal(df.dtypes, + pd.Series(odict([('a', np.float_), ('b', np.float_), + ('c', np.float_),]))) + def skip_if_no_ne(engine='numexpr'): if engine == 'numexpr': From cd3ba5753aa2bf3c420bf523be967ba6b07f52da Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 3 Mar 2014 11:51:34 -0500 Subject: [PATCH 2/9] BUG: Regression from 0.13 in the treatmenet of numpy datetime64 non-ns dtypes in Series creation (GH6529) --- doc/source/release.rst | 1 + pandas/core/common.py | 9 +++------ pandas/tests/test_series.py | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 49955ec79e9f3..ae31f1e7fc495 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -195,6 +195,7 @@ Bug Fixes fail (:issue:`6445`). - Bug in multi-axis indexing using ``.loc`` on non-unique indices (:issue:`6504`) - Bug that caused _ref_locs corruption when slice indexing across columns axis of a DataFrame (:issue:`6525`) +- Regression from 0.13 in the treatmenet of numpy ``datetime64`` non-ns dtypes in Series creation (:issue:`6529`) pandas 0.13.1 ------------- diff --git a/pandas/core/common.py b/pandas/core/common.py index 69addea1c4188..eb3c159ae916d 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -124,7 +124,7 @@ def isnull(obj): See also -------- - pandas.notnull: boolean inverse of pandas.isnull + pandas.notnull: boolean inverse of pandas.isnull """ return _isnull(obj) @@ -272,7 +272,7 @@ def notnull(obj): isnulled : array-like of bool or bool Array or bool indicating whether an object is *not* null or if an array is given which of the element is *not* null. - + See also -------- pandas.isnull : boolean inverse of pandas.notnull @@ -1727,10 +1727,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False): dtype = value.dtype if dtype.kind == 'M' and dtype != _NS_DTYPE: - try: - value = tslib.array_to_datetime(value) - except: - raise + value = value.astype(_NS_DTYPE) elif dtype.kind == 'm' and dtype != _TD_DTYPE: from pandas.tseries.timedeltas import \ diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index bae4036a68b37..faf5341276ae5 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -600,6 +600,25 @@ def test_constructor_dtype_datetime64(self): self.assertEqual(result['a'], Timestamp('20130101')) self.assertEqual(result['b'], 1) + # GH6529 + # coerce datetime64 non-ns properly + dates = date_range('01-Jan-2015', '01-Dec-2015', freq='M') + values2 = dates.view(np.ndarray).astype('datetime64[ns]') + expected = Series(values2, dates) + + # numpy < 1.7 is very odd about astyping + if not _np_version_under1p7: + for dtype in ['s','D','ms','us','ns']: + values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype)) + result = Series(values1, dates) + assert_series_equal(result,expected) + + # leave datetime.date alone + dates2 = np.array([ d.date() for d in dates.to_pydatetime() ],dtype=object) + series1 = Series(dates2, dates) + self.assert_numpy_array_equal(series1.values,dates2) + self.assertEqual(series1.dtype,object) + def test_constructor_dict(self): d = {'a': 0., 'b': 1., 'c': 2.} result = Series(d, index=['b', 'c', 'd', 'a']) From bc155aaa9d343da72ce0744aa14b746270d6d2a7 Mon Sep 17 00:00:00 2001 From: Christopher Whelan Date: Sun, 23 Feb 2014 19:23:07 -0800 Subject: [PATCH 3/9] ENH: Preserve .names in df.set_index(df.index) Preserve .names in df.set_index(df.index) Check that df.set_index(df.index) doesn't convert a MultiIndex to an Index Handle general case of df.set_index([df.index,...]) Cleanup Add to release notes Add equality checks Fix issue on 2.6 Add example to whatsnew --- doc/source/release.rst | 2 ++ doc/source/v0.14.0.txt | 43 ++++++++++++++++++++++++++++++++++++++ pandas/core/frame.py | 10 ++++++++- pandas/tests/test_frame.py | 27 ++++++++++++++++++++++++ 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index ae31f1e7fc495..8fddee8954689 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -107,6 +107,7 @@ API Changes or numbering columns as needed (:issue:`2385`) - Slicing and advanced/boolean indexing operations on ``Index`` classes will no longer change type of the resulting index (:issue:`6440`). +- ``set_index`` no longer converts MultiIndexes to an Index of tuples (:issue:`6459`). Experimental Features ~~~~~~~~~~~~~~~~~~~~~ @@ -196,6 +197,7 @@ Bug Fixes - Bug in multi-axis indexing using ``.loc`` on non-unique indices (:issue:`6504`) - Bug that caused _ref_locs corruption when slice indexing across columns axis of a DataFrame (:issue:`6525`) - Regression from 0.13 in the treatmenet of numpy ``datetime64`` non-ns dtypes in Series creation (:issue:`6529`) +- ``.names`` attribute of MultiIndexes passed to ``set_index`` are now preserved (:issue:`6459`). pandas 0.13.1 ------------- diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 4432e9e891e7d..7bcd30301e4e6 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -92,6 +92,49 @@ These are out-of-bounds selections .. ipython:: python i[[0,1,2]].astype(np.int_) +- ``set_index`` no longer converts MultiIndexes to an Index of tuples. For example, + the old behavior returned an Index in this case (:issue:`6459`): + + .. ipython:: python + :suppress: + + from itertools import product + tuples = list(product(('a', 'b'), ('c', 'd'))) + mi = MultiIndex.from_tuples(tuples) + df_multi = DataFrame(np.random.randn(4, 2), index=mi) + tuple_ind = pd.Index(tuples) + + .. ipython:: python + + df_multi.index + + @suppress + df_multi.index = tuple_ind + + # Old behavior, casted MultiIndex to an Index + df_multi.set_index(df_multi.index) + + @suppress + df_multi.index = mi + + # New behavior + df_multi.set_index(df_multi.index) + + This also applies when passing multiple indices to ``set_index``: + + .. ipython:: python + + @suppress + df_multi.index = tuple_ind + + # Old output, 2-level MultiIndex of tuples + df_multi.set_index([df_multi.index, df_multi.index]) + + @suppress + df_multi.index = mi + + # New output, 4-level MultiIndex + df_multi.set_index([df_multi.index, df_multi.index]) MultiIndexing Using Slicers diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6c1037f018e02..05f7785a401f8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2240,7 +2240,15 @@ def set_index(self, keys, drop=True, append=False, inplace=False, to_remove = [] for col in keys: - if isinstance(col, Series): + if isinstance(col, MultiIndex): + # append all but the last column so we don't have to modify + # the end of this loop + for n in range(col.nlevels - 1): + arrays.append(col.get_level_values(n)) + + level = col.get_level_values(col.nlevels - 1) + names.extend(col.names) + elif isinstance(col, (Series, Index)): level = col.values names.append(col.name) elif isinstance(col, (list, np.ndarray)): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index a36b3c5b15384..1cc357ce2a260 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -12280,6 +12280,33 @@ def test_dtypes_are_correct_after_column_slice(self): pd.Series(odict([('a', np.float_), ('b', np.float_), ('c', np.float_),]))) + def test_set_index_names(self): + df = pd.util.testing.makeDataFrame() + df.index.name = 'name' + + self.assertEquals(df.set_index(df.index).index.names, ['name']) + + mi = MultiIndex.from_arrays(df[['A', 'B']].T.values, names=['A', 'B']) + mi2 = MultiIndex.from_arrays(df[['A', 'B', 'A', 'B']].T.values, + names=['A', 'B', 'A', 'B']) + + df = df.set_index(['A', 'B']) + + self.assertEquals(df.set_index(df.index).index.names, ['A', 'B']) + + # Check that set_index isn't converting a MultiIndex into an Index + self.assertTrue(isinstance(df.set_index(df.index).index, MultiIndex)) + + # Check actual equality + tm.assert_index_equal(df.set_index(df.index).index, mi) + + # Check that [MultiIndex, MultiIndex] yields a MultiIndex rather + # than a pair of tuples + self.assertTrue(isinstance(df.set_index([df.index, df.index]).index, MultiIndex)) + + # Check equality + tm.assert_index_equal(df.set_index([df.index, df.index]).index, mi2) + def skip_if_no_ne(engine='numexpr'): if engine == 'numexpr': From 5f81319e2e8d339294bd20e7a03e16d6860dcc50 Mon Sep 17 00:00:00 2001 From: Clark Fitzgerald Date: Mon, 3 Mar 2014 19:35:42 -0800 Subject: [PATCH 4/9] informative error message --- doc/source/release.rst | 1 + pandas/core/internals.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 8fddee8954689..29fd5bad16986 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -140,6 +140,7 @@ Improvements to existing features Bug Fixes ~~~~~~~~~ +- Bug in Series ValueError when index doesn't match data (:issue:`6532`) - Bug in ``pd.DataFrame.sort_index`` where mergesort wasn't stable when ``ascending=False`` (:issue:`6399`) - Bug in ``pd.tseries.frequencies.to_offset`` when argument has leading zeroes (:issue:`6391`) - Bug in version string gen. for dev versions with shallow clones / install from tarball (:issue:`6127`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 6d2b2933eb597..e3deed52f4b3f 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -61,8 +61,8 @@ def __init__(self, values, items, ref_items, ndim=None, fastpath=False, raise ValueError('Wrong number of dimensions') if len(items) != len(values): - raise ValueError('Wrong number of items passed %d, indices imply ' - '%d' % (len(items), len(values))) + raise ValueError('Wrong number of items passed %d, index implies ' + '%d' % (len(values), len(items))) self.set_ref_locs(placement) self.values = values From 653cc43ad2071cae6ac437a7e7af7b1537c51370 Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 4 Mar 2014 13:32:23 -0500 Subject: [PATCH 5/9] BUG: Bug in setitem with a duplicate index and an alignable rhs (GH6541) --- doc/source/release.rst | 1 + pandas/core/index.py | 9 +++++++-- pandas/core/indexing.py | 4 +++- pandas/tests/test_indexing.py | 23 +++++++++++++++++++++++ 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 29fd5bad16986..f5e2c80289c5c 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -199,6 +199,7 @@ Bug Fixes - Bug that caused _ref_locs corruption when slice indexing across columns axis of a DataFrame (:issue:`6525`) - Regression from 0.13 in the treatmenet of numpy ``datetime64`` non-ns dtypes in Series creation (:issue:`6529`) - ``.names`` attribute of MultiIndexes passed to ``set_index`` are now preserved (:issue:`6459`). +- Bug in setitem with a duplicate index and an alignable rhs (:issue:`6541`) pandas 0.13.1 ------------- diff --git a/pandas/core/index.py b/pandas/core/index.py index c16e2eff06904..f67270530c3f8 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -987,8 +987,13 @@ def intersection(self, other): except TypeError: pass - indexer = self.get_indexer(other.values) - indexer = indexer.take((indexer != -1).nonzero()[0]) + try: + indexer = self.get_indexer(other.values) + indexer = indexer.take((indexer != -1).nonzero()[0]) + except: + # duplicates + indexer = self.get_indexer_non_unique(other.values)[0].unique() + return self.take(indexer) def diff(self, other): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6691db5f35bb4..288934dbd27f4 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -441,7 +441,9 @@ def can_do_equal_len(): # align to if item in value: v = value[item] - v = v.reindex(self.obj[item].index & v.index) + i = self.obj[item].index + v = v.reindex(i & v.index) + setter(item, v.values) else: setter(item, np.nan) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index d373f2f43ad3e..f466ea302ee1c 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -564,6 +564,29 @@ def test_loc_setitem(self): expected = DataFrame({'a' : [0.5,-0.5,-1.5], 'b' : [0,1,2] }) assert_frame_equal(df,expected) + def test_loc_setitem_dups(self): + + # GH 6541 + df_orig = DataFrame({'me' : list('rttti'), + 'foo': list('aaade'), + 'bar': np.arange(5,dtype='float64')*1.34+2, + 'bar2': np.arange(5,dtype='float64')*-.34+2}).set_index('me') + + indexer = tuple(['r',['bar','bar2']]) + df = df_orig.copy() + df.loc[indexer]*=2.0 + assert_series_equal(df.loc[indexer],2.0*df_orig.loc[indexer]) + + indexer = tuple(['r','bar']) + df = df_orig.copy() + df.loc[indexer]*=2.0 + self.assertEqual(df.loc[indexer],2.0*df_orig.loc[indexer]) + + indexer = tuple(['t',['bar','bar2']]) + df = df_orig.copy() + df.loc[indexer]*=2.0 + assert_frame_equal(df.loc[indexer],2.0*df_orig.loc[indexer]) + def test_chained_getitem_with_lists(self): # GH6394 From f467a9162e9451b5d4244006ca1a5180ba2ea198 Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 4 Mar 2014 19:24:52 -0500 Subject: [PATCH 6/9] BUG: Bug in setitem with loc on mixed integer Indexes (GH6546) --- doc/source/release.rst | 1 + pandas/core/index.py | 23 +++++++++++++++++++++++ pandas/core/indexing.py | 26 +++++++------------------- pandas/tests/test_indexing.py | 13 +++++++++++++ 4 files changed, 44 insertions(+), 19 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index f5e2c80289c5c..d84afc66bf9ac 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -200,6 +200,7 @@ Bug Fixes - Regression from 0.13 in the treatmenet of numpy ``datetime64`` non-ns dtypes in Series creation (:issue:`6529`) - ``.names`` attribute of MultiIndexes passed to ``set_index`` are now preserved (:issue:`6459`). - Bug in setitem with a duplicate index and an alignable rhs (:issue:`6541`) +- Bug in setitem with loc on mixed integer Indexes (:issue:`6546`) pandas 0.13.1 ------------- diff --git a/pandas/core/index.py b/pandas/core/index.py index f67270530c3f8..30e18d239d950 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -555,6 +555,29 @@ def _convert_list_indexer(self, key, typ=None): """ convert a list indexer. these should be locations """ return key + def _convert_list_indexer_for_mixed(self, keyarr, typ=None): + """ passed a key that is tuplesafe that is integer based + and we have a mixed index (e.g. number/labels). figure out + the indexer. return None if we can't help + """ + if com.is_integer_dtype(keyarr) and not self.is_floating(): + if self.inferred_type != 'integer': + keyarr = np.where(keyarr < 0, + len(self) + keyarr, keyarr) + + if self.inferred_type == 'mixed-integer': + indexer = self.get_indexer(keyarr) + if (indexer >= 0).all(): + return indexer + + from pandas.core.indexing import _maybe_convert_indices + return _maybe_convert_indices(indexer, len(self)) + + elif not self.inferred_type == 'integer': + return keyarr + + return None + def _convert_indexer_error(self, key, msg=None): if msg is None: msg = 'label' diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 288934dbd27f4..c7970309a6558 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -911,20 +911,10 @@ def _reindex(keys, level=None): # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) - if is_integer_dtype(keyarr) and not labels.is_floating(): - if labels.inferred_type != 'integer': - keyarr = np.where(keyarr < 0, - len(labels) + keyarr, keyarr) - - if labels.inferred_type == 'mixed-integer': - indexer = labels.get_indexer(keyarr) - if (indexer >= 0).all(): - self.obj.take(indexer, axis=axis, convert=True) - else: - return self.obj.take(keyarr, axis=axis) - elif not labels.inferred_type == 'integer': - - return self.obj.take(keyarr, axis=axis) + # handle a mixed integer scenario + indexer = labels._convert_list_indexer_for_mixed(keyarr, typ=self.name) + if indexer is not None: + return self.obj.take(indexer, axis=axis) # this is not the most robust, but... if (isinstance(labels, MultiIndex) and @@ -1064,11 +1054,9 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): objarr = _asarray_tuplesafe(obj) # If have integer labels, defer to label-based indexing - if is_integer_dtype(objarr) and not is_int_index: - if labels.inferred_type != 'integer': - objarr = np.where(objarr < 0, - len(labels) + objarr, objarr) - return objarr + indexer = labels._convert_list_indexer_for_mixed(objarr, typ=self.name) + if indexer is not None: + return indexer # this is not the most robust, but... if (isinstance(labels, MultiIndex) and diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index f466ea302ee1c..1d033782a0175 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -835,6 +835,19 @@ def test_loc_setitem_frame(self): expected = DataFrame(dict(A = Series(val1,index=keys1), B = Series(val2,index=keys2))).reindex(index=index) assert_frame_equal(df, expected) + # GH 6546 + # setting with mixed labels + df = DataFrame({1:[1,2],2:[3,4],'a':['a','b']}) + + result = df.loc[0,[1,2]] + expected = Series([1,3],index=[1,2],dtype=object) + assert_series_equal(result,expected) + + expected = DataFrame({1:[5,2],2:[6,4],'a':['a','b']}) + df.loc[0,[1,2]] = [5,6] + assert_frame_equal(df, expected) + + def test_loc_setitem_frame_multiples(self): # multiple setting From b257684300e61d1d96bb330cc6dc213e210ea39b Mon Sep 17 00:00:00 2001 From: bashtage Date: Wed, 12 Feb 2014 19:15:00 +0000 Subject: [PATCH 7/9] BUG/API: Fix stata io to deal with wrong data types and missing values (GH6335) BUG: Changes types used in packing structs Corrected incorrect data type conversion between pandas and Stata Remove unnecessary, potentially precision degrading cast to Series when writing data Added function to cast columns from NumPy data types to Stata data types Corrected tests for correct Stata datatypes Fixed formatting in comparison after casting Added docstring for new function and warning class BUG: Fixes and tests for extreme values in all data types The extreme values of float and double (Stata, pandas eqiv: float 32 and float64) were not correct. This resulted in incorrect truncation. The handling of missing values have been improved and code to convert missing values in any format has been added. The improvement differentiated between valid ranges for data and missing values. Additional issues were found when handling missing Dates, where missing Dates (NaT) were converted to non-missing dates when written. A test has been added for extreme numeric values as well as missing values. Fixed legacy date issue with format 114 files Added test for 114 files Added format 114 (Stata 9/10/11) data file Add test for Stata data with file format 114 Added additional data files for testing alternative Stata file formats Added expected result to test Renamed Stata data files to include file format Types used for integer conversion where always half the size they should be. Produced a bug when exporting data tables with long integer data (np.int64). Added test for integer conversion bug Added test for incorrect integer conversion from int16, int32 and int64 Added additional data files for testing alternative Stata file formats Added expected result to test Renamed Stata data files to include file format Disabled the big endian skips --- doc/source/release.rst | 4 + doc/source/v0.14.0.txt | 3 + pandas/io/stata.py | 151 ++++++++++++--- .../tests/data/{stata1.dta => stata1_114.dta} | Bin .../data/{stata1_v13.dta => stata1_117.dta} | Bin pandas/io/tests/data/stata2_113.dta | Bin 0 -> 1490 bytes .../tests/data/{stata2.dta => stata2_114.dta} | Bin pandas/io/tests/data/stata2_115.dta | Bin 0 -> 1786 bytes ...for testing alternative Stata file formats | Bin 0 -> 1786 bytes .../data/{stata2_v13.dta => stata2_117.dta} | Bin pandas/io/tests/data/stata3_113.dta | Bin 0 -> 12737 bytes .../tests/data/{stata3.dta => stata3_114.dta} | Bin pandas/io/tests/data/stata3_115.dta | Bin 0 -> 13255 bytes ...for testing alternative Stata file formats | Bin 0 -> 13255 bytes .../data/{stata3_v13.dta => stata3_117.dta} | Bin pandas/io/tests/data/stata4_113.dta | Bin 0 -> 1528 bytes .../tests/data/{stata4.dta => stata4_114.dta} | Bin pandas/io/tests/data/stata4_115.dta | Bin 0 -> 1713 bytes ...for testing alternative Stata file formats | Bin 0 -> 1713 bytes .../data/{stata4_v13.dta => stata4_117.dta} | Bin pandas/io/tests/data/stata5.csv | 19 ++ pandas/io/tests/data/stata5_113.dta | Bin 0 -> 4628 bytes pandas/io/tests/data/stata5_114.dta | Bin 0 -> 4924 bytes pandas/io/tests/data/stata5_115.dta | Bin 0 -> 4924 bytes pandas/io/tests/data/stata6.csv | 6 + pandas/io/tests/data/stata6_113.dta | Bin 0 -> 2752 bytes pandas/io/tests/data/stata6_114.dta | Bin 0 -> 3048 bytes pandas/io/tests/data/stata6_115.dta | Bin 0 -> 3048 bytes pandas/io/tests/test_stata.py | 173 ++++++++++++++---- 29 files changed, 293 insertions(+), 63 deletions(-) rename pandas/io/tests/data/{stata1.dta => stata1_114.dta} (100%) rename pandas/io/tests/data/{stata1_v13.dta => stata1_117.dta} (100%) create mode 100644 pandas/io/tests/data/stata2_113.dta rename pandas/io/tests/data/{stata2.dta => stata2_114.dta} (100%) create mode 100644 pandas/io/tests/data/stata2_115.dta create mode 100644 pandas/io/tests/data/stata2_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats rename pandas/io/tests/data/{stata2_v13.dta => stata2_117.dta} (100%) create mode 100644 pandas/io/tests/data/stata3_113.dta rename pandas/io/tests/data/{stata3.dta => stata3_114.dta} (100%) create mode 100644 pandas/io/tests/data/stata3_115.dta create mode 100644 pandas/io/tests/data/stata3_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats rename pandas/io/tests/data/{stata3_v13.dta => stata3_117.dta} (100%) create mode 100644 pandas/io/tests/data/stata4_113.dta rename pandas/io/tests/data/{stata4.dta => stata4_114.dta} (100%) create mode 100644 pandas/io/tests/data/stata4_115.dta create mode 100644 pandas/io/tests/data/stata4_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats rename pandas/io/tests/data/{stata4_v13.dta => stata4_117.dta} (100%) create mode 100644 pandas/io/tests/data/stata5.csv create mode 100644 pandas/io/tests/data/stata5_113.dta create mode 100644 pandas/io/tests/data/stata5_114.dta create mode 100644 pandas/io/tests/data/stata5_115.dta create mode 100644 pandas/io/tests/data/stata6.csv create mode 100644 pandas/io/tests/data/stata6_113.dta create mode 100644 pandas/io/tests/data/stata6_114.dta create mode 100644 pandas/io/tests/data/stata6_115.dta diff --git a/doc/source/release.rst b/doc/source/release.rst index d84afc66bf9ac..f4f376026225e 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -201,6 +201,10 @@ Bug Fixes - ``.names`` attribute of MultiIndexes passed to ``set_index`` are now preserved (:issue:`6459`). - Bug in setitem with a duplicate index and an alignable rhs (:issue:`6541`) - Bug in setitem with loc on mixed integer Indexes (:issue:`6546`) +- Bug in ``pd.read_stata`` which would use the wrong data types and missing values (:issue:`6327`) +- Bug in ``DataFrame.to_stata`` that lead to data loss in certain cases, and could exported using the + wrong data types and missing values (:issue:`6335`) + pandas 0.13.1 ------------- diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 7bcd30301e4e6..310047545d84e 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -291,6 +291,9 @@ Enhancements using ``DataFrame.to_csv`` (:issue:`5414`, :issue:`4528`) - Added a ``to_julian_date`` function to ``TimeStamp`` and ``DatetimeIndex`` to convert to the Julian Date used primarily in astronomy. (:issue:`4041`) +- ``DataFrame.to_stata`` will now check data for compatibility with Stata data types + and will upcast when needed. When it isn't possibly to losslessly upcast, a warning + is raised (:issue:`6327`) Performance ~~~~~~~~~~~ diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 55bcbd76c2248..2ecdb22a5cc7b 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -23,7 +23,7 @@ from pandas.compat import long, lrange, lmap, lzip from pandas import isnull from pandas.io.common import get_filepath_or_buffer - +from pandas.tslib import NaT def read_stata(filepath_or_buffer, convert_dates=True, convert_categoricals=True, encoding=None, index=None): @@ -48,7 +48,7 @@ def read_stata(filepath_or_buffer, convert_dates=True, return reader.data(convert_dates, convert_categoricals, index) -_date_formats = ["%tc", "%tC", "%td", "%tw", "%tm", "%tq", "%th", "%ty"] +_date_formats = ["%tc", "%tC", "%td", "%d", "%tw", "%tm", "%tq", "%th", "%ty"] def _stata_elapsed_date_to_datetime(date, fmt): @@ -97,6 +97,7 @@ def _stata_elapsed_date_to_datetime(date, fmt): # numpy types and numpy datetime isn't mature enough / we can't rely on # pandas version > 0.7.1 #TODO: IIRC relative delta doesn't play well with np.datetime? + #TODO: When pandas supports more than datetime64[ns], this should be improved to use correct range, e.g. datetime[Y] for yearly if np.isnan(date): return np.datetime64('nat') @@ -109,7 +110,7 @@ def _stata_elapsed_date_to_datetime(date, fmt): from warnings import warn warn("Encountered %tC format. Leaving in Stata Internal Format.") return date - elif fmt in ["%td", "td"]: + elif fmt in ["%td", "td", "%d", "d"]: return stata_epoch + datetime.timedelta(int(date)) elif fmt in ["%tw", "tw"]: # does not count leap days - 7 days is a week year = datetime.datetime(stata_epoch.year + date // 52, 1, 1) @@ -150,6 +151,11 @@ def _datetime_to_stata_elapsed(date, fmt): if not isinstance(date, datetime.datetime): raise ValueError("date should be datetime.datetime format") stata_epoch = datetime.datetime(1960, 1, 1) + # Handle NaTs + if date is NaT: + # Missing value for dates ('.'), assumed always double + # TODO: Should be moved so a const somewhere, and consolidated + return struct.unpack(' 100 or data[col].min() < -127: + data[col] = data[col].astype(np.int16) + elif dtype == np.int16: + if data[col].max() > 32740 or data[col].min() < -32767: + data[col] = data[col].astype(np.int32) + elif dtype == np.int64: + if data[col].max() <= 2147483620 and data[col].min() >= -2147483647: + data[col] = data[col].astype(np.int32) + else: + data[col] = data[col].astype(np.float64) + if data[col].max() <= 2 * 53 or data[col].min() >= -2 ** 53: + ws = precision_loss_doc % ('int64', 'float64') + + if ws: + import warnings + + warnings.warn(ws, PossiblePrecisionLoss) + + return data + + class StataMissingValue(StringMixin): """ An observation's missing value. @@ -193,14 +255,23 @@ class StataMissingValue(StringMixin): ----- More information: """ - + # TODO: Needs test def __init__(self, offset, value): self._value = value - if type(value) is int or type(value) is long: - self._str = value - offset is 1 and \ - '.' or ('.' + chr(value - offset + 96)) + value_type = type(value) + if value_type in int: + loc = value - offset + elif value_type in (float, np.float32, np.float64): + if value <= np.finfo(np.float32).max: # float32 + conv_str, byte_loc, scale = ' nmax: if self._missing_values: return StataMissingValue(nmax, d) @@ -855,11 +942,12 @@ def _dtype_to_stata_type(dtype): See TYPE_MAP and comments for an explanation. This is also explained in the dta spec. 1 - 244 are strings of this length - 251 - chr(251) - for int8 and int16, byte - 252 - chr(252) - for int32, int - 253 - chr(253) - for int64, long - 254 - chr(254) - for float32, float - 255 - chr(255) - double, double + Pandas Stata + 251 - chr(251) - for int8 byte + 252 - chr(252) - for int16 int + 253 - chr(253) - for int32 long + 254 - chr(254) - for float32 float + 255 - chr(255) - for double double If there are dates to convert, then dtype will already have the correct type inserted. @@ -878,8 +966,10 @@ def _dtype_to_stata_type(dtype): elif dtype == np.int64: return chr(253) elif dtype == np.int32: + return chr(253) + elif dtype == np.int16: return chr(252) - elif dtype == np.int8 or dtype == np.int16: + elif dtype == np.int8: return chr(251) else: # pragma : no cover raise ValueError("Data type %s not currently understood. " @@ -970,7 +1060,7 @@ def __init__(self, fname, data, convert_dates=None, write_index=True, self._file = _open_file_binary_write( fname, self._encoding or self._default_encoding ) - self.type_converters = {253: np.long, 252: int} + self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8} def _write(self, to_write): """ @@ -990,11 +1080,14 @@ def __init__(self, data): self.data = data def __iter__(self): - for i, row in data.iterrows(): - yield row + for row in data.itertuples(): + # First element is index, so remove + yield row[1:] if self._write_index: data = data.reset_index() + # Check columns for compatbaility with stata + data = _cast_to_stata_types(data) self.datarows = DataFrameRowIter(data) self.nobs, self.nvar = data.shape self.data = data @@ -1181,7 +1274,7 @@ def _write_data_dates(self): self._write(var) else: if isnull(var): # this only matters for floats - var = MISSING_VALUES[typ] + var = MISSING_VALUES[TYPE_MAP[typ]] self._file.write(struct.pack(byteorder+TYPE_MAP[typ], var)) def _null_terminate(self, s, as_string=False): diff --git a/pandas/io/tests/data/stata1.dta b/pandas/io/tests/data/stata1_114.dta similarity index 100% rename from pandas/io/tests/data/stata1.dta rename to pandas/io/tests/data/stata1_114.dta diff --git a/pandas/io/tests/data/stata1_v13.dta b/pandas/io/tests/data/stata1_117.dta similarity index 100% rename from pandas/io/tests/data/stata1_v13.dta rename to pandas/io/tests/data/stata1_117.dta diff --git a/pandas/io/tests/data/stata2_113.dta b/pandas/io/tests/data/stata2_113.dta new file mode 100644 index 0000000000000000000000000000000000000000..09c90dca943d1cdf84bb15b884958ed941705b58 GIT binary patch literal 1490 zcmXS9Vr1Z8U}k`T316L{EFh&|sNkDeq+n!VX!4K|A}j=z-FrN=9K=j;Hgp0>9q{gU z0MPMPq+h|%*viO+;s5_XfByXb{U;@{B()?nH#I&PtQwPmD@e*r$EpaV z08<;1NO@{%c1~qHZgsi&c_kV6R1}sb7L}wH;Z~B7n3EP?nVN`G2|hU_ry?^|4Ndh7 z(jftg!cJj8h?kTzz}1xGBC!jR*cnLdN;n(Yt`W`dh~o$QzhU94H|?cPS?-!#5lmJo zx7b0VGeolwg6L(F%?^rvU;g*V|99V*KmtJUpdLm)tp~x>)TdyPQ3FTAkc?ylNlY3y M-vSdM162|u07r08-T(jq literal 0 HcmV?d00001 diff --git a/pandas/io/tests/data/stata2.dta b/pandas/io/tests/data/stata2_114.dta similarity index 100% rename from pandas/io/tests/data/stata2.dta rename to pandas/io/tests/data/stata2_114.dta diff --git a/pandas/io/tests/data/stata2_115.dta b/pandas/io/tests/data/stata2_115.dta new file mode 100644 index 0000000000000000000000000000000000000000..ad7dda3fdc4b38fe4a34a2615414ec13bae8389e GIT binary patch literal 1786 zcmXSBVr1Z8U}iuDMnDb|5QAt4a5jd}6N*2xfLI_|WB}5SP|d)g29h+gP;g64QZOjQuN@9CM%+UJ-LA3mu6 zq+-;+Tjd>^XfM?7`7eBD5AVVKIi}&=`~Uh2pFVkk_Tj(#PuSwg3l>fPhtDjSK6%cJ zX`z3IXBhPZQ|HfH__v=>KQMFNlD~al`-U0wmn{0*PpIE8Ep+DJzRyhkr#DsDT+hD; zYws+YGIP#?$&04{?Z+0+n?Cm+vcNE*^Z)T}hM74}n)A0`ufFkTqW(S&{|IK7?t}Vx z%=qgw|I2Id|N8g;2VVal!ua>?|GzTu=Co0DcupmGwjf?OZS_b{ZagA3Rrk$Yb`#l*fm3_Z0qTT>Dsk`9XIo9lb_g%>71Km-bYU>Yosfdk9EV=HD;;H;4byMr)RT zJLP{D2Q@AGSY5(&7pnYUlN=$pBjq`647fMl`VvA5rl0&@0s3Da0%+mcZ!My#7J89q0)eAviZ<4pU&#~zVoC^;Ll{%jlDv+<9&e%eL-qR)tcp` zCXmb!UXs%I3%dS-O;t<2c2uG%oz?sF@T5wITV!#|?E=Tu2;AwHM3QRF9A_(ZigMK; zB?tP$v1Yy;*A-R_6GJ_z?0u6k9@~U9ho0o)P>daD&t>9zRMsv({otL1Lx$zMW znFZ;rvD&3Cm7;Hw3%6Gb8wb`2T*XT=I=9X&J2piuEf9VwA3kh=;D1{-As*9yR_EQ{ zgGx(ok}57r2z>llVC-L$+sjpmo#2q!29WFgi$4ayN?AdspC`Yc)jO=>s9bu7eAOvl zXmELox84z#peJz5PP2lIm%o=aP^j;fZ*mFx7*-59(g4ccy-Vzzz7ZN8)(gte4;VO4 zV8tGFanP)Ej~;}t3NQu5p8~|_?=+Zl3m*`2;#(oh@=jnn>uK$%r^r}yL6UkPW9Rr6 zKHeHcl4?vDtgd>Y4^?V^BUxScpbH zpOCH*>B8>s8xdxc74@-sj!F0@L?ukt=pm>P3y!(MF{8fJOu4Sl@$I{X;I~#p-rtCJ zih6EV0vwR!ftr;uhM{nJxCxf{1FUY^C_<%kFUas42ZXHSdcvG%sZEt?rIjEjc_U+a ze8je)fU~CBQEdUy_bd^?u~#JQ^GxAkw2sKl8^g*=SZRY;%*fbLWXuN?_26$W-{yOO zH3UWtr*imPV%IcB=$U6tq@6aDVd}x;wnFWw@tR|6KY2Ylm8dXjXE4OZ?BZc%uZdyo0Wk)y z980+hD;n|Ql#rOl5J_x9!+hQ%mdnjNV0GP$ku#wm01r3Rr6*J9mL&A7I4={>P#&Yx#y4K_id@dR~ggx6j9Ox93-C6qGP9O%FakA! zfa=Hbhgn_KqeWC1)0*C6YJ@b`&P0mt4J$@i>4Q#Or&-DGFGBj-A;~qS!>s<^>qS(a z+?I~)cSDG(b|OlxE38;qWXFCWOqd1{kBWfLfWY?gJbSCm< zcZA=_A}4u)@EbMA&X4`TvFW^z%{W*wMqOG)x%Ta7@vQs8TRS**74t^2M#faROV*Nn z{h}YN?7*-X$EV%-4or%)l3mj`|VHgY4GEIZ{3?GJ6Y(~a0DF7C>Hir3EXW3h8}1VS)`n_;wa9 zTLi~WAeISy{HbA(j!lrh1E%j-ostqsrHkF^eOHEz9odmcM==Sw=79Kv@TWA$K$qdj zUL{1yq3}CapJ|Aq%8eehF5tc3^Fw2E^EAa1@nwF0{CJy&&hcAyTFf zGS&u!8HthfLW7LI{t-A<#%KENG9AGVV}z9^;MI~kp)%J2qT~y;YzN0iBIIj;MDyuV zfN=Wdd2PNNVfBF)Kq`9E-Cj?Gh4%J@d2NQ3_DFIdNK%PdlK8-;BVZ*GR-E}Gtf9q% zXey6#qt*e>g^f>|5-EWI;)Gbn!2dI_lExn|9RVHK!{>I4f@4NUE5vf$mA>Zg2`Rms z68U!lkWQ%nIDG6ISjpo9mX3rz?@~Edkj?6xv({rI^`%u2Iude-CCU{)(6Tce3xY(< z1jq9ERQr!X9tNzgFlDp)-AiJqRNz6|ue2hyc~*c72c!!e3qkma8f4)9k3pWDsEY+w z46*SWsMMn$P32iK-o=V=Cq9B>U6JIekcb3utcV{yJ`mv##p_$JVsv`80aMJ2hFKX% zy|oVH9|4YaLsv?u{!$H6{97Ozd9d1UrW{t+bKXYEtRG0f7-ma`$LWaF=VR2R2l{0; zXgO62zkJFlP5NxAx&orVbbb?+8xNtGHywz$6Z6G+6bRE3(l-yhT8bne$IPNH0_NP{PKA?42=3(~o%DCGb+wi@hZF#nTyIeZMr<7dw8 zxDi&2(s@{^r_^&(FCwF%+5dG@%Bq~27bX_ASlHU5IAUB^3E7T&T zMWZVlH1enX^eHSlnf-6SEj-E^TmllQoJXnN)q&_gsz)r7QT+gm)MGsqMvwWQ%`aX! z7IBTo`fw0djJZd4Q00n+CTwj**0n}mib7yzl!be~9<}%dj#cnu>&Bu(oy|TrenrPv zUC>Y4(f5RQ{ia^%JL07v{W1ww#(rLjo2>KYH7y zPIx%>xuA@h1;}`de0?K6b_B85ncKV>hwz_3`d$&n{ZbIiXj+-lmN=4E0(Tdy12YY!+=h>(BmC!lz4b(p|AKn3f)&G7`&}3oW9ayY zZHeucdVy;ahFE5xUv{ASyEH35r%u%3s_&Hyi1Gc9-LNu_hQDznoxEP5lncP*StzB9 zjBNqO-tZOfpFsx>!^-T!JXYt}F%^SvB0U)AOnRE%2+ZpRa4Zy7cB1#UB4bba_J==1 zzu1DwQAK&I-gfdnD$o3kHfr35)Q|r|Q1TZeV+&Er6b!pvh~+Nd*GHAX{g`Hp3-VY) z@T3eXul}6Aj~qzCl=p%%Cmb1Df+X*OW5H5 z_yOUU@<;wqAgHP4u5IPA?wli1lVE{3f6n~F?nMoaM z1Bh)u119oXSc$<7lMRm9qpkA!u%9O($wo7j-*kf2B{n-sIm2u$@Bt)dvVm|PB4K4C z>XM5VYpp??Lqjl<%;^2zMJI3;8*-E~w`NhFSpg(okNILB2`gK`v3v|Z2OQ&$@(y;B zQKL@*Sp$e6bHgzz^_fe*n(t4pUo;ZwM{FBXf<=-FK*gS#6+2}zWY8C}WEGxZjrC1( zapIdxhvyJte;$xKQLvH>9V^%7=V) z-c+=k6%?kbAfGinjm)9Us#$bh$8IERiw%)aM#1WSXmT00qwWa5o-dv?4gJzt)o0&) z)|gp+j4~;6sl&zoWVCe?qO4jENG3|Dz_B@qrIx?>WE!eJSk*E9Bw{&q0#j@rtvD-^ zjrW@p#RGc)cLWMkiGG=+L0o4{NBE7A@yv_d2_i z;!`b&axfN-6r+%6$SMdeNI?j%z~{vJn|OV`2CQsJIT0j^=xp zzd%_#KnHT*m{GqBjukASQ777yw2JnGJG~LHoCcHYK>qjWeOL4R(KF%LsKVM&w&p@s z=Tel1lSMeS@pC3Ok2w+f_SfkBvw+lVS;7Wxep)gUj?LoLjl7W6zopsmg;S zJ7DLpjR&LzwEP21ZUT}xn-fRQfUa z`F$JS7(FO+tvFqK=j28 zWt5u~O)I?Skt1F`2=`8gmCF{{n@2HB zzqH2%-mg&pTl|+ej|)9)&q#-n+nISBmB-J>QT7{v2PoS zSmKH}uCMtNVi|XX%28XW*NQnLq;d#hI;6p|Y74iuCm_X|W9ez3py&{k(jQg~^3q?Z z+-f^@ahOj0f430nZ5piHu*iAtfb7?-bQi_aTSppgd)7bgX-=a#(EZgANGC=eWZg;74Y6q^>=8(-N1;Y5JftGhcOK;TWdu_Z{C=1Xp%g`_P z05L4>b`N(-Ni@nZmn?oRK>7D0{2Gg5>jTHmY2(#>!9vW+Xei8kK#cD852=hTHh0BJ za`$JEDCq|Pd7wdRN9Dr*e7@_ug(&6c*!f%VXK+R)zfo>#G7WbOBTJW@a?hant)PeOSMQn-9XINw1 z;om8@CxsSNMG%)W$k?c{St$9bRK_})m6NBPNl)SC|9_K z#1xwdHyeYHtH(lO!Bw;k6ef+|n!5z#FX3^01Bl-F{7cF`-$fkqDsmhnl7y*vtCeKcjO?Jw-|f`8GDUV?&o_pU#1=3 zuv-FR+)7_ld2TA*S@I<@*2BtRY#Yo6Y~!d=*$dR=AfJ=I41~FYeQdSq9IK1{`7M=> z@1+rBB}sVXN0da!dJTi83d4xHimFz|jtoa#Vi8Lae~#6c-grkPF^$@cTS@Nq@+aIS zY#Xcef)qU*vD9mj3(m`-W8Z4zKZj=))$b_tAdSu%6HY2a;LKBX+prS2=K&-r{x_}q ziFOeP-xf!n) z`qJTGavn-)EifVDNT%O+h~+x(wc;xbi;p!Xm#~JBZivNtAFU3bMqF1S7Te>1G{&Li zGxF!n)eSyq{t8Vl)H~P`oRNDqV6>*uM&5IYXQG900v4128;sVe;8-LayUUjjSP2~q z!0Uyk^Q^9}_B~~;?5EG3%^(v~0clf!j5R^VW~2JcP|8|7vsj6pc{mn$FF^F}<%}p- zX3*flOUc<#3t{34Q5Rc;9|{={!8bqW=Py`=-uDN`a(&OUh6UaAqLO=%rk)Qc_tFVb zB8!mZrm(Ucz3;0*yy{lL(Fq{T?Sk{H(LT~3DpxY8<=QIZc!oeOPJ@yM6vGAa&{9_Sd$B>3V-C|XH`kJkJapx>B0!qK${Nhd!J5Fn&sh!W>jzP?@-1cc zCzFh#GUz)x>p>)OvKdOGDW?&Oy})c*hq{=xjHPy7gHpcMp7NBkhWPI*BlR3Vi7*CmGb9pYtYDM48E=>OIc&fW~?Zm%%SN&#*%f7h7cv|9O}{n z=a`Ln@Grnh1;6{{8pQI_gw8bm!0HZ7vKE!1qtvE)GnqfSKT&=?i|V(;g~ZqB{a_7p zHgGM{(E)^61BiZrkB!K5$fZG_$s{>|!*W%MQaT9C<#_Z8}uD7LYntSH;0zoyR^DzU zDo@W*ckw&2^ROF{T3<&gI|+=(I`qqbGzgv{VgENnU>i&d`o+49DF1$*9-sL=i3sXV zI2JoiO(#4ZjRYxlidk(@CK}O79=qW za*+aOQuzEue?rs^R$&cU{o0F+heCJHJwtLXyWyK}qAt#GEDEJmA3!AYc8}G|qCT4R zDLB7O?I0@aFVXx>CFK5AEQ$elkg+a;5)h4rM0G5IcMgmJWG5cjE#oh;y6m5wL^-LP z&TUgpoci@7a@c)jtc$>{jn=HF_ZJV-VxVJheNVsK1c<)FuuiCcC4HE1k(6A(23v&H zf$IuQj>g>o4;+i*bL}=@yKsUiIRj$oG1XaAtglgV%@yJm+?z0iusYNLQlut19^r4| zr>xolD?gdkN1hj1Uo* z{3TX*EVa8Rt@)XjnC_6JHK@zhr?Ao;mn$)l@i>$+hCl8d3y3#1r2^9>R^RVw50U$D zi*8`A-$Q4$YtN9!5#0D`w@Kzcla5Y z+)I#4W1wS+NOCma;1~xckHy-P11mc=Ad9As=2pHLBpi5!ZDDNjt7 z@$;yMeMM1yGNK3xPsxU>XzWZ+VLed_?Vh^ksMtFFN@*mG> z@a}tL<9`MaW{VMy;Rj?zn=vcaM$sOHjhs* zY{HFV34W{4%=bsukn8CsGAZxq`^$GoivyVZR~m^N=Y<_60kK@vtawk`gp+u0m2a?O z3>e}iDv=FTzwZvY`-dNqdSG@Y4G`oBNwAWnS-JcDCT!zWeK1s5!Rq{8d5O}Gf6%q5 zcSx))7R9|RtPF&}CWA1Wv4iFF^YxqI*gg|7SWv<0D`pK8xi*YAw*D?wxKV^T(ij;7 zq#*@^Z!sJ@$2(2jjEu#qzajv{aP6zXqI63qT87pVv1SyJzik4J4Z?ic4TT9csf%KG z-e%OL2gYk995Yhap(2y07wz7>AYSu6A@WRHBzdsF346dXNsA?b-J)qSe&SGch1IQC z;Uh{{4PpdePa?XFB{GZIDGh;+rD;~wy7-UU0xPXy<@>@btX{106_q%n=pW8dm!NTk znPVr)j`(fDrhS+%G91h1Gxu%56q|wDo_4Te=;F(Z++i#6NLM{gZTlIKCt(nl;qhkY z3|P60j2+|`TB|qM)4{hluwrz6XvU_*it~Ro((clDB7fc-5EFhElYyPz7EC_KUk~02 z!dR&*oKGdITXanjnW5HVX(~(YMohrdH`R(6j|lgp_ucT#`}w*1wql2g#1KDTSjpn4g)w2j#Ja3eY} zDHsb$b5!3Ca&Z9CHyOR3!pBe929C8b+f-#2RkFsietsfz$3|?kwh2wUJCP`ZTOi4P z0=M-b$iE7)B=ecaw!uo7FY3f!#oWI%3_4~j#>6(IIh`jH?v(=^^GA{oV(@JOdlMk4 z+qLTB0fgx)tG^yI96L-?@x>=i=@G9iCy zi;9h%=svLtb!H|J=51?mY#7#_14weR2DyJ>J7W2vP(7MnWsS#L1c=g-X5!*WEUiw0 zmFI26XInlJxaKQD3STLQ)^V^BGT}B~8eS~?DJIOn+D3)Ql8dr3U)h(%pmDVjp_da@D z-p$4@RUE}3A3lX6hcJ;#z~p4Uv}Om6qWwY3U4_?Jz5TbNuqU?=w+8-!(&5bF<|Mi# zPedbUX=#R{*pdSk*c zk=fQvJkshYeyO&SFiV_7`($O7+dB%^XGpy|N8fH|Mk2EuXA28TJxX&{r7*p&8)v^VWGZmGO7Q}YSv%<{J-D# zT@8F!&#ogUFX%eJ!*gI)5AUG^y_mOe-u*BCMNXf*;NO7%_;1AG$qN=u|C`S&m_B*V zjA`Nj5v=~v{DG18z zkJVjV>Q3cXKas}NO@jZf=Yo>(h$t1m8JSf!5Vl1Sw*9N%051|!GW2u?AsDx~bdKseUYm*cv_if&@KCzZW#5XNJhux{)_L4Ng|FifLSspAJJiKqwbC1pE0-$9txLd*kC9%nxOc-&C% z&B9ksapALAU1^Y!Dus_o_oz%^_ji_r*MWmh9lwq2{@&%Qqd8$2ve=(`x?|Fo2?#XwbPn?M7icVnsld7;No>e-qnUS z^?3UjTKKN$|O z8vW@{s8V4?6SrIu&M@z=2X#R8-#{+9p^>9_r6QGI{hblLzsF!E%pq1AQXC4ou%^Qj zuL!P7-w8}+M_RMzEy&*;+O-(5B=MU^n$SDl%yGerE|E;7OpXn$9`cJ2AA*e4ccC2j z2dwn^mz4y5*n1PKOhvz#U`6j;@)=dq?Pw>%4WWp&BC>NgIQAY^T;P8^>av!98z-pb z@xOK4kj-k&ESZP8w5G$Ho(Ru+HYZX>cbeZtV^S(y@y%Pou}Hq?y@2X}1A+a%D4W&3 zHO{9@pElGx@1amsV@tT^J&+DS0{fy9x53InJ|vN#E|z%RozG@p721p zn`Tdx&b_E4=}a8w3c1^-Lf#FefT!}Oo@e@Iv-;8J7C@BR(v`~_g!8xB5>D3#gwdI# zI#;ka4N0EE&nl!)n9itv0)Lp*)I3;3l`-w;U8YXRaP3N@*uJo$hn0Tl#C58b!hs^B zuOpINXE@Ai@4j3_<;flBr~%i7m|91oG`PZwg-Lc80K!D55b>x8_zVbaKi|WwZt%k; zRCaNo-qE)OQ&?9bZ+1ucmL@sH3xwaOLUw-W2aZkWeXPg9iazGzGRk%8NK0qk6JFcG zu`8H2k`*#$mb+9n$=5FU!O9K{iy^)_tj6o{7Z6xS>aeIm@VwfMaCa~XQkucaP!MLX z8ot5HU!66HC#wxPsEgBTstoK($LN{_|2SvDTpb8Va}%@57az+3lf(GJJb$n{pLYpK zEXrYZ-L-2dGo(8$w`>$bdUqwtKre8t1#YlRP|FMz;uA0eso0E+^@0`s_$`3E>`seT zJ{B_jIbnhgLin~OE>{G{P9T;EeB#Lwkd94|z5|ADS&fnwO{EJx={;A5j2+d5NJlXV zxK@DpgYYL+$Y7TMWUm^cWMBL(tIgKMQ000r+8Fpw@cE$=k-i)PNE<*#fQlDYNYJ4G zWb81wwgy&ombEdIJJ6e2x7U(k_W&6+9FVpkOaMZ@3I8QNXygY-_(3TDB|!A;hsID@ z<3daOHVJY;dm?4~AY<)8m{Ax>&s50xYaf7P6@0eeF2fP*FnU;N4qh#36sq&>Axgeb z%Z_kt6hgiVNGzWz1q!F0pHt_{5mp;y0;H-h-R<>ASZHTQn3qOa>4YQ)fh5(4C4~=a zF%njyVa1t0!s=Qth^2C{8?_32Dr|hzf=EdO5J$u^2L7Lhl??uP`AF!%9zMTI3>?!t zSRj^buJk2$S4ivIg2=xMfH2E=3WP|IS3pT@_`{s5?)V$qdvRr06(^f4?t8vSp*E=zBuDf7K-PANgF6ewhR-V?fId_~slybjHw$;}Cup8hNts zF;*M*J_(DWAH8MWDAbRADkx)S0W#hsU)zX}9YHL%#`dqqA^b;>zGub9SY6tN6e=YJ z(BlpoqMZ$nxy(Sne1htK2^~9!>RTDbr17XrmAdmEgN`M9MY$7!v}cB%*tC8vFso)F zmPu%_O<->(s;@VW?l1w3e2GVA7U#2?QEk70CFFAcGbrqyX3hy!^caCfjeFw;=VZTMIw!hgy)Sxp4_&zlcc zu%g>)w+q8!3>{zJf!J(m61e6Oh-C))We2LiOSSTI`b0IZraq~F=-&<74J+elA?hN(%bk-U|ueOW8tu} z6TQC`8GFokI{XRx#Rg1{DJfvJHk0>JdFCh7vROaUH2x1kDO`+(kH02+DW3dFJ;kbRK8vnt0NmwyV1-Zm4a_&BTac>qYCDb#e+i+J|= zL*R6)kg=7Z<$lP;4+y`UKk|ne@dCNg&i)G}%{!e0v=I|Nax zQPuL)_E0$4#po3CLCJAeCr!ykBhR3XA^xP$MoX0SYY=`cS}aE$d}d7!qm$rdM`O(% z1`vH&>38Utnbe^nkk|~+VIr@El{oA$x!{-`+NzL`_<0hNtT#gW4JTMla?7KX)6K>L zA4uXR>j?Kg8df%nuh8RHR~Av zJz_a@0#j@rtvVx;jrUp*#RGc)cLWMkjeeP=LR@D|NBGT<eH+59HA4y^NhGLS?{;-3r6=1E5kwHxSE*uwr9$+B*ZWIHO;B8H-qLvQ<9iqQY@W z@LH>8>eOKfB z(KF##aB)Mhjj@>3xRey&WD!ZN{hY~-V~#|=^(A`$3?NNvmau^vAD7L9W3zbkMqbQn zpEZ1slf?>pvBra>+GFQ$NCcz|wEP21ZVr+-8j_mx7`9rIT~4m`98H)SC}MG zMj3?nJ*-p_I)LixwHH=Pxdui@LM|d$xQr%owNj0DN0MVAtx{Tvn0$qD`Hi>igB=X)}uu_AJwIS)@ch%kxTx#Akd#j}^ zWp(`vYp`7;(lDQyB&N+^K+<648kE01Y3lPUW@R1U*m*uO=7W=X46NwKPr63AvP4?; z$pVt48BDkjF*_@2(J!5Ff%hwv|0X|X&-{PYh2c-K8rvncRC>LM23(IIjMX3_`KH6M zYY4w9iFr_|LhL$(BbI~`j_YqciCD&6r*g~|>a}7H39BAPn9dn+tk%SB?F~q&>R4t* zI4C*{rSyjtoxJoHD!1KEUF@e5|KCkSdYu6)*G+PPJ0Sa2D?P;sbZB=F#sd)jjSV*_ z_hvioG=DmI@|%G$_WKaaFKFaJ1Xr8L*fl=yTm%N+2DDg$;S_fMb2q6{vYlo{%pn(B z8VT1a6IO1SBLvTL)FXX$wS%^}8ik-hTe;Q|G@*Cx*rqW1< z2(omEktmlAA(r1vxI@9caz2uLly8Vwgo(BmgsC)~#t!565I4PF)B2_*B!7j0NDB_b z%0pCNz+LP`IF`eQSuDnU*@)dz4=cLNRgF|Ry_4E*Tt>26`Vyuh2T6Wp;;IDn%RLqH z*|&?;bD8-ldYaWY9{!zjd(vo8O%!oCjf_n@0>_@1lsN<(GanQ7^W(jjpkLy!j?UxH zu$ro8PpLF?7v+lAkhoF<;bvnHa!puBOt^}+hQegQ}FCdSkRUsS4Eydv5gi?M6h_2^}7gR~xLo=n7WY#l^>yA9c@*0DW zAY(64%Kd!rR?F1m8+J=T^jqmmD$h-)JIg*N`X*Qzifw~=k8K<^s(6OF9OUydmw_;s zv5&1boMkofKfk8Z@x3&PtRzVf{D_haS+8U8RAB@$ucBrvV@E}zF7b#Zgg?t_%dfwo zl9)lQ$E_rH`}h;?BDRfHT0x2pKrBrvl~`+I3bi!gkA1d&YnRCZti84WAP)qY7${~VBHgZUz_ z3_E|9*HoUKL2Vy~lKwOROfEnvtpp})9Le_k7O`C8y;gjIVez5LEw4 zDmWGm$L{dugH}Sv0`YpG;T)@JYMR;MbSXI#ZX!%VG3sK2@WUbFVff~!{QL#0(EI-2SibK$R=1$1 zR#fs2()4qYb>tgOMT9I6WJ zyS&wqz5x&=3*T~9`+bUDREB&@XWfq`j@HA8H02axu@jh0>rfY?nz8h*Yf#FU>QkO_ zRyRM;LX>;v(uJ3z$vWGiL{?5g`dZ9!Qj5 z&!GBka3S#}dOuW!oC#WsbaVz`)&Qa%*L&9l(!eqrfoY&(~u#AEB*nHws@qL2##%6HF;gHXb{E?uNnRa zoc9mfh|;VR)IMV;X>{WV6Rsd*?F6ME5zqR}hxc55cTBWOOY>un^Q@+oMGH~deS+RH zrIDX64?JtnfOG$sSV*!|Na%4h zq#Xp-7Z82N)h$J3UQLMEX1_o}gsX%h>9^}}l&U%_fhKerRP z7p2tU*EG^}$sN_NLhpA5a}$X9yl);B{pZHQiiAft`wSJV*6VF+QR#D+c5d!1Z@)xd$1`;M1(vgD`1$Vzq=9F@D zM4s$Un1k10rK=!`F_4QiIFrT~F8UK<-e470mouP~$apAp_uSJY?~)t7`3CCZ498+n zO7jDVRNnTX`Ld{=Dt!vhFH<{<%KD46a8nt%w-t+G;B92Ao1g^7Vj(d*mc%;;#R0Mt z5A2rl7g$a1&yJ#;Qc35wuOyBGdJ{R~9x~QV;MT^fR?PPo^%-%{vDdz*o^Jv~+j)c& zs$Wg(lP-|5^VncZusU$vp~|lPx?WSAMaAkW71vxQUZH&n zGX$$c9UvvBL`THOJC>iqe{&X_?_RSz3p>Y<&zXJ#o1b2N_R5DdYI#-tmBV zV^bG&T-|*{niN?TuFz#y-0@tmraZx`dfXRIXxjYU!mW(9F@^2jyz~r%5d-7mKuXk`0<@!3>bMzw+23`3@ z6Lic4kDwEvFd68PwS42}X2>AG+wqrJjeCL{8tF>70~qRceNp{*grA3u zt>Tla5-^d&@i66);SzoxRo`Efb~ez)ptr=*WdPv}?+}Zdz}$?((p!L7B6)w$jW{F6 z0&)=$-LjS*BKN3~CWpQxjd_SAk`bjkcQyP7pOdPU(a9S@eor|05ggOIck~n)o8PH* z)Jx)3=T4+7OtqB$f;1=&u^d&ca8Ea?8?1fyJwqj{IUMaFO4=vXaqI(fKE;zTyEURD zc?e2KyjmAkWW_H5xvxG*tz@;DL$K2Dgeu#9BR6Jv5k)jZJOydVmsl01sMUAhZN8AS z!?4%`E4q;$o}&E6QyRMaF4_3aAi`|X!!i7TtYkB0Wj{EU!s{&)k>onxY`<1~C97Y1 z305w>pf=J~GV$^d!dU81T2K5IZ5xKgFL*=?QTj9ynmikjQp1m|CUP7nN?9-I(ySXK zai|Zjp)5rukV7fAq53z`???H!`x8N!>9`(zQ2Zly{tpI6@B0^FHwnpOSSuMlRJO-5vdnuXUZT!o{$17DXNt_-)+J+PR$R7 zimOGaTDzKn9DdSZ&p;!6Mh5 z5yv*&!3q~lm?O=QF+kp?Vel=6V`q8CiJOtJc=N9a0MT9jVyGzH)QG0x4MeO9Ci2(K z!LcEjFT0^I;Rf@f7+J6xb?JriS`Eka)OEPXBx^<6SI>yoypM=H(*{W%DsaLca7% zYkWl|K`;78GSnqx9AW0zin0TKo3Lpg=8Fu+a{26iTQJ3D;I^kDtmwM=@*;QGLOjx4 zOVc}iLgYypgcW$a*)I8|>-e+bdYnJJ%brDY4@GAN91SG@i(xwgSX} z-^FBM=eGfq5AxSSw}LPh<`vGTn$;}2Du~Q*E3rJCrM4p{;OU#$iV=?p_oMgS@Xh=A zx%;+ahl$1zKVDqTYI|f8QI2mWelggBR<4;qm~9xhxhB<0%^n`jy9W>J&5#DP*fv8o ztNYqb6s1^evHxL9Iyfa13rQPT2;IwKiJUC_`H#$$kR2^&rT<3bCZ}*~hlQN`)`##9zVOzc>OqW+TSMx1f1lCll_4 zJsk5#k`H3=Z326fAgbHd>f-@~;R>t078igWriJ+IqZagt*CfKxc7Oy3Qm+G8NMsda zGi5t;Yz0y=0T5l*jsQ`ywiVqcHmAmaB0k zS6R)HT3Bh{8Gh19QOWWIC2ORxeQEB<;&}K;3zr}wA@vEmDSpP9gID> zwYW9t50nmP7B@%HC3PYiIY(7XGn1=)k^!+YX8T=(6`eF7NK`J^iSAck5^`rL;R+nY zq&wp=U-Hd+G6c4SPu!i1x&)M%FIKMNK6%_okvr5%-S<^|3cN>Erg`EN9 zxsUlVNA0U*^HH=kB+=G*jn$+-A0aBgwiGX3JV`=ZM-thglUR0Oj37IdnVF0qO=*mJ zF~vM94LFMxUt_iUq!A*st)+OR?NR(vZ6#rrIEr@5#-jQc5Ze&^{;iF%F*OC!Hx6|P xEV;(&K79pRrrC*w6$PYn%^F$O7+dB%^XGpy|N8fH|Mk2EuXA28TJxX&{r7*p&8)v^VWGZmGO7Q}YSv%<{J-D# zT@8F!&#ogUFX%eJ!*gI)5AUG^y_mOe-u*BCMNXf*;NO7%_;1AG$qN=u|C`S&m_B*V zjA`Nj5v=~v{DG18z zkJVjV>Q3cXKas}NO@jZf=Yo>(h$t1m8JSf!5Vl1Sw*9N%051|!GW2u?AsDx~bdKseUYm*cv_if&@KCzZW#5XNJhux{)_L4Ng|FifLSspAJJiKqwbC1pE0-$9txLd*kC9%nxOc-&C% z&B9ksapALAU1^Y!Dus_o_oz%^_ji_r*MWmh9lwq2{@&%Qqd8$2ve=(`x?|Fo2?#XwbPn?M7icVnsld7;No>e-qnUS z^?3UjTKKN$|O z8vW@{s8V4?6SrIu&M@z=2X#R8-#{+9p^>9_r6QGI{hblLzsF!E%pq1AQXC4ou%^Qj zuL!P7-w8}+M_RMzEy&*;+O-(5B=MU^n$SDl%yGerE|E;7OpXn$9`cJ2AA*e4ccC2j z2dwn^mz4y5*n1PKOhvz#U`6j;@)=dq?Pw>%4WWp&BC>NgIQAY^T;P8^>av!98z-pb z@xOK4kj-k&ESZP8w5G$Ho(Ru+HYZX>cbeZtV^S(y@y%Pou}Hq?y@2X}1A+a%D4W&3 zHO{9@pElGx@1amsV@tT^J&+DS0{fy9x53InJ|vN#E|z%RozG@p721p zn`Tdx&b_E4=}a8w3c1^-Lf#FefT!}Oo@e@Iv-;8J7C@BR(v`~_g!8xB5>D3#gwdI# zI#;ka4N0EE&nl!)n9itv0)Lp*)I3;3l`-w;U8YXRaP3N@*uJo$hn0Tl#C58b!hs^B zuOpINXE@Ai@4j3_<;flBr~%i7m|91oG`PZwg-Lc80K!D55b>x8_zVbaKi|WwZt%k; zRCaNo-qE)OQ&?9bZ+1ucmL@sH3xwaOLUw-W2aZkWeXPg9iazGzGRk%8NK0qk6JFcG zu`8H2k`*#$mb+9n$=5FU!O9K{iy^)_tj6o{7Z6xS>aeIm@VwfMaCa~XQkucaP!MLX z8ot5HU!66HC#wxPsEgBTstoK($LN{_|2SvDTpb8Va}%@57az+3lf(GJJb$n{pLYpK zEXrYZ-L-2dGo(8$w`>$bdUqwtKre8t1#YlRP|FMz;uA0eso0E+^@0`s_$`3E>`seT zJ{B_jIbnhgLin~OE>{G{P9T;EeB#Lwkd94|z5|ADS&fnwO{EJx={;A5j2+d5NJlXV zxK@DpgYYL+$Y7TMWUm^cWMBL(tIgKMQ000r+8Fpw@cE$=k-i)PNE<*#fQlDYNYJ4G zWb81wwgy&ombEdIJJ6e2x7U(k_W&6+9FVpkOaMZ@3I8QNXygY-_(3TDB|!A;hsID@ z<3daOHVJY;dm?4~AY<)8m{Ax>&s50xYaf7P6@0eeF2fP*FnU;N4qh#36sq&>Axgeb z%Z_kt6hgiVNGzWz1q!F0pHt_{5mp;y0;H-h-R<>ASZHTQn3qOa>4YQ)fh5(4C4~=a zF%njyVa1t0!s=Qth^2C{8?_32Dr|hzf=EdO5J$u^2L7Lhl??uP`AF!%9zMTI3>?!t zSRj^buJk2$S4ivIg2=xMfH2E=3WP|IS3pT@_`{s5?)V$qdvRr06(^f4?t8vSp*E=zBuDf7K-PANgF6ewhR-V?fId_~slybjHw$;}Cup8hNts zF;*M*J_(DWAH8MWDAbRADkx)S0W#hsU)zX}9YHL%#`dqqA^b;>zGub9SY6tN6e=YJ z(BlpoqMZ$nxy(Sne1htK2^~9!>RTDbr17XrmAdmEgN`M9MY$7!v}cB%*tC8vFso)F zmPu%_O<->(s;@VW?l1w3e2GVA7U#2?QEk70CFFAcGbrqyX3hy!^caCfjeFw;=VZTMIw!hgy)Sxp4_&zlcc zu%g>)w+q8!3>{zJf!J(m61e6Oh-C))We2LiOSSTI`b0IZraq~F=-&<74J+elA?hN(%bk-U|ueOW8tu} z6TQC`8GFokI{XRx#Rg1{DJfvJHk0>JdFCh7vROaUH2x1kDO`+(kH02+DW3dFJ;kbRK8vnt0NmwyV1-Zm4a_&BTac>qYCDb#e+i+J|= zL*R6)kg=7Z<$lP;4+y`UKk|ne@dCNg&i)G}%{!e0v=I|Nax zQPuL)_E0$4#po3CLCJAeCr!ykBhR3XA^xP$MoX0SYY=`cS}aE$d}d7!qm$rdM`O(% z1`vH&>38Utnbe^nkk|~+VIr@El{oA$x!{-`+NzL`_<0hNtT#gW4JTMla?7KX)6K>L zA4uXR>j?Kg8df%nuh8RHR~Av zJz_a@0#j@rtvVx;jrUp*#RGc)cLWMkjeeP=LR@D|NBGT<eH+59HA4y^NhGLS?{;-3r6=1E5kwHxSE*uwr9$+B*ZWIHO;B8H-qLvQ<9iqQY@W z@LH>8>eOKfB z(KF##aB)Mhjj@>3xRey&WD!ZN{hY~-V~#|=^(A`$3?NNvmau^vAD7L9W3zbkMqbQn zpEZ1slf?>pvBra>+GFQ$NCcz|wEP21ZVr+-8j_mx7`9rIT~4m`98H)SC}MG zMj3?nJ*-p_I)LixwHH=Pxdui@LM|d$xQr%owNj0DN0MVAtx{Tvn0$qD`Hi>igB=X)}uu_AJwIS)@ch%kxTx#Akd#j}^ zWp(`vYp`7;(lDQyB&N+^K+<648kE01Y3lPUW@R1U*m*uO=7W=X46NwKPr63AvP4?; z$pVt48BDkjF*_@2(J!5Ff%hwv|0X|X&-{PYh2c-K8rvncRC>LM23(IIjMX3_`KH6M zYY4w9iFr_|LhL$(BbI~`j_YqciCD&6r*g~|>a}7H39BAPn9dn+tk%SB?F~q&>R4t* zI4C*{rSyjtoxJoHD!1KEUF@e5|KCkSdYu6)*G+PPJ0Sa2D?P;sbZB=F#sd)jjSV*_ z_hvioG=DmI@|%G$_WKaaFKFaJ1Xr8L*fl=yTm%N+2DDg$;S_fMb2q6{vYlo{%pn(B z8VT1a6IO1SBLvTL)FXX$wS%^}8ik-hTe;Q|G@*Cx*rqW1< z2(omEktmlAA(r1vxI@9caz2uLly8Vwgo(BmgsC)~#t!565I4PF)B2_*B!7j0NDB_b z%0pCNz+LP`IF`eQSuDnU*@)dz4=cLNRgF|Ry_4E*Tt>26`Vyuh2T6Wp;;IDn%RLqH z*|&?;bD8-ldYaWY9{!zjd(vo8O%!oCjf_n@0>_@1lsN<(GanQ7^W(jjpkLy!j?UxH zu$ro8PpLF?7v+lAkhoF<;bvnHa!puBOt^}+hQegQ}FCdSkRUsS4Eydv5gi?M6h_2^}7gR~xLo=n7WY#l^>yA9c@*0DW zAY(64%Kd!rR?F1m8+J=T^jqmmD$h-)JIg*N`X*Qzifw~=k8K<^s(6OF9OUydmw_;s zv5&1boMkofKfk8Z@x3&PtRzVf{D_haS+8U8RAB@$ucBrvV@E}zF7b#Zgg?t_%dfwo zl9)lQ$E_rH`}h;?BDRfHT0x2pKrBrvl~`+I3bi!gkA1d&YnRCZti84WAP)qY7${~VBHgZUz_ z3_E|9*HoUKL2Vy~lKwOROfEnvtpp})9Le_k7O`C8y;gjIVez5LEw4 zDmWGm$L{dugH}Sv0`YpG;T)@JYMR;MbSXI#ZX!%VG3sK2@WUbFVff~!{QL#0(EI-2SibK$R=1$1 zR#fs2()4qYb>tgOMT9I6WJ zyS&wqz5x&=3*T~9`+bUDREB&@XWfq`j@HA8H02axu@jh0>rfY?nz8h*Yf#FU>QkO_ zRyRM;LX>;v(uJ3z$vWGiL{?5g`dZ9!Qj5 z&!GBka3S#}dOuW!oC#WsbaVz`)&Qa%*L&9l(!eqrfoY&(~u#AEB*nHws@qL2##%6HF;gHXb{E?uNnRa zoc9mfh|;VR)IMV;X>{WV6Rsd*?F6ME5zqR}hxc55cTBWOOY>un^Q@+oMGH~deS+RH zrIDX64?JtnfOG$sSV*!|Na%4h zq#Xp-7Z82N)h$J3UQLMEX1_o}gsX%h>9^}}l&U%_fhKerRP z7p2tU*EG^}$sN_NLhpA5a}$X9yl);B{pZHQiiAft`wSJV*6VF+QR#D+c5d!1Z@)xd$1`;M1(vgD`1$Vzq=9F@D zM4s$Un1k10rK=!`F_4QiIFrT~F8UK<-e470mouP~$apAp_uSJY?~)t7`3CCZ498+n zO7jDVRNnTX`Ld{=Dt!vhFH<{<%KD46a8nt%w-t+G;B92Ao1g^7Vj(d*mc%;;#R0Mt z5A2rl7g$a1&yJ#;Qc35wuOyBGdJ{R~9x~QV;MT^fR?PPo^%-%{vDdz*o^Jv~+j)c& zs$Wg(lP-|5^VncZusU$vp~|lPx?WSAMaAkW71vxQUZH&n zGX$$c9UvvBL`THOJC>iqe{&X_?_RSz3p>Y<&zXJ#o1b2N_R5DdYI#-tmBV zV^bG&T-|*{niN?TuFz#y-0@tmraZx`dfXRIXxjYU!mW(9F@^2jyz~r%5d-7mKuXk`0<@!3>bMzw+23`3@ z6Lic4kDwEvFd68PwS42}X2>AG+wqrJjeCL{8tF>70~qRceNp{*grA3u zt>Tla5-^d&@i66);SzoxRo`Efb~ez)ptr=*WdPv}?+}Zdz}$?((p!L7B6)w$jW{F6 z0&)=$-LjS*BKN3~CWpQxjd_SAk`bjkcQyP7pOdPU(a9S@eor|05ggOIck~n)o8PH* z)Jx)3=T4+7OtqB$f;1=&u^d&ca8Ea?8?1fyJwqj{IUMaFO4=vXaqI(fKE;zTyEURD zc?e2KyjmAkWW_H5xvxG*tz@;DL$K2Dgeu#9BR6Jv5k)jZJOydVmsl01sMUAhZN8AS z!?4%`E4q;$o}&E6QyRMaF4_3aAi`|X!!i7TtYkB0Wj{EU!s{&)k>onxY`<1~C97Y1 z305w>pf=J~GV$^d!dU81T2K5IZ5xKgFL*=?QTj9ynmikjQp1m|CUP7nN?9-I(ySXK zai|Zjp)5rukV7fAq53z`???H!`x8N!>9`(zQ2Zly{tpI6@B0^FHwnpOSSuMlRJO-5vdnuXUZT!o{$17DXNt_-)+J+PR$R7 zimOGaTDzKn9DdSZ&p;!6Mh5 z5yv*&!3q~lm?O=QF+kp?Vel=6V`q8CiJOtJc=N9a0MT9jVyGzH)QG0x4MeO9Ci2(K z!LcEjFT0^I;Rf@f7+J6xb?JriS`Eka)OEPXBx^<6SI>yoypM=H(*{W%DsaLca7% zYkWl|K`;78GSnqx9AW0zin0TKo3Lpg=8Fu+a{26iTQJ3D;I^kDtmwM=@*;QGLOjx4 zOVc}iLgYypgcW$a*)I8|>-e+bdYnJJ%brDY4@GAN91SG@i(xwgSX} z-^FBM=eGfq5AxSSw}LPh<`vGTn$;}2Du~Q*E3rJCrM4p{;OU#$iV=?p_oMgS@Xh=A zx%;+ahl$1zKVDqTYI|f8QI2mWelggBR<4;qm~9xhxhB<0%^n`jy9W>J&5#DP*fv8o ztNYqb6s1^evHxL9Iyfa13rQPT2;IwKiJUC_`H#$$kR2^&rT<3bCZ}*~hlQN`)`##9zVOzc>OqW+TSMx1f1lCll_4 zJsk5#k`H3=Z326fAgbHd>f-@~;R>t078igWriJ+IqZagt*CfKxc7Oy3Qm+G8NMsda zGi5t;Yz0y=0T5l*jsQ`ywiVqcHmAmaB0k zS6R)HT3Bh{8Gh19QOWWIC2ORxeQEB<;&}K;3zr}wA@vEmDSpP9gID> zwYW9t50nmP7B@%HC3PYiIY(7XGn1=)k^!+YX8T=(6`eF7NK`J^iSAck5^`rL;R+nY zq&wp=U-Hd+G6c4SPu!i1x&)M%FIKMNK6%_okvr5%-S<^|3cN>Erg`EN9 zxsUlVNA0U*^HH=kB+=G*jn$+-A0aBgwiGX3JV`=ZM-thglUR0Oj37IdnVF0qO=*mJ zF~vM94LFMxUt_iUq!A*st)+OR?NR(vZ6#rrIEr@5#-jQc5Ze&^{;iF%F*OC!Hx6|P xEV;(&K79pRrrC*w6$PYn%^F1gh5S}Ci5|B_78j2Jvq)2J7Y|904le_eJ26HB3d!_YVxI5b*A>lS9;stmS zBo{nD9w0@U^eLzyi8J5cIxP4HnTnNWc6N8>o7tJ!eY?6s>-3n4G5Y%Xa7DyAe;AOe zsG}%V)pDgaUULd=y88CX>d|;S#ujWa&dzt9SOGYmy(;h5Yc=|Ldvkr;>KXICVWK7* zJ|;I=BgcMIs8XTlE9m%5(mgSpvj^lWIPk_m=bc7ZXPJ)MnXk|?Nt72cAc)0d_f@If zb}GC3o?NNkFXrjsE)CN(#FkGx`m4gP%g#Y?_nUJn9`9XDdoOlEWmB$P+EhPEb<{Lm z!f}AohhU%}ctCUkMxbG^PBa3e%a24~f`^ucaguj9T>!f*;8bjCAP!h%NtcVWOhvMG z$tckM<|{!ySxZCP+DPjN#Q2o!Jxbq%J(olz#oC73QtAyo9b%&$K(T$6QbAO z5A6(#ij|6P0sd2=XF|_~c7$FC?FqdU0tjw~Mnmn;R?<*}Fky>;fSJTh`5>XZlQL5) z>7~@teWpymqm26;6YF*-r&vQ#mUS`u;~mWm7*4l^Fd$C?_5+&^03MJM{~^kYpXuSh GNy#01=}<+U7>Z_rgCGopx>R|U1PdC_^nq07k;j4 z>Q|hpdbQS2)uZD^gZ@q)|4rI`Yp^S?%M~o^H4za6p!FxBF$4i4y-suwK@T@X zfAnWBq9o0`oGox$*5O=iYA749a%qpJYfi!Skwd<$OZg=5quMT7wids2o3~?0vr-2L!p6> zvXX)#gb7;&1fOZbln+zNyBRaJ(|$&6GhlM&k{k~>AvT?EPKg0g)MGJP`i_KyO}`2- W!hplT<^#YExRRw$SvVi&N}d1$bO#*( literal 0 HcmV?d00001 diff --git a/pandas/io/tests/data/stata4_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats b/pandas/io/tests/data/stata4_115.dta~1dc157c... Added additional data files for testing alternative Stata file formats new file mode 100644 index 0000000000000000000000000000000000000000..2c68cfb393b9ec5defdbf78959bb9ea2b174a942 GIT binary patch literal 1713 zcmd5+%}yIJ5FVhEpF@S(13hrCDj{(pN}?PJ7rYP9S0LKJB(db|qV*;LaUfR?eTPcC z2#=BrPtg)FpR))@DTx*d2}|FsXZ_81>=}<+U7>Z_rgCGopx>R|U1PdC_^nq07k;j4 z>Q|hpdbQS2)uZD^gZ@q)|4rI`Yp^S?%M~o^H4za6p!FxBF$4i4y-suwK@T@X zfAnWBq9o0`oGox$*5O=iYA749a%qpJYfi!Skwd<$OZg=5quMT7wids2o3~?0vr-2L!p6> zvXX)#gb7;&1fOZbln+zNyBRaJ(|$&6GhlM&k{k~>AvT?EPKg0g)MGJP`i_KyO}`2- W!hplT<^#YExRRw$SvVi&N}d1$bO#*( literal 0 HcmV?d00001 diff --git a/pandas/io/tests/data/stata4_v13.dta b/pandas/io/tests/data/stata4_117.dta similarity index 100% rename from pandas/io/tests/data/stata4_v13.dta rename to pandas/io/tests/data/stata4_117.dta diff --git a/pandas/io/tests/data/stata5.csv b/pandas/io/tests/data/stata5.csv new file mode 100644 index 0000000000000..8eb0c2854a740 --- /dev/null +++ b/pandas/io/tests/data/stata5.csv @@ -0,0 +1,19 @@ +byte_,int_,long_,float_,double_,date_td,string_,string_1 +0,0,0,0,0,,"a","a" +1,1,1,1,1,,"ab","b" +-1,-1,-1,-1,-1,,"abc","c" +100,32740,-2147483647,-1.70100000027769e+38,-2.0000000000000e+307,1970-01-01,"abcdefghijklmnop","d" +-127,-32767,2147483620,1.70100000027769e+38,8.0000000000000e+307,1970-01-02,"abcdefghijklmnopqrstuvwxyz","e" +,0,,,,2014-01-01,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","f" +0,,,,,2114-01-01,"1234567890","1" +,,0,,,2014-12-31,"This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted","2" +.a,.a,.a,.a,.a,2012-02-29,"!","A" +100,32740,-2.15e+09,-1.70e+38,-2.0e+307,01jan1970,"abcdefghijklmnop","d" +-127,-32767,2.15e+09,1.70e+38,8.0e+307,02jan1970,"abcdefghijklmnopqrstuvwxyz","e" +,0,,,,01jan2014,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","f" +0,,,,,01jan2114,"1234567890","1" +,,0,,,31dec2014,"This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted","2" +.a,.a,.a,.a,.a,29feb2012,"!","A" +.z,.z,.z,.z,.z,,"&","Z" +,,,0,,,"1.23","!" +,,,,0,,"10jan1970","." diff --git a/pandas/io/tests/data/stata5_113.dta b/pandas/io/tests/data/stata5_113.dta new file mode 100644 index 0000000000000000000000000000000000000000..3615928d558388f77e427bd7444798eeb1654701 GIT binary patch literal 4628 zcmeHJJxmlq6n-lvl4z_f)R-`YNHj5*-Qy6C5QRey{6SH`A1rk5cJFrK{Mt!f&1WyM!s!U4R=z8+;ba2$t*=p1VGA9ieH9hxU+ zK9c>lzH0b6`)kgb`!0nBv3?$8g+Z75mpMn0Hi3 z0{0)818yiwTU!kQ=#i*lcK5{MiDYW&Jh7+z!u)@^2ocp#1IWDOT+o(q5iU9|!rG)w zbdjmZI&)R{X8Ax9V4d%+IFw2=EQM}}%4@=QiyLrtq;tf#F# z$Vwt?s2G`PM1a`PQCuA`<9%@y(-To_pp>4Dn;7dF3Js!mtYX!J{_&*V$h198YowHO zA(0yScsp3mOJTRTn7RK*S`yT$_CKx#I?A~V|(tQD8?jgoP8Z*7z8e_i5`=r~&XUck5UV%saKf~8Y7@iI0d<4GkhnrvQW7lIB zPyGhlvB{!rc+a`mPPiavMZiyF2 zj6aE7{7F3cm*_=L#z;JCLc-Al2NHp+L`_5l)|uHESXx>%CPpZev~S*f-+Mdr?Rz`B zN(l!_!8Txyg_px%4*fi@hKV6*FI)0q1NNtXT;t{fV=1ezv(jn9zR~Az#?4F!3whWg zDA4WVz0l=5;H?Hf=NA@@#cy1oUz08X7tf$MG7{I2zW^Ybh=&Vi2t^YD%8`GFiBuqp zR*`pz0$QXd=Jf+rQzCd3c?SjL-zESRB`c#HKBp^8O}{m_fE*%Nj&*yz>QWvRusHyh zfR&fdr~QL~0wk}xDu-ovs{*o(4&O2X*}hHvj~UPf6v7n>GYkW}w}0)Z#kh&tAwQOn z?bw<-HWTJ@0ya5Zt855qe^EP4W^o=ICB-=iK5W4mIjXZ8FhO?f#^Q-& zb{yMNZey-rE&`}g@Bkq5j&gw|;v!gdTm;2Q9o0p~K4UC;v==pHSKb<+Rct_9h{aYH z;sBSKGz=YaanFHja#h+ep1P!<#+s7Z>t52^{M$$|zW3PeCnmS9XsM`EcM zj7ssaCc~tp#3Gs|i7?O)+cZHFobW$S>JClo)3`@UDHj~-k&ky8%V{g@Wd}3WpKw_Q zYHy~}Jf8tb&ijdFc2}H8b|~MpjgWI|>-23R>+jk7FrYRqllPHZpRbemgX9h~?qT2& z(OKSS-6lR$)vTg$i_*aunmjpK6!O=lr(n_-fk3J6toQ`*e)9|AWA_fhK>a5z^}#MOZL(aAv%K8Ye1A- zBMO^(8sYvm0Le@`u_i-2nUOJvcTq9}(Ij4lXHjIlNThi70}~lKS%qg|0Pk%AP{WmE zB-J6mnD8WArDIbJz$&kA(}7^nTty5|S9Qo--wd8irPkq_+E%<}OU6ETz867k+a|u9 z-@EC#Fq5AM6Z}*a*dlbq+S1U`q2TKsHU+Q(;~#`M61yWmb z6aY1z(DdHEWGbD>t{lhrRGgR_SBohAok#&7_m*ivYOF=9vlfnvT-AcTREu6U zRE)l{L5I?UwNOeeE|f7LH*4D#*5an4#b*m#FV>>pFwwx^(D29&mMt`vR=d15ZjPub z0CQLik{8Y{XDqPiVWcVCe7q&n+IHgPsnhLe&UTzT-`UlD;o_ysJ)2jHIGD4Fw0M1` zRSMJw>kic)4vC;-w2;&}77wntSa=-i(sUC#|6iacn-D*^VN8<^IfhijJP6GUM4F7C zZa@q~nhI0$u$~%7!K9i_APr_!Bc&sxDlj?%JCTfJKm5;01Mzulp5%y+X(8Yo`Sc)C z$x`6}znGc*Tv*{?{x(+gH$ZkVOg;I%X&ik_0!RIRaC@;rDHqZE-35utkt`EmI{B-#FKK8$G@WgJiYzr^S z0w0+c`>+=H?5JTprKCj<&JlKAu-!XaNd7=Aag>r4m4CJ1=LV6$lEhv)ErseuVz~;B NEtiWnH$#s2{{X{R*%$x- literal 0 HcmV?d00001 diff --git a/pandas/io/tests/data/stata6.csv b/pandas/io/tests/data/stata6.csv new file mode 100644 index 0000000000000..27a1dc64f530b --- /dev/null +++ b/pandas/io/tests/data/stata6.csv @@ -0,0 +1,6 @@ +byte_,int_,long_,float_,double_,date_td,string_,string_1 +0,0,0,0,0,1960-01-01,"a","a" +1,1,1,1,1,3014-12-31,"ab","b" +-1,-1,-1,-1,-1,2014-12-31,"abc","c" +100,32740,-2147483647,-1.7010000002777e+38,-2.000000000000e+307,1970-01-01,"This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted","d" +-127,-32767,2147483620,1.7010000002777e+38,8.000000000000e+307,1970-01-02,"abcdefghijklmnopqrstuvwxyz","e" diff --git a/pandas/io/tests/data/stata6_113.dta b/pandas/io/tests/data/stata6_113.dta new file mode 100644 index 0000000000000000000000000000000000000000..2e4795b167f266cf86afd73033bc1a43ae9afc5e GIT binary patch literal 2752 zcmXS9Vr1Z8U}b=S1A8YsF#?%ZsWs&c3Wf^4iA4%V28Jez%uoeT>cf$VV08@t|NjRG zf%JgDf~Bh5UB>f5omR2Sf48Q;U{rCUh7sjN@lGJzx14FPW2~Zx7f}H%k zbi5{{<>V&<&A<&(@=KF)fUdv^QWAlVEJ?vB&rn=alnHVLP5@Pa+nWpwYD}oQRW0=l z(xE~mvr`xldW}p>is9xIGr-uWwvAxk*QG32e4hvugQaCG(hRWl&i6Ug0i^E1k4|tt zSgOo0Y9KVgM#B&@PP?<<#M!}c3Y^v%p?p{(egNe&pG*g< z{{)kVx&S8M@WTo2CcK#gs%r1!La=!pbMwK~28@Okb&W{o!3~Bo5*Zm8Aa*p^Lxeuq zJE-mPNK9g2NKH%6$jr*l$<50zK-UNJ6J`hx-LOakrWz1nU})G64U7E@ALh6vCMA!! zut-jMQV&cQ4ay(>|1&OGVj<5ELH$kdkV=JiA9OYC8KPJJT!HB#1v!rt78RG2mX%jjRzY(MI4vSl}3&VdPD7`tI;j1s#T!}4{8B2iT%}=H?lmoHk zWTVysf7Y_?{ERX=Kz$eaw=e_%_%szPpJ zMP_bku0l>~UV2G}LP2U#Ze~eIYKlTqr9yB?Vo9Q&!XP&^rIE4W$^ZZLz_6%;gvFhD ihJ+MXV7f>_&Lf3I#U-U>Zp6cc4dZ z9C!c_XOQ{^Jwz%G5S1T++QzKyH8vFvqNh@r%lwaLKh5r#bt%vXy%15qu9Zuw*$z)W|04R8&iJkx7(@COOUY1LLk4bdgCiAp4jA!y&v$g`wg| zPEXXkrT-#9pnShWj}{8-Zfuf>p(4BA7n0ORE}d=n+=2iRw%~opB;WS~-@wUy_>MF$ zHm8F4?Ll}dC_#I|Py&%+*<+Im`cwRSAZMcC9RC^)X9JGHSy!YEvYV#VyASm+{zvSW zjl_QMB3XtVFNe(A|Gpa5>-*w#2d0?EE1{mMCsVE&Q51;2;Y~!w*2LuS-Yii8v?bjz zm)~1<*;%15E_;s5L2eL^ri&tw8yebV&9o~vT3T7>2CitXZE~(r zRI5102`3}?j~AtN#XI)>n!w_7gvGZSEYfK)7c_lGR$ay`A3m;C*X1P%XX&=(^>FWq Fb_Muvm<#{_ literal 0 HcmV?d00001 diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index 1640bee7a9929..ac4b9662fc57e 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -27,22 +27,46 @@ def setUp(self): # Unit test datasets for dta7 - dta9 (old stata formats 104, 105 and 107) can be downloaded from: # http://stata-press.com/data/glmext.html self.dirpath = tm.get_data_path() - self.dta1 = os.path.join(self.dirpath, 'stata1.dta') - self.dta2 = os.path.join(self.dirpath, 'stata2.dta') - self.dta3 = os.path.join(self.dirpath, 'stata3.dta') + self.dta1_114 = os.path.join(self.dirpath, 'stata1_114.dta') + self.dta1_117 = os.path.join(self.dirpath, 'stata1_117.dta') + + self.dta2_113 = os.path.join(self.dirpath, 'stata2_113.dta') + self.dta2_114 = os.path.join(self.dirpath, 'stata2_114.dta') + self.dta2_115 = os.path.join(self.dirpath, 'stata2_115.dta') + self.dta2_117 = os.path.join(self.dirpath, 'stata2_117.dta') + + self.dta3_113 = os.path.join(self.dirpath, 'stata3_113.dta') + self.dta3_114 = os.path.join(self.dirpath, 'stata3_114.dta') + self.dta3_115 = os.path.join(self.dirpath, 'stata3_115.dta') + self.dta3_117 = os.path.join(self.dirpath, 'stata3_117.dta') self.csv3 = os.path.join(self.dirpath, 'stata3.csv') - self.dta4 = os.path.join(self.dirpath, 'stata4.dta') + + self.dta4_113 = os.path.join(self.dirpath, 'stata4_113.dta') + self.dta4_114 = os.path.join(self.dirpath, 'stata4_114.dta') + self.dta4_115 = os.path.join(self.dirpath, 'stata4_115.dta') + self.dta4_117 = os.path.join(self.dirpath, 'stata4_117.dta') + self.dta7 = os.path.join(self.dirpath, 'cancer.dta') self.csv7 = os.path.join(self.dirpath, 'cancer.csv') + self.dta8 = os.path.join(self.dirpath, 'tbl19-3.dta') + self.csv8 = os.path.join(self.dirpath, 'tbl19-3.csv') + self.dta9 = os.path.join(self.dirpath, 'lbw.dta') self.csv9 = os.path.join(self.dirpath, 'lbw.csv') + self.dta_encoding = os.path.join(self.dirpath, 'stata1_encoding.dta') - self.dta1_13 = os.path.join(self.dirpath, 'stata1_v13.dta') - self.dta2_13 = os.path.join(self.dirpath, 'stata2_v13.dta') - self.dta3_13 = os.path.join(self.dirpath, 'stata3_v13.dta') - self.dta4_13 = os.path.join(self.dirpath, 'stata4_v13.dta') + + self.csv14 = os.path.join(self.dirpath, 'stata5.csv') + self.dta14_113 = os.path.join(self.dirpath, 'stata5_113.dta') + self.dta14_114 = os.path.join(self.dirpath, 'stata5_114.dta') + self.dta14_115 = os.path.join(self.dirpath, 'stata5_115.dta') + + self.csv15 = os.path.join(self.dirpath, 'stata6.csv') + self.dta15_113 = os.path.join(self.dirpath, 'stata6_113.dta') + self.dta15_114 = os.path.join(self.dirpath, 'stata6_114.dta') + self.dta15_115 = os.path.join(self.dirpath, 'stata6_115.dta') def read_dta(self, file): return read_stata(file, convert_dates=True) @@ -51,10 +75,10 @@ def read_csv(self, file): return read_csv(file, parse_dates=True) def test_read_dta1(self): - reader = StataReader(self.dta1) - parsed = reader.data() - reader_13 = StataReader(self.dta1_13) - parsed_13 = reader_13.data() + reader_114 = StataReader(self.dta1_114) + parsed_114 = reader_114.data() + reader_117 = StataReader(self.dta1_117) + parsed_117 = reader_117.data() # Pandas uses np.nan as missing value. # Thus, all columns will be of type float, regardless of their name. expected = DataFrame([(np.nan, np.nan, np.nan, np.nan, np.nan)], @@ -65,8 +89,8 @@ def test_read_dta1(self): # the casting doesn't fail so need to match stata here expected['float_miss'] = expected['float_miss'].astype(np.float32) - tm.assert_frame_equal(parsed, expected) - tm.assert_frame_equal(parsed_13, expected) + tm.assert_frame_equal(parsed_114, expected) + tm.assert_frame_equal(parsed_117, expected) def test_read_dta2(self): if LooseVersion(sys.version) < '2.7': @@ -109,34 +133,48 @@ def test_read_dta2(self): 'monthly_date', 'quarterly_date', 'half_yearly_date', 'yearly_date'] ) + expected['yearly_date'] = expected['yearly_date'].astype('O') with warnings.catch_warnings(record=True) as w: - parsed = self.read_dta(self.dta2) - parsed_13 = self.read_dta(self.dta2_13) + parsed_114 = self.read_dta(self.dta2_114) + parsed_115 = self.read_dta(self.dta2_115) + parsed_117 = self.read_dta(self.dta2_117) + # 113 is buggy due ot limits date format support in Stata + # parsed_113 = self.read_dta(self.dta2_113) + np.testing.assert_equal( len(w), 1) # should get a warning for that format. # buggy test because of the NaT comparison on certain platforms - # - #tm.assert_frame_equal(parsed, expected) - #tm.assert_frame_equal(parsed_13, expected) + # Format 113 test fails since it does not support tc and tC formats + # tm.assert_frame_equal(parsed_113, expected) + tm.assert_frame_equal(parsed_114, expected) + tm.assert_frame_equal(parsed_115, expected) + tm.assert_frame_equal(parsed_117, expected) def test_read_dta3(self): - parsed = self.read_dta(self.dta3) - parsed_13 = self.read_dta(self.dta3_13) + parsed_113 = self.read_dta(self.dta3_113) + parsed_114 = self.read_dta(self.dta3_114) + parsed_115 = self.read_dta(self.dta3_115) + parsed_117 = self.read_dta(self.dta3_117) # match stata here expected = self.read_csv(self.csv3) expected = expected.astype(np.float32) - expected['year'] = expected['year'].astype(np.int32) - expected['quarter'] = expected['quarter'].astype(np.int16) + expected['year'] = expected['year'].astype(np.int16) + expected['quarter'] = expected['quarter'].astype(np.int8) - tm.assert_frame_equal(parsed, expected) - tm.assert_frame_equal(parsed_13, expected) + tm.assert_frame_equal(parsed_113, expected) + tm.assert_frame_equal(parsed_114, expected) + tm.assert_frame_equal(parsed_115, expected) + tm.assert_frame_equal(parsed_117, expected) def test_read_dta4(self): - parsed = self.read_dta(self.dta4) - parsed_13 = self.read_dta(self.dta4_13) + parsed_113 = self.read_dta(self.dta4_113) + parsed_114 = self.read_dta(self.dta4_114) + parsed_115 = self.read_dta(self.dta4_115) + parsed_117 = self.read_dta(self.dta4_117) + expected = DataFrame.from_records( [ ["one", "ten", "one", "one", "one"], @@ -153,11 +191,13 @@ def test_read_dta4(self): columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled']) - tm.assert_frame_equal(parsed, expected) - tm.assert_frame_equal(parsed_13, expected) + tm.assert_frame_equal(parsed_113, expected) + tm.assert_frame_equal(parsed_114, expected) + tm.assert_frame_equal(parsed_115, expected) + tm.assert_frame_equal(parsed_117, expected) def test_read_write_dta5(self): - skip_if_not_little_endian() + # skip_if_not_little_endian() original = DataFrame([(np.nan, np.nan, np.nan, np.nan, np.nan)], columns=['float_miss', 'double_miss', 'byte_miss', @@ -171,10 +211,13 @@ def test_read_write_dta5(self): original) def test_write_dta6(self): - skip_if_not_little_endian() + # skip_if_not_little_endian() original = self.read_csv(self.csv3) original.index.name = 'index' + original.index = original.index.astype(np.int32) + original['year'] = original['year'].astype(np.int32) + original['quarter'] = original['quarter'].astype(np.int32) with tm.ensure_clean() as path: original.to_stata(path, None, False) @@ -201,7 +244,7 @@ def test_read_dta9(self): tm.assert_frame_equal(parsed, expected) def test_read_write_dta10(self): - skip_if_not_little_endian() + # skip_if_not_little_endian() original = DataFrame(data=[["string", "object", 1, 1.1, np.datetime64('2003-12-25')]], @@ -209,6 +252,8 @@ def test_read_write_dta10(self): 'datetime']) original["object"] = Series(original["object"], dtype=object) original.index.name = 'index' + original.index = original.index.astype(np.int32) + original['integer'] = original['integer'].astype(np.int32) with tm.ensure_clean() as path: original.to_stata(path, {'datetime': 'tc'}, False) @@ -238,13 +283,14 @@ def test_encoding(self): self.assert_(isinstance(result, unicode)) def test_read_write_dta11(self): - skip_if_not_little_endian() + # skip_if_not_little_endian() original = DataFrame([(1, 2, 3, 4)], columns=['good', compat.u('b\u00E4d'), '8number', 'astringwithmorethan32characters______']) formatted = DataFrame([(1, 2, 3, 4)], columns=['good', 'b_d', '_8number', 'astringwithmorethan32characters_']) formatted.index.name = 'index' + formatted = formatted.astype(np.int32) with tm.ensure_clean() as path: with warnings.catch_warnings(record=True) as w: @@ -256,13 +302,14 @@ def test_read_write_dta11(self): tm.assert_frame_equal(written_and_read_again.set_index('index'), formatted) def test_read_write_dta12(self): - skip_if_not_little_endian() + # skip_if_not_little_endian() original = DataFrame([(1, 2, 3, 4)], columns=['astringwithmorethan32characters_1', 'astringwithmorethan32characters_2', '+', '-']) formatted = DataFrame([(1, 2, 3, 4)], columns=['astringwithmorethan32characters_', '_0astringwithmorethan32character', '_', '_1_']) formatted.index.name = 'index' + formatted = formatted.astype(np.int32) with tm.ensure_clean() as path: with warnings.catch_warnings(record=True) as w: @@ -272,6 +319,64 @@ def test_read_write_dta12(self): written_and_read_again = self.read_dta(path) tm.assert_frame_equal(written_and_read_again.set_index('index'), formatted) + + def test_read_write_dta13(self): + s1 = Series(2**9,dtype=np.int16) + s2 = Series(2**17,dtype=np.int32) + s3 = Series(2**33,dtype=np.int64) + original = DataFrame({'int16':s1,'int32':s2,'int64':s3}) + original.index.name = 'index' + + formatted = original + formatted['int64'] = formatted['int64'].astype(np.float64) + + with tm.ensure_clean() as path: + original.to_stata(path) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index('index'), + formatted) + + def test_read_write_reread_dta14(self): + expected = self.read_csv(self.csv14) + cols = ['byte_', 'int_', 'long_', 'float_', 'double_'] + for col in cols: + expected[col] = expected[col].convert_objects(convert_numeric=True) + expected['float_'] = expected['float_'].astype(np.float32) + expected['date_td'] = pd.to_datetime(expected['date_td'], coerce=True) + + parsed_113 = self.read_dta(self.dta14_113) + parsed_113.index.name = 'index' + parsed_114 = self.read_dta(self.dta14_114) + parsed_114.index.name = 'index' + parsed_115 = self.read_dta(self.dta14_115) + parsed_115.index.name = 'index' + + tm.assert_frame_equal(parsed_114, parsed_113) + tm.assert_frame_equal(parsed_114, parsed_115) + + with tm.ensure_clean() as path: + parsed_114.to_stata(path, {'date_td': 'td'}, write_index=False) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index('index'), parsed_114) + + def test_read_write_reread_dta15(self): + expected = self.read_csv(self.csv15) + expected['byte_'] = expected['byte_'].astype(np.int8) + expected['int_'] = expected['int_'].astype(np.int16) + expected['long_'] = expected['long_'].astype(np.int32) + expected['float_'] = expected['float_'].astype(np.float32) + expected['double_'] = expected['double_'].astype(np.float64) + expected['date_td'] = expected['date_td'].apply(datetime.strptime, args=('%Y-%m-%d',)) + + parsed_113 = self.read_dta(self.dta15_113) + parsed_114 = self.read_dta(self.dta15_114) + parsed_115 = self.read_dta(self.dta15_115) + + tm.assert_frame_equal(expected, parsed_114) + tm.assert_frame_equal(parsed_113, parsed_114) + tm.assert_frame_equal(parsed_114, parsed_115) + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], From ef4228bdccc595e8491e354e38c564fb2e2fa5e0 Mon Sep 17 00:00:00 2001 From: immerrr Date: Mon, 3 Mar 2014 20:29:57 +0400 Subject: [PATCH 8/9] BUG/TST: fix several issues with slice bound checking code BUG/TST: fix handling of slice.stop < -len, obj.iloc[:-len(obj)] should be empty BUG/TST: fix exceptions raised by Series.iloc when slice.start > len CLN: remove unused _check_slice_bound function and raise_on_error params --- doc/source/release.rst | 3 +++ pandas/core/frame.py | 5 ---- pandas/core/generic.py | 10 ++++++++ pandas/core/indexing.py | 43 +++-------------------------------- pandas/core/internals.py | 12 +++------- pandas/core/panel.py | 6 ----- pandas/core/series.py | 6 ++--- pandas/sparse/frame.py | 8 ++----- pandas/sparse/panel.py | 2 +- pandas/tests/test_indexing.py | 38 ++++++++++++++++++++++++++++--- 10 files changed, 59 insertions(+), 74 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index f4f376026225e..1819272c59243 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -108,6 +108,9 @@ API Changes - Slicing and advanced/boolean indexing operations on ``Index`` classes will no longer change type of the resulting index (:issue:`6440`). - ``set_index`` no longer converts MultiIndexes to an Index of tuples (:issue:`6459`). +- Slicing with negative start, stop & step values handles corner cases better (:issue:`6531`): + - ``df.iloc[:-len(df)]`` is now empty + - ``df.iloc[len(df)::-1]`` now enumerates all elements in reverse Experimental Features ~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 05f7785a401f8..4c02c8abab353 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1867,11 +1867,6 @@ def eval(self, expr, **kwargs): kwargs['resolvers'] = kwargs.get('resolvers', ()) + resolvers return _eval(expr, **kwargs) - def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): - axis = self._get_block_manager_axis(axis) - new_data = self._data.get_slice( - slobj, axis=axis, raise_on_error=raise_on_error) - return self._constructor(new_data) def _box_item_values(self, key, values): items = self.columns[self.columns.get_loc(key)] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8ca397eda17e9..120e03e9962d8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1079,6 +1079,16 @@ def _clear_item_cache(self, i=None): else: self._item_cache.clear() + def _slice(self, slobj, axis=0, typ=None): + """ + Construct a slice of this container. + + typ parameter is maintained for compatibility with Series slicing. + + """ + axis = self._get_block_manager_axis(axis) + return self._constructor(self._data.get_slice(slobj, axis=axis)) + def _set_item(self, key, value): self._data.set(key, value) self._clear_item_cache() diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c7970309a6558..e3cbddebb6643 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -91,32 +91,8 @@ def _get_label(self, label, axis=0): def _get_loc(self, key, axis=0): return self.obj._ixs(key, axis=axis) - def _slice(self, obj, axis=0, raise_on_error=False, typ=None): - - # make out-of-bounds into bounds of the object - if typ == 'iloc': - ax = self.obj._get_axis(axis) - l = len(ax) - start = obj.start - stop = obj.stop - step = obj.step - if start is not None: - # degenerate to return nothing - if start >= l: - return self._getitem_axis(tuple(),axis=axis) - - # equiv to a null slice - elif start <= -l: - start = None - if stop is not None: - if stop > l: - stop = None - elif stop <= -l: - stop = None - obj = slice(start,stop,step) - - return self.obj._slice(obj, axis=axis, raise_on_error=raise_on_error, - typ=typ) + def _slice(self, obj, axis=0, typ=None): + return self.obj._slice(obj, axis=axis, typ=typ) def __setitem__(self, key, value): @@ -1343,8 +1319,7 @@ def _get_slice_axis(self, slice_obj, axis=0): return obj if isinstance(slice_obj, slice): - return self._slice(slice_obj, axis=axis, raise_on_error=True, - typ='iloc') + return self._slice(slice_obj, axis=axis, typ='iloc') else: return self.obj.take(slice_obj, axis=axis, convert=False) @@ -1647,18 +1622,6 @@ def _need_slice(obj): (obj.step is not None and obj.step != 1)) -def _check_slice_bounds(slobj, values): - l = len(values) - start = slobj.start - if start is not None: - if start < -l or start > l - 1: - raise IndexError("out-of-bounds on slice (start)") - stop = slobj.stop - if stop is not None: - if stop < -l - 1 or stop > l: - raise IndexError("out-of-bounds on slice (end)") - - def _maybe_droplevels(index, key): # drop levels original_index = index diff --git a/pandas/core/internals.py b/pandas/core/internals.py index e3deed52f4b3f..39eb03eebdb8c 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -14,8 +14,7 @@ _values_from_object, _is_null_datelike_scalar) from pandas.core.index import (Index, MultiIndex, _ensure_index, _handle_legacy_indexes) -from pandas.core.indexing import (_check_slice_bounds, _maybe_convert_indices, - _length_of_indexer) +from pandas.core.indexing import (_maybe_convert_indices, _length_of_indexer) import pandas.core.common as com from pandas.sparse.array import _maybe_to_sparse, SparseArray import pandas.lib as lib @@ -2669,12 +2668,9 @@ def combine(self, blocks): new_axes[0] = new_items return self.__class__(new_blocks, new_axes, do_integrity_check=False) - def get_slice(self, slobj, axis=0, raise_on_error=False): + def get_slice(self, slobj, axis=0): new_axes = list(self.axes) - if raise_on_error: - _check_slice_bounds(slobj, new_axes[axis]) - new_axes[axis] = new_axes[axis][slobj] if axis == 0: @@ -3739,9 +3735,7 @@ def _delete_from_block(self, i, item): ) self._values = self._block.values - def get_slice(self, slobj, raise_on_error=False): - if raise_on_error: - _check_slice_bounds(slobj, self.index) + def get_slice(self, slobj): return self.__class__(self._block._slice(slobj), self.index[slobj], fastpath=True) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index eba526f574375..2bf50bb1bf142 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -539,12 +539,6 @@ def _box_item_values(self, key, values): d = self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:]) return self._constructor_sliced(values, **d) - def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): - new_data = self._data.get_slice(slobj, - axis=axis, - raise_on_error=raise_on_error) - return self._constructor(new_data) - def __setitem__(self, key, value): shape = tuple(self.shape) if isinstance(value, self._constructor_sliced): diff --git a/pandas/core/series.py b/pandas/core/series.py index 9e6c0bd9305ab..4fc7ced6e8900 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -28,7 +28,7 @@ from pandas.core.index import (Index, MultiIndex, InvalidIndexError, _ensure_index, _handle_legacy_indexes) from pandas.core.indexing import ( - _check_bool_indexer, _check_slice_bounds, + _check_bool_indexer, _is_index_slice, _maybe_convert_indices) from pandas.core import generic, base from pandas.core.internals import SingleBlockManager @@ -469,9 +469,7 @@ def _ixs(self, i, axis=0): def _is_mixed_type(self): return False - def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): - if raise_on_error: - _check_slice_bounds(slobj, self.values) + def _slice(self, slobj, axis=0, typ=None): slobj = self.index._convert_slice_indexer(slobj, typ=typ or 'getitem') return self._get_values(slobj) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 6e76155619c09..a69c07494af8a 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -13,7 +13,7 @@ from pandas.core.common import (isnull, notnull, _pickle_array, _unpickle_array, _try_sort) from pandas.core.index import Index, MultiIndex, _ensure_index -from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices +from pandas.core.indexing import _maybe_convert_indices from pandas.core.series import Series from pandas.core.frame import (DataFrame, extract_index, _prep_ndarray, _default_index) @@ -379,15 +379,11 @@ def set_value(self, index, col, value, takeable=False): return dense.to_sparse(kind=self._default_kind, fill_value=self._default_fill_value) - def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): + def _slice(self, slobj, axis=0, typ=None): if axis == 0: - if raise_on_error: - _check_slice_bounds(slobj, self.index) new_index = self.index[slobj] new_columns = self.columns else: - if raise_on_error: - _check_slice_bounds(slobj, self.columns) new_index = self.index new_columns = self.columns[slobj] diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index 86dcf97c8bd3d..20bbc58cc908f 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -187,7 +187,7 @@ def _ixs(self, i, axis=0): return self.xs(key, axis=axis) - def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): + def _slice(self, slobj, axis=0, typ=None): """ for compat as we don't support Block Manager here """ diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 1d033782a0175..325d770fb62c9 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -393,12 +393,36 @@ def test_iloc_exceeds_bounds(self): self.assertRaises(IndexError, lambda : df.iloc[-30]) # slices are ok - result = df.iloc[:,4:10] + result = df.iloc[:,4:10] # 0 < start < len < stop expected = df.iloc[:,4:] assert_frame_equal(result,expected) - result = df.iloc[:,-4:-10] - expected = df.iloc[:,-4:] + result = df.iloc[:,-4:-10] # stop < 0 < start < len + expected = df.iloc[:,:0] + assert_frame_equal(result,expected) + + result = df.iloc[:,10:4:-1] # 0 < stop < len < start (down) + expected = df.iloc[:,:4:-1] + assert_frame_equal(result,expected) + + result = df.iloc[:,4:-10:-1] # stop < 0 < start < len (down) + expected = df.iloc[:,4::-1] + assert_frame_equal(result,expected) + + result = df.iloc[:,-10:4] # start < 0 < stop < len + expected = df.iloc[:,:4] + assert_frame_equal(result,expected) + + result = df.iloc[:,10:4] # 0 < stop < len < start + expected = df.iloc[:,:0] + assert_frame_equal(result,expected) + + result = df.iloc[:,-10:-11:-1] # stop < start < 0 < len (down) + expected = df.iloc[:,:0] + assert_frame_equal(result,expected) + + result = df.iloc[:,10:11] # 0 < len < start < stop + expected = df.iloc[:,:0] assert_frame_equal(result,expected) # slice bounds exceeding is ok @@ -406,6 +430,14 @@ def test_iloc_exceeds_bounds(self): expected = s.iloc[18:] assert_series_equal(result,expected) + result = s.iloc[30:] + expected = s.iloc[:0] + assert_series_equal(result,expected) + + result = s.iloc[30::-1] + expected = s.iloc[::-1] + assert_series_equal(result,expected) + # doc example def check(result,expected): str(result) From 48c3969b902700db3f8300024f0e7a170a2e8279 Mon Sep 17 00:00:00 2001 From: Andrew Rosenfeld Date: Tue, 4 Mar 2014 14:04:27 -0500 Subject: [PATCH 9/9] BUG: Fix irregular Timestamp arithmetic types #6543 --- doc/source/release.rst | 3 ++- pandas/tseries/tests/test_tslib.py | 29 ++++++++++++++++++++++++++--- pandas/tslib.pyx | 10 +++++----- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 1819272c59243..f5997e2c35e72 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -185,7 +185,7 @@ Bug Fixes - Bug in :meth:`DataFrame.replace` where nested dicts were erroneously depending on the order of dictionary keys and values (:issue:`5338`). - Perf issue in concatting with empty objects (:issue:`3259`) -- Clarify sorting of ``sym_diff`` on ``Index``es with ``NaN``s (:isssue:`6444`) +- Clarify sorting of ``sym_diff`` on ``Index``es with ``NaN``s (:issue:`6444`) - Regression in ``MultiIndex.from_product`` with a ``DatetimeIndex`` as input (:issue:`6439`) - Bug in ``str.extract`` when passed a non-default index (:issue:`6348`) - Bug in ``str.split`` when passed ``pat=None`` and ``n=1`` (:issue:`6466`) @@ -207,6 +207,7 @@ Bug Fixes - Bug in ``pd.read_stata`` which would use the wrong data types and missing values (:issue:`6327`) - Bug in ``DataFrame.to_stata`` that lead to data loss in certain cases, and could exported using the wrong data types and missing values (:issue:`6335`) +- Inconsistent types in Timestamp addition/subtraction (:issue:`6543`) pandas 0.13.1 diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index bc5b8dcfbd49a..a24f545901ccd 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -7,6 +7,7 @@ from pandas.core.api import Timestamp from pandas.tslib import period_asfreq, period_ordinal +from pandas.tseries.index import date_range from pandas.tseries.frequencies import get_freq from pandas import _np_version_under1p7 import pandas.util.testing as tm @@ -302,10 +303,32 @@ def test_period_ordinal_business_day(self): # Tuesday self.assertEqual(11418, period_ordinal(2013, 10, 8, 0, 0, 0, 0, 0, get_freq('B'))) -class TestTomeStampOps(tm.TestCase): +class TestTimestampOps(tm.TestCase): def test_timestamp_and_datetime(self): - self.assertEqual((Timestamp(datetime.datetime(2013, 10,13)) - datetime.datetime(2013, 10,12)).days, 1) - self.assertEqual((datetime.datetime(2013, 10, 12) - Timestamp(datetime.datetime(2013, 10,13))).days, -1) + self.assertEqual((Timestamp(datetime.datetime(2013, 10, 13)) - datetime.datetime(2013, 10, 12)).days, 1) + self.assertEqual((datetime.datetime(2013, 10, 12) - Timestamp(datetime.datetime(2013, 10, 13))).days, -1) + + def test_addition_subtraction_types(self): + # Assert on the types resulting from Timestamp +/- various date/time objects + datetime_instance = datetime.datetime(2014, 3, 4) + timedelta_instance = datetime.timedelta(seconds=1) + # build a timestamp with a frequency, since then it supports addition/subtraction of integers + timestamp_instance = date_range(datetime_instance, periods=1, freq='D')[0] + + self.assertEqual(type(timestamp_instance + 1), Timestamp) + self.assertEqual(type(timestamp_instance - 1), Timestamp) + + # Timestamp + datetime not supported, though subtraction is supported and yields timedelta + self.assertEqual(type(timestamp_instance - datetime_instance), datetime.timedelta) + + self.assertEqual(type(timestamp_instance + timedelta_instance), Timestamp) + self.assertEqual(type(timestamp_instance - timedelta_instance), Timestamp) + + if not _np_version_under1p7: + # Timestamp +/- datetime64 not supported, so not tested (could possibly assert error raised?) + timedelta64_instance = np.timedelta64(1, 'D') + self.assertEqual(type(timestamp_instance + timedelta64_instance), Timestamp) + self.assertEqual(type(timestamp_instance - timedelta64_instance), Timestamp) if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index f065ea90473c6..9ff73e7c92fdb 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -700,11 +700,11 @@ cdef class _Timestamp(datetime): return result def __sub__(self, other): - if is_integer_object(other): - neg_other = -other - return self + neg_other - # This calling convention is required - return datetime.__sub__(self, other) + if isinstance(other, datetime): + return datetime.__sub__(self, other) + + neg_other = -other + return self + neg_other cpdef _get_field(self, field): out = get_date_field(np.array([self.value], dtype=np.int64), field)