Skip to content

Segfault with pd.tslib.get_period_field FIX #4520

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions doc/source/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -724,16 +724,16 @@ We illustrate these fill methods on a simple TimeSeries:
ts2.reindex(ts.index, method='ffill')
ts2.reindex(ts.index, method='bfill')

Note these methods require that the indexes are **order increasing**.

Note the same result could have been achieved using :ref:`fillna
<missing_data.fillna>`:

.. ipython:: python

ts2.reindex(ts.index).fillna(method='ffill')

Note these methods generally assume that the indexes are **sorted**. They may
be modified in the future to be a bit more flexible but as time series data is
ordered most of the time anyway, this has not been a major priority.
Note that this method does not check the order of the index.

.. _basics.drop:

Expand Down
3 changes: 3 additions & 0 deletions doc/source/missing_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,9 @@ To remind you, these are the available filling methods:
With time series data, using pad/ffill is extremely common so that the "last
known value" is available at every time point.

The ``ffill()`` function is equivalent to ``fillna(method='ffill')``
and ``bfill()`` is equivalent to ``fillna(method='bfill')``

.. _missing_data.dropna:

Dropping axis labels with missing data: dropna
Expand Down
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ pandas 0.13

**API Changes**

- ``DataFrame.reindex()`` and forward/backward filling now raises ValueError
if either index is not monotonic (:issue: `4484`).
- ``pandas`` now is Python 2/3 compatible without the need for 2to3 thanks to
@jtratner. As a result, pandas now uses iterators more extensively. This
also led to the introduction of substantive parts of the Benjamin
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,12 +811,12 @@ def get_indexer(self, target, method=None, limit=None):
'objects')

if method == 'pad':
if not self.is_monotonic:
raise AssertionError('Must be monotonic for forward fill')
if not self.is_monotonic or not target.is_monotonic:
raise ValueError('Must be monotonic for forward fill')
indexer = self._engine.get_pad_indexer(target.values, limit)
elif method == 'backfill':
if not self.is_monotonic:
raise AssertionError('Must be monotonic for backward fill')
if not self.is_monotonic or not target.is_monotonic:
raise ValueError('Must be monotonic for backward fill')
indexer = self._engine.get_backfill_indexer(target.values, limit)
elif method is None:
indexer = self._engine.get_indexer(target.values)
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1629,6 +1629,29 @@ def test_nested_exception(self):
repr(df)
except Exception as e:
self.assertNotEqual(type(e), UnboundLocalError)

def test_reverse_reindex_ffill_raises(self):
dr = pd.date_range('2013-08-01', periods=6, freq='B')
data = np.random.randn(6,1)
df = pd.DataFrame(data, index=dr, columns=list('A'))
df['A'][3] = np.nan
df_rev = pd.DataFrame(data, index=dr[::-1], columns=list('A'))
# Reverse index is not 'monotonic'
self.assertRaises(ValueError, df_rev.reindex, df.index, method='pad')
self.assertRaises(ValueError, df_rev.reindex, df.index, method='ffill')
self.assertRaises(ValueError, df_rev.reindex, df.index, method='bfill')

def test_reversed_reindex_ffill_raises(self):
dr = pd.date_range('2013-08-01', periods=6, freq='B')
data = np.random.randn(6,1)
df = pd.DataFrame(data, index=dr, columns=list('A'))
df['A'][3] = np.nan
df = pd.DataFrame(data, index=dr, columns=list('A'))
# Reversed reindex is not 'monotonic'
self.assertRaises(ValueError, df.reindex, dr[::-1], method='pad')
self.assertRaises(ValueError, df.reindex, dr[::-1], method='ffill')
self.assertRaises(ValueError, df.reindex, dr[::-1], method='bfill')


_seriesd = tm.getSeriesData()
_tsd = tm.getTimeSeriesData()
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/test_tslib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import unittest

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't put this in a separate file, stick in tests/test_tseries.py

import numpy as np
import pandas as pd

class TestPeriodField(unittest.TestCase):
_multiprocess_can_split_ = True

def test_get_period_field_raises_on_out_of_range(self):
self.assertRaises(ValueError, pd.tslib.get_period_field, -1, 0, 0)

def test_get_period_field_array_raises_on_out_of_range(self):
self.assertRaises(ValueError, pd.tslib.get_period_field_arr, -1, np.empty(1), 0)

if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
2 changes: 1 addition & 1 deletion pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ def test_pad_require_monotonicity(self):

rng2 = rng[::2][::-1]

self.assertRaises(AssertionError, rng2.get_indexer, rng,
self.assertRaises(ValueError, rng2.get_indexer, rng,
method='pad')

def test_frame_ctor_datetime64_column(self):
Expand Down
7 changes: 5 additions & 2 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2417,6 +2417,8 @@ ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN

def get_period_field(int code, int64_t value, int freq):
cdef accessor f = _get_accessor_func(code)
if <long>f == <long>NULL:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you should be able to say if not f: and in fact i'm not sure what happens when you try to cast a function pointer to long

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like is might be the way to check for NULL in Cython

http://docs.cython.org/src/tutorial/clibraries.html#testing-the-result

so maybe change to if f is NULL

also can you change the error message to say something about periods? something simple like "Unrecognizable period code: {the_code}"

raise ValueError('Unrecognized code: %s' % code)
return f(value, freq)

def get_period_field_arr(int code, ndarray[int64_t] arr, int freq):
Expand All @@ -2426,6 +2428,8 @@ def get_period_field_arr(int code, ndarray[int64_t] arr, int freq):
accessor f

f = _get_accessor_func(code)
if <long>f == <long>NULL:
raise ValueError('Unrecognized code: %s' % code)

sz = len(arr)
out = np.empty(sz, dtype=np.int64)
Expand Down Expand Up @@ -2460,8 +2464,7 @@ cdef accessor _get_accessor_func(int code):
return &pday_of_year
elif code == 10:
return &pweekday
else:
raise ValueError('Unrecognized code: %s' % code)
return NULL


def extract_ordinals(ndarray[object] values, freq):
Expand Down