-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Don't overflow PeriodIndex in to_csv #15984
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1820,7 +1820,26 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs): | |
return header + result | ||
|
||
def to_native_types(self, slicer=None, **kwargs): | ||
""" slice and dice then format """ | ||
""" | ||
Format specified values of `self` and return them. | ||
Parameters | ||
---------- | ||
slicer : int, array-like | ||
An indexer into `self` that specifies which values | ||
are used in the formatting process. | ||
kwargs : dict | ||
Options for specifying how the values should be formatted. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think would be ok to actually list these options (with there defautls) in the signature itself. I don't recall why I didn't do this originally. followup PR for this though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Derived classes change the signature with different defaults. That's why |
||
These options include the following: | ||
1) na_rep : str | ||
The value that serves as a placeholder for NULL values | ||
2) quoting : bool or None | ||
Whether or not there are quoted values in `self` | ||
3) date_format : str | ||
The format used to represent date-like values | ||
""" | ||
|
||
values = self | ||
if slicer is not None: | ||
values = values[slicer] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1143,3 +1143,31 @@ def test_to_csv_quoting(self): | |
df = df.set_index(['a', 'b']) | ||
expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n' | ||
self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected) | ||
|
||
def test_period_index_date_overflow(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you also test NaT. I think we have very few tests for outputing a PI. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, done. |
||
# see gh-15982 | ||
|
||
dates = ["1990-01-01", "2000-01-01", "3005-01-01"] | ||
index = pd.PeriodIndex(dates, freq="D") | ||
|
||
df = pd.DataFrame([4, 5, 6], index=index) | ||
result = df.to_csv() | ||
|
||
expected = ',0\n1990-01-01,4\n2000-01-01,5\n3005-01-01,6\n' | ||
assert result == expected | ||
|
||
date_format = "%m-%d-%Y" | ||
result = df.to_csv(date_format=date_format) | ||
|
||
expected = ',0\n01-01-1990,4\n01-01-2000,5\n01-01-3005,6\n' | ||
assert result == expected | ||
|
||
# Overflow with pd.NaT | ||
dates = ["1990-01-01", pd.NaT, "3005-01-01"] | ||
index = pd.PeriodIndex(dates, freq="D") | ||
|
||
df = pd.DataFrame([4, 5, 6], index=index) | ||
result = df.to_csv() | ||
|
||
expected = ',0\n1990-01-01,4\n,5\n3005-01-01,6\n' | ||
assert result == expected |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
from pandas import DatetimeIndex | ||
|
||
import numpy as np | ||
|
||
import pandas.util.testing as tm | ||
import pandas as pd | ||
|
||
|
||
def test_to_native_types(): | ||
index = DatetimeIndex(freq='1D', periods=3, start='2017-01-01') | ||
|
||
# First, with no arguments. | ||
expected = np.array(['2017-01-01', '2017-01-02', | ||
'2017-01-03'], dtype=object) | ||
|
||
result = index.to_native_types() | ||
tm.assert_numpy_array_equal(result, expected) | ||
|
||
# No NaN values, so na_rep has no effect | ||
result = index.to_native_types(na_rep='pandas') | ||
tm.assert_numpy_array_equal(result, expected) | ||
|
||
# Make sure slicing works | ||
expected = np.array(['2017-01-01', '2017-01-03'], dtype=object) | ||
|
||
result = index.to_native_types([0, 2]) | ||
tm.assert_numpy_array_equal(result, expected) | ||
|
||
# Make sure date formatting works | ||
expected = np.array(['01-2017-01', '01-2017-02', | ||
'01-2017-03'], dtype=object) | ||
|
||
result = index.to_native_types(date_format='%m-%Y-%d') | ||
tm.assert_numpy_array_equal(result, expected) | ||
|
||
# NULL object handling should work | ||
index = DatetimeIndex(['2017-01-01', pd.NaT, '2017-01-03']) | ||
expected = np.array(['2017-01-01', 'NaT', '2017-01-03'], dtype=object) | ||
|
||
result = index.to_native_types() | ||
tm.assert_numpy_array_equal(result, expected) | ||
|
||
expected = np.array(['2017-01-01', 'pandas', | ||
'2017-01-03'], dtype=object) | ||
|
||
result = index.to_native_types(na_rep='pandas') | ||
tm.assert_numpy_array_equal(result, expected) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
from pandas import PeriodIndex | ||
|
||
import numpy as np | ||
|
||
import pandas.util.testing as tm | ||
import pandas as pd | ||
|
||
|
||
def test_to_native_types(): | ||
index = PeriodIndex(['2017-01-01', '2017-01-02', | ||
'2017-01-03'], freq='D') | ||
|
||
# First, with no arguments. | ||
expected = np.array(['2017-01-01', '2017-01-02', | ||
'2017-01-03'], dtype='<U10') | ||
|
||
result = index.to_native_types() | ||
tm.assert_numpy_array_equal(result, expected) | ||
|
||
# No NaN values, so na_rep has no effect | ||
result = index.to_native_types(na_rep='pandas') | ||
tm.assert_numpy_array_equal(result, expected) | ||
|
||
# Make sure slicing works | ||
expected = np.array(['2017-01-01', '2017-01-03'], dtype='<U10') | ||
|
||
result = index.to_native_types([0, 2]) | ||
tm.assert_numpy_array_equal(result, expected) | ||
|
||
# Make sure date formatting works | ||
expected = np.array(['01-2017-01', '01-2017-02', | ||
'01-2017-03'], dtype='<U10') | ||
|
||
result = index.to_native_types(date_format='%m-%Y-%d') | ||
tm.assert_numpy_array_equal(result, expected) | ||
|
||
# NULL object handling should work | ||
index = PeriodIndex(['2017-01-01', pd.NaT, '2017-01-03'], freq='D') | ||
expected = np.array(['2017-01-01', 'NaT', '2017-01-03'], dtype=object) | ||
|
||
result = index.to_native_types() | ||
tm.assert_numpy_array_equal(result, expected) | ||
|
||
expected = np.array(['2017-01-01', 'pandas', | ||
'2017-01-03'], dtype=object) | ||
|
||
result = index.to_native_types(na_rep='pandas') | ||
tm.assert_numpy_array_equal(result, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
timestamp
->Period
I think.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I used
timestamp
because that seemed more clear (to the everyday user) thanPeriodIndex
. What do you think?