Skip to content

Commit b698227

Browse files
jrebackwesm
authored andcommitted
ARROW-622 [Python] deprecate timestamps_to_ms in .from_pandas()
xref pandas-dev/pandas#17438 this was not fully resolved in apache#944 Author: Jeff Reback <[email protected]> Closes apache#1046 from jreback/warn and squashes the following commits: 382592f [Jeff Reback] deprecate timestamps_to_ms in .from_pandas()
1 parent 3f2fa03 commit b698227

File tree

3 files changed

+19
-48
lines changed

3 files changed

+19
-48
lines changed

python/pyarrow/array.pxi

+2
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ cdef class Array:
121121
compatibility with other functionality like Parquet I/O which
122122
only supports milliseconds.
123123
124+
.. deprecated:: 0.7.0
125+
124126
memory_pool: MemoryPool, optional
125127
Specific memory pool to use to allocate the resulting Arrow array.
126128

python/pyarrow/table.pxi

+3
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,9 @@ cdef class Table:
723723
Convert datetime columns to ms resolution. This is needed for
724724
compability with other functionality like Parquet I/O which
725725
only supports milliseconds.
726+
727+
.. deprecated:: 0.7.0
728+
726729
schema : pyarrow.Schema, optional
727730
The expected schema of the Arrow Table. This can be used to
728731
indicate the type of columns if we cannot infer it automatically.

python/pyarrow/tests/test_convert_pandas.py

+14-48
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,10 @@ def tearDown(self):
6969
pass
7070

7171
def _check_pandas_roundtrip(self, df, expected=None, nthreads=1,
72-
timestamps_to_ms=False, expected_schema=None,
72+
expected_schema=None,
7373
check_dtype=True, schema=None,
7474
check_index=False):
75-
table = pa.Table.from_pandas(df, timestamps_to_ms=timestamps_to_ms,
75+
table = pa.Table.from_pandas(df,
7676
schema=schema, preserve_index=check_index)
7777
result = table.to_pandas(nthreads=nthreads)
7878
if expected_schema:
@@ -92,9 +92,8 @@ def _check_series_roundtrip(self, s, type_=None):
9292
tm.assert_series_equal(s, result)
9393

9494
def _check_array_roundtrip(self, values, expected=None, mask=None,
95-
timestamps_to_ms=False, type=None):
96-
arr = pa.Array.from_pandas(values, timestamps_to_ms=timestamps_to_ms,
97-
mask=mask, type=type)
95+
type=None):
96+
arr = pa.Array.from_pandas(values, mask=mask, type=type)
9897
result = arr.to_pandas()
9998

10099
values_nulls = pd.isnull(values)
@@ -332,21 +331,6 @@ def test_fixed_size_bytes_does_not_accept_varying_lengths(self):
332331
pa.Table.from_pandas(df, schema=schema)
333332

334333
def test_timestamps_notimezone_no_nulls(self):
335-
df = pd.DataFrame({
336-
'datetime64': np.array([
337-
'2007-07-13T01:23:34.123',
338-
'2006-01-13T12:34:56.432',
339-
'2010-08-13T05:46:57.437'],
340-
dtype='datetime64[ms]')
341-
})
342-
field = pa.field('datetime64', pa.timestamp('ms'))
343-
schema = pa.schema([field])
344-
self._check_pandas_roundtrip(
345-
df,
346-
timestamps_to_ms=True,
347-
expected_schema=schema,
348-
)
349-
350334
df = pd.DataFrame({
351335
'datetime64': np.array([
352336
'2007-07-13T01:23:34.123456789',
@@ -357,36 +341,24 @@ def test_timestamps_notimezone_no_nulls(self):
357341
field = pa.field('datetime64', pa.timestamp('ns'))
358342
schema = pa.schema([field])
359343
self._check_pandas_roundtrip(
360-
df, expected_schema=schema,
344+
df,
345+
expected_schema=schema,
361346
)
362347

363348
def test_timestamps_to_ms_explicit_schema(self):
364349
# ARROW-1328
365350
df = pd.DataFrame({'datetime': [datetime(2017, 1, 1)]})
366351
pa_type = pa.from_numpy_dtype(df['datetime'].dtype)
367352

368-
arr = pa.Array.from_pandas(df['datetime'], type=pa_type,
369-
timestamps_to_ms=True)
353+
with tm.assert_produces_warning(FutureWarning,
354+
check_stacklevel=False):
355+
arr = pa.Array.from_pandas(df['datetime'], type=pa_type,
356+
timestamps_to_ms=True)
370357

371358
tm.assert_almost_equal(df['datetime'].values.astype('M8[ms]'),
372359
arr.to_pandas())
373360

374361
def test_timestamps_notimezone_nulls(self):
375-
df = pd.DataFrame({
376-
'datetime64': np.array([
377-
'2007-07-13T01:23:34.123',
378-
None,
379-
'2010-08-13T05:46:57.437'],
380-
dtype='datetime64[ms]')
381-
})
382-
field = pa.field('datetime64', pa.timestamp('ms'))
383-
schema = pa.schema([field])
384-
self._check_pandas_roundtrip(
385-
df,
386-
timestamps_to_ms=True,
387-
expected_schema=schema,
388-
)
389-
390362
df = pd.DataFrame({
391363
'datetime64': np.array([
392364
'2007-07-13T01:23:34.123456789',
@@ -397,7 +369,8 @@ def test_timestamps_notimezone_nulls(self):
397369
field = pa.field('datetime64', pa.timestamp('ns'))
398370
schema = pa.schema([field])
399371
self._check_pandas_roundtrip(
400-
df, expected_schema=schema,
372+
df,
373+
expected_schema=schema,
401374
)
402375

403376
def test_timestamps_with_timezone(self):
@@ -410,7 +383,7 @@ def test_timestamps_with_timezone(self):
410383
})
411384
df['datetime64'] = (df['datetime64'].dt.tz_localize('US/Eastern')
412385
.to_frame())
413-
self._check_pandas_roundtrip(df, timestamps_to_ms=True)
386+
self._check_pandas_roundtrip(df)
414387

415388
self._check_series_roundtrip(df['datetime64'])
416389

@@ -425,15 +398,8 @@ def test_timestamps_with_timezone(self):
425398
})
426399
df['datetime64'] = (df['datetime64'].dt.tz_localize('US/Eastern')
427400
.to_frame())
428-
self._check_pandas_roundtrip(df)
429-
430-
def test_timestamp_with_tz_to_pandas_type(self):
431-
from pyarrow.compat import DatetimeTZDtype
432401

433-
tz = 'America/Los_Angeles'
434-
t = pa.timestamp('ns', tz=tz)
435-
436-
assert t.to_pandas_dtype() == DatetimeTZDtype('ns', tz=tz)
402+
self._check_pandas_roundtrip(df)
437403

438404
def test_date_infer(self):
439405
df = pd.DataFrame({

0 commit comments

Comments
 (0)