|
1 | 1 | import re
|
2 | 2 |
|
3 | 3 | import numpy as np
|
4 |
| -import pyarrow as pa |
5 | 4 | import pytest
|
6 | 5 | import pytz
|
7 | 6 |
|
|
33 | 32 | CategoricalIndex,
|
34 | 33 | DatetimeIndex,
|
35 | 34 | IntervalIndex,
|
36 |
| - NaT, |
37 | 35 | Series,
|
38 | 36 | SparseDtype,
|
39 |
| - Timedelta, |
40 |
| - Timestamp, |
41 |
| - concat, |
42 | 37 | date_range,
|
43 |
| - timedelta_range, |
44 | 38 | )
|
45 | 39 | import pandas._testing as tm
|
46 | 40 | from pandas.core.arrays.sparse import SparseArray
|
@@ -1199,275 +1193,3 @@ def test_multi_column_dtype_assignment():
|
1199 | 1193 |
|
1200 | 1194 | df["b"] = 0
|
1201 | 1195 | tm.assert_frame_equal(df, expected)
|
1202 |
| - |
1203 |
| - |
1204 |
| -@pytest.mark.parametrize( |
1205 |
| - "unit", |
1206 |
| - [ |
1207 |
| - "s", |
1208 |
| - "ms", |
1209 |
| - "us", |
1210 |
| - "ns", |
1211 |
| - ], |
1212 |
| -) |
1213 |
| -def test_convert_dtypes_timestamp(unit): |
1214 |
| - series = Series(date_range("2020-01-01", "2020-01-02", freq="1min")) |
1215 |
| - expected = series.astype(f"timestamp[{unit}][pyarrow]") |
1216 |
| - |
1217 |
| - converted = expected.convert_dtypes(dtype_backend="pyarrow") |
1218 |
| - |
1219 |
| - tm.assert_series_equal(expected, converted) |
1220 |
| - |
1221 |
| - |
1222 |
| -@pytest.mark.parametrize( |
1223 |
| - "unit", |
1224 |
| - [ |
1225 |
| - "s", |
1226 |
| - "ms", |
1227 |
| - "us", |
1228 |
| - "ns", |
1229 |
| - ], |
1230 |
| -) |
1231 |
| -def test_convert_dtypes_duration(unit): |
1232 |
| - series = Series(timedelta_range("1s", "10s", freq="1s")) |
1233 |
| - expected = series.astype(f"duration[{unit}][pyarrow]") |
1234 |
| - |
1235 |
| - converted = expected.convert_dtypes(dtype_backend="pyarrow") |
1236 |
| - |
1237 |
| - tm.assert_series_equal(expected, converted) |
1238 |
| - |
1239 |
| - |
1240 |
| -@pytest.mark.parametrize( |
1241 |
| - "timestamp_unit, duration_unit", |
1242 |
| - [ |
1243 |
| - ("s", "s"), |
1244 |
| - ("s", "ms"), |
1245 |
| - ("s", "us"), |
1246 |
| - ("s", "ns"), |
1247 |
| - ("ms", "s"), |
1248 |
| - ("ms", "ms"), |
1249 |
| - ("ms", "us"), |
1250 |
| - ("ms", "ns"), |
1251 |
| - ("us", "s"), |
1252 |
| - ("us", "ms"), |
1253 |
| - ("us", "us"), |
1254 |
| - ("us", "ns"), |
1255 |
| - ("ns", "s"), |
1256 |
| - ("ns", "ms"), |
1257 |
| - ("ns", "us"), |
1258 |
| - ("ns", "ns"), |
1259 |
| - ], |
1260 |
| -) |
1261 |
| -def test_convert_dtypes_timestamp_and_duration(timestamp_unit, duration_unit): |
1262 |
| - timestamp_series = Series( |
1263 |
| - date_range("2020-01-01", "2020-01-02", freq="1min") |
1264 |
| - ).astype(f"timestamp[{timestamp_unit}][pyarrow]") |
1265 |
| - duration_series = Series(timedelta_range("1s", "10s", freq="1s")).astype( |
1266 |
| - f"duration[{duration_unit}][pyarrow]" |
1267 |
| - ) |
1268 |
| - |
1269 |
| - df = concat([timestamp_series, duration_series], axis=1) |
1270 |
| - converted = df.convert_dtypes(dtype_backend="pyarrow") |
1271 |
| - |
1272 |
| - tm.assert_frame_equal(df, converted) |
1273 |
| - |
1274 |
| - |
1275 |
| -@pytest.mark.parametrize( |
1276 |
| - "unit", |
1277 |
| - [ |
1278 |
| - "s", |
1279 |
| - "ms", |
1280 |
| - "us", |
1281 |
| - "ns", |
1282 |
| - ], |
1283 |
| -) |
1284 |
| -def test_convert_dtypes_datetime(unit): |
1285 |
| - series = Series(date_range("2020-01-01", "2020-01-02", freq="1min")).astype( |
1286 |
| - f"datetime64[{unit}]" |
1287 |
| - ) |
1288 |
| - |
1289 |
| - expected = series.astype(f"timestamp[{unit}][pyarrow]") |
1290 |
| - converted = series.convert_dtypes(dtype_backend="pyarrow") |
1291 |
| - |
1292 |
| - tm.assert_series_equal(expected, converted) |
1293 |
| - |
1294 |
| - |
1295 |
| -@pytest.mark.parametrize( |
1296 |
| - "unit", |
1297 |
| - [ |
1298 |
| - "s", |
1299 |
| - "ms", |
1300 |
| - "us", |
1301 |
| - "ns", |
1302 |
| - ], |
1303 |
| -) |
1304 |
| -def test_convert_dtypes_timedelta(unit): |
1305 |
| - series = Series(timedelta_range("1s", "10s", freq="1s")).astype( |
1306 |
| - f"timedelta64[{unit}]" |
1307 |
| - ) |
1308 |
| - |
1309 |
| - expected = series.astype(f"duration[{unit}][pyarrow]") |
1310 |
| - converted = series.convert_dtypes(dtype_backend="pyarrow") |
1311 |
| - |
1312 |
| - tm.assert_series_equal(expected, converted) |
1313 |
| - |
1314 |
| - |
1315 |
| -@pytest.mark.parametrize( |
1316 |
| - "unit", |
1317 |
| - [ |
1318 |
| - "s", |
1319 |
| - "ms", |
1320 |
| - "us", |
1321 |
| - "ns", |
1322 |
| - ], |
1323 |
| -) |
1324 |
| -def test_pa_table_to_pandas_datetime(unit): |
1325 |
| - df = pd.DataFrame(date_range("2020-01-01", "2020-01-02", freq="1min")).astype( |
1326 |
| - f"datetime64[{unit}]" |
1327 |
| - ) |
1328 |
| - df_converted_to_pa = pa.table(df) |
1329 |
| - df_back_to_pd = df_converted_to_pa.to_pandas() |
1330 |
| - |
1331 |
| - tm.assert_frame_equal(df, df_back_to_pd) |
1332 |
| - |
1333 |
| - |
1334 |
| -@pytest.mark.parametrize( |
1335 |
| - "unit", |
1336 |
| - [ |
1337 |
| - "s", |
1338 |
| - "ms", |
1339 |
| - "us", |
1340 |
| - "ns", |
1341 |
| - ], |
1342 |
| -) |
1343 |
| -def test_pa_table_to_pandas_timedelta(unit): |
1344 |
| - df = pd.DataFrame(timedelta_range("1s", "10s", freq="1s")).astype( |
1345 |
| - f"timedelta64[{unit}]" |
1346 |
| - ) |
1347 |
| - df_converted_to_pa = pa.table(df) |
1348 |
| - df_back_to_pd = df_converted_to_pa.to_pandas() |
1349 |
| - |
1350 |
| - tm.assert_frame_equal(df, df_back_to_pd) |
1351 |
| - |
1352 |
| - |
1353 |
| -@pytest.mark.parametrize( |
1354 |
| - "datetime_unit, timedelta_unit", |
1355 |
| - [ |
1356 |
| - ("s", "s"), |
1357 |
| - ("s", "ms"), |
1358 |
| - ("s", "us"), |
1359 |
| - ("s", "ns"), |
1360 |
| - ("ms", "s"), |
1361 |
| - ("ms", "ms"), |
1362 |
| - ("ms", "us"), |
1363 |
| - ("ms", "ns"), |
1364 |
| - ("us", "s"), |
1365 |
| - ("us", "ms"), |
1366 |
| - ("us", "us"), |
1367 |
| - ("us", "ns"), |
1368 |
| - ("ns", "s"), |
1369 |
| - ("ns", "ms"), |
1370 |
| - ("ns", "us"), |
1371 |
| - ("ns", "ns"), |
1372 |
| - ], |
1373 |
| -) |
1374 |
| -def test_pa_table_and_to_pandas_datetime_and_timedelta(datetime_unit, timedelta_unit): |
1375 |
| - timestamp_series = Series( |
1376 |
| - date_range("2020-01-01", "2020-01-02", freq="1min") |
1377 |
| - ).astype(f"datetime64[{datetime_unit}]") |
1378 |
| - duration_series = Series(timedelta_range("1s", "10s", freq="1s")).astype( |
1379 |
| - f"timedelta64[{timedelta_unit}]" |
1380 |
| - ) |
1381 |
| - |
1382 |
| - df = concat([timestamp_series, duration_series], axis=1) |
1383 |
| - df_converted_to_pa = pa.table(df) |
1384 |
| - df_back_to_pd = df_converted_to_pa.to_pandas() |
1385 |
| - |
1386 |
| - tm.assert_frame_equal(df, df_back_to_pd) |
1387 |
| - |
1388 |
| - |
1389 |
| -@pytest.mark.parametrize( |
1390 |
| - "unit", |
1391 |
| - [ |
1392 |
| - "s", |
1393 |
| - "ms", |
1394 |
| - "us", |
1395 |
| - "ns", |
1396 |
| - ], |
1397 |
| -) |
1398 |
| -def test_pa_table_to_pandas_timestamp(unit): |
1399 |
| - df = pd.DataFrame(date_range("2020-01-01", "2020-01-02", freq="1min")).astype( |
1400 |
| - f"timestamp[{unit}][pyarrow]" |
1401 |
| - ) |
1402 |
| - df_converted_to_pa = pa.table(df) |
1403 |
| - df_back_to_pd = df_converted_to_pa.to_pandas() |
1404 |
| - |
1405 |
| - tm.assert_frame_equal(df, df_back_to_pd) |
1406 |
| - |
1407 |
| - |
1408 |
| -@pytest.mark.parametrize( |
1409 |
| - "unit", |
1410 |
| - [ |
1411 |
| - "s", |
1412 |
| - "ms", |
1413 |
| - "us", |
1414 |
| - "ns", |
1415 |
| - ], |
1416 |
| -) |
1417 |
| -def test_pa_table_to_pandas_duration(unit): |
1418 |
| - df = pd.DataFrame(timedelta_range("1s", "10s", freq="1s")).astype( |
1419 |
| - f"duration[{unit}][pyarrow]" |
1420 |
| - ) |
1421 |
| - df_converted_to_pa = pa.table(df) |
1422 |
| - df_back_to_pd = df_converted_to_pa.to_pandas() |
1423 |
| - |
1424 |
| - tm.assert_frame_equal(df, df_back_to_pd) |
1425 |
| - |
1426 |
| - |
1427 |
| -@pytest.mark.parametrize( |
1428 |
| - "timestamp_unit, duration_unit", |
1429 |
| - [ |
1430 |
| - ("s", "s"), |
1431 |
| - ("s", "ms"), |
1432 |
| - ("s", "us"), |
1433 |
| - ("s", "ns"), |
1434 |
| - ("ms", "s"), |
1435 |
| - ("ms", "ms"), |
1436 |
| - ("ms", "us"), |
1437 |
| - ("ms", "ns"), |
1438 |
| - ("us", "s"), |
1439 |
| - ("us", "ms"), |
1440 |
| - ("us", "us"), |
1441 |
| - ("us", "ns"), |
1442 |
| - ("ns", "s"), |
1443 |
| - ("ns", "ms"), |
1444 |
| - ("ns", "us"), |
1445 |
| - ("ns", "ns"), |
1446 |
| - ], |
1447 |
| -) |
1448 |
| -def test_pa_table_and_to_pandas_timestamp_and_duration(timestamp_unit, duration_unit): |
1449 |
| - timestamp_series = Series( |
1450 |
| - date_range("2020-01-01", "2020-01-02", freq="1min") |
1451 |
| - ).astype(f"timestamp[{timestamp_unit}][pyarrow]") |
1452 |
| - duration_series = Series(timedelta_range("1s", "10s", freq="1s")).astype( |
1453 |
| - f"duration[{duration_unit}][pyarrow]" |
1454 |
| - ) |
1455 |
| - |
1456 |
| - df = concat([timestamp_series, duration_series], axis=1) |
1457 |
| - df_converted_to_pa = pa.table(df) |
1458 |
| - df_back_to_pd = df_converted_to_pa.to_pandas() |
1459 |
| - |
1460 |
| - tm.assert_frame_equal(df, df_back_to_pd) |
1461 |
| - |
1462 |
| - |
1463 |
| -def test_conversion_with_missing_values(): |
1464 |
| - df = pd.DataFrame( |
1465 |
| - { |
1466 |
| - "timestamp_col": [Timestamp("2020-01-01"), NaT], |
1467 |
| - "duration_col": [Timedelta("1s"), NaT], |
1468 |
| - } |
1469 |
| - ) |
1470 |
| - df_coverted_to_pa = pa.table(df) |
1471 |
| - df_back_to_pd = df_coverted_to_pa.to_pandas() |
1472 |
| - |
1473 |
| - tm.assert_frame_equal(df, df_back_to_pd) |
0 commit comments