From e83f0e245d94a5d0260e690f0b6f76e5550b0c95 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 22 Oct 2023 19:53:31 -0700 Subject: [PATCH 1/4] Remove unnecessary read_csv usage --- pandas/tests/frame/test_stack_unstack.py | 31 +- pandas/tests/groupby/test_apply.py | 86 +- pandas/tests/groupby/test_reductions.py | 20 +- pandas/tests/groupby/test_timegrouper.py | 27 +- .../tests/groupby/transform/test_transform.py | 40 +- pandas/tests/indexes/test_base.py | 19 +- pandas/tests/indexing/multiindex/test_loc.py | 15 +- pandas/tests/resample/test_datetime_index.py | 51 +- pandas/tests/reshape/merge/test_merge_asof.py | 1927 ++++++++++++++++- 9 files changed, 2083 insertions(+), 133 deletions(-) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index e041eff697718..60da510fa26f4 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1,5 +1,4 @@ from datetime import datetime -from io import StringIO import itertools import re @@ -1771,21 +1770,21 @@ def test_stack_duplicate_index(self, idx, columns, exp_idx, future_stack): "ignore:The previous implementation of stack is deprecated" ) def test_unstack_odd_failure(self, future_stack): - data = """day,time,smoker,sum,len -Fri,Dinner,No,8.25,3. -Fri,Dinner,Yes,27.03,9 -Fri,Lunch,No,3.0,1 -Fri,Lunch,Yes,13.68,6 -Sat,Dinner,No,139.63,45 -Sat,Dinner,Yes,120.77,42 -Sun,Dinner,No,180.57,57 -Sun,Dinner,Yes,66.82,19 -Thu,Dinner,No,3.0,1 -Thu,Lunch,No,117.32,44 -Thu,Lunch,Yes,51.51,17""" - - df = pd.read_csv(StringIO(data)).set_index(["day", "time", "smoker"]) - + mi = MultiIndex.from_arrays( + [ + ["Fri"] * 4 + ["Sat"] * 2 + ["Sun"] * 2 + ["Thu"] * 3, + ["Dinner"] * 2 + ["Lunch"] * 2 + ["Dinner"] * 5 + ["Lunch"] * 2, + ["No", "Yes"] * 4 + ["No", "No", "Yes"], + ], + names=["day", "time", "smoker"], + ) + df = DataFrame( + { + "sum": np.arange(11, dtype="float64"), + "len": np.arange(11, dtype="float64"), + }, + index=mi, + ) # it works, #2100 result = df.unstack(2) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 5331b2e2c5d81..e365bb127864f 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -2,7 +2,6 @@ date, datetime, ) -from io import StringIO import numpy as np import pytest @@ -38,39 +37,76 @@ def store(group): tm.assert_frame_equal(groups[0], expected_value) -def test_apply_issues(): +def test_apply_index_date(): # GH 5788 - - s = """2011.05.16,00:00,1.40893 -2011.05.16,01:00,1.40760 -2011.05.16,02:00,1.40750 -2011.05.16,03:00,1.40649 -2011.05.17,02:00,1.40893 -2011.05.17,03:00,1.40760 -2011.05.17,04:00,1.40750 -2011.05.17,05:00,1.40649 -2011.05.18,02:00,1.40893 -2011.05.18,03:00,1.40760 -2011.05.18,04:00,1.40750 -2011.05.18,05:00,1.40649""" - - df = pd.read_csv( - StringIO(s), - header=None, - names=["date", "time", "value"], - parse_dates=[["date", "time"]], + ts = """2011-05-16 00:00 +2011-05-16 01:00 +2011-05-16 02:00 +2011-05-16 03:00 +2011-05-17 02:00 +2011-05-17 03:00 +2011-05-17 04:00 +2011-05-17 05:00 +2011-05-18 02:00 +2011-05-18 03:00 +2011-05-18 04:00 +2011-05-18 05:00""" + df = DataFrame( + { + "value": [ + 1.40893, + 1.40760, + 1.40750, + 1.40649, + 1.40893, + 1.40760, + 1.40750, + 1.40649, + 1.40893, + 1.40760, + 1.40750, + 1.40649, + ], + }, + index=Index(pd.to_datetime(ts.split("\n")), name="date_time"), ) - df = df.set_index("date_time") - expected = df.groupby(df.index.date).idxmax() result = df.groupby(df.index.date).apply(lambda x: x.idxmax()) tm.assert_frame_equal(result, expected) + +def test_apply_index_date_object(): # GH 5789 # don't auto coerce dates - df = pd.read_csv(StringIO(s), header=None, names=["date", "time", "value"]) + ts = """2011-05-16 00:00 +2011-05-16 01:00 +2011-05-16 02:00 +2011-05-16 03:00 +2011-05-17 02:00 +2011-05-17 03:00 +2011-05-17 04:00 +2011-05-17 05:00 +2011-05-18 02:00 +2011-05-18 03:00 +2011-05-18 04:00 +2011-05-18 05:00""" + df = DataFrame([row.split() for row in ts.split("\n")], columns=["date", "time"]) + df["value"] = [ + 1.40893, + 1.40760, + 1.40750, + 1.40649, + 1.40893, + 1.40760, + 1.40750, + 1.40649, + 1.40893, + 1.40760, + 1.40750, + 1.40649, + ] exp_idx = Index( - ["2011.05.16", "2011.05.17", "2011.05.18"], dtype=object, name="date" + ["2011-05-16", "2011-05-17", "2011-05-18"], dtype=object, name="date" ) expected = Series(["00:00", "02:00", "02:00"], index=exp_idx) msg = "DataFrameGroupBy.apply operated on the grouping columns" diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index fdfb211ac2269..31575dbad7d09 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -1,6 +1,5 @@ import builtins import datetime as dt -from io import StringIO from string import ascii_lowercase import numpy as np @@ -576,13 +575,18 @@ def test_groupby_min_max_categorical(func): def test_max_nan_bug(): - raw = """,Date,app,File --04-23,2013-04-23 00:00:00,,log080001.log --05-06,2013-05-06 00:00:00,,log.log --05-07,2013-05-07 00:00:00,OE,xlsx""" - - with tm.assert_produces_warning(UserWarning, match="Could not infer format"): - df = pd.read_csv(StringIO(raw), parse_dates=[0]) + df = DataFrame( + { + "Unnamed: 0": ["-04-23", "-05-06", "-05-07"], + "Date": [ + "2013-04-23 00:00:00", + "2013-05-06 00:00:00", + "2013-05-07 00:00:00", + ], + "app": Series([np.nan, np.nan, "OE"]), + "File": ["log080001.log", "log.log", "xlsx"], + } + ) gb = df.groupby("Date") r = gb[["File"]].max() e = gb["File"].max().to_frame() diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 31629ba697e33..8815d200bb3be 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -5,7 +5,6 @@ datetime, timedelta, ) -from io import StringIO import numpy as np import pytest @@ -607,14 +606,26 @@ def test_frame_datetime64_handling_groupby(self): def test_groupby_multi_timezone(self): # combining multiple / different timezones yields UTC + df = DataFrame( + { + "value": range(5), + "date": [ + "2000-01-28 16:47:00", + "2000-01-29 16:48:00", + "2000-01-30 16:49:00", + "2000-01-31 16:50:00", + "2000-01-01 16:50:00", + ], + "tz": [ + "America/Chicago", + "America/Chicago", + "America/Los_Angeles", + "America/Chicago", + "America/New_York", + ], + } + ) - data = """0,2000-01-28 16:47:00,America/Chicago -1,2000-01-29 16:48:00,America/Chicago -2,2000-01-30 16:49:00,America/Los_Angeles -3,2000-01-31 16:50:00,America/Chicago -4,2000-01-01 16:50:00,America/New_York""" - - df = pd.read_csv(StringIO(data), header=None, names=["value", "date", "tz"]) result = df.groupby("tz", group_keys=False).date.apply( lambda x: pd.to_datetime(x).dt.tz_localize(x.name) ) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index add3c94dcd36a..bd60efd16ccef 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1,6 +1,4 @@ """ test with the .transform """ -from io import StringIO - import numpy as np import pytest @@ -337,22 +335,28 @@ def test_transform_datetime_to_numeric(): def test_transform_casting(): # 13046 - data = """ - idx A ID3 DATETIME - 0 B-028 b76cd912ff "2014-10-08 13:43:27" - 1 B-054 4a57ed0b02 "2014-10-08 14:26:19" - 2 B-076 1a682034f8 "2014-10-08 14:29:01" - 3 B-023 b76cd912ff "2014-10-08 18:39:34" - 4 B-023 f88g8d7sds "2014-10-08 18:40:18" - 5 B-033 b76cd912ff "2014-10-08 18:44:30" - 6 B-032 b76cd912ff "2014-10-08 18:46:00" - 7 B-037 b76cd912ff "2014-10-08 18:52:15" - 8 B-046 db959faf02 "2014-10-08 18:59:59" - 9 B-053 b76cd912ff "2014-10-08 19:17:48" - 10 B-065 b76cd912ff "2014-10-08 19:21:38" - """ - df = pd.read_csv( - StringIO(data), sep=r"\s+", index_col=[0], parse_dates=["DATETIME"] + times = [ + "13:43:27", + "14:26:19", + "14:29:01", + "18:39:34", + "18:40:18", + "18:44:30", + "18:46:00", + "18:52:15", + "18:59:59", + "19:17:48", + "19:21:38", + ] + df = DataFrame( + { + "A": [f"B-{i}" for i in range(11)], + "ID3": np.take( + ["a", "b", "c", "d", "e"], [0, 1, 2, 1, 3, 1, 1, 1, 4, 1, 1] + ), + "DATETIME": pd.to_datetime([f"2014-10-08 {time}" for time in times]), + }, + index=pd.RangeIndex(11, name="idx"), ) result = df.groupby("ID3")["DATETIME"].transform(lambda x: x.diff()) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 04ab2020b4c7a..17ab9d7823c83 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1,6 +1,5 @@ from collections import defaultdict from datetime import datetime -from io import StringIO import math import operator import re @@ -1174,13 +1173,21 @@ def test_groupby(self): def test_equals_op_multiindex(self, mi, expected): # GH9785 # test comparisons of multiindex - df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1]) + df = DataFrame( + [3, 6], + columns=["c"], + index=MultiIndex.from_arrays([[1, 4], [2, 5]], names=["a", "b"]), + ) result = df.index == mi tm.assert_numpy_array_equal(result, expected) def test_equals_op_multiindex_identify(self): - df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1]) + df = DataFrame( + [3, 6], + columns=["c"], + index=MultiIndex.from_arrays([[1, 4], [2, 5]], names=["a", "b"]), + ) result = df.index == df.index expected = np.array([True, True]) @@ -1194,7 +1201,11 @@ def test_equals_op_multiindex_identify(self): ], ) def test_equals_op_mismatched_multiindex_raises(self, index): - df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1]) + df = DataFrame( + [3, 6], + columns=["c"], + index=MultiIndex.from_arrays([[1, 4], [2, 5]], names=["a", "b"]), + ) with pytest.raises(ValueError, match="Lengths must match"): df.index == index diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index c8b10f72c9ad9..873c4e3e60f4c 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -698,10 +698,19 @@ def test_loc_mi_with_level1_named_0(): tm.assert_series_equal(result, expected) -def test_getitem_str_slice(datapath): +def test_getitem_str_slice(): # GH#15928 - path = datapath("reshape", "merge", "data", "quotes2.csv") - df = pd.read_csv(path, parse_dates=["time"]) + df = DataFrame( + [ + ["20160525 13:30:00.023", "MSFT", "51.95", "51.95"], + ["20160525 13:30:00.048", "GOOG", "720.50", "720.93"], + ["20160525 13:30:00.076", "AAPL", "98.55", "98.56"], + ["20160525 13:30:00.131", "AAPL", "98.61", "98.62"], + ["20160525 13:30:00.135", "MSFT", "51.92", "51.95"], + ["20160525 13:30:00.135", "AAPL", "98.61", "98.62"], + ], + columns="time,ticker,bid,ask".split(","), + ) df2 = df.set_index(["ticker", "time"]).sort_index() res = df2.loc[("AAPL", slice("2016-05-25 13:30:00")), :].droplevel(0) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index e0ba7902a8a6c..f0a0add769430 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1,6 +1,5 @@ from datetime import datetime from functools import partial -from io import StringIO import numpy as np import pytest @@ -271,34 +270,30 @@ def test_resample_rounding(unit): # GH 8371 # odd results when rounding is needed - data = """date,time,value -11-08-2014,00:00:01.093,1 -11-08-2014,00:00:02.159,1 -11-08-2014,00:00:02.667,1 -11-08-2014,00:00:03.175,1 -11-08-2014,00:00:07.058,1 -11-08-2014,00:00:07.362,1 -11-08-2014,00:00:08.324,1 -11-08-2014,00:00:08.830,1 -11-08-2014,00:00:08.982,1 -11-08-2014,00:00:09.815,1 -11-08-2014,00:00:10.540,1 -11-08-2014,00:00:11.061,1 -11-08-2014,00:00:11.617,1 -11-08-2014,00:00:13.607,1 -11-08-2014,00:00:14.535,1 -11-08-2014,00:00:15.525,1 -11-08-2014,00:00:17.960,1 -11-08-2014,00:00:20.674,1 -11-08-2014,00:00:21.191,1""" - - df = pd.read_csv( - StringIO(data), - parse_dates={"timestamp": ["date", "time"]}, - index_col="timestamp", - ) + ts = [ + "2014-11-08 00:00:01", + "2014-11-08 00:00:02", + "2014-11-08 00:00:02", + "2014-11-08 00:00:03", + "2014-11-08 00:00:07", + "2014-11-08 00:00:07", + "2014-11-08 00:00:08", + "2014-11-08 00:00:08", + "2014-11-08 00:00:08", + "2014-11-08 00:00:09", + "2014-11-08 00:00:10", + "2014-11-08 00:00:11", + "2014-11-08 00:00:11", + "2014-11-08 00:00:13", + "2014-11-08 00:00:14", + "2014-11-08 00:00:15", + "2014-11-08 00:00:17", + "2014-11-08 00:00:20", + "2014-11-08 00:00:21", + ] + df = DataFrame({"value": [1] * 19}, index=pd.to_datetime(ts)) df.index = df.index.as_unit(unit) - df.index.name = None + result = df.resample("6s").sum() expected = DataFrame( {"value": [4, 9, 4, 2]}, diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 8ada42898f947..0faff89e23070 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -11,7 +11,6 @@ Index, Timedelta, merge_asof, - read_csv, to_datetime, ) import pandas._testing as tm @@ -27,39 +26,1070 @@ def unit(request): class TestAsOfMerge: - def read_data(self, datapath, name, dedupe=False): - path = datapath("reshape", "merge", "data", name) - x = read_csv(path) + def prep_data(self, df, dedupe=False): if dedupe: - x = x.drop_duplicates(["time", "ticker"], keep="last").reset_index( + df = df.drop_duplicates(["time", "ticker"], keep="last").reset_index( drop=True ) - x.time = to_datetime(x.time) - return x + df.time = to_datetime(df.time) + return df @pytest.fixture - def trades(self, datapath): - return self.read_data(datapath, "trades.csv") + def trades(self): + df = pd.DataFrame( + [ + ["20160525 13:30:00.023", "MSFT", "51.9500", "75", "NASDAQ"], + ["20160525 13:30:00.038", "MSFT", "51.9500", "155", "NASDAQ"], + ["20160525 13:30:00.048", "GOOG", "720.7700", "100", "NASDAQ"], + ["20160525 13:30:00.048", "GOOG", "720.9200", "100", "NASDAQ"], + ["20160525 13:30:00.048", "GOOG", "720.9300", "200", "NASDAQ"], + ["20160525 13:30:00.048", "GOOG", "720.9300", "300", "NASDAQ"], + ["20160525 13:30:00.048", "GOOG", "720.9300", "600", "NASDAQ"], + ["20160525 13:30:00.048", "GOOG", "720.9300", "44", "NASDAQ"], + ["20160525 13:30:00.074", "AAPL", "98.6700", "478343", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.6700", "478343", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.6600", "6", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.6500", "30", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.6500", "75", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.6500", "20", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.6500", "35", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.6500", "10", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.5500", "6", "ARCA"], + ["20160525 13:30:00.075", "AAPL", "98.5500", "6", "ARCA"], + ["20160525 13:30:00.076", "AAPL", "98.5600", "1000", "ARCA"], + ["20160525 13:30:00.076", "AAPL", "98.5600", "200", "ARCA"], + ["20160525 13:30:00.076", "AAPL", "98.5600", "300", "ARCA"], + ["20160525 13:30:00.076", "AAPL", "98.5600", "400", "ARCA"], + ["20160525 13:30:00.076", "AAPL", "98.5600", "600", "ARCA"], + ["20160525 13:30:00.076", "AAPL", "98.5600", "200", "ARCA"], + ["20160525 13:30:00.078", "MSFT", "51.9500", "783", "NASDAQ"], + ["20160525 13:30:00.078", "MSFT", "51.9500", "100", "NASDAQ"], + ["20160525 13:30:00.078", "MSFT", "51.9500", "100", "NASDAQ"], + ], + columns="time,ticker,price,quantity,marketCenter".split(","), + ) + df["price"] = df["price"].astype("float64") + df["quantity"] = df["quantity"].astype("int64") + return self.prep_data(df) @pytest.fixture - def quotes(self, datapath): - return self.read_data(datapath, "quotes.csv", dedupe=True) + def quotes(self): + df = pd.DataFrame( + [ + ["20160525 13:30:00.023", "GOOG", "720.50", "720.93"], + ["20160525 13:30:00.023", "MSFT", "51.95", "51.95"], + ["20160525 13:30:00.041", "MSFT", "51.95", "51.95"], + ["20160525 13:30:00.048", "GOOG", "720.50", "720.93"], + ["20160525 13:30:00.048", "GOOG", "720.50", "720.93"], + ["20160525 13:30:00.048", "GOOG", "720.50", "720.93"], + ["20160525 13:30:00.048", "GOOG", "720.50", "720.93"], + ["20160525 13:30:00.072", "GOOG", "720.50", "720.88"], + ["20160525 13:30:00.075", "AAPL", "98.55", "98.56"], + ["20160525 13:30:00.076", "AAPL", "98.55", "98.56"], + ["20160525 13:30:00.076", "AAPL", "98.55", "98.56"], + ["20160525 13:30:00.076", "AAPL", "98.55", "98.56"], + ["20160525 13:30:00.078", "MSFT", "51.95", "51.95"], + ["20160525 13:30:00.078", "MSFT", "51.95", "51.95"], + ["20160525 13:30:00.078", "MSFT", "51.95", "51.95"], + ["20160525 13:30:00.078", "MSFT", "51.92", "51.95"], + ], + columns="time,ticker,bid,ask".split(","), + ) + df["bid"] = df["bid"].astype("float64") + df["ask"] = df["ask"].astype("float64") + return self.prep_data(df, dedupe=True) @pytest.fixture - def asof(self, datapath): - return self.read_data(datapath, "asof.csv") + def asof(self): + df = pd.DataFrame( + [ + [ + "20160525 13:30:00.023", + "MSFT", + "51.95", + "75", + "NASDAQ", + "51.95", + "51.95", + ], + [ + "20160525 13:30:00.038", + "MSFT", + "51.95", + "155", + "NASDAQ", + "51.95", + "51.95", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.77", + "100", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.92", + "100", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "200", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "300", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "600", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "44", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.074", + "AAPL", + "98.67", + "478343", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.67", + "478343", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.66", + "6", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "30", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "75", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "20", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "35", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "10", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.55", + "6", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.55", + "6", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "1000", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "200", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "300", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "400", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "600", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "200", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.078", + "MSFT", + "51.95", + "783", + "NASDAQ", + "51.92", + "51.95", + ], + [ + "20160525 13:30:00.078", + "MSFT", + "51.95", + "100", + "NASDAQ", + "51.92", + "51.95", + ], + [ + "20160525 13:30:00.078", + "MSFT", + "51.95", + "100", + "NASDAQ", + "51.92", + "51.95", + ], + ], + columns="time,ticker,price,quantity,marketCenter,bid,ask".split(","), + ) + df["price"] = df["price"].astype("float64") + df["quantity"] = df["quantity"].astype("int64") + df["bid"] = df["bid"].astype("float64") + df["ask"] = df["ask"].astype("float64") + return self.prep_data(df) @pytest.fixture - def tolerance(self, datapath): - return self.read_data(datapath, "tolerance.csv") + def tolerance(self): + df = pd.DataFrame( + [ + [ + "20160525 13:30:00.023", + "MSFT", + "51.95", + "75", + "NASDAQ", + "51.95", + "51.95", + ], + [ + "20160525 13:30:00.038", + "MSFT", + "51.95", + "155", + "NASDAQ", + "51.95", + "51.95", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.77", + "100", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.92", + "100", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "200", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "300", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "600", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "44", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.074", + "AAPL", + "98.67", + "478343", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.67", + "478343", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.66", + "6", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "30", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "75", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "20", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "35", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "10", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.55", + "6", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.55", + "6", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "1000", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "200", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "300", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "400", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "600", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "200", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.078", + "MSFT", + "51.95", + "783", + "NASDAQ", + "51.92", + "51.95", + ], + [ + "20160525 13:30:00.078", + "MSFT", + "51.95", + "100", + "NASDAQ", + "51.92", + "51.95", + ], + [ + "20160525 13:30:00.078", + "MSFT", + "51.95", + "100", + "NASDAQ", + "51.92", + "51.95", + ], + ], + columns="time,ticker,price,quantity,marketCenter,bid,ask".split(","), + ) + df["price"] = df["price"].astype("float64") + df["quantity"] = df["quantity"].astype("int64") + df["bid"] = df["bid"].astype("float64") + df["ask"] = df["ask"].astype("float64") + return self.prep_data(df) @pytest.fixture def allow_exact_matches(self, datapath): - return self.read_data(datapath, "allow_exact_matches.csv") + df = pd.DataFrame( + [ + [ + "20160525 13:30:00.023", + "MSFT", + "51.95", + "75", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.038", + "MSFT", + "51.95", + "155", + "NASDAQ", + "51.95", + "51.95", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.77", + "100", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.92", + "100", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "200", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "300", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "600", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "44", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.074", + "AAPL", + "98.67", + "478343", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.67", + "478343", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.66", + "6", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "30", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "75", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "20", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "35", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "10", + "NASDAQ", + np.nan, + np.nan, + ], + ["20160525 13:30:00.075", "AAPL", "98.55", "6", "ARCA", np.nan, np.nan], + ["20160525 13:30:00.075", "AAPL", "98.55", "6", "ARCA", np.nan, np.nan], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "1000", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "200", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "300", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "400", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "600", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "200", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.078", + "MSFT", + "51.95", + "783", + "NASDAQ", + "51.95", + "51.95", + ], + [ + "20160525 13:30:00.078", + "MSFT", + "51.95", + "100", + "NASDAQ", + "51.95", + "51.95", + ], + [ + "20160525 13:30:00.078", + "MSFT", + "51.95", + "100", + "NASDAQ", + "51.95", + "51.95", + ], + ], + columns="time,ticker,price,quantity,marketCenter,bid,ask".split(","), + ) + df["price"] = df["price"].astype("float64") + df["quantity"] = df["quantity"].astype("int64") + df["bid"] = df["bid"].astype("float64") + df["ask"] = df["ask"].astype("float64") + return self.prep_data(df) @pytest.fixture - def allow_exact_matches_and_tolerance(self, datapath): - return self.read_data(datapath, "allow_exact_matches_and_tolerance.csv") + def allow_exact_matches_and_tolerance(self): + df = pd.DataFrame( + [ + [ + "20160525 13:30:00.023", + "MSFT", + "51.95", + "75", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.038", + "MSFT", + "51.95", + "155", + "NASDAQ", + "51.95", + "51.95", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.77", + "100", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.92", + "100", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "200", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "300", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "600", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "44", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.074", + "AAPL", + "98.67", + "478343", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.67", + "478343", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.66", + "6", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "30", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "75", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "20", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "35", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "10", + "NASDAQ", + np.nan, + np.nan, + ], + ["20160525 13:30:00.075", "AAPL", "98.55", "6", "ARCA", np.nan, np.nan], + ["20160525 13:30:00.075", "AAPL", "98.55", "6", "ARCA", np.nan, np.nan], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "1000", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "200", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "300", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "400", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "600", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "200", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.078", + "MSFT", + "51.95", + "783", + "NASDAQ", + "51.95", + "51.95", + ], + [ + "20160525 13:30:00.078", + "MSFT", + "51.95", + "100", + "NASDAQ", + "51.95", + "51.95", + ], + [ + "20160525 13:30:00.078", + "MSFT", + "51.95", + "100", + "NASDAQ", + "51.95", + "51.95", + ], + ], + columns="time,ticker,price,quantity,marketCenter,bid,ask".split(","), + ) + df["price"] = df["price"].astype("float64") + df["quantity"] = df["quantity"].astype("int64") + df["bid"] = df["bid"].astype("float64") + df["ask"] = df["ask"].astype("float64") + return self.prep_data(df) def test_examples1(self): """doc-string examples""" @@ -501,9 +1531,860 @@ def test_multiby_indexed(self): ) def test_basic2(self, datapath): - expected = self.read_data(datapath, "asof2.csv") - trades = self.read_data(datapath, "trades2.csv") - quotes = self.read_data(datapath, "quotes2.csv", dedupe=True) + expected = pd.DataFrame( + [ + [ + "20160525 13:30:00.023", + "MSFT", + "51.95", + "75", + "NASDAQ", + "51.95", + "51.95", + ], + [ + "20160525 13:30:00.038", + "MSFT", + "51.95", + "155", + "NASDAQ", + "51.95", + "51.95", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.77", + "100", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.92", + "100", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "200", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "300", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "600", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.048", + "GOOG", + "720.93", + "44", + "NASDAQ", + "720.5", + "720.93", + ], + [ + "20160525 13:30:00.074", + "AAPL", + "98.67", + "478343", + "NASDAQ", + np.nan, + np.nan, + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.67", + "478343", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.66", + "6", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "30", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "75", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "20", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "35", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.65", + "10", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.55", + "6", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.075", + "AAPL", + "98.55", + "6", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "1000", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "200", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "300", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "400", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "600", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.076", + "AAPL", + "98.56", + "200", + "ARCA", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.078", + "MSFT", + "51.95", + "783", + "NASDAQ", + "51.92", + "51.95", + ], + [ + "20160525 13:30:00.078", + "MSFT", + "51.95", + "100", + "NASDAQ", + "51.92", + "51.95", + ], + [ + "20160525 13:30:00.078", + "MSFT", + "51.95", + "100", + "NASDAQ", + "51.92", + "51.95", + ], + [ + "20160525 13:30:00.084", + "AAPL", + "98.64", + "40", + "NASDAQ", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.084", + "AAPL", + "98.55", + "149", + "EDGX", + "98.55", + "98.56", + ], + [ + "20160525 13:30:00.086", + "AAPL", + "98.56", + "500", + "ARCA", + "98.55", + "98.63", + ], + [ + "20160525 13:30:00.104", + "AAPL", + "98.63", + "647", + "EDGX", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.104", + "AAPL", + "98.63", + "300", + "EDGX", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.104", + "AAPL", + "98.63", + "50", + "NASDAQ", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.104", + "AAPL", + "98.63", + "50", + "NASDAQ", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.104", + "AAPL", + "98.63", + "70", + "NASDAQ", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.104", + "AAPL", + "98.63", + "70", + "NASDAQ", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.104", + "AAPL", + "98.63", + "1", + "NASDAQ", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.104", + "AAPL", + "98.63", + "62", + "NASDAQ", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.104", + "AAPL", + "98.63", + "10", + "NASDAQ", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.104", + "AAPL", + "98.63", + "100", + "ARCA", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.105", + "AAPL", + "98.63", + "100", + "ARCA", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.105", + "AAPL", + "98.63", + "700", + "ARCA", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.106", + "AAPL", + "98.63", + "61", + "EDGX", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.107", + "AAPL", + "98.63", + "100", + "ARCA", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.107", + "AAPL", + "98.63", + "53", + "ARCA", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.108", + "AAPL", + "98.63", + "100", + "ARCA", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.108", + "AAPL", + "98.63", + "839", + "ARCA", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.115", + "AAPL", + "98.63", + "5", + "EDGX", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.118", + "AAPL", + "98.63", + "295", + "EDGX", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.118", + "AAPL", + "98.63", + "5", + "EDGX", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.128", + "AAPL", + "98.63", + "100", + "NASDAQ", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.128", + "AAPL", + "98.63", + "100", + "NASDAQ", + "98.62", + "98.63", + ], + [ + "20160525 13:30:00.128", + "MSFT", + "51.92", + "100", + "ARCA", + "51.92", + "51.95", + ], + [ + "20160525 13:30:00.129", + "AAPL", + "98.62", + "100", + "NASDAQ", + "98.61", + "98.63", + ], + [ + "20160525 13:30:00.129", + "AAPL", + "98.62", + "10", + "NASDAQ", + "98.61", + "98.63", + ], + [ + "20160525 13:30:00.129", + "AAPL", + "98.62", + "59", + "NASDAQ", + "98.61", + "98.63", + ], + [ + "20160525 13:30:00.129", + "AAPL", + "98.62", + "31", + "NASDAQ", + "98.61", + "98.63", + ], + [ + "20160525 13:30:00.129", + "AAPL", + "98.62", + "69", + "NASDAQ", + "98.61", + "98.63", + ], + [ + "20160525 13:30:00.129", + "AAPL", + "98.62", + "12", + "NASDAQ", + "98.61", + "98.63", + ], + [ + "20160525 13:30:00.129", + "AAPL", + "98.62", + "12", + "EDGX", + "98.61", + "98.63", + ], + [ + "20160525 13:30:00.129", + "AAPL", + "98.62", + "100", + "ARCA", + "98.61", + "98.63", + ], + [ + "20160525 13:30:00.129", + "AAPL", + "98.62", + "100", + "ARCA", + "98.61", + "98.63", + ], + [ + "20160525 13:30:00.130", + "MSFT", + "51.95", + "317", + "ARCA", + "51.93", + "51.95", + ], + [ + "20160525 13:30:00.130", + "MSFT", + "51.95", + "283", + "ARCA", + "51.93", + "51.95", + ], + [ + "20160525 13:30:00.135", + "MSFT", + "51.93", + "100", + "EDGX", + "51.92", + "51.95", + ], + [ + "20160525 13:30:00.135", + "AAPL", + "98.62", + "100", + "ARCA", + "98.61", + "98.62", + ], + [ + "20160525 13:30:00.144", + "AAPL", + "98.62", + "12", + "NASDAQ", + "98.61", + "98.62", + ], + [ + "20160525 13:30:00.144", + "AAPL", + "98.62", + "88", + "NASDAQ", + "98.61", + "98.62", + ], + [ + "20160525 13:30:00.144", + "AAPL", + "98.62", + "162", + "NASDAQ", + "98.61", + "98.62", + ], + [ + "20160525 13:30:00.144", + "AAPL", + "98.61", + "100", + "BATS", + "98.61", + "98.62", + ], + [ + "20160525 13:30:00.144", + "AAPL", + "98.62", + "61", + "ARCA", + "98.61", + "98.62", + ], + [ + "20160525 13:30:00.144", + "AAPL", + "98.62", + "25", + "ARCA", + "98.61", + "98.62", + ], + [ + "20160525 13:30:00.144", + "AAPL", + "98.62", + "14", + "ARCA", + "98.61", + "98.62", + ], + [ + "20160525 13:30:00.145", + "AAPL", + "98.62", + "12", + "ARCA", + "98.6", + "98.63", + ], + [ + "20160525 13:30:00.145", + "AAPL", + "98.62", + "100", + "ARCA", + "98.6", + "98.63", + ], + [ + "20160525 13:30:00.145", + "AAPL", + "98.63", + "100", + "NASDAQ", + "98.6", + "98.63", + ], + [ + "20160525 13:30:00.145", + "AAPL", + "98.63", + "100", + "NASDAQ", + "98.6", + "98.63", + ], + ], + columns="time,ticker,price,quantity,marketCenter,bid,ask".split(","), + ) + expected["price"] = expected["price"].astype("float64") + expected["quantity"] = expected["quantity"].astype("int64") + expected["bid"] = expected["bid"].astype("float64") + expected["ask"] = expected["ask"].astype("float64") + expected = self.prep_data(expected) + + trades = pd.DataFrame( + [ + ["20160525 13:30:00.023", "MSFT", "51.9500", "75", "NASDAQ"], + ["20160525 13:30:00.038", "MSFT", "51.9500", "155", "NASDAQ"], + ["20160525 13:30:00.048", "GOOG", "720.7700", "100", "NASDAQ"], + ["20160525 13:30:00.048", "GOOG", "720.9200", "100", "NASDAQ"], + ["20160525 13:30:00.048", "GOOG", "720.9300", "200", "NASDAQ"], + ["20160525 13:30:00.048", "GOOG", "720.9300", "300", "NASDAQ"], + ["20160525 13:30:00.048", "GOOG", "720.9300", "600", "NASDAQ"], + ["20160525 13:30:00.048", "GOOG", "720.9300", "44", "NASDAQ"], + ["20160525 13:30:00.074", "AAPL", "98.6700", "478343", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.6700", "478343", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.6600", "6", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.6500", "30", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.6500", "75", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.6500", "20", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.6500", "35", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.6500", "10", "NASDAQ"], + ["20160525 13:30:00.075", "AAPL", "98.5500", "6", "ARCA"], + ["20160525 13:30:00.075", "AAPL", "98.5500", "6", "ARCA"], + ["20160525 13:30:00.076", "AAPL", "98.5600", "1000", "ARCA"], + ["20160525 13:30:00.076", "AAPL", "98.5600", "200", "ARCA"], + ["20160525 13:30:00.076", "AAPL", "98.5600", "300", "ARCA"], + ["20160525 13:30:00.076", "AAPL", "98.5600", "400", "ARCA"], + ["20160525 13:30:00.076", "AAPL", "98.5600", "600", "ARCA"], + ["20160525 13:30:00.076", "AAPL", "98.5600", "200", "ARCA"], + ["20160525 13:30:00.078", "MSFT", "51.9500", "783", "NASDAQ"], + ["20160525 13:30:00.078", "MSFT", "51.9500", "100", "NASDAQ"], + ["20160525 13:30:00.078", "MSFT", "51.9500", "100", "NASDAQ"], + ["20160525 13:30:00.084", "AAPL", "98.6400", "40", "NASDAQ"], + ["20160525 13:30:00.084", "AAPL", "98.5500", "149", "EDGX"], + ["20160525 13:30:00.086", "AAPL", "98.5600", "500", "ARCA"], + ["20160525 13:30:00.104", "AAPL", "98.6300", "647", "EDGX"], + ["20160525 13:30:00.104", "AAPL", "98.6300", "300", "EDGX"], + ["20160525 13:30:00.104", "AAPL", "98.6300", "50", "NASDAQ"], + ["20160525 13:30:00.104", "AAPL", "98.6300", "50", "NASDAQ"], + ["20160525 13:30:00.104", "AAPL", "98.6300", "70", "NASDAQ"], + ["20160525 13:30:00.104", "AAPL", "98.6300", "70", "NASDAQ"], + ["20160525 13:30:00.104", "AAPL", "98.6300", "1", "NASDAQ"], + ["20160525 13:30:00.104", "AAPL", "98.6300", "62", "NASDAQ"], + ["20160525 13:30:00.104", "AAPL", "98.6300", "10", "NASDAQ"], + ["20160525 13:30:00.104", "AAPL", "98.6300", "100", "ARCA"], + ["20160525 13:30:00.105", "AAPL", "98.6300", "100", "ARCA"], + ["20160525 13:30:00.105", "AAPL", "98.6300", "700", "ARCA"], + ["20160525 13:30:00.106", "AAPL", "98.6300", "61", "EDGX"], + ["20160525 13:30:00.107", "AAPL", "98.6300", "100", "ARCA"], + ["20160525 13:30:00.107", "AAPL", "98.6300", "53", "ARCA"], + ["20160525 13:30:00.108", "AAPL", "98.6300", "100", "ARCA"], + ["20160525 13:30:00.108", "AAPL", "98.6300", "839", "ARCA"], + ["20160525 13:30:00.115", "AAPL", "98.6300", "5", "EDGX"], + ["20160525 13:30:00.118", "AAPL", "98.6300", "295", "EDGX"], + ["20160525 13:30:00.118", "AAPL", "98.6300", "5", "EDGX"], + ["20160525 13:30:00.128", "AAPL", "98.6300", "100", "NASDAQ"], + ["20160525 13:30:00.128", "AAPL", "98.6300", "100", "NASDAQ"], + ["20160525 13:30:00.128", "MSFT", "51.9200", "100", "ARCA"], + ["20160525 13:30:00.129", "AAPL", "98.6200", "100", "NASDAQ"], + ["20160525 13:30:00.129", "AAPL", "98.6200", "10", "NASDAQ"], + ["20160525 13:30:00.129", "AAPL", "98.6200", "59", "NASDAQ"], + ["20160525 13:30:00.129", "AAPL", "98.6200", "31", "NASDAQ"], + ["20160525 13:30:00.129", "AAPL", "98.6200", "69", "NASDAQ"], + ["20160525 13:30:00.129", "AAPL", "98.6200", "12", "NASDAQ"], + ["20160525 13:30:00.129", "AAPL", "98.6200", "12", "EDGX"], + ["20160525 13:30:00.129", "AAPL", "98.6200", "100", "ARCA"], + ["20160525 13:30:00.129", "AAPL", "98.6200", "100", "ARCA"], + ["20160525 13:30:00.130", "MSFT", "51.9500", "317", "ARCA"], + ["20160525 13:30:00.130", "MSFT", "51.9500", "283", "ARCA"], + ["20160525 13:30:00.135", "MSFT", "51.9300", "100", "EDGX"], + ["20160525 13:30:00.135", "AAPL", "98.6200", "100", "ARCA"], + ["20160525 13:30:00.144", "AAPL", "98.6200", "12", "NASDAQ"], + ["20160525 13:30:00.144", "AAPL", "98.6200", "88", "NASDAQ"], + ["20160525 13:30:00.144", "AAPL", "98.6200", "162", "NASDAQ"], + ["20160525 13:30:00.144", "AAPL", "98.6100", "100", "BATS"], + ["20160525 13:30:00.144", "AAPL", "98.6200", "61", "ARCA"], + ["20160525 13:30:00.144", "AAPL", "98.6200", "25", "ARCA"], + ["20160525 13:30:00.144", "AAPL", "98.6200", "14", "ARCA"], + ["20160525 13:30:00.145", "AAPL", "98.6200", "12", "ARCA"], + ["20160525 13:30:00.145", "AAPL", "98.6200", "100", "ARCA"], + ["20160525 13:30:00.145", "AAPL", "98.6300", "100", "NASDAQ"], + ["20160525 13:30:00.145", "AAPL", "98.6300", "100", "NASDAQ"], + ], + columns="time,ticker,price,quantity,marketCenter".split(","), + ) + trades["price"] = trades["price"].astype("float64") + trades["quantity"] = trades["quantity"].astype("int64") + trades = self.prep_data(trades) + + quotes = pd.DataFrame( + [ + ["20160525 13:30:00.023", "GOOG", "720.50", "720.93"], + ["20160525 13:30:00.023", "MSFT", "51.95", "51.95"], + ["20160525 13:30:00.041", "MSFT", "51.95", "51.95"], + ["20160525 13:30:00.048", "GOOG", "720.50", "720.93"], + ["20160525 13:30:00.048", "GOOG", "720.50", "720.93"], + ["20160525 13:30:00.048", "GOOG", "720.50", "720.93"], + ["20160525 13:30:00.048", "GOOG", "720.50", "720.93"], + ["20160525 13:30:00.072", "GOOG", "720.50", "720.88"], + ["20160525 13:30:00.075", "AAPL", "98.55", "98.56"], + ["20160525 13:30:00.076", "AAPL", "98.55", "98.56"], + ["20160525 13:30:00.076", "AAPL", "98.55", "98.56"], + ["20160525 13:30:00.076", "AAPL", "98.55", "98.56"], + ["20160525 13:30:00.078", "MSFT", "51.95", "51.95"], + ["20160525 13:30:00.078", "MSFT", "51.95", "51.95"], + ["20160525 13:30:00.078", "MSFT", "51.95", "51.95"], + ["20160525 13:30:00.078", "MSFT", "51.92", "51.95"], + ["20160525 13:30:00.079", "MSFT", "51.92", "51.95"], + ["20160525 13:30:00.080", "AAPL", "98.55", "98.56"], + ["20160525 13:30:00.084", "AAPL", "98.55", "98.56"], + ["20160525 13:30:00.086", "AAPL", "98.55", "98.63"], + ["20160525 13:30:00.088", "AAPL", "98.65", "98.63"], + ["20160525 13:30:00.089", "AAPL", "98.63", "98.63"], + ["20160525 13:30:00.104", "AAPL", "98.63", "98.63"], + ["20160525 13:30:00.104", "AAPL", "98.63", "98.63"], + ["20160525 13:30:00.104", "AAPL", "98.63", "98.63"], + ["20160525 13:30:00.104", "AAPL", "98.63", "98.63"], + ["20160525 13:30:00.104", "AAPL", "98.62", "98.63"], + ["20160525 13:30:00.105", "AAPL", "98.62", "98.63"], + ["20160525 13:30:00.107", "AAPL", "98.62", "98.63"], + ["20160525 13:30:00.115", "AAPL", "98.62", "98.63"], + ["20160525 13:30:00.115", "AAPL", "98.62", "98.63"], + ["20160525 13:30:00.118", "AAPL", "98.62", "98.63"], + ["20160525 13:30:00.128", "AAPL", "98.62", "98.63"], + ["20160525 13:30:00.128", "AAPL", "98.62", "98.63"], + ["20160525 13:30:00.129", "AAPL", "98.62", "98.63"], + ["20160525 13:30:00.129", "AAPL", "98.61", "98.63"], + ["20160525 13:30:00.129", "AAPL", "98.62", "98.63"], + ["20160525 13:30:00.129", "AAPL", "98.62", "98.63"], + ["20160525 13:30:00.129", "AAPL", "98.61", "98.63"], + ["20160525 13:30:00.130", "MSFT", "51.93", "51.95"], + ["20160525 13:30:00.130", "MSFT", "51.93", "51.95"], + ["20160525 13:30:00.130", "AAPL", "98.61", "98.63"], + ["20160525 13:30:00.131", "AAPL", "98.61", "98.62"], + ["20160525 13:30:00.131", "AAPL", "98.61", "98.62"], + ["20160525 13:30:00.135", "MSFT", "51.92", "51.95"], + ["20160525 13:30:00.135", "AAPL", "98.61", "98.62"], + ["20160525 13:30:00.136", "AAPL", "98.61", "98.62"], + ["20160525 13:30:00.136", "AAPL", "98.61", "98.62"], + ["20160525 13:30:00.144", "AAPL", "98.61", "98.62"], + ["20160525 13:30:00.144", "AAPL", "98.61", "98.62"], + ["20160525 13:30:00.145", "AAPL", "98.61", "98.62"], + ["20160525 13:30:00.145", "AAPL", "98.61", "98.63"], + ["20160525 13:30:00.145", "AAPL", "98.61", "98.63"], + ["20160525 13:30:00.145", "AAPL", "98.60", "98.63"], + ["20160525 13:30:00.145", "AAPL", "98.61", "98.63"], + ["20160525 13:30:00.145", "AAPL", "98.60", "98.63"], + ], + columns="time,ticker,bid,ask".split(","), + ) + quotes["bid"] = quotes["bid"].astype("float64") + quotes["ask"] = quotes["ask"].astype("float64") + quotes = self.prep_data(quotes, dedupe=True) result = merge_asof(trades, quotes, on="time", by="ticker") tm.assert_frame_equal(result, expected) @@ -535,14 +2416,14 @@ def test_valid_join_keys(self, trades, quotes): with pytest.raises(MergeError, match="can only asof on a key for left"): merge_asof(trades, quotes, by="ticker") - def test_with_duplicates(self, datapath, trades, quotes): + def test_with_duplicates(self, datapath, trades, quotes, asof): q = ( pd.concat([quotes, quotes]) .sort_values(["time", "ticker"]) .reset_index(drop=True) ) result = merge_asof(trades, q, on="time", by="ticker") - expected = self.read_data(datapath, "asof.csv") + expected = self.prep_data(asof) tm.assert_frame_equal(result, expected) def test_with_duplicates_no_on(self): From 9383ae3541c4d3829361772f183cafa99c9edfe1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 22 Oct 2023 19:55:22 -0700 Subject: [PATCH 2/4] Remove csvs --- .../merge/data/allow_exact_matches.csv | 28 ------- .../allow_exact_matches_and_tolerance.csv | 28 ------- pandas/tests/reshape/merge/data/asof.csv | 28 ------- pandas/tests/reshape/merge/data/asof2.csv | 78 ------------------- pandas/tests/reshape/merge/data/quotes.csv | 17 ---- pandas/tests/reshape/merge/data/quotes2.csv | 57 -------------- pandas/tests/reshape/merge/data/tolerance.csv | 28 ------- pandas/tests/reshape/merge/data/trades.csv | 28 ------- pandas/tests/reshape/merge/data/trades2.csv | 78 ------------------- 9 files changed, 370 deletions(-) delete mode 100644 pandas/tests/reshape/merge/data/allow_exact_matches.csv delete mode 100644 pandas/tests/reshape/merge/data/allow_exact_matches_and_tolerance.csv delete mode 100644 pandas/tests/reshape/merge/data/asof.csv delete mode 100644 pandas/tests/reshape/merge/data/asof2.csv delete mode 100644 pandas/tests/reshape/merge/data/quotes.csv delete mode 100644 pandas/tests/reshape/merge/data/quotes2.csv delete mode 100644 pandas/tests/reshape/merge/data/tolerance.csv delete mode 100644 pandas/tests/reshape/merge/data/trades.csv delete mode 100644 pandas/tests/reshape/merge/data/trades2.csv diff --git a/pandas/tests/reshape/merge/data/allow_exact_matches.csv b/pandas/tests/reshape/merge/data/allow_exact_matches.csv deleted file mode 100644 index 0446fb744c540..0000000000000 --- a/pandas/tests/reshape/merge/data/allow_exact_matches.csv +++ /dev/null @@ -1,28 +0,0 @@ -time,ticker,price,quantity,marketCenter,bid,ask -20160525 13:30:00.023,MSFT,51.95,75,NASDAQ,, -20160525 13:30:00.038,MSFT,51.95,155,NASDAQ,51.95,51.95 -20160525 13:30:00.048,GOOG,720.77,100,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.92,100,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,200,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,300,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,600,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,44,NASDAQ,720.5,720.93 -20160525 13:30:00.074,AAPL,98.67,478343,NASDAQ,, -20160525 13:30:00.075,AAPL,98.67,478343,NASDAQ,, -20160525 13:30:00.075,AAPL,98.66,6,NASDAQ,, -20160525 13:30:00.075,AAPL,98.65,30,NASDAQ,, -20160525 13:30:00.075,AAPL,98.65,75,NASDAQ,, -20160525 13:30:00.075,AAPL,98.65,20,NASDAQ,, -20160525 13:30:00.075,AAPL,98.65,35,NASDAQ,, -20160525 13:30:00.075,AAPL,98.65,10,NASDAQ,, -20160525 13:30:00.075,AAPL,98.55,6,ARCA,, -20160525 13:30:00.075,AAPL,98.55,6,ARCA,, -20160525 13:30:00.076,AAPL,98.56,1000,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,300,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,400,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,600,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 -20160525 13:30:00.078,MSFT,51.95,783,NASDAQ,51.95,51.95 -20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.95,51.95 -20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.95,51.95 diff --git a/pandas/tests/reshape/merge/data/allow_exact_matches_and_tolerance.csv b/pandas/tests/reshape/merge/data/allow_exact_matches_and_tolerance.csv deleted file mode 100644 index 0446fb744c540..0000000000000 --- a/pandas/tests/reshape/merge/data/allow_exact_matches_and_tolerance.csv +++ /dev/null @@ -1,28 +0,0 @@ -time,ticker,price,quantity,marketCenter,bid,ask -20160525 13:30:00.023,MSFT,51.95,75,NASDAQ,, -20160525 13:30:00.038,MSFT,51.95,155,NASDAQ,51.95,51.95 -20160525 13:30:00.048,GOOG,720.77,100,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.92,100,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,200,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,300,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,600,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,44,NASDAQ,720.5,720.93 -20160525 13:30:00.074,AAPL,98.67,478343,NASDAQ,, -20160525 13:30:00.075,AAPL,98.67,478343,NASDAQ,, -20160525 13:30:00.075,AAPL,98.66,6,NASDAQ,, -20160525 13:30:00.075,AAPL,98.65,30,NASDAQ,, -20160525 13:30:00.075,AAPL,98.65,75,NASDAQ,, -20160525 13:30:00.075,AAPL,98.65,20,NASDAQ,, -20160525 13:30:00.075,AAPL,98.65,35,NASDAQ,, -20160525 13:30:00.075,AAPL,98.65,10,NASDAQ,, -20160525 13:30:00.075,AAPL,98.55,6,ARCA,, -20160525 13:30:00.075,AAPL,98.55,6,ARCA,, -20160525 13:30:00.076,AAPL,98.56,1000,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,300,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,400,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,600,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 -20160525 13:30:00.078,MSFT,51.95,783,NASDAQ,51.95,51.95 -20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.95,51.95 -20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.95,51.95 diff --git a/pandas/tests/reshape/merge/data/asof.csv b/pandas/tests/reshape/merge/data/asof.csv deleted file mode 100644 index d7d061bc46ccc..0000000000000 --- a/pandas/tests/reshape/merge/data/asof.csv +++ /dev/null @@ -1,28 +0,0 @@ -time,ticker,price,quantity,marketCenter,bid,ask -20160525 13:30:00.023,MSFT,51.95,75,NASDAQ,51.95,51.95 -20160525 13:30:00.038,MSFT,51.95,155,NASDAQ,51.95,51.95 -20160525 13:30:00.048,GOOG,720.77,100,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.92,100,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,200,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,300,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,600,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,44,NASDAQ,720.5,720.93 -20160525 13:30:00.074,AAPL,98.67,478343,NASDAQ,, -20160525 13:30:00.075,AAPL,98.67,478343,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.66,6,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.65,30,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.65,75,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.65,20,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.65,35,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.65,10,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.55,6,ARCA,98.55,98.56 -20160525 13:30:00.075,AAPL,98.55,6,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,1000,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,300,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,400,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,600,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 -20160525 13:30:00.078,MSFT,51.95,783,NASDAQ,51.92,51.95 -20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.92,51.95 -20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.92,51.95 diff --git a/pandas/tests/reshape/merge/data/asof2.csv b/pandas/tests/reshape/merge/data/asof2.csv deleted file mode 100644 index 2c9c0392dd617..0000000000000 --- a/pandas/tests/reshape/merge/data/asof2.csv +++ /dev/null @@ -1,78 +0,0 @@ -time,ticker,price,quantity,marketCenter,bid,ask -20160525 13:30:00.023,MSFT,51.95,75,NASDAQ,51.95,51.95 -20160525 13:30:00.038,MSFT,51.95,155,NASDAQ,51.95,51.95 -20160525 13:30:00.048,GOOG,720.77,100,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.92,100,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,200,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,300,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,600,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,44,NASDAQ,720.5,720.93 -20160525 13:30:00.074,AAPL,98.67,478343,NASDAQ,, -20160525 13:30:00.075,AAPL,98.67,478343,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.66,6,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.65,30,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.65,75,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.65,20,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.65,35,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.65,10,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.55,6,ARCA,98.55,98.56 -20160525 13:30:00.075,AAPL,98.55,6,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,1000,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,300,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,400,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,600,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 -20160525 13:30:00.078,MSFT,51.95,783,NASDAQ,51.92,51.95 -20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.92,51.95 -20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.92,51.95 -20160525 13:30:00.084,AAPL,98.64,40,NASDAQ,98.55,98.56 -20160525 13:30:00.084,AAPL,98.55,149,EDGX,98.55,98.56 -20160525 13:30:00.086,AAPL,98.56,500,ARCA,98.55,98.63 -20160525 13:30:00.104,AAPL,98.63,647,EDGX,98.62,98.63 -20160525 13:30:00.104,AAPL,98.63,300,EDGX,98.62,98.63 -20160525 13:30:00.104,AAPL,98.63,50,NASDAQ,98.62,98.63 -20160525 13:30:00.104,AAPL,98.63,50,NASDAQ,98.62,98.63 -20160525 13:30:00.104,AAPL,98.63,70,NASDAQ,98.62,98.63 -20160525 13:30:00.104,AAPL,98.63,70,NASDAQ,98.62,98.63 -20160525 13:30:00.104,AAPL,98.63,1,NASDAQ,98.62,98.63 -20160525 13:30:00.104,AAPL,98.63,62,NASDAQ,98.62,98.63 -20160525 13:30:00.104,AAPL,98.63,10,NASDAQ,98.62,98.63 -20160525 13:30:00.104,AAPL,98.63,100,ARCA,98.62,98.63 -20160525 13:30:00.105,AAPL,98.63,100,ARCA,98.62,98.63 -20160525 13:30:00.105,AAPL,98.63,700,ARCA,98.62,98.63 -20160525 13:30:00.106,AAPL,98.63,61,EDGX,98.62,98.63 -20160525 13:30:00.107,AAPL,98.63,100,ARCA,98.62,98.63 -20160525 13:30:00.107,AAPL,98.63,53,ARCA,98.62,98.63 -20160525 13:30:00.108,AAPL,98.63,100,ARCA,98.62,98.63 -20160525 13:30:00.108,AAPL,98.63,839,ARCA,98.62,98.63 -20160525 13:30:00.115,AAPL,98.63,5,EDGX,98.62,98.63 -20160525 13:30:00.118,AAPL,98.63,295,EDGX,98.62,98.63 -20160525 13:30:00.118,AAPL,98.63,5,EDGX,98.62,98.63 -20160525 13:30:00.128,AAPL,98.63,100,NASDAQ,98.62,98.63 -20160525 13:30:00.128,AAPL,98.63,100,NASDAQ,98.62,98.63 -20160525 13:30:00.128,MSFT,51.92,100,ARCA,51.92,51.95 -20160525 13:30:00.129,AAPL,98.62,100,NASDAQ,98.61,98.63 -20160525 13:30:00.129,AAPL,98.62,10,NASDAQ,98.61,98.63 -20160525 13:30:00.129,AAPL,98.62,59,NASDAQ,98.61,98.63 -20160525 13:30:00.129,AAPL,98.62,31,NASDAQ,98.61,98.63 -20160525 13:30:00.129,AAPL,98.62,69,NASDAQ,98.61,98.63 -20160525 13:30:00.129,AAPL,98.62,12,NASDAQ,98.61,98.63 -20160525 13:30:00.129,AAPL,98.62,12,EDGX,98.61,98.63 -20160525 13:30:00.129,AAPL,98.62,100,ARCA,98.61,98.63 -20160525 13:30:00.129,AAPL,98.62,100,ARCA,98.61,98.63 -20160525 13:30:00.130,MSFT,51.95,317,ARCA,51.93,51.95 -20160525 13:30:00.130,MSFT,51.95,283,ARCA,51.93,51.95 -20160525 13:30:00.135,MSFT,51.93,100,EDGX,51.92,51.95 -20160525 13:30:00.135,AAPL,98.62,100,ARCA,98.61,98.62 -20160525 13:30:00.144,AAPL,98.62,12,NASDAQ,98.61,98.62 -20160525 13:30:00.144,AAPL,98.62,88,NASDAQ,98.61,98.62 -20160525 13:30:00.144,AAPL,98.62,162,NASDAQ,98.61,98.62 -20160525 13:30:00.144,AAPL,98.61,100,BATS,98.61,98.62 -20160525 13:30:00.144,AAPL,98.62,61,ARCA,98.61,98.62 -20160525 13:30:00.144,AAPL,98.62,25,ARCA,98.61,98.62 -20160525 13:30:00.144,AAPL,98.62,14,ARCA,98.61,98.62 -20160525 13:30:00.145,AAPL,98.62,12,ARCA,98.6,98.63 -20160525 13:30:00.145,AAPL,98.62,100,ARCA,98.6,98.63 -20160525 13:30:00.145,AAPL,98.63,100,NASDAQ,98.6,98.63 -20160525 13:30:00.145,AAPL,98.63,100,NASDAQ,98.6,98.63 diff --git a/pandas/tests/reshape/merge/data/quotes.csv b/pandas/tests/reshape/merge/data/quotes.csv deleted file mode 100644 index 3f31d2cfffe1b..0000000000000 --- a/pandas/tests/reshape/merge/data/quotes.csv +++ /dev/null @@ -1,17 +0,0 @@ -time,ticker,bid,ask -20160525 13:30:00.023,GOOG,720.50,720.93 -20160525 13:30:00.023,MSFT,51.95,51.95 -20160525 13:30:00.041,MSFT,51.95,51.95 -20160525 13:30:00.048,GOOG,720.50,720.93 -20160525 13:30:00.048,GOOG,720.50,720.93 -20160525 13:30:00.048,GOOG,720.50,720.93 -20160525 13:30:00.048,GOOG,720.50,720.93 -20160525 13:30:00.072,GOOG,720.50,720.88 -20160525 13:30:00.075,AAPL,98.55,98.56 -20160525 13:30:00.076,AAPL,98.55,98.56 -20160525 13:30:00.076,AAPL,98.55,98.56 -20160525 13:30:00.076,AAPL,98.55,98.56 -20160525 13:30:00.078,MSFT,51.95,51.95 -20160525 13:30:00.078,MSFT,51.95,51.95 -20160525 13:30:00.078,MSFT,51.95,51.95 -20160525 13:30:00.078,MSFT,51.92,51.95 diff --git a/pandas/tests/reshape/merge/data/quotes2.csv b/pandas/tests/reshape/merge/data/quotes2.csv deleted file mode 100644 index 7ade1e7faf1ae..0000000000000 --- a/pandas/tests/reshape/merge/data/quotes2.csv +++ /dev/null @@ -1,57 +0,0 @@ -time,ticker,bid,ask -20160525 13:30:00.023,GOOG,720.50,720.93 -20160525 13:30:00.023,MSFT,51.95,51.95 -20160525 13:30:00.041,MSFT,51.95,51.95 -20160525 13:30:00.048,GOOG,720.50,720.93 -20160525 13:30:00.048,GOOG,720.50,720.93 -20160525 13:30:00.048,GOOG,720.50,720.93 -20160525 13:30:00.048,GOOG,720.50,720.93 -20160525 13:30:00.072,GOOG,720.50,720.88 -20160525 13:30:00.075,AAPL,98.55,98.56 -20160525 13:30:00.076,AAPL,98.55,98.56 -20160525 13:30:00.076,AAPL,98.55,98.56 -20160525 13:30:00.076,AAPL,98.55,98.56 -20160525 13:30:00.078,MSFT,51.95,51.95 -20160525 13:30:00.078,MSFT,51.95,51.95 -20160525 13:30:00.078,MSFT,51.95,51.95 -20160525 13:30:00.078,MSFT,51.92,51.95 -20160525 13:30:00.079,MSFT,51.92,51.95 -20160525 13:30:00.080,AAPL,98.55,98.56 -20160525 13:30:00.084,AAPL,98.55,98.56 -20160525 13:30:00.086,AAPL,98.55,98.63 -20160525 13:30:00.088,AAPL,98.65,98.63 -20160525 13:30:00.089,AAPL,98.63,98.63 -20160525 13:30:00.104,AAPL,98.63,98.63 -20160525 13:30:00.104,AAPL,98.63,98.63 -20160525 13:30:00.104,AAPL,98.63,98.63 -20160525 13:30:00.104,AAPL,98.63,98.63 -20160525 13:30:00.104,AAPL,98.62,98.63 -20160525 13:30:00.105,AAPL,98.62,98.63 -20160525 13:30:00.107,AAPL,98.62,98.63 -20160525 13:30:00.115,AAPL,98.62,98.63 -20160525 13:30:00.115,AAPL,98.62,98.63 -20160525 13:30:00.118,AAPL,98.62,98.63 -20160525 13:30:00.128,AAPL,98.62,98.63 -20160525 13:30:00.128,AAPL,98.62,98.63 -20160525 13:30:00.129,AAPL,98.62,98.63 -20160525 13:30:00.129,AAPL,98.61,98.63 -20160525 13:30:00.129,AAPL,98.62,98.63 -20160525 13:30:00.129,AAPL,98.62,98.63 -20160525 13:30:00.129,AAPL,98.61,98.63 -20160525 13:30:00.130,MSFT,51.93,51.95 -20160525 13:30:00.130,MSFT,51.93,51.95 -20160525 13:30:00.130,AAPL,98.61,98.63 -20160525 13:30:00.131,AAPL,98.61,98.62 -20160525 13:30:00.131,AAPL,98.61,98.62 -20160525 13:30:00.135,MSFT,51.92,51.95 -20160525 13:30:00.135,AAPL,98.61,98.62 -20160525 13:30:00.136,AAPL,98.61,98.62 -20160525 13:30:00.136,AAPL,98.61,98.62 -20160525 13:30:00.144,AAPL,98.61,98.62 -20160525 13:30:00.144,AAPL,98.61,98.62 -20160525 13:30:00.145,AAPL,98.61,98.62 -20160525 13:30:00.145,AAPL,98.61,98.63 -20160525 13:30:00.145,AAPL,98.61,98.63 -20160525 13:30:00.145,AAPL,98.60,98.63 -20160525 13:30:00.145,AAPL,98.61,98.63 -20160525 13:30:00.145,AAPL,98.60,98.63 diff --git a/pandas/tests/reshape/merge/data/tolerance.csv b/pandas/tests/reshape/merge/data/tolerance.csv deleted file mode 100644 index d7d061bc46ccc..0000000000000 --- a/pandas/tests/reshape/merge/data/tolerance.csv +++ /dev/null @@ -1,28 +0,0 @@ -time,ticker,price,quantity,marketCenter,bid,ask -20160525 13:30:00.023,MSFT,51.95,75,NASDAQ,51.95,51.95 -20160525 13:30:00.038,MSFT,51.95,155,NASDAQ,51.95,51.95 -20160525 13:30:00.048,GOOG,720.77,100,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.92,100,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,200,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,300,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,600,NASDAQ,720.5,720.93 -20160525 13:30:00.048,GOOG,720.93,44,NASDAQ,720.5,720.93 -20160525 13:30:00.074,AAPL,98.67,478343,NASDAQ,, -20160525 13:30:00.075,AAPL,98.67,478343,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.66,6,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.65,30,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.65,75,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.65,20,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.65,35,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.65,10,NASDAQ,98.55,98.56 -20160525 13:30:00.075,AAPL,98.55,6,ARCA,98.55,98.56 -20160525 13:30:00.075,AAPL,98.55,6,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,1000,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,300,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,400,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,600,ARCA,98.55,98.56 -20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 -20160525 13:30:00.078,MSFT,51.95,783,NASDAQ,51.92,51.95 -20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.92,51.95 -20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.92,51.95 diff --git a/pandas/tests/reshape/merge/data/trades.csv b/pandas/tests/reshape/merge/data/trades.csv deleted file mode 100644 index b26a4ce714255..0000000000000 --- a/pandas/tests/reshape/merge/data/trades.csv +++ /dev/null @@ -1,28 +0,0 @@ -time,ticker,price,quantity,marketCenter -20160525 13:30:00.023,MSFT,51.9500,75,NASDAQ -20160525 13:30:00.038,MSFT,51.9500,155,NASDAQ -20160525 13:30:00.048,GOOG,720.7700,100,NASDAQ -20160525 13:30:00.048,GOOG,720.9200,100,NASDAQ -20160525 13:30:00.048,GOOG,720.9300,200,NASDAQ -20160525 13:30:00.048,GOOG,720.9300,300,NASDAQ -20160525 13:30:00.048,GOOG,720.9300,600,NASDAQ -20160525 13:30:00.048,GOOG,720.9300,44,NASDAQ -20160525 13:30:00.074,AAPL,98.6700,478343,NASDAQ -20160525 13:30:00.075,AAPL,98.6700,478343,NASDAQ -20160525 13:30:00.075,AAPL,98.6600,6,NASDAQ -20160525 13:30:00.075,AAPL,98.6500,30,NASDAQ -20160525 13:30:00.075,AAPL,98.6500,75,NASDAQ -20160525 13:30:00.075,AAPL,98.6500,20,NASDAQ -20160525 13:30:00.075,AAPL,98.6500,35,NASDAQ -20160525 13:30:00.075,AAPL,98.6500,10,NASDAQ -20160525 13:30:00.075,AAPL,98.5500,6,ARCA -20160525 13:30:00.075,AAPL,98.5500,6,ARCA -20160525 13:30:00.076,AAPL,98.5600,1000,ARCA -20160525 13:30:00.076,AAPL,98.5600,200,ARCA -20160525 13:30:00.076,AAPL,98.5600,300,ARCA -20160525 13:30:00.076,AAPL,98.5600,400,ARCA -20160525 13:30:00.076,AAPL,98.5600,600,ARCA -20160525 13:30:00.076,AAPL,98.5600,200,ARCA -20160525 13:30:00.078,MSFT,51.9500,783,NASDAQ -20160525 13:30:00.078,MSFT,51.9500,100,NASDAQ -20160525 13:30:00.078,MSFT,51.9500,100,NASDAQ diff --git a/pandas/tests/reshape/merge/data/trades2.csv b/pandas/tests/reshape/merge/data/trades2.csv deleted file mode 100644 index 64021faa68ce3..0000000000000 --- a/pandas/tests/reshape/merge/data/trades2.csv +++ /dev/null @@ -1,78 +0,0 @@ -time,ticker,price,quantity,marketCenter -20160525 13:30:00.023,MSFT,51.9500,75,NASDAQ -20160525 13:30:00.038,MSFT,51.9500,155,NASDAQ -20160525 13:30:00.048,GOOG,720.7700,100,NASDAQ -20160525 13:30:00.048,GOOG,720.9200,100,NASDAQ -20160525 13:30:00.048,GOOG,720.9300,200,NASDAQ -20160525 13:30:00.048,GOOG,720.9300,300,NASDAQ -20160525 13:30:00.048,GOOG,720.9300,600,NASDAQ -20160525 13:30:00.048,GOOG,720.9300,44,NASDAQ -20160525 13:30:00.074,AAPL,98.6700,478343,NASDAQ -20160525 13:30:00.075,AAPL,98.6700,478343,NASDAQ -20160525 13:30:00.075,AAPL,98.6600,6,NASDAQ -20160525 13:30:00.075,AAPL,98.6500,30,NASDAQ -20160525 13:30:00.075,AAPL,98.6500,75,NASDAQ -20160525 13:30:00.075,AAPL,98.6500,20,NASDAQ -20160525 13:30:00.075,AAPL,98.6500,35,NASDAQ -20160525 13:30:00.075,AAPL,98.6500,10,NASDAQ -20160525 13:30:00.075,AAPL,98.5500,6,ARCA -20160525 13:30:00.075,AAPL,98.5500,6,ARCA -20160525 13:30:00.076,AAPL,98.5600,1000,ARCA -20160525 13:30:00.076,AAPL,98.5600,200,ARCA -20160525 13:30:00.076,AAPL,98.5600,300,ARCA -20160525 13:30:00.076,AAPL,98.5600,400,ARCA -20160525 13:30:00.076,AAPL,98.5600,600,ARCA -20160525 13:30:00.076,AAPL,98.5600,200,ARCA -20160525 13:30:00.078,MSFT,51.9500,783,NASDAQ -20160525 13:30:00.078,MSFT,51.9500,100,NASDAQ -20160525 13:30:00.078,MSFT,51.9500,100,NASDAQ -20160525 13:30:00.084,AAPL,98.6400,40,NASDAQ -20160525 13:30:00.084,AAPL,98.5500,149,EDGX -20160525 13:30:00.086,AAPL,98.5600,500,ARCA -20160525 13:30:00.104,AAPL,98.6300,647,EDGX -20160525 13:30:00.104,AAPL,98.6300,300,EDGX -20160525 13:30:00.104,AAPL,98.6300,50,NASDAQ -20160525 13:30:00.104,AAPL,98.6300,50,NASDAQ -20160525 13:30:00.104,AAPL,98.6300,70,NASDAQ -20160525 13:30:00.104,AAPL,98.6300,70,NASDAQ -20160525 13:30:00.104,AAPL,98.6300,1,NASDAQ -20160525 13:30:00.104,AAPL,98.6300,62,NASDAQ -20160525 13:30:00.104,AAPL,98.6300,10,NASDAQ -20160525 13:30:00.104,AAPL,98.6300,100,ARCA -20160525 13:30:00.105,AAPL,98.6300,100,ARCA -20160525 13:30:00.105,AAPL,98.6300,700,ARCA -20160525 13:30:00.106,AAPL,98.6300,61,EDGX -20160525 13:30:00.107,AAPL,98.6300,100,ARCA -20160525 13:30:00.107,AAPL,98.6300,53,ARCA -20160525 13:30:00.108,AAPL,98.6300,100,ARCA -20160525 13:30:00.108,AAPL,98.6300,839,ARCA -20160525 13:30:00.115,AAPL,98.6300,5,EDGX -20160525 13:30:00.118,AAPL,98.6300,295,EDGX -20160525 13:30:00.118,AAPL,98.6300,5,EDGX -20160525 13:30:00.128,AAPL,98.6300,100,NASDAQ -20160525 13:30:00.128,AAPL,98.6300,100,NASDAQ -20160525 13:30:00.128,MSFT,51.9200,100,ARCA -20160525 13:30:00.129,AAPL,98.6200,100,NASDAQ -20160525 13:30:00.129,AAPL,98.6200,10,NASDAQ -20160525 13:30:00.129,AAPL,98.6200,59,NASDAQ -20160525 13:30:00.129,AAPL,98.6200,31,NASDAQ -20160525 13:30:00.129,AAPL,98.6200,69,NASDAQ -20160525 13:30:00.129,AAPL,98.6200,12,NASDAQ -20160525 13:30:00.129,AAPL,98.6200,12,EDGX -20160525 13:30:00.129,AAPL,98.6200,100,ARCA -20160525 13:30:00.129,AAPL,98.6200,100,ARCA -20160525 13:30:00.130,MSFT,51.9500,317,ARCA -20160525 13:30:00.130,MSFT,51.9500,283,ARCA -20160525 13:30:00.135,MSFT,51.9300,100,EDGX -20160525 13:30:00.135,AAPL,98.6200,100,ARCA -20160525 13:30:00.144,AAPL,98.6200,12,NASDAQ -20160525 13:30:00.144,AAPL,98.6200,88,NASDAQ -20160525 13:30:00.144,AAPL,98.6200,162,NASDAQ -20160525 13:30:00.144,AAPL,98.6100,100,BATS -20160525 13:30:00.144,AAPL,98.6200,61,ARCA -20160525 13:30:00.144,AAPL,98.6200,25,ARCA -20160525 13:30:00.144,AAPL,98.6200,14,ARCA -20160525 13:30:00.145,AAPL,98.6200,12,ARCA -20160525 13:30:00.145,AAPL,98.6200,100,ARCA -20160525 13:30:00.145,AAPL,98.6300,100,NASDAQ -20160525 13:30:00.145,AAPL,98.6300,100,NASDAQ From cceb6c6e1e2c11fc02a7ee49fc3d8997baacaeed Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 25 Oct 2023 10:33:36 -0700 Subject: [PATCH 3/4] Evaluate splits --- pandas/tests/groupby/test_apply.py | 56 ++++++++++++++++-------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index e365bb127864f..601a47a120b44 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -39,18 +39,20 @@ def store(group): def test_apply_index_date(): # GH 5788 - ts = """2011-05-16 00:00 -2011-05-16 01:00 -2011-05-16 02:00 -2011-05-16 03:00 -2011-05-17 02:00 -2011-05-17 03:00 -2011-05-17 04:00 -2011-05-17 05:00 -2011-05-18 02:00 -2011-05-18 03:00 -2011-05-18 04:00 -2011-05-18 05:00""" + ts = [ + "2011-05-16 00:00", + "2011-05-16 01:00", + "2011-05-16 02:00", + "2011-05-16 03:00", + "2011-05-17 02:00", + "2011-05-17 03:00", + "2011-05-17 04:00", + "2011-05-17 05:00", + "2011-05-18 02:00", + "2011-05-18 03:00", + "2011-05-18 04:00", + "2011-05-18 05:00", + ] df = DataFrame( { "value": [ @@ -68,7 +70,7 @@ def test_apply_index_date(): 1.40649, ], }, - index=Index(pd.to_datetime(ts.split("\n")), name="date_time"), + index=Index(pd.to_datetime(ts, name="date_time")), ) expected = df.groupby(df.index.date).idxmax() result = df.groupby(df.index.date).apply(lambda x: x.idxmax()) @@ -78,19 +80,21 @@ def test_apply_index_date(): def test_apply_index_date_object(): # GH 5789 # don't auto coerce dates - ts = """2011-05-16 00:00 -2011-05-16 01:00 -2011-05-16 02:00 -2011-05-16 03:00 -2011-05-17 02:00 -2011-05-17 03:00 -2011-05-17 04:00 -2011-05-17 05:00 -2011-05-18 02:00 -2011-05-18 03:00 -2011-05-18 04:00 -2011-05-18 05:00""" - df = DataFrame([row.split() for row in ts.split("\n")], columns=["date", "time"]) + ts = [ + "2011-05-16 00:00", + "2011-05-16 01:00", + "2011-05-16 02:00", + "2011-05-16 03:00", + "2011-05-17 02:00", + "2011-05-17 03:00", + "2011-05-17 04:00", + "2011-05-17 05:00", + "2011-05-18 02:00", + "2011-05-18 03:00", + "2011-05-18 04:00", + "2011-05-18 05:00", + ] + df = DataFrame([row.split() for row in ts], columns=["date", "time"]) df["value"] = [ 1.40893, 1.40760, From 9f4f974ac3b4b8bfd5ca228dcdbf72a43bf27636 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 27 Oct 2023 10:01:57 -0700 Subject: [PATCH 4/4] Typo --- pandas/tests/groupby/test_apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 601a47a120b44..2f2648b9293c5 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -70,7 +70,7 @@ def test_apply_index_date(): 1.40649, ], }, - index=Index(pd.to_datetime(ts, name="date_time")), + index=Index(pd.to_datetime(ts), name="date_time"), ) expected = df.groupby(df.index.date).idxmax() result = df.groupby(df.index.date).apply(lambda x: x.idxmax())