Skip to content

PERF: changed default value of cache parameter to True in to_datetime function #26043

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Jul 4, 2019
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions asv_bench/benchmarks/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import numpy as np
import pandas.util.testing as tm
from pandas import DataFrame, Categorical, date_range, read_csv, to_datetime
from pandas.io.parsers import _parser_defaults
from io import StringIO

from ..pandas_vb_common import BaseIO
Expand Down Expand Up @@ -272,13 +271,12 @@ def setup(self, do_cache):
self.StringIO_input = StringIO(data)

def time_read_csv_cached(self, do_cache):
# kwds setting here is used to avoid breaking tests in
# previous version of pandas, because this is api changes
kwds = {}
if 'cache_dates' in _parser_defaults:
kwds['cache_dates'] = do_cache
read_csv(self.data(self.StringIO_input), header=None,
parse_dates=[0], **kwds)
try:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TomAugspurger ok method of handling ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems fine.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although... I worry it would incorrectly catch a TypeError in the function? The other way might be to check pandas.__version__?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, let me see what i can do

read_csv(self.data(self.StringIO_input), header=None,
parse_dates=[0], cache_dates=do_cache)
except TypeError:
# cache_dates is a new keyword in 0.25
pass


class ReadCSVMemoryGrowth(BaseIO):
Expand Down Expand Up @@ -329,9 +327,14 @@ def setup(self, cache_dates):
self.StringIO_input = StringIO(data)

def time_read_csv_dayfirst(self, cache_dates):
read_csv(self.data(self.StringIO_input), sep=',', header=None,
names=['Date'], parse_dates=['Date'], cache_dates=cache_dates,
dayfirst=True)
try:
read_csv(self.data(self.StringIO_input), sep=',', header=None,
names=['Date'], parse_dates=['Date'], cache_dates=cache_dates,
dayfirst=True)
except TypeError:
# cache_dates is a new keyword in 0.25
pass


def time_to_datetime_dayfirst(self, cache_dates):
df = read_csv(self.data(self.StringIO_input),
Expand Down