✅ add/update tests

MarcoGorelli · MarcoGorelli · commit d1cdfd290760 · 2022-10-19T10:05:56.000+01:00
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
@@ -836,7 +836,8 @@ def test_with_dictlike_columns_with_datetime():
     df["author"] = ["X", "Y", "Z"]
     df["publisher"] = ["BBC", "NBC", "N24"]
     df["date"] = pd.to_datetime(
-        ["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"]
+        ["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"],
+        dayfirst=True,
     )
     result = df.apply(lambda x: {}, axis=1)
     expected = Series([{}, {}, {}])
diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py
@@ -405,11 +405,11 @@ def test_drop_level_nonunique_datetime(self):
         idx = Index([2, 3, 4, 4, 5], name="id")
         idxdt = pd.to_datetime(
             [
-                "201603231400",
-                "201603231500",
-                "201603231600",
-                "201603231600",
-                "201603231700",
+                "2016-03-23 14:00",
+                "2016-03-23 15:00",
+                "2016-03-23 16:00",
+                "2016-03-23 16:00",
+                "2016-03-23 17:00",
             ]
         )
         df = DataFrame(np.arange(10).reshape(5, 2), columns=list("ab"), index=idx)
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
@@ -27,7 +27,7 @@
 
 class TestDataFrameToCSV:
     def read_csv(self, path, **kwargs):
-        params = {"index_col": 0, "parse_dates": True}
+        params = {"index_col": 0}
         params.update(**kwargs)
 
         return read_csv(path, **params)
@@ -46,17 +46,17 @@ def test_to_csv_from_csv1(self, float_frame, datetime_frame):
             # freq does not roundtrip
             datetime_frame.index = datetime_frame.index._with_freq(None)
             datetime_frame.to_csv(path)
-            recons = self.read_csv(path)
+            recons = self.read_csv(path, parse_dates=True)
             tm.assert_frame_equal(datetime_frame, recons)
 
             datetime_frame.to_csv(path, index_label="index")
-            recons = self.read_csv(path, index_col=None)
+            recons = self.read_csv(path, index_col=None, parse_dates=True)
 
             assert len(recons.columns) == len(datetime_frame.columns) + 1
 
             # no index
             datetime_frame.to_csv(path, index=False)
-            recons = self.read_csv(path, index_col=None)
+            recons = self.read_csv(path, index_col=None, parse_dates=True)
             tm.assert_almost_equal(datetime_frame.values, recons.values)
 
             # corner case
@@ -1056,7 +1056,7 @@ def test_to_csv_date_format(self, datetime_frame):
 
             # test NaTs
             nat_index = to_datetime(
-                ["NaT"] * 10 + ["2000-01-01", "1/1/2000", "1-1-2000"]
+                ["NaT"] * 10 + ["2000-01-01", "2000-01-01", "2000-01-01"]
             )
             nat_frame = DataFrame({"A": nat_index}, index=nat_index)
             nat_frame.to_csv(path, date_format="%Y-%m-%d")
diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py
@@ -1042,10 +1042,18 @@ def test_datetimeindex_constructor_misc(self):
         arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O")
         idx4 = DatetimeIndex(arr)
 
-        arr = to_datetime(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"])
+        # Can't be parsed consistently, need to parse each element individually
+        arr = [
+            to_datetime(date_string)
+            for date_string in ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]
+        ]
         idx5 = DatetimeIndex(arr)
 
-        arr = to_datetime(["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"])
+        # Can't be parsed consistently, need to parse each element individually
+        arr = [
+            to_datetime(date_string)
+            for date_string in ["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"]
+        ]
         idx6 = DatetimeIndex(arr)
 
         idx7 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -1185,10 +1185,16 @@ def test_equals_op_index_vs_mi_same_length(self):
         expected = np.array([False, False, False])
         tm.assert_numpy_array_equal(result, expected)
 
-    @pytest.mark.parametrize("dt_conv", [pd.to_datetime, pd.to_timedelta])
-    def test_dt_conversion_preserves_name(self, dt_conv):
+    @pytest.mark.parametrize(
+        "dt_conv, arg",
+        [
+            (pd.to_datetime, ["2000-01-01", "2000-01-02"]),
+            (pd.to_timedelta, ["01:02:03", "01:02:04"]),
+        ],
+    )
+    def test_dt_conversion_preserves_name(self, dt_conv, arg):
         # GH 10875
-        index = Index(["01:02:03", "01:02:04"], name="label")
+        index = Index(arg, name="label")
         assert index.name == dt_conv(index).name
 
     def test_cached_properties_not_settable(self):
diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
@@ -58,8 +58,8 @@ def _set_noconvert_columns(self):
             return CParserWrapper._set_noconvert_columns(self)
 
     data = """a,b,c,d,e
-0,1,20140101,0900,4
-0,1,20140102,1000,4"""
+0,1,2014-01-01,09:00,4
+0,1,2014-01-02,10:00,4"""
 
     parse_dates = [[1, 2]]
     cols = {
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
@@ -1666,9 +1666,9 @@ def test_parse_delimited_date_swap_no_warning(
 @pytest.mark.parametrize(
     "date_string,dayfirst,expected",
     [
-        # %d/%m/%Y; month > 12 thus replacement
+        # %d/%m/%Y; month > 12
         ("13/02/2019", False, datetime(2019, 2, 13)),
-        # %m/%d/%Y; day > 12 thus there will be no replacement
+        # %m/%d/%Y; day > 12
         ("02/13/2019", True, datetime(2019, 2, 13)),
     ],
 )
@@ -1677,7 +1677,10 @@ def test_parse_delimited_date_swap_with_warning(
 ):
     parser = all_parsers
     expected = DataFrame({0: [expected]}, dtype="datetime64[ns]")
-    warning_msg = "Specify a format to ensure consistent parsing"
+    warning_msg = (
+        "Parsing dates in .* format when dayfirst=.* was specified. "
+        "Pass `dayfirst=.*` or specify a format to silence this warning."
+    )
     result = parser.read_csv_check_warnings(
         UserWarning,
         warning_msg,
@@ -1691,13 +1694,11 @@ def test_parse_delimited_date_swap_with_warning(
 
 def test_parse_multiple_delimited_dates_with_swap_warnings():
     # GH46210
-    warning_msg = "Specify a format to ensure consistent parsing"
-    with tm.assert_produces_warning(UserWarning, match=warning_msg) as record:
+    with pytest.raises(
+        ValueError,
+        match=r"^time data '31/05/2000' does not match format '%m/%d/%Y' \(match\)$",
+    ):
         pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"])
-    assert len({str(warning.message) for warning in record}) == 1
-    # Using set(record) as repetitions of the same warning are suppressed
-    # https://docs.python.org/3/library/warnings.html
-    # and here we care to check that the warning is only shows once to users.
 
 
 def _helper_hypothesis_delimited_date(call, date_string, **kwargs):
@@ -1860,97 +1861,51 @@ def test_parse_dates_and_keep_orgin_column(all_parsers):
 
 def test_dayfirst_warnings():
     # GH 12585
-    warning_msg_day_first = (
-        r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) was "
-        r"specified. This may lead to inconsistently parsed dates! Specify a format "
-        r"to ensure consistent parsing."
-    )
-    warning_msg_month_first = (
-        "Parsing dates in MM/DD/YYYY format when dayfirst=True was "
-        "specified. This may lead to inconsistently parsed dates! Specify a format "
-        "to ensure consistent parsing."
-    )
 
     # CASE 1: valid input
     input = "date\n31/12/2014\n10/03/2011"
-    expected_consistent = DatetimeIndex(
+    expected = DatetimeIndex(
         ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None, name="date"
     )
-    expected_inconsistent = DatetimeIndex(
-        ["2014-12-31", "2011-10-03"], dtype="datetime64[ns]", freq=None, name="date"
+    warning_msg = (
+        "Parsing dates in .* format when dayfirst=.* was specified. "
+        "Pass `dayfirst=.*` or specify a format to silence this warning."
     )
 
     # A. dayfirst arg correct, no warning
     res1 = read_csv(
         StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date"
     ).index
-    tm.assert_index_equal(expected_consistent, res1)
+    tm.assert_index_equal(expected, res1)
 
-    # B. dayfirst arg incorrect, warning + incorrect output
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
+    # B. dayfirst arg incorrect, warning
+    with tm.assert_produces_warning(UserWarning, match=warning_msg):
         res2 = read_csv(
             StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
         ).index
-    tm.assert_index_equal(expected_inconsistent, res2)
-
-    # C. dayfirst default arg, same as B
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-        res3 = read_csv(
-            StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
-        ).index
-    tm.assert_index_equal(expected_inconsistent, res3)
-
-    # D. infer_datetime_format=True overrides dayfirst default
-    # no warning + correct result
-    res4 = read_csv(
-        StringIO(input),
-        parse_dates=["date"],
-        infer_datetime_format=True,
-        index_col="date",
-    ).index
-    tm.assert_index_equal(expected_consistent, res4)
+    tm.assert_index_equal(expected, res2)
 
     # CASE 2: invalid input
     # cannot consistently process with single format
-    # warnings *always* raised
+    # return to user unaltered
 
     # first in DD/MM/YYYY, second in MM/DD/YYYY
     input = "date\n31/12/2014\n03/30/2011"
-    expected = DatetimeIndex(
-        ["2014-12-31", "2011-03-30"], dtype="datetime64[ns]", freq=None, name="date"
-    )
+    expected = Index(["31/12/2014", "03/30/2011"], dtype="object", name="date")
 
     # A. use dayfirst=True
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_month_first):
-        res5 = read_csv(
-            StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date"
-        ).index
+    res5 = read_csv(
+        StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date"
+    ).index
     tm.assert_index_equal(expected, res5)
 
     # B. use dayfirst=False
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
+    with tm.assert_produces_warning(UserWarning, match=warning_msg):
         res6 = read_csv(
             StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
         ).index
     tm.assert_index_equal(expected, res6)
 
-    # C. use dayfirst default arg, same as B
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-        res7 = read_csv(
-            StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
-        ).index
-    tm.assert_index_equal(expected, res7)
-
-    # D. use infer_datetime_format=True
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-        res8 = read_csv(
-            StringIO(input),
-            parse_dates=["date"],
-            infer_datetime_format=True,
-            index_col="date",
-        ).index
-    tm.assert_index_equal(expected, res8)
-
 
 @pytest.mark.parametrize(
     "date_string, dayfirst",
@@ -1973,9 +1928,11 @@ def test_dayfirst_warnings_no_leading_zero(date_string, dayfirst):
     expected = DatetimeIndex(
         ["2014-01-31"], dtype="datetime64[ns]", freq=None, name="date"
     )
-    with tm.assert_produces_warning(
-        UserWarning, match=r"may lead to inconsistently parsed dates"
-    ):
+    warning_msg = (
+        "Parsing dates in .* format when dayfirst=.* was specified. "
+        "Pass `dayfirst=.*` or specify a format to silence this warning."
+    )
+    with tm.assert_produces_warning(UserWarning, match=warning_msg):
         res = read_csv(
             StringIO(initial_value),
             parse_dates=["date"],
diff --git a/pandas/tests/io/parser/usecols/test_parse_dates.py b/pandas/tests/io/parser/usecols/test_parse_dates.py
@@ -31,8 +31,8 @@
 def test_usecols_with_parse_dates(all_parsers, usecols):
     # see gh-9755
     data = """a,b,c,d,e
-0,1,20140101,0900,4
-0,1,20140102,1000,4"""
+0,1,2014-01-01,09:00,4
+0,1,2014-01-02,10:00,4"""
     parser = all_parsers
     parse_dates = [[1, 2]]
 
@@ -138,8 +138,8 @@ def test_usecols_with_parse_dates4(all_parsers):
 )
 def test_usecols_with_parse_dates_and_names(all_parsers, usecols, names):
     # see gh-9755
-    s = """0,1,20140101,0900,4
-0,1,20140102,1000,4"""
+    s = """0,1,2014-01-01,09:00,4
+0,1,2014-01-02,10:00,4"""
     parse_dates = [[1, 2]]
     parser = all_parsers
 
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
@@ -1386,7 +1386,7 @@ def test_sqlalchemy_type_mapping(self):
 
         # Test Timestamp objects (no datetime64 because of timezone) (GH9085)
         df = DataFrame(
-            {"time": to_datetime(["201412120154", "201412110254"], utc=True)}
+            {"time": to_datetime(["2014-12-12 01:54", "2014-12-11 02:54"], utc=True)}
         )
         db = sql.SQLDatabase(self.conn)
         table = sql.SQLTable("test_type", db, frame=df)
@@ -1595,7 +1595,7 @@ def test_sqlite_type_mapping(self):
 
         # Test Timestamp objects (no datetime64 because of timezone) (GH9085)
         df = DataFrame(
-            {"time": to_datetime(["201412120154", "201412110254"], utc=True)}
+            {"time": to_datetime(["2014-12-12 01:54", "2014-12-11 02:54"], utc=True)}
         )
         db = sql.SQLiteDatabase(self.conn)
         table = sql.SQLiteTable("test_type", db, frame=df)
diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py
@@ -457,7 +457,7 @@ def test_day_first_parse_dates(parser):
     )
 
     with tm.assert_produces_warning(
-        UserWarning, match="Parsing dates in DD/MM/YYYY format"
+        UserWarning, match="Parsing dates in %d/%m/%Y format"
     ):
         df_result = read_xml(xml, parse_dates=["date"], parser=parser)
         df_iter = read_xml_iterparse(
diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py
@@ -161,8 +161,8 @@ def dtc(self):
         return converter.DatetimeConverter()
 
     def test_convert_accepts_unicode(self, dtc):
-        r1 = dtc.convert("12:22", None, None)
-        r2 = dtc.convert("12:22", None, None)
+        r1 = dtc.convert("2000-01-01 12:22", None, None)
+        r2 = dtc.convert("2000-01-01 12:22", None, None)
         assert r1 == r2, "DatetimeConverter.convert should accept unicode"
 
     def test_conversion(self, dtc):
diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py
@@ -13,7 +13,7 @@
 
 class TestSeriesToCSV:
     def read_csv(self, path, **kwargs):
-        params = {"index_col": 0, "header": None, "parse_dates": True}
+        params = {"index_col": 0, "header": None}
         params.update(**kwargs)
 
         header = params.get("header")
@@ -30,7 +30,7 @@ def test_from_csv(self, datetime_series, string_series):
 
         with tm.ensure_clean() as path:
             datetime_series.to_csv(path, header=False)
-            ts = self.read_csv(path)
+            ts = self.read_csv(path, parse_dates=True)
             tm.assert_series_equal(datetime_series, ts, check_names=False)
 
             assert ts.name is None
@@ -55,7 +55,7 @@ def test_from_csv(self, datetime_series, string_series):
             with open(path, "w") as outfile:
                 outfile.write("1998-01-01|1.0\n1999-01-01|2.0")
 
-            series = self.read_csv(path, sep="|")
+            series = self.read_csv(path, sep="|", parse_dates=True)
             check_series = Series(
                 {datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}
             )
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py

Original file line number	Diff line number	Diff line change
`@@ -405,11 +405,11 @@ def test_drop_level_nonunique_datetime(self):`
`405`	`405`	`idx = Index([2, 3, 4, 4, 5], name="id")`
`406`	`406`	`idxdt = pd.to_datetime(`
`407`	`407`	`[`
`408`		`- "201603231400",`
`409`		`- "201603231500",`
`410`		`- "201603231600",`
`411`		`- "201603231600",`
`412`		`- "201603231700",`
	`408`	`+ "2016-03-23 14:00",`
	`409`	`+ "2016-03-23 15:00",`
	`410`	`+ "2016-03-23 16:00",`
	`411`	`+ "2016-03-23 16:00",`
	`412`	`+ "2016-03-23 17:00",`
`413`	`413`	`]`
`414`	`414`	`)`
`415`	`415`	`df = DataFrame(np.arange(10).reshape(5, 2), columns=list("ab"), index=idx)`
Original file line number	Diff line number	Diff line change
`@@ -457,7 +457,7 @@ def test_day_first_parse_dates(parser):`
`457`	`457`	`)`
`458`	`458`
`459`	`459`	`with tm.assert_produces_warning(`
`460`		`- UserWarning, match="Parsing dates in DD/MM/YYYY format"`
	`460`	`+ UserWarning, match="Parsing dates in %d/%m/%Y format"`
`461`	`461`	`):`
`462`	`462`	`df_result = read_xml(xml, parse_dates=["date"], parser=parser)`
`463`	`463`	`df_iter = read_xml_iterparse(`