pandas-dev · mproszewska · Apr 25, 2020 · Apr 25, 2020 · May 5, 2020 · May 5, 2020
diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py
@@ -11,7 +11,7 @@
 
 
 def _generate_dataframe():
-    N = 2000
+    N = 20000
     C = 5
     df = DataFrame(
         np.random.randn(N, C),
@@ -69,5 +69,9 @@ def time_read_excel(self, engine):
         fname = self.fname_odf if engine == "odf" else self.fname_excel
         read_excel(fname, engine=engine)
 
+    def nrows_read_excel(self, engine):
+        fname = self.fname_odf if engine == "odf" else self.fname_excel
+        read_excel(fname, engine=engine, nrows=1)
+
 
 from ..pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -49,7 +49,7 @@
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import CategoricalDtype
-from pandas.core.dtypes.missing import isna
+from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core import algorithms
 from pandas.core.arrays import Categorical
@@ -1322,6 +1322,31 @@ def _validate_parse_dates_arg(parse_dates):
     return parse_dates
 
 
+def _check_unexpected_data(columns, data, index_col):
+    """
+    Checks if ammount of columns in data matches expected number of columns.
+    Raises a warning if those numbers don't match.
+
+    Parameters
+    ----------
+    columns : list
+        List that contains columns names.
+    data : array-like
+        Object that contains column data.
+    index_col : list or False, optional
+        Columns to use as the index.
+    """
+    if index_col is None or index_col is False:
+        index_col = []
+    expected_columns = len(columns) + len(index_col)
+    if expected_columns != len(data) and notna(data[expected_columns:]).any():
+        warnings.warn(
+            "Expected {} columns instead of {}".format(expected_columns, len(data)),
+            ParserWarning,
+            stacklevel=2,
+        )
+
+
 class ParserBase:
     def __init__(self, kwds):
         self.names = kwds.get("names")
@@ -2136,6 +2161,8 @@ def read(self, nrows=None):
 
             # columns as list
             alldata = [x[1] for x in data]
+            if self.usecols is None:
+                _check_unexpected_data(names, data, self.index_col)
 
             data = {k: v for k, (i, v) in zip(names, data)}
 
@@ -2144,7 +2171,6 @@ def read(self, nrows=None):
 
         # maybe create a mi on the columns
         names = self._maybe_make_multi_index_columns(names, self.col_names)
-
         return index, names, data
 
     def _filter_usecols(self, names):
@@ -2495,6 +2521,10 @@ def read(self, rows=None):
             content = content[1:]
 
         alldata = self._rows_to_cols(content)
+
+        if self.usecols is None:
+            _check_unexpected_data(columns, alldata, self.index_col)
+
         data = self._exclude_implicit_index(alldata)
 
         columns = self._maybe_dedup_names(self.columns)

diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
@@ -15,7 +15,7 @@
 import pytest
 
 from pandas._libs.tslib import Timestamp
-from pandas.errors import DtypeWarning, EmptyDataError, ParserError
+from pandas.errors import DtypeWarning, EmptyDataError, ParserError, ParserWarning
 import pandas.util._test_decorators as td
 
 from pandas import DataFrame, Index, MultiIndex, Series, compat, concat, option_context
@@ -1071,8 +1071,8 @@ def test_trailing_delimiters(all_parsers):
 4,5,6,
 7,8,9,"""
     parser = all_parsers
-    result = parser.read_csv(StringIO(data), index_col=False)
-
+    with tm.assert_produces_warning(ParserWarning):
+        result = parser.read_csv(StringIO(data), index_col=False)
     expected = DataFrame({"A": [1, 4, 7], "B": [2, 5, 8], "C": [3, 6, 9]})
     tm.assert_frame_equal(result, expected)
 
@@ -2178,7 +2178,8 @@ def test_no_header_two_extra_columns(all_parsers):
     ref = DataFrame([["foo", "bar", "baz"]], columns=column_names)
     stream = StringIO("foo,bar,baz,bam,blah")
     parser = all_parsers
-    df = parser.read_csv(stream, header=None, names=column_names, index_col=False)
+    with tm.assert_produces_warning(ParserWarning):
+        df = parser.read_csv(stream, header=None, names=column_names, index_col=False)
     tm.assert_frame_equal(df, ref)
 
 
@@ -2241,3 +2242,10 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter):
 
     with pytest.raises(ValueError, match=msg):
         parser.read_table(f, delim_whitespace=True, delimiter=delimiter)
+
+
+def test_first_row_length(all_parsers):
+    stream = StringIO("col1,col2,col3\n0,1,2,X\n4,5,6,\n6,7,8")
+    parser = all_parsers
+    with tm.assert_produces_warning(ParserWarning):
+        parser.read_csv(stream, index_col=False)