pandas-dev · kinshukdua · Oct 21, 2021 · Oct 22, 2021 · Nov 12, 2021 · Dec 2, 2021
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -405,6 +405,39 @@ raise a ``ValueError`` if the operation could produce a result with more than
 
 .. ---------------------------------------------------------------------------
 
+.. _whatsnew_140.notable_bug_fixes.mean_implicit_conversion_to_numeric:
+
+Implicit conversion of string to numeric type in mean
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When computing the ``mean`` of a :class:`Series` or :class:`DataFrame` with a string-type value, the elements are concatenated
+to a single string then coerced to a numeric type implicitly before computing the mean. This can lead to unexpected results:
+
+.. code-block:: ipython
+
+    In [5]: df = DataFrame({
+                    "A": ["1", "2", "3"],
+                    "B": ["0", "1", "J"],
+                })
+    In [6]: df.mean(numeric_only=False)
+    Out[6]:
+    A     41.00000+0.00000j
+    C    0.000000+0.333333j
+    dtype: complex128
+
+Now, an exception will be raised whenever ``mean`` is called on a string-type column or :class:`Series`.
+
+.. code-block:: ipython
+
+    In [7]: df = DataFrame({
+                    "A": ["1", "2", "3"],
+                    "B": ["0", "1", "J"],
+                })
+    In [8]: df.mean(numeric_only=False)
+    Out[8]:
+    ...
+    TypeError: cannot find the mean of type 'str'
+
 .. _whatsnew_140.notable_bug_fixes.groupby_apply_mutation:
 
 groupby.apply consistent transform detection

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -20,6 +20,7 @@
     iNaT,
     lib,
 )
+from pandas._libs.lib import infer_dtype
 from pandas._typing import (
     ArrayLike,
     Dtype,
@@ -695,7 +696,10 @@ def nanmean(
         dtype_count = dtype
 
     count = _get_counts(values.shape, mask, axis, dtype=dtype_count)
-    the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))
+    the_sum = values.sum(axis, dtype=dtype_sum)
+    if infer_dtype(the_sum) in ("string", "byte", "mixed-integer", "mixed"):
+        raise TypeError("cannot find the mean of type 'str'")
+    the_sum = _ensure_numeric(the_sum)
 
     if axis is not None and getattr(the_sum, "ndim", False):
         count = cast(np.ndarray, count)

diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
@@ -257,7 +257,10 @@ def test_agg_cython_table_raises_frame(df, func, expected, axis):
 )
 def test_agg_cython_table_raises_series(series, func, expected):
     # GH21224
-    msg = r"[Cc]ould not convert|can't multiply sequence by non-int of type"
+    msg = (
+        r"[Cc]ould not convert|can't multiply sequence by non-int of type"
+        r"|cannot find the mean of type 'str'"
+    )
     with pytest.raises(expected, match=msg):
         # e.g. Series('a b'.split()).cumprod() will raise
         series.agg(func)

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -513,6 +513,33 @@ def test_mean_mixed_string_decimal(self):
         expected = Series([2.7, 681.6], index=["A", "C"])
         tm.assert_series_equal(result, expected)
 
+    def test_mean_string(self):
+        # https://github.com/pandas-dev/pandas/issues/44008
+        # https://github.com/pandas-dev/pandas/issues/34671
+        # https://github.com/pandas-dev/pandas/issues/22642
+        # https://github.com/pandas-dev/pandas/issues/26927
+        # https://github.com/pandas-dev/pandas/issues/13916
+        # https://github.com/pandas-dev/pandas/issues/36703
+
+        df = DataFrame(
+            {
+                "A": ["1", "2", "3"],
+                "B": ["a", "b", "c"],
+                "C": [1, 2, 3],
+                "D": ["0", "1", "J"],
+            }
+        )
+        with tm.assert_produces_warning(FutureWarning, match="Dropping of nuisance"):
+            result = df.mean()
+        expected = Series([2.0], index=["C"])
+        tm.assert_series_equal(result, expected)
+        msg = "cannot find the mean of type 'str'"
+        with pytest.raises(TypeError, match=msg):
+            df.mean(numeric_only=False)
+        result = df.sum()
+        expected = Series(["123", "abc", 6, "01J"], index=["A", "B", "C", "D"])
+        tm.assert_series_equal(result, expected)
+
     def test_var_std(self, datetime_frame):
         result = datetime_frame.std(ddof=4)
         expected = datetime_frame.apply(lambda x: x.std(ddof=4))

diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
@@ -92,7 +92,7 @@ def test_cython_agg_nothing_to_agg():
     with pytest.raises(NotImplementedError, match="does not implement"):
         frame.groupby("a")["b"].mean(numeric_only=True)
 
-    with pytest.raises(TypeError, match="Could not convert (foo|bar)*"):
+    with pytest.raises(TypeError, match="cannot find the mean of*"):
         frame.groupby("a")["b"].mean()
 
     frame = DataFrame({"a": np.random.randint(0, 5, 50), "b": ["foo", "bar"] * 25})