From f8910eb987803097c7e07a827b939708f6ef8b53 Mon Sep 17 00:00:00 2001
From: Kinshuk Dua <kinshukdua@gmail.com>
Date: Thu, 21 Oct 2021 16:42:33 +0530
Subject: [PATCH 1/4] BUG: make `.mean()` raise an exception for strings

---
 pandas/core/nanops.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 52d2322b11f42..e66a8f9716316 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -44,6 +44,7 @@
     is_numeric_dtype,
     is_object_dtype,
     is_scalar,
+    is_string_dtype,
     is_timedelta64_dtype,
     needs_i8_conversion,
     pandas_dtype,
@@ -696,7 +697,11 @@ def nanmean(
         dtype_count = dtype
 
     count = _get_counts(values.shape, mask, axis, dtype=dtype_count)
-    the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))
+    the_sum = values.sum(axis, dtype=dtype_sum)
+    if isinstance(the_sum, str) or is_string_dtype(the_sum):
+        raise TypeError("cannot find the mean of type 'str'")
+    else:
+        _ensure_numeric(the_sum)
 
     if axis is not None and getattr(the_sum, "ndim", False):
         count = cast(np.ndarray, count)

From adb58ce0851ef562d4f35f0a30efd9ab9aab3c7e Mon Sep 17 00:00:00 2001
From: Kinshuk Dua <kinshukdua@gmail.com>
Date: Fri, 22 Oct 2021 10:14:28 +0530
Subject: [PATCH 2/4] Use `infer_dtype` to detect strings; add new regex for
 tests

---
 pandas/core/nanops.py                         | 7 +++----
 pandas/tests/apply/test_invalid_arg.py        | 5 ++++-
 pandas/tests/groupby/aggregate/test_cython.py | 2 +-
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index e66a8f9716316..c72a7af420ca5 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -20,6 +20,7 @@
     iNaT,
     lib,
 )
+from pandas._libs.lib import infer_dtype
 from pandas._typing import (
     ArrayLike,
     Dtype,
@@ -44,7 +45,6 @@
     is_numeric_dtype,
     is_object_dtype,
     is_scalar,
-    is_string_dtype,
     is_timedelta64_dtype,
     needs_i8_conversion,
     pandas_dtype,
@@ -698,10 +698,9 @@ def nanmean(
 
     count = _get_counts(values.shape, mask, axis, dtype=dtype_count)
     the_sum = values.sum(axis, dtype=dtype_sum)
-    if isinstance(the_sum, str) or is_string_dtype(the_sum):
+    if infer_dtype(the_sum) in ("string", "byte", "mixed-integer", "mixed"):
         raise TypeError("cannot find the mean of type 'str'")
-    else:
-        _ensure_numeric(the_sum)
+    the_sum = _ensure_numeric(the_sum)
 
     if axis is not None and getattr(the_sum, "ndim", False):
         count = cast(np.ndarray, count)
diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
index b0faeba23a479..9aea08a998207 100644
--- a/pandas/tests/apply/test_invalid_arg.py
+++ b/pandas/tests/apply/test_invalid_arg.py
@@ -257,7 +257,10 @@ def test_agg_cython_table_raises_frame(df, func, expected, axis):
 )
 def test_agg_cython_table_raises_series(series, func, expected):
     # GH21224
-    msg = r"[Cc]ould not convert|can't multiply sequence by non-int of type"
+    msg = (
+        r"[Cc]ould not convert|can't multiply sequence by non-int of type"
+        r"|cannot find the mean of type 'str'"
+    )
     with pytest.raises(expected, match=msg):
         # e.g. Series('a b'.split()).cumprod() will raise
         series.agg(func)
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index d9372ba5cbb50..9a4daad61b97a 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -92,7 +92,7 @@ def test_cython_agg_nothing_to_agg():
     with pytest.raises(NotImplementedError, match="does not implement"):
         frame.groupby("a")["b"].mean(numeric_only=True)
 
-    with pytest.raises(TypeError, match="Could not convert (foo|bar)*"):
+    with pytest.raises(TypeError, match="cannot find the mean of*"):
         frame.groupby("a")["b"].mean()
 
     frame = DataFrame({"a": np.random.randint(0, 5, 50), "b": ["foo", "bar"] * 25})

From 1c81cf622e95a89a5cfd0c551a64921352b6aae3 Mon Sep 17 00:00:00 2001
From: Kinshuk Dua <kinshukdua@gmail.com>
Date: Fri, 12 Nov 2021 11:00:27 +0530
Subject: [PATCH 3/4] Add tests for mean with strings

---
 pandas/tests/frame/test_reductions.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 6402a08ca54a2..54da0f2796d7b 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -524,6 +524,33 @@ def test_mean_mixed_string_decimal(self):
         expected = Series([2.7, 681.6], index=["A", "C"])
         tm.assert_series_equal(result, expected)
 
+    def test_mean_string(self):
+        # https://github.com/pandas-dev/pandas/issues/44008
+        # https://github.com/pandas-dev/pandas/issues/34671
+        # https://github.com/pandas-dev/pandas/issues/22642
+        # https://github.com/pandas-dev/pandas/issues/26927
+        # https://github.com/pandas-dev/pandas/issues/13916
+        # https://github.com/pandas-dev/pandas/issues/36703
+
+        df = DataFrame(
+            {
+                "A": ["1", "2", "3"],
+                "B": ["a", "b", "c"],
+                "C": [1, 2, 3],
+                "D": ["0", "1", "J"],
+            }
+        )
+        with tm.assert_produces_warning(FutureWarning, match="Dropping of nuisance"):
+            result = df.mean()
+        expected = Series([2.0], index=["C"])
+        tm.assert_series_equal(result, expected)
+        msg = "cannot find the mean of type 'str'"
+        with pytest.raises(TypeError, match=msg):
+            df.mean(numeric_only=False)
+        result = df.sum()
+        expected = Series(["123", "abc", 6, "01J"], index=["A", "B", "C", "D"])
+        tm.assert_series_equal(result, expected)
+
     def test_var_std(self, datetime_frame):
         result = datetime_frame.std(ddof=4)
         expected = datetime_frame.apply(lambda x: x.std(ddof=4))

From be053cef25f5729eccc94abaf7ac57237a560346 Mon Sep 17 00:00:00 2001
From: Kinshuk Dua <kinshukdua@gmail.com>
Date: Thu, 2 Dec 2021 16:40:11 +0530
Subject: [PATCH 4/4] Add whatsnew;fix conflict

---
 doc/source/whatsnew/v1.4.0.rst | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index fd7cb6a69d955..4811902eac2dd 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -346,6 +346,39 @@ second column is instead renamed to ``a.2``.
 
     res
 
+.. _whatsnew_140.notable_bug_fixes.mean_implicit_conversion_to_numeric:
+
+Implicit conversion of string to numeric type in mean
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When computing the ``mean`` of a :class:`Series` or :class:`DataFrame` with a string-type value, the elements are concatenated
+to a single string then coerced to a numeric type implicitly before computing the mean. This can lead to unexpected results:
+
+.. code-block:: ipython
+
+    In [5]: df = DataFrame({
+                    "A": ["1", "2", "3"],
+                    "B": ["0", "1", "J"],
+                })
+    In [6]: df.mean(numeric_only=False)
+    Out[6]:
+    A     41.00000+0.00000j
+    C    0.000000+0.333333j
+    dtype: complex128
+
+Now, an exception will be raised whenever ``mean`` is called on a string-type column or :class:`Series`.
+
+.. code-block:: ipython
+
+    In [7]: df = DataFrame({
+                    "A": ["1", "2", "3"],
+                    "B": ["0", "1", "J"],
+                })
+    In [8]: df.mean(numeric_only=False)
+    Out[8]:
+    ...
+    TypeError: cannot find the mean of type 'str'
+
 .. _whatsnew_140.notable_bug_fixes.notable_bug_fix3:
 
 notable_bug_fix3