From 398477a8a0001508d8a041d52ec4a76bf6dbc74b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Fri, 8 Mar 2024 09:42:20 -0300 Subject: [PATCH 01/92] REF Series: add a test to check that on dictionary constructor np.nan equals float('nan') --- pandas/tests/series/test_constructors.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index b00074c04257e..5249dd215681f 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1388,6 +1388,12 @@ def test_constructor_dict_nan_key(self, value): ) tm.assert_series_equal(result, expected) + def test_dict_np_nan_equals_floatnan(self): + d = {np.nan: 1} + result = Series(d, index=[float("nan")]) + expected = Series(d) + tm.assert_series_equal(result, expected) + def test_constructor_dict_datetime64_index(self): # GH 9456 From a15fa212f2d4c2bd98b1c7865d1b92ff02a526f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sat, 9 Mar 2024 11:42:33 -0300 Subject: [PATCH 02/92] REF Series: simplify Series._init_dict constructor --- pandas/core/series.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index bae95418c7641..c3d3eed009883 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -542,23 +542,20 @@ def _init_dict( # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] # raises KeyError), so we iterate the entire dict, and align + values = list(data.values()) # Generating list of values- faster way if data: # GH:34717, issue was using zip to extract key and values from data. # using generators in effects the performance. - # Below is the new way of extracting the keys and values - + # Below is the new way of extracting the keys and values] keys = tuple(data.keys()) - values = list(data.values()) # Generating list of values- faster way elif index is not None: # fastpath for Series(data=None). Just use broadcasting a scalar # instead of reindexing. if len(index) or dtype is not None: values = na_value_for_dtype(pandas_dtype(dtype), compat=False) - else: - values = [] keys = index else: - keys, values = default_index(0), [] + keys = default_index(0) # Input is now list-like, so rely on "standard" construction: s = Series(values, index=keys, dtype=dtype) From e72cff15d4fabf3173a2e4da6c6639295b567f1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sat, 9 Mar 2024 16:00:54 -0300 Subject: [PATCH 03/92] REF Series: add tests to ensure that series dict constructor preserve dtype --- pandas/tests/series/test_constructors.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 5249dd215681f..e6df7d96f2ee5 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -2257,3 +2257,26 @@ def test_series_with_complex_nan(input_list): result = Series(ser.array) assert ser.dtype == "complex128" tm.assert_series_equal(ser, result) + + +@pytest.mark.parametrize( + "value", + [ + (1), + (1.0), + (1.0 + 1.0j), + ], +) +@pytest.mark.parametrize( + "unused_value", + [ + (True), + ("a"), + ], +) +def test_dict_constructor_preserve_dtype(value, unused_value): + d = {"a": value, "b": unused_value} + e = {"a": value} + s = Series(data=d, index=["a"]) + expected = Series(data=e) + tm.assert_series_equal(s, expected) From 0e6f2698929bf7d1002e7416459bf661565ae4e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 11 Mar 2024 09:27:28 -0300 Subject: [PATCH 04/92] REF Series: Move 'data is None' below --- pandas/core/series.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index c3d3eed009883..ff3e979ac681e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -414,13 +414,6 @@ def __init__( if dtype is not None: dtype = self._validate_dtype(dtype) - if data is None: - index = index if index is not None else default_index(0) - if len(index) or dtype is not None: - data = na_value_for_dtype(pandas_dtype(dtype), compat=False) - else: - data = [] - if isinstance(data, MultiIndex): raise NotImplementedError( "initializing a Series from a MultiIndex is not supported" @@ -480,6 +473,13 @@ def __init__( elif isinstance(data, ExtensionArray): pass else: + if data is None: + index = index if index is not None else default_index(0) + if len(index) or dtype is not None: + data = na_value_for_dtype(pandas_dtype(dtype), compat=False) + else: + data = [] + data = com.maybe_iterable_to_list(data) if is_list_like(data) and not len(data) and dtype is None: # GH 29405: Pre-2.0, this defaulted to float. From b696dc2a3832a807d1469e00a532ebb504fd6959 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 11 Mar 2024 12:03:26 -0300 Subject: [PATCH 05/92] REF Series: ensure dict is not series and Series(dict(),...) -> Series(None,...) --- pandas/core/series.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index ff3e979ac681e..c6d8b85173809 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -384,6 +384,11 @@ def __init__( return is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) + + if is_dict_like(data) and not is_pandas_object: + if len(data) == 0: + data = None # Stating that Series(dict(),...) = Series (None, ...) OK + data_dtype = getattr(data, "dtype", None) original_dtype = dtype @@ -420,7 +425,12 @@ def __init__( ) refs = None - if isinstance(data, Index): + + if is_dict_like(data) and not is_pandas_object: + data, index = self._init_dict(data, index, dtype) + dtype = None + copy = False + elif isinstance(data, Index): if dtype is not None: data = data.astype(dtype) @@ -444,10 +454,6 @@ def __init__( data = data.reindex(index) copy = False data = data._mgr - elif is_dict_like(data): - data, index = self._init_dict(data, index, dtype) - dtype = None - copy = False elif isinstance(data, SingleBlockManager): if index is None: index = data.index From 66793c39cc62bee9ed7372e1d52f259554436a1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 11 Mar 2024 13:01:27 -0300 Subject: [PATCH 06/92] REF Series: Series(dict(),...) extracted from _init_dict() which is now _init_non_empty_dict() --- pandas/core/series.py | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index c6d8b85173809..a877ff76e7a36 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -386,8 +386,16 @@ def __init__( is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) if is_dict_like(data) and not is_pandas_object: - if len(data) == 0: - data = None # Stating that Series(dict(),...) = Series (None, ...) OK + if data: + pass + else: + data = None + # if not len(data): + # data = None # Stating that Series(dict(),...) = Series (None, ...) OK + # elif index is None or (index is not None and not len(index)): + # pass + # # data = list(data.values()) + # # index = tuple(data.keys()) data_dtype = getattr(data, "dtype", None) original_dtype = dtype @@ -427,7 +435,7 @@ def __init__( refs = None if is_dict_like(data) and not is_pandas_object: - data, index = self._init_dict(data, index, dtype) + data, index = self._init_non_empty_dict(data, index, dtype) dtype = None copy = False elif isinstance(data, Index): @@ -523,7 +531,7 @@ def __init__( stacklevel=find_stack_level(), ) - def _init_dict( + def _init_non_empty_dict( self, data, index: Index | None = None, dtype: DtypeObj | None = None ): """ @@ -544,30 +552,19 @@ def _init_dict( _data : BlockManager for the new Series index : index for the new Series """ - keys: Index | tuple + # GH:34717, issue was using zip to extract key and values from data. + # using generators in effects the performance. + # Below is the new way of extracting the keys and values] + keys = tuple(data.keys()) + values = list(data.values()) # Generating list of values- faster way # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] # raises KeyError), so we iterate the entire dict, and align - values = list(data.values()) # Generating list of values- faster way - if data: - # GH:34717, issue was using zip to extract key and values from data. - # using generators in effects the performance. - # Below is the new way of extracting the keys and values] - keys = tuple(data.keys()) - elif index is not None: - # fastpath for Series(data=None). Just use broadcasting a scalar - # instead of reindexing. - if len(index) or dtype is not None: - values = na_value_for_dtype(pandas_dtype(dtype), compat=False) - keys = index - else: - keys = default_index(0) - # Input is now list-like, so rely on "standard" construction: s = Series(values, index=keys, dtype=dtype) # Now we just make sure the order is respected, if any - if data and index is not None: + if index is not None: s = s.reindex(index) return s._mgr, s.index From 5e09cc89c9c97dd869b3da18e3900ba0074b20f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 11 Mar 2024 14:38:35 -0300 Subject: [PATCH 07/92] REF Series: bring dict data closer and make if-else structure identical --- pandas/core/series.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index a877ff76e7a36..e6c857254633a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -384,19 +384,6 @@ def __init__( return is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) - - if is_dict_like(data) and not is_pandas_object: - if data: - pass - else: - data = None - # if not len(data): - # data = None # Stating that Series(dict(),...) = Series (None, ...) OK - # elif index is None or (index is not None and not len(index)): - # pass - # # data = list(data.values()) - # # index = tuple(data.keys()) - data_dtype = getattr(data, "dtype", None) original_dtype = dtype @@ -434,10 +421,23 @@ def __init__( refs = None - if is_dict_like(data) and not is_pandas_object: - data, index = self._init_non_empty_dict(data, index, dtype) - dtype = None - copy = False + data_is_dict = is_dict_like(data) and not is_pandas_object + if data_is_dict: # + if data: + pass + # if index is None or (index is not None and not len(index)): + # pass + else: + data = None + data_is_dict = False + + if data_is_dict: + if data: + data, index = self._init_non_empty_dict(data, index, dtype) + dtype = None + copy = False + else: + pass elif isinstance(data, Index): if dtype is not None: data = data.astype(dtype) From f0f336dc04ebc65eb633445b9c6036b7ba125682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 11 Mar 2024 17:49:22 -0300 Subject: [PATCH 08/92] REF Series: Decouple dictionary from all other kinds of data --- pandas/core/series.py | 74 +++++++++---------------------------------- 1 file changed, 15 insertions(+), 59 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index e6c857254633a..80c97444b1e8a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -386,6 +386,7 @@ def __init__( is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) data_dtype = getattr(data, "dtype", None) original_dtype = dtype + refs = None if isinstance(data, (ExtensionArray, np.ndarray)): if copy is not False: @@ -419,26 +420,25 @@ def __init__( "initializing a Series from a MultiIndex is not supported" ) - refs = None - - data_is_dict = is_dict_like(data) and not is_pandas_object - if data_is_dict: # + if is_dict_like(data) and not is_pandas_object: if data: - pass - # if index is None or (index is not None and not len(index)): - # pass + # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] + # raises KeyError). Send it to Series for "standard" construction: + data = Series( + list(data.values()), index=tuple(data.keys()), dtype=dtype + ) + dtype = None else: data = None - data_is_dict = False - if data_is_dict: - if data: - data, index = self._init_non_empty_dict(data, index, dtype) - dtype = None - copy = False + if data is None: + index = index if index is not None else default_index(0) + if len(index) or dtype is not None: + data = na_value_for_dtype(pandas_dtype(dtype), compat=False) else: - pass - elif isinstance(data, Index): + data = [] + + if isinstance(data, Index): if dtype is not None: data = data.astype(dtype) @@ -487,13 +487,6 @@ def __init__( elif isinstance(data, ExtensionArray): pass else: - if data is None: - index = index if index is not None else default_index(0) - if len(index) or dtype is not None: - data = na_value_for_dtype(pandas_dtype(dtype), compat=False) - else: - data = [] - data = com.maybe_iterable_to_list(data) if is_list_like(data) and not len(data) and dtype is None: # GH 29405: Pre-2.0, this defaulted to float. @@ -531,43 +524,6 @@ def __init__( stacklevel=find_stack_level(), ) - def _init_non_empty_dict( - self, data, index: Index | None = None, dtype: DtypeObj | None = None - ): - """ - Derive the "_mgr" and "index" attributes of a new Series from a - dictionary input. - - Parameters - ---------- - data : dict or dict-like - Data used to populate the new Series. - index : Index or None, default None - Index for the new Series: if None, use dict keys. - dtype : np.dtype, ExtensionDtype, or None, default None - The dtype for the new Series: if None, infer from data. - - Returns - ------- - _data : BlockManager for the new Series - index : index for the new Series - """ - # GH:34717, issue was using zip to extract key and values from data. - # using generators in effects the performance. - # Below is the new way of extracting the keys and values] - keys = tuple(data.keys()) - values = list(data.values()) # Generating list of values- faster way - - # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] - # raises KeyError), so we iterate the entire dict, and align - # Input is now list-like, so rely on "standard" construction: - s = Series(values, index=keys, dtype=dtype) - - # Now we just make sure the order is respected, if any - if index is not None: - s = s.reindex(index) - return s._mgr, s.index - # ---------------------------------------------------------------------- @property From ca87d9dbf5d7a8ca1e08a56aace44bffa0ff3bad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 11 Mar 2024 22:43:47 -0300 Subject: [PATCH 09/92] REF Series: Starting to decouple index and data is None/Not None edge cases --- pandas/core/series.py | 66 ++++++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 80c97444b1e8a..713a7fa39ef03 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -392,6 +392,7 @@ def __init__( if copy is not False: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): data = data.copy() + if copy is None: copy = False @@ -409,17 +410,9 @@ def __init__( name = ibase.maybe_extract_name(name, data, type(self)) - if index is not None: - index = ensure_index(index) - if dtype is not None: dtype = self._validate_dtype(dtype) - if isinstance(data, MultiIndex): - raise NotImplementedError( - "initializing a Series from a MultiIndex is not supported" - ) - if is_dict_like(data) and not is_pandas_object: if data: # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] @@ -431,22 +424,43 @@ def __init__( else: data = None - if data is None: - index = index if index is not None else default_index(0) - if len(index) or dtype is not None: - data = na_value_for_dtype(pandas_dtype(dtype), compat=False) + if isinstance(data, MultiIndex): + raise NotImplementedError( + "initializing a Series from a MultiIndex is not supported" + ) + + if index is not None: + index = ensure_index(index) + + # Código 1 + if data is None: # TODO - simplificar a lógica e juntar com o código + if index is None: + index = default_index(0) + + if index is not None: + if len(index) or dtype is not None: + data = na_value_for_dtype(pandas_dtype(dtype), compat=False) + else: + data = [] else: - data = [] + if dtype is not None: + data = na_value_for_dtype(pandas_dtype(dtype), compat=False) + else: + data = [] if isinstance(data, Index): if dtype is not None: data = data.astype(dtype) - + if index is None: + index = default_index(len(data)) refs = data._references data = data._values copy = False elif isinstance(data, np.ndarray): + if index is None: + index = default_index(len(data)) + if len(data.dtype): # GH#13296 we are dealing with a compound dtype, which # should be treated as 2D @@ -485,18 +499,26 @@ def __init__( allow_mgr = True elif isinstance(data, ExtensionArray): - pass - else: + if index is None: + index = default_index(len(data)) + # pass + elif is_iterator(data) or is_list_like(data): data = com.maybe_iterable_to_list(data) - if is_list_like(data) and not len(data) and dtype is None: + if index is None: + index = default_index(len(data)) + + if not len(data) and dtype is None: # GH 29405: Pre-2.0, this defaulted to float. dtype = np.dtype(object) - - if index is None: - if not is_list_like(data): + else: + # código 2 - + # TODO: simplificar a lógica e juntar com o código 2 + if index is None: data = [data] - index = default_index(len(data)) - elif is_list_like(data): + index = default_index(len(data)) + + # Final requirement + if is_list_like(data): com.require_length_match(data, index) # create/copy the manager From 6e850ea12732f8dc9da95ad0c470378bbcb4a37f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 11 Mar 2024 23:09:11 -0300 Subject: [PATCH 10/92] REF Series: Further simplifications --- pandas/core/series.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 713a7fa39ef03..2d03c1c650187 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -436,14 +436,12 @@ def __init__( if data is None: # TODO - simplificar a lógica e juntar com o código if index is None: index = default_index(0) - - if index is not None: - if len(index) or dtype is not None: + if dtype is not None: data = na_value_for_dtype(pandas_dtype(dtype), compat=False) else: data = [] else: - if dtype is not None: + if len(index) or dtype is not None: data = na_value_for_dtype(pandas_dtype(dtype), compat=False) else: data = [] @@ -510,12 +508,16 @@ def __init__( if not len(data) and dtype is None: # GH 29405: Pre-2.0, this defaulted to float. dtype = np.dtype(object) - else: + elif data is None: # código 2 - # TODO: simplificar a lógica e juntar com o código 2 if index is None: data = [data] index = default_index(len(data)) + elif data is not None: + if index is None: + data = [data] + index = default_index(len(data)) # Final requirement if is_list_like(data): From 94f4fe54d3f155b1ab118c23e3cf714c249ff6a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 11 Mar 2024 23:58:51 -0300 Subject: [PATCH 11/92] REF Series: More simplifications --- pandas/core/series.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 2d03c1c650187..3a31e01b615dd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -446,6 +446,8 @@ def __init__( else: data = [] + # Here >>> Maybe data is list, data is not None + if isinstance(data, Index): if dtype is not None: data = data.astype(dtype) @@ -508,16 +510,10 @@ def __init__( if not len(data) and dtype is None: # GH 29405: Pre-2.0, this defaulted to float. dtype = np.dtype(object) - elif data is None: - # código 2 - - # TODO: simplificar a lógica e juntar com o código 2 + elif data is not None: # Possibly scalar! Check with 'hasattr(bar, '__len__')' if index is None: data = [data] - index = default_index(len(data)) - elif data is not None: - if index is None: - data = [data] - index = default_index(len(data)) + index = default_index(1) # Final requirement if is_list_like(data): From b4f71ac68bf9c02a26c45293044960ba14f7b2db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Tue, 12 Mar 2024 11:14:18 -0300 Subject: [PATCH 12/92] REF Series: Else if is joined and escalated from the most restrictive to the less restrictive case. With comments. --- pandas/core/series.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3a31e01b615dd..0231acd514d7d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -438,17 +438,39 @@ def __init__( index = default_index(0) if dtype is not None: data = na_value_for_dtype(pandas_dtype(dtype), compat=False) + # continue on scalar branch but does nothing. else: data = [] + data = com.maybe_iterable_to_list(data) + # if index is None: + # index = default_index(len(data)) + + if not len(data) and dtype is None: + # GH 29405: Pre-2.0, this defaulted to float. + dtype = np.dtype(object) + # continue on list branch + # list branch, index is not None, but 'len(index)==0'. + # Note that index = default_index(0). OK else: if len(index) or dtype is not None: data = na_value_for_dtype(pandas_dtype(dtype), compat=False) + # continue on scalar branch but does nothing. else: data = [] + # continue on list branch + # list branch, index is not None, but 'len(index)==0'. + # Note that index = ensure_index(index). + data = com.maybe_iterable_to_list(data) + # if index is None: + # index = default_index(len(data)) + + if not len(data) and dtype is None: + # GH 29405: Pre-2.0, this defaulted to float. + dtype = np.dtype(object) # Here >>> Maybe data is list, data is not None - if isinstance(data, Index): + elif isinstance(data, Index): if dtype is not None: data = data.astype(dtype) if index is None: @@ -502,7 +524,7 @@ def __init__( if index is None: index = default_index(len(data)) # pass - elif is_iterator(data) or is_list_like(data): + elif is_list_like(data) or is_iterator(data): data = com.maybe_iterable_to_list(data) if index is None: index = default_index(len(data)) @@ -510,7 +532,7 @@ def __init__( if not len(data) and dtype is None: # GH 29405: Pre-2.0, this defaulted to float. dtype = np.dtype(object) - elif data is not None: # Possibly scalar! Check with 'hasattr(bar, '__len__')' + elif data is not None: # scalar directly form input only. if index is None: data = [data] index = default_index(1) From c0c4554d1459b572da3189f1c8baf3c29ce11c20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Tue, 12 Mar 2024 12:09:47 -0300 Subject: [PATCH 13/92] REF Series: Clean unused code and comments. --- pandas/core/series.py | 65 +++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 43 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0231acd514d7d..d40523e40856c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -413,16 +413,18 @@ def __init__( if dtype is not None: dtype = self._validate_dtype(dtype) + # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] + # raises KeyError). Send it to Series for "standard" construction: if is_dict_like(data) and not is_pandas_object: - if data: - # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] - # raises KeyError). Send it to Series for "standard" construction: - data = Series( - list(data.values()), index=tuple(data.keys()), dtype=dtype + data = ( + Series( + data=list(data.values()), + index=tuple(data.keys()), + dtype=dtype, ) - dtype = None - else: - data = None + if data + else None + ) if isinstance(data, MultiIndex): raise NotImplementedError( @@ -432,43 +434,17 @@ def __init__( if index is not None: index = ensure_index(index) - # Código 1 - if data is None: # TODO - simplificar a lógica e juntar com o código + if data is None: + na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) if index is None: index = default_index(0) - if dtype is not None: - data = na_value_for_dtype(pandas_dtype(dtype), compat=False) - # continue on scalar branch but does nothing. - else: - data = [] - data = com.maybe_iterable_to_list(data) - # if index is None: - # index = default_index(len(data)) - - if not len(data) and dtype is None: - # GH 29405: Pre-2.0, this defaulted to float. - dtype = np.dtype(object) - # continue on list branch - # list branch, index is not None, but 'len(index)==0'. - # Note that index = default_index(0). OK + data = na_value if dtype is not None else [] else: - if len(index) or dtype is not None: - data = na_value_for_dtype(pandas_dtype(dtype), compat=False) - # continue on scalar branch but does nothing. - else: - data = [] - # continue on list branch - # list branch, index is not None, but 'len(index)==0'. - # Note that index = ensure_index(index). - data = com.maybe_iterable_to_list(data) - # if index is None: - # index = default_index(len(data)) - - if not len(data) and dtype is None: - # GH 29405: Pre-2.0, this defaulted to float. - dtype = np.dtype(object) + data = na_value if len(index) or dtype is not None else [] - # Here >>> Maybe data is list, data is not None + if isinstance(data, list) and dtype is None: + # GH 29405: Pre-2.0, this defaulted to float. + dtype = np.dtype(object) elif isinstance(data, Index): if dtype is not None: @@ -490,6 +466,7 @@ def __init__( "Cannot construct a Series from an ndarray with " "compound dtype. Use DataFrame instead." ) + elif isinstance(data, Series): if index is None: index = data.index @@ -498,6 +475,7 @@ def __init__( data = data.reindex(index) copy = False data = data._mgr + elif isinstance(data, SingleBlockManager): if index is None: index = data.index @@ -523,7 +501,7 @@ def __init__( elif isinstance(data, ExtensionArray): if index is None: index = default_index(len(data)) - # pass + elif is_list_like(data) or is_iterator(data): data = com.maybe_iterable_to_list(data) if index is None: @@ -532,7 +510,8 @@ def __init__( if not len(data) and dtype is None: # GH 29405: Pre-2.0, this defaulted to float. dtype = np.dtype(object) - elif data is not None: # scalar directly form input only. + + else: # scalar directly from input only. Could be #elif data is not None: if index is None: data = [data] index = default_index(1) From ff139db32d94727882ecf7bd1b180a5da8639549 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 13 Mar 2024 07:09:23 -0300 Subject: [PATCH 14/92] REF Series: simplification --- pandas/core/series.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d40523e40856c..de5bb1ace8a68 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -393,7 +393,7 @@ def __init__( if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): data = data.copy() - if copy is None: + if copy is None: # Check if can go up or down. After pushing to pandas-dev copy = False if isinstance(data, SingleBlockManager) and not copy: @@ -431,22 +431,23 @@ def __init__( "initializing a Series from a MultiIndex is not supported" ) - if index is not None: + na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) + + if index is None: + # Next, decouple simple basic index operations + # from bulk data in operations. + pass + else: index = ensure_index(index) if data is None: - na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) if index is None: index = default_index(0) data = na_value if dtype is not None else [] else: data = na_value if len(index) or dtype is not None else [] - if isinstance(data, list) and dtype is None: - # GH 29405: Pre-2.0, this defaulted to float. - dtype = np.dtype(object) - - elif isinstance(data, Index): + if isinstance(data, Index): if dtype is not None: data = data.astype(dtype) if index is None: @@ -510,11 +511,11 @@ def __init__( if not len(data) and dtype is None: # GH 29405: Pre-2.0, this defaulted to float. dtype = np.dtype(object) - - else: # scalar directly from input only. Could be #elif data is not None: + else: # if data is not None: + # scalar, directly from input only. if index is None: - data = [data] index = default_index(1) + data = [data] # Final requirement if is_list_like(data): From 9a1843ed2a8ccbcb6b4efcfab3b1148404126b21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 13 Mar 2024 07:21:10 -0300 Subject: [PATCH 15/92] REF Series: Groupping common operations on index --- pandas/core/series.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index de5bb1ace8a68..0356cd1cc7af7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -431,20 +431,15 @@ def __init__( "initializing a Series from a MultiIndex is not supported" ) + # Common operations on index na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) - if index is None: - # Next, decouple simple basic index operations - # from bulk data in operations. - pass - else: - index = ensure_index(index) - - if data is None: - if index is None: + if data is None: index = default_index(0) data = na_value if dtype is not None else [] - else: + else: + index = ensure_index(index) + if data is None: data = na_value if len(index) or dtype is not None else [] if isinstance(data, Index): From 161d0ff0d668626413ba8de72b3a701ff9ea6e24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 13 Mar 2024 09:49:33 -0300 Subject: [PATCH 16/92] REF Series: selecting iterator on if-else data logic was no longer necessary --- pandas/core/series.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0356cd1cc7af7..39e0bfc258050 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -8,6 +8,7 @@ Iterable, Mapping, Sequence, + Sized, ) import operator import sys @@ -437,16 +438,21 @@ def __init__( if data is None: index = default_index(0) data = na_value if dtype is not None else [] + else: + # if isinstance(data,(Series, SingleBlockManager, ...)): + pass else: index = ensure_index(index) if data is None: data = na_value if len(index) or dtype is not None else [] if isinstance(data, Index): - if dtype is not None: - data = data.astype(dtype) if index is None: index = default_index(len(data)) + + if dtype is not None: + data = data.astype(dtype) + refs = data._references data = data._values copy = False @@ -498,15 +504,22 @@ def __init__( if index is None: index = default_index(len(data)) - elif is_list_like(data) or is_iterator(data): - data = com.maybe_iterable_to_list(data) + elif is_list_like(data): + # Code below is getting some object that is not scalar, + # but somehow is_list_like() + # Does it have something to do with DateTime? + # data = com.maybe_iterable_to_list(data) + if not isinstance(data, Sized): + data = list(data) + if index is None: index = default_index(len(data)) if not len(data) and dtype is None: # GH 29405: Pre-2.0, this defaulted to float. dtype = np.dtype(object) - else: # if data is not None: + + else: # is_scalar(data) fails: #data is not None: OK # scalar, directly from input only. if index is None: index = default_index(1) From 4b208bb32f8c9e9c9cb3970f7cf89fdb2eb51e48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 13 Mar 2024 10:46:55 -0300 Subject: [PATCH 17/92] REF Series: Separating a peculiar data Sized type being catched by is_list_like on data manipulation --- pandas/core/series.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 39e0bfc258050..44d81b8513a63 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -384,11 +384,17 @@ def __init__( self.name = name return + # TASK: Capturing relevant data for latter access is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) data_dtype = getattr(data, "dtype", None) original_dtype = dtype refs = None + # TASK: Validating dtype + if dtype is not None: + dtype = self._validate_dtype(dtype) + + # TASK: Especial Data Manipulation if isinstance(data, (ExtensionArray, np.ndarray)): if copy is not False: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): @@ -411,8 +417,7 @@ def __init__( name = ibase.maybe_extract_name(name, data, type(self)) - if dtype is not None: - dtype = self._validate_dtype(dtype) + # ESPECIAL DATA MANIPULATIONS # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] # raises KeyError). Send it to Series for "standard" construction: @@ -427,6 +432,10 @@ def __init__( else None ) + # Which type is this data? + if is_list_like(data) and not isinstance(data, Sized): + data = list(data) + if isinstance(data, MultiIndex): raise NotImplementedError( "initializing a Series from a MultiIndex is not supported" @@ -446,6 +455,7 @@ def __init__( if data is None: data = na_value if len(index) or dtype is not None else [] + # Final data preparation manipulation if isinstance(data, Index): if index is None: index = default_index(len(data)) @@ -509,8 +519,6 @@ def __init__( # but somehow is_list_like() # Does it have something to do with DateTime? # data = com.maybe_iterable_to_list(data) - if not isinstance(data, Sized): - data = list(data) if index is None: index = default_index(len(data)) @@ -519,8 +527,10 @@ def __init__( # GH 29405: Pre-2.0, this defaulted to float. dtype = np.dtype(object) - else: # is_scalar(data) fails: #data is not None: OK - # scalar, directly from input only. + else: # data is not None and not is_iterator(data): + # is_scalar(data) fails: #data is not None: OK + # seems scalar, directly from input only. + if index is None: index = default_index(1) data = [data] From 299169aeae597cd56b0e40b46a3ebac37864920f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 13 Mar 2024 11:32:20 -0300 Subject: [PATCH 18/92] REF Series: Repositioning code blocks according to tasks --- pandas/core/series.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 44d81b8513a63..7daf860b9c817 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -384,23 +384,30 @@ def __init__( self.name = name return - # TASK: Capturing relevant data for latter access + # Series TASK 1: Capturing relevant data for latter access is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) data_dtype = getattr(data, "dtype", None) original_dtype = dtype refs = None + name = ibase.maybe_extract_name(name, data, type(self)) - # TASK: Validating dtype + # Series TASK 1: Validating dtype if dtype is not None: dtype = self._validate_dtype(dtype) - # TASK: Especial Data Manipulation + # Series TASK 2: Data Preparation if isinstance(data, (ExtensionArray, np.ndarray)): if copy is not False: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): data = data.copy() - if copy is None: # Check if can go up or down. After pushing to pandas-dev + if isinstance(data, MultiIndex): + raise NotImplementedError( + "initializing a Series from a MultiIndex is not supported" + ) + + # It Fails if go to Task 1 + if copy is None: copy = False if isinstance(data, SingleBlockManager) and not copy: @@ -415,9 +422,9 @@ def __init__( stacklevel=2, ) - name = ibase.maybe_extract_name(name, data, type(self)) - - # ESPECIAL DATA MANIPULATIONS + # Dict is a just a special case of data preparation. + # Here it is being sent to Series, but it could different, for simplicity. + # It could be sent to array (for faster manipulation, for example). # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] # raises KeyError). Send it to Series for "standard" construction: @@ -432,15 +439,10 @@ def __init__( else None ) - # Which type is this data? + # TODO: Investigate. This is an unknown type that must be converted to list. if is_list_like(data) and not isinstance(data, Sized): data = list(data) - if isinstance(data, MultiIndex): - raise NotImplementedError( - "initializing a Series from a MultiIndex is not supported" - ) - # Common operations on index na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) if index is None: From b1f13203750401ed5354a7b37f50b8b23ea7236c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 13 Mar 2024 11:48:48 -0300 Subject: [PATCH 19/92] REF Series: Separating copy from dtype logic on final steps. Clean comments --- pandas/core/series.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7daf860b9c817..8d050c3f375e2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -396,6 +396,11 @@ def __init__( dtype = self._validate_dtype(dtype) # Series TASK 2: Data Preparation + + # TODO: Investigate. But below the logic changes! + # Is it so? ExtensionArray copies with None and True + # And BlockManagers copies only with True + # Since copy maybe None, if copy is None it will enter this if isinstance(data, (ExtensionArray, np.ndarray)): if copy is not False: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): @@ -422,7 +427,7 @@ def __init__( stacklevel=2, ) - # Dict is a just a special case of data preparation. + # COMMENT: Dict is a just a special case of data preparation. # Here it is being sent to Series, but it could different, for simplicity. # It could be sent to array (for faster manipulation, for example). @@ -517,11 +522,6 @@ def __init__( index = default_index(len(data)) elif is_list_like(data): - # Code below is getting some object that is not scalar, - # but somehow is_list_like() - # Does it have something to do with DateTime? - # data = com.maybe_iterable_to_list(data) - if index is None: index = default_index(len(data)) @@ -529,10 +529,9 @@ def __init__( # GH 29405: Pre-2.0, this defaulted to float. dtype = np.dtype(object) - else: # data is not None and not is_iterator(data): + else: # data is not None: # is_scalar(data) fails: #data is not None: OK # seems scalar, directly from input only. - if index is None: index = default_index(1) data = [data] @@ -545,7 +544,9 @@ def __init__( if isinstance(data, SingleBlockManager): if dtype is not None: data = data.astype(dtype=dtype, errors="ignore") - elif copy: + copy = False + + if copy: data = data.copy() else: data = sanitize_array(data, index, dtype, copy) From d7007c078b6057e7d7a818c8732e82cd20afa981 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 13 Mar 2024 13:23:33 -0300 Subject: [PATCH 20/92] REF Series: Separating contexts. Who needs the SingleBlockManager and who don't. Copy if needed. --- pandas/core/series.py | 147 +++++++++++++++++++++--------------------- 1 file changed, 74 insertions(+), 73 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8d050c3f375e2..57ad129e6460e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -455,100 +455,101 @@ def __init__( index = default_index(0) data = na_value if dtype is not None else [] else: - # if isinstance(data,(Series, SingleBlockManager, ...)): pass else: index = ensure_index(index) if data is None: data = na_value if len(index) or dtype is not None else [] - # Final data preparation manipulation - if isinstance(data, Index): - if index is None: - index = default_index(len(data)) + # Preparing the SingleBlockManager + if isinstance(data, (Series, SingleBlockManager)): # has the manager. + if isinstance(data, Series): + if index is None: + index = data.index + data = data._mgr.copy(deep=False) + else: + data = data.reindex(index) # Copy the manager + copy = False + index = data.index + data = data._mgr + + elif isinstance(data, SingleBlockManager): + if index is None: + index = data.index + elif not data.index.equals(index) or copy: # TODO: FAIL FAST! + # GH#19275 SingleBlockManager input should only be called + # internally + raise AssertionError( + "Cannot pass both SingleBlockManager " + "`data` argument and a different " + "`index` argument. `copy` must be False." + ) + + if not allow_mgr: + warnings.warn( + f"Passing a {type(data).__name__} to {type(self).__name__} " + "is deprecated and will raise in a future version. " + "Use public APIs instead.", + DeprecationWarning, + stacklevel=2, + ) + allow_mgr = True if dtype is not None: - data = data.astype(dtype) + data = data.astype(dtype=dtype, errors="ignore") + copy = False - refs = data._references - data = data._values - copy = False + if copy: + data = data.copy() - elif isinstance(data, np.ndarray): - if index is None: - index = default_index(len(data)) + else: + if isinstance(data, Index): + if index is None: + index = default_index(len(data)) - if len(data.dtype): - # GH#13296 we are dealing with a compound dtype, which - # should be treated as 2D - raise ValueError( - "Cannot construct a Series from an ndarray with " - "compound dtype. Use DataFrame instead." - ) + if dtype is not None: + data = data.astype(dtype) - elif isinstance(data, Series): - if index is None: - index = data.index - data = data._mgr.copy(deep=False) - else: - data = data.reindex(index) + refs = data._references + data = data._values copy = False - data = data._mgr - - elif isinstance(data, SingleBlockManager): - if index is None: - index = data.index - elif not data.index.equals(index) or copy: - # GH#19275 SingleBlockManager input should only be called - # internally - raise AssertionError( - "Cannot pass both SingleBlockManager " - "`data` argument and a different " - "`index` argument. `copy` must be False." - ) - if not allow_mgr: - warnings.warn( - f"Passing a {type(data).__name__} to {type(self).__name__} " - "is deprecated and will raise in a future version. " - "Use public APIs instead.", - DeprecationWarning, - stacklevel=2, - ) - allow_mgr = True + elif isinstance(data, np.ndarray): + if index is None: + index = default_index(len(data)) - elif isinstance(data, ExtensionArray): - if index is None: - index = default_index(len(data)) + if len(data.dtype): + # GH#13296 we are dealing with a compound dtype, which + # should be treated as 2D + raise ValueError( + "Cannot construct a Series from an ndarray with " + "compound dtype. Use DataFrame instead." + ) - elif is_list_like(data): - if index is None: - index = default_index(len(data)) + elif isinstance(data, ExtensionArray): + if index is None: + index = default_index(len(data)) - if not len(data) and dtype is None: - # GH 29405: Pre-2.0, this defaulted to float. - dtype = np.dtype(object) + elif is_list_like(data): + if index is None: + index = default_index(len(data)) - else: # data is not None: - # is_scalar(data) fails: #data is not None: OK - # seems scalar, directly from input only. - if index is None: - index = default_index(1) - data = [data] + if not len(data) and dtype is None: + # GH 29405: Pre-2.0, this defaulted to float. + dtype = np.dtype(object) - # Final requirement - if is_list_like(data): - com.require_length_match(data, index) + else: # data is not None: + # is_scalar(data) fails: #data is not None: OK + # seems scalar, directly from input only. + if index is None: + index = default_index(1) + data = [data] - # create/copy the manager - if isinstance(data, SingleBlockManager): - if dtype is not None: - data = data.astype(dtype=dtype, errors="ignore") - copy = False + # Final requirement + if is_list_like(data): + com.require_length_match(data, index) - if copy: - data = data.copy() - else: + # create the manager data = sanitize_array(data, index, dtype, copy) data = SingleBlockManager.from_array(data, index, refs=refs) From 5d40174a7e4141c3a4b2f6a83d4c2daef659ad4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 13 Mar 2024 14:37:50 -0300 Subject: [PATCH 21/92] REF Series: organizing ideas for decoupling index and copy. While creating the manager --- pandas/core/series.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 57ad129e6460e..5ce04fcb3b1aa 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -454,8 +454,6 @@ def __init__( if data is None: index = default_index(0) data = na_value if dtype is not None else [] - else: - pass else: index = ensure_index(index) if data is None: @@ -463,16 +461,22 @@ def __init__( # Preparing the SingleBlockManager if isinstance(data, (Series, SingleBlockManager)): # has the manager. + # deep = True # <------ Next step, include this if isinstance(data, Series): + deep = True # <---- and remove this if index is None: index = data.index - data = data._mgr.copy(deep=False) + deep = False + data = data._mgr.copy(deep=deep) + else: - data = data.reindex(index) # Copy the manager - copy = False + key = index + data = data.reindex(index=key) # Copy the manager index = data.index data = data._mgr + copy = False + elif isinstance(data, SingleBlockManager): if index is None: index = data.index @@ -500,7 +504,8 @@ def __init__( copy = False if copy: - data = data.copy() + # data = data.copy(deep) <------ Next step, include this + data = data.copy() # <---- and remove this else: if isinstance(data, Index): From dccda089489b7f2fe303fa2cbfc416bd47a835ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 13 Mar 2024 14:46:32 -0300 Subject: [PATCH 22/92] REF Series - Constructor for Series and Manager: Making parameter deep explicit on copy --- pandas/core/series.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 5ce04fcb3b1aa..56a6c2bf5f183 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -461,9 +461,8 @@ def __init__( # Preparing the SingleBlockManager if isinstance(data, (Series, SingleBlockManager)): # has the manager. - # deep = True # <------ Next step, include this + deep = True # if isinstance(data, Series): - deep = True # <---- and remove this if index is None: index = data.index deep = False @@ -504,10 +503,9 @@ def __init__( copy = False if copy: - # data = data.copy(deep) <------ Next step, include this - data = data.copy() # <---- and remove this + data = data.copy(deep) - else: + else: # Creating the SingleBlockManager if isinstance(data, Index): if index is None: index = default_index(len(data)) From ac8ea67ac3de3a35655572a9271fd27626d0c224 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 13 Mar 2024 14:59:33 -0300 Subject: [PATCH 23/92] REF Series: Refrasing Comments --- pandas/core/series.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 56a6c2bf5f183..af3c13f4ef925 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -459,9 +459,11 @@ def __init__( if data is None: data = na_value if len(index) or dtype is not None else [] - # Preparing the SingleBlockManager - if isinstance(data, (Series, SingleBlockManager)): # has the manager. - deep = True # + if isinstance( + data, (Series, SingleBlockManager) + ): # Preparing the SingleBlockManager + deep = True + if isinstance(data, Series): if index is None: index = data.index From eb11e89a67265acc5bc7c654afb7380f89f39021 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 13 Mar 2024 17:39:38 -0300 Subject: [PATCH 24/92] REF Series: Organizing TODOs for next steps. --- pandas/core/series.py | 68 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index af3c13f4ef925..ef6905eed0f32 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -384,20 +384,23 @@ def __init__( self.name = name return - # Series TASK 1: Capturing relevant data for latter access + # Series TASK 0: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. + + # Series TASK 1: CAPTURE DATA NECESSARY FOR WARNINGS AND CLOSING is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) data_dtype = getattr(data, "dtype", None) original_dtype = dtype refs = None name = ibase.maybe_extract_name(name, data, type(self)) - # Series TASK 1: Validating dtype + # Series TASK 2: VALIDATE BASIC TYPES (meanwhile, dtype only). if dtype is not None: dtype = self._validate_dtype(dtype) - # Series TASK 2: Data Preparation - - # TODO: Investigate. But below the logic changes! + # TODO 10.1: Codes to move to Series TASK copy below. CODE1, CODE 2 + # + # CODE 2.0, TRY TO move below, to copy. + # Note that the logic changes! # Is it so? ExtensionArray copies with None and True # And BlockManagers copies only with True # Since copy maybe None, if copy is None it will enter this @@ -406,18 +409,23 @@ def __init__( if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): data = data.copy() + # TODO 1: Move above, to TASK 0. if isinstance(data, MultiIndex): raise NotImplementedError( "initializing a Series from a MultiIndex is not supported" ) - # It Fails if go to Task 1 + # TODO 10.0: TRY TO UNIFY WITH CODE 2 + # CODE 2.1 if copy is None: copy = False + # TODO 9: + # CODE 1. Move to Series TASK 5.A if isinstance(data, SingleBlockManager) and not copy: data = data.copy(deep=False) + # TODO 8N-2: Try to move to Series TASK 7- WARNINGS if not allow_mgr: warnings.warn( f"Passing a {type(data).__name__} to {type(self).__name__} " @@ -427,6 +435,8 @@ def __init__( stacklevel=2, ) + # Series TASK 3: DATA TRANSFORMATION. + # COMMENT: Dict is a just a special case of data preparation. # Here it is being sent to Series, but it could different, for simplicity. # It could be sent to array (for faster manipulation, for example). @@ -444,10 +454,11 @@ def __init__( else None ) - # TODO: Investigate. This is an unknown type that must be converted to list. + # TODO 11: Investigate. This is an unknown type that must be converted to list. if is_list_like(data) and not isinstance(data, Sized): data = list(data) + # Series TASK 4: COMMON INDEX MANIPULATION # Common operations on index na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) if index is None: @@ -459,9 +470,9 @@ def __init__( if data is None: data = na_value if len(index) or dtype is not None else [] - if isinstance( - data, (Series, SingleBlockManager) - ): # Preparing the SingleBlockManager + # Series TASK 5: CREATING OR COPYING THE MANAGER. + # TODO 6: DECOUPLE MANAGER PREPARATION FROM COPYING + if isinstance(data, (Series, SingleBlockManager)): deep = True if isinstance(data, Series): @@ -481,7 +492,16 @@ def __init__( elif isinstance(data, SingleBlockManager): if index is None: index = data.index - elif not data.index.equals(index) or copy: # TODO: FAIL FAST! + + # TODO 3.0: Check if is possible to move to Series TASK-0. Above + # TODO 3.1: Move to Series Task 0 + # TODO 3.2: Unify if-else structure + if ( + isinstance(data, SingleBlockManager) + and index is not None + and not data.index.equals(index) + or copy + ): # GH#19275 SingleBlockManager input should only be called # internally raise AssertionError( @@ -490,6 +510,10 @@ def __init__( "`index` argument. `copy` must be False." ) + # TODO 4.0: Check if it is possible to move below to Series TASK 7. + # TODO 4.1: Recreate if + # TODO 4.2: and move. + # TODO 4.3: Unify if-else structure. if not allow_mgr: warnings.warn( f"Passing a {type(data).__name__} to {type(self).__name__} " @@ -500,6 +524,7 @@ def __init__( ) allow_mgr = True + # Series TASK 5.A: COPYING THE MANAGER. if dtype is not None: data = data.astype(dtype=dtype, errors="ignore") copy = False @@ -508,6 +533,10 @@ def __init__( data = data.copy(deep) else: # Creating the SingleBlockManager + # TODO Decouple single element from the other data. + # Use 'single_element' signature. + # TODO 8.0. Separate if-else single element; + # TODO 8.1. Group common 'index' definitions on 'not single_element' cases. if isinstance(data, Index): if index is None: index = default_index(len(data)) @@ -523,6 +552,10 @@ def __init__( if index is None: index = default_index(len(data)) + # TODO 6.0: Prepare if-signature to move to Series TASK-0 + # TODO 6.1: Try to move + # TODO 6.2: Move + # TODO 6.3: Unify if-else signature. if len(data.dtype): # GH#13296 we are dealing with a compound dtype, which # should be treated as 2D @@ -539,29 +572,36 @@ def __init__( if index is None: index = default_index(len(data)) + # TODO 7: Try to Move above, on data preparation. + # TODO 7.0: Prepare if-signature to move to Series TASK- + # TODO 7.1: Try to move + # TODO 7.2: Move + # TODO 7.3: Unify if-else signature. if not len(data) and dtype is None: # GH 29405: Pre-2.0, this defaulted to float. dtype = np.dtype(object) - else: # data is not None: + else: # data is not None: # TODO 12: FIND HOW TO CAPTURE THIS DATA TYPE. # is_scalar(data) fails: #data is not None: OK # seems scalar, directly from input only. if index is None: index = default_index(1) data = [data] - # Final requirement + # Series TASK 5.B: CREATING THE MANAGER. + # Final requirements if is_list_like(data): com.require_length_match(data, index) - # create the manager data = sanitize_array(data, index, dtype, copy) data = SingleBlockManager.from_array(data, index, refs=refs) + # Series TASK 6: CREATE THE MANAGER NDFrame.__init__(self, data) self.name = name self._set_axis(0, index) + # Series TASK 7: RAISE WARNINGS if original_dtype is None and is_pandas_object and data_dtype == np.object_: if self.dtype != data_dtype: warnings.warn( From 5fcdefa9587a2f8623166c951781e6e064e5483f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 13 Mar 2024 20:12:01 -0300 Subject: [PATCH 25/92] REF Series - TODO 1: Move error when data is MultiIndex to 'Series TASK 0' --- pandas/core/series.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index ef6905eed0f32..849094baf2f09 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -385,6 +385,10 @@ def __init__( return # Series TASK 0: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. + if isinstance(data, MultiIndex): + raise NotImplementedError( + "initializing a Series from a MultiIndex is not supported" + ) # Series TASK 1: CAPTURE DATA NECESSARY FOR WARNINGS AND CLOSING is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) @@ -409,12 +413,6 @@ def __init__( if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): data = data.copy() - # TODO 1: Move above, to TASK 0. - if isinstance(data, MultiIndex): - raise NotImplementedError( - "initializing a Series from a MultiIndex is not supported" - ) - # TODO 10.0: TRY TO UNIFY WITH CODE 2 # CODE 2.1 if copy is None: @@ -492,7 +490,8 @@ def __init__( elif isinstance(data, SingleBlockManager): if index is None: index = data.index - + # TODO 1: DONE + # TODO 2: (I possibly just skipped the number) # TODO 3.0: Check if is possible to move to Series TASK-0. Above # TODO 3.1: Move to Series Task 0 # TODO 3.2: Unify if-else structure From 28ca344a7ca7641a7b165808223a5a477d8360b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 13 Mar 2024 21:25:19 -0300 Subject: [PATCH 26/92] REF Series - TODO 3: Move Manager AssertionError to Series TASK 1 --- pandas/core/series.py | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 849094baf2f09..fa632594d4795 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -362,6 +362,7 @@ def __init__( name=None, copy: bool | None = None, ) -> None: + # TODO 2: Send to Series Task 7, below. URGENT. Classify this. allow_mgr = False if ( isinstance(data, SingleBlockManager) @@ -384,12 +385,27 @@ def __init__( self.name = name return - # Series TASK 0: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. + # Series TASK 1: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. if isinstance(data, MultiIndex): raise NotImplementedError( "initializing a Series from a MultiIndex is not supported" ) + # TODO 3.0: OK Create if-else-logic to move + # TODO 3.1: OK Check if is possible to move to Series TASK-0. Above + # TODO 3.2: Move to Series Task 0 + # TODO 3.3: Unify if-else structure + if isinstance(data, SingleBlockManager): + # DeMorgan Rule + if not (data.index.equals(index) or index is None) or copy: + # GH#19275 SingleBlockManager input should only be called + # internally + raise AssertionError( + "Cannot pass both SingleBlockManager " + "`data` argument and a different " + "`index` argument. `copy` must be False." + ) + # Series TASK 1: CAPTURE DATA NECESSARY FOR WARNINGS AND CLOSING is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) data_dtype = getattr(data, "dtype", None) @@ -490,24 +506,6 @@ def __init__( elif isinstance(data, SingleBlockManager): if index is None: index = data.index - # TODO 1: DONE - # TODO 2: (I possibly just skipped the number) - # TODO 3.0: Check if is possible to move to Series TASK-0. Above - # TODO 3.1: Move to Series Task 0 - # TODO 3.2: Unify if-else structure - if ( - isinstance(data, SingleBlockManager) - and index is not None - and not data.index.equals(index) - or copy - ): - # GH#19275 SingleBlockManager input should only be called - # internally - raise AssertionError( - "Cannot pass both SingleBlockManager " - "`data` argument and a different " - "`index` argument. `copy` must be False." - ) # TODO 4.0: Check if it is possible to move below to Series TASK 7. # TODO 4.1: Recreate if From ae97b19d5e851198d10e98ea293fdf3f29b87b62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 13 Mar 2024 23:06:23 -0300 Subject: [PATCH 27/92] REF Series TODO 3: Joined with another TODO and detailed the tasks. --- pandas/core/series.py | 91 +++++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 33 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index fa632594d4795..dda812d182f96 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -362,28 +362,29 @@ def __init__( name=None, copy: bool | None = None, ) -> None: - # TODO 2: Send to Series Task 7, below. URGENT. Classify this. allow_mgr = False - if ( - isinstance(data, SingleBlockManager) - and index is None - and dtype is None - and (copy is False or copy is None) - ): - if not allow_mgr: - # GH#52419 - warnings.warn( - f"Passing a {type(data).__name__} to {type(self).__name__} " - "is deprecated and will raise in a future version. " - "Use public APIs instead.", - DeprecationWarning, - stacklevel=2, - ) - data = data.copy(deep=False) - # GH#33357 called with just the SingleBlockManager - NDFrame.__init__(self, data) - self.name = name - return + + # # TODO 2: Send to Series Task 7, below. URGENT. Classify this. + # if ( + # isinstance(data, SingleBlockManager) + # and index is None + # and dtype is None + # and (copy is False or copy is None) + # ): + # if not allow_mgr: + # # GH#52419 + # warnings.warn( + # f"Passing a {type(data).__name__} to {type(self).__name__} " + # "is deprecated and will raise in a future version. " + # "Use public APIs instead.", + # DeprecationWarning, + # stacklevel=2, + # ) + # data = data.copy(deep=False) + # # GH#33357 called with just the SingleBlockManager + # NDFrame.__init__(self, data) + # self.name = name + # return # Series TASK 1: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. if isinstance(data, MultiIndex): @@ -391,10 +392,11 @@ def __init__( "initializing a Series from a MultiIndex is not supported" ) + # DONE # TODO 3.0: OK Create if-else-logic to move # TODO 3.1: OK Check if is possible to move to Series TASK-0. Above - # TODO 3.2: Move to Series Task 0 - # TODO 3.3: Unify if-else structure + # TODO 3.2: OK Move to Series Task 0 + # TODO 3.3: OK Unify if-else structure if isinstance(data, SingleBlockManager): # DeMorgan Rule if not (data.index.equals(index) or index is None) or copy: @@ -417,25 +419,48 @@ def __init__( if dtype is not None: dtype = self._validate_dtype(dtype) - # TODO 10.1: Codes to move to Series TASK copy below. CODE1, CODE 2 - # - # CODE 2.0, TRY TO move below, to copy. + # TODO 10: Codes to move to Series TASK 5.B. Copying the Manager, below. + # TRY TO move below, to copy. # Note that the logic changes! - # Is it so? ExtensionArray copies with None and True - # And BlockManagers copies only with True + # Does ExtensionArray copies with None and True? + # BlockManagers copies only with True # Since copy maybe None, if copy is None it will enter this if isinstance(data, (ExtensionArray, np.ndarray)): if copy is not False: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): data = data.copy() - - # TODO 10.0: TRY TO UNIFY WITH CODE 2 - # CODE 2.1 if copy is None: copy = False - # TODO 9: - # CODE 1. Move to Series TASK 5.A + # WORKING HERE!!!!! + # TODO 2: Decouple warning/Manager manipulation IN THE TWO CALLS BELOW. + # TODO 2.1: Join the two Manager checks below into a single if-else + # TODO 2.2: Decouple warnings / DATA MANIPULATION. + # TODO 2.3: Slide the warnings to Series Task 7. + # TODO 2.4: Slide copying the manager to Series TASK 5.A + # TODO 2.5: Check if it is possible to separate NDFrame.__init__ to + # --------- To Series Task 6. + if ( + isinstance(data, SingleBlockManager) + and index is None + and dtype is None + and not copy + ): + if not allow_mgr: + # GH#52419 + warnings.warn( + f"Passing a {type(data).__name__} to {type(self).__name__} " + "is deprecated and will raise in a future version. " + "Use public APIs instead.", + DeprecationWarning, + stacklevel=2, + ) + data = data.copy(deep=False) + # GH#33357 called with just the SingleBlockManager + NDFrame.__init__(self, data) + self.name = name + return + # Move to Series TASK 5 if isinstance(data, SingleBlockManager) and not copy: data = data.copy(deep=False) From ac74810aa19e57687e691c1309cd10ed85270b0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Thu, 14 Mar 2024 06:31:35 -0300 Subject: [PATCH 28/92] REF Series - TODO 2. Organizing two blocks of code with Manager (on TASK 2). Step 2.1.1 --- pandas/core/series.py | 44 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index dda812d182f96..86eafed669948 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -432,36 +432,34 @@ def __init__( if copy is None: copy = False - # WORKING HERE!!!!! + # This one I will do a single commit documenting each sub-step, so that + # other programmers can understand the method I am using on this refactoring. # TODO 2: Decouple warning/Manager manipulation IN THE TWO CALLS BELOW. - # TODO 2.1: Join the two Manager checks below into a single if-else + # TODO 2.1.1: <--- Make the two if-else checks below have a common pattern + # TODO 2.1.2: Join the two Manager checks below into a single if-else # TODO 2.2: Decouple warnings / DATA MANIPULATION. # TODO 2.3: Slide the warnings to Series Task 7. # TODO 2.4: Slide copying the manager to Series TASK 5.A # TODO 2.5: Check if it is possible to separate NDFrame.__init__ to # --------- To Series Task 6. - if ( - isinstance(data, SingleBlockManager) - and index is None - and dtype is None - and not copy - ): - if not allow_mgr: - # GH#52419 - warnings.warn( - f"Passing a {type(data).__name__} to {type(self).__name__} " - "is deprecated and will raise in a future version. " - "Use public APIs instead.", - DeprecationWarning, - stacklevel=2, - ) - data = data.copy(deep=False) - # GH#33357 called with just the SingleBlockManager - NDFrame.__init__(self, data) - self.name = name - return + if isinstance(data, SingleBlockManager) and not copy: # <--- + if index is None and dtype is None: # <--- + if not allow_mgr: + # GH#52419 + warnings.warn( + f"Passing a {type(data).__name__} to {type(self).__name__} " + "is deprecated and will raise in a future version. " + "Use public APIs instead.", + DeprecationWarning, + stacklevel=2, + ) + data = data.copy(deep=False) + # GH#33357 called with just the SingleBlockManager + NDFrame.__init__(self, data) + self.name = name + return # Move to Series TASK 5 - if isinstance(data, SingleBlockManager) and not copy: + if isinstance(data, SingleBlockManager) and not copy: # <--- data = data.copy(deep=False) # TODO 8N-2: Try to move to Series TASK 7- WARNINGS From b5a372c07341d3bcf367930e48c9a16be1d1fdfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Thu, 14 Mar 2024 06:53:20 -0300 Subject: [PATCH 29/92] REF Series - TODO 2: Organizing two blocks of code with Manager (on TASK 2). Step 2.1.2 --- pandas/core/series.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 86eafed669948..c6fd684403330 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -435,8 +435,8 @@ def __init__( # This one I will do a single commit documenting each sub-step, so that # other programmers can understand the method I am using on this refactoring. # TODO 2: Decouple warning/Manager manipulation IN THE TWO CALLS BELOW. - # TODO 2.1.1: <--- Make the two if-else checks below have a common pattern - # TODO 2.1.2: Join the two Manager checks below into a single if-else + # TODO 2.1.1: (DONE) Make the two if-else checks below have a common pattern + # TODO 2.1.2: <--- Join the two Manager checks below into a single if-else # TODO 2.2: Decouple warnings / DATA MANIPULATION. # TODO 2.3: Slide the warnings to Series Task 7. # TODO 2.4: Slide copying the manager to Series TASK 5.A @@ -458,12 +458,8 @@ def __init__( NDFrame.__init__(self, data) self.name = name return - # Move to Series TASK 5 - if isinstance(data, SingleBlockManager) and not copy: # <--- - data = data.copy(deep=False) - # TODO 8N-2: Try to move to Series TASK 7- WARNINGS - if not allow_mgr: + if not allow_mgr: # <--- warnings.warn( f"Passing a {type(data).__name__} to {type(self).__name__} " "is deprecated and will raise in a future version. " @@ -474,7 +470,8 @@ def __init__( # Series TASK 3: DATA TRANSFORMATION. - # COMMENT: Dict is a just a special case of data preparation. + # COMMENT: Dict is SPECIAL case, since it's data has + # data values and index keys. # Here it is being sent to Series, but it could different, for simplicity. # It could be sent to array (for faster manipulation, for example). From 29eb542d3debf07ad9d88ece0daba7c24ba95c68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Thu, 14 Mar 2024 07:51:41 -0300 Subject: [PATCH 30/92] REF Series - TODO 2. Move code on TASK 2 to proper places. Step 2.2 Decouple warnings and data manipulations. --- pandas/core/series.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index c6fd684403330..b5ed565dff45c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -436,14 +436,17 @@ def __init__( # other programmers can understand the method I am using on this refactoring. # TODO 2: Decouple warning/Manager manipulation IN THE TWO CALLS BELOW. # TODO 2.1.1: (DONE) Make the two if-else checks below have a common pattern - # TODO 2.1.2: <--- Join the two Manager checks below into a single if-else - # TODO 2.2: Decouple warnings / DATA MANIPULATION. + # TODO 2.1.2: (DONE) Join the two Manager checks below into a single if-else + # TODO 2.2: <--- Decouple warnings / DATA MANIPULATION. # TODO 2.3: Slide the warnings to Series Task 7. # TODO 2.4: Slide copying the manager to Series TASK 5.A - # TODO 2.5: Check if it is possible to separate NDFrame.__init__ to - # --------- To Series Task 6. + # TODO 2.5: (DONE) Check if it is possible to separate copying from + # --------- Data Frame Creation. + # TODO 2.5.1: Move both blocks to TASK 5.A + # TODO 2.5.2: Decouple Copying DataFrame Creation to TASKS 5.A AND 6 + # TODO 2.5.3: Move DataFrame Creation to 'Series Task 6'. if isinstance(data, SingleBlockManager) and not copy: # <--- - if index is None and dtype is None: # <--- + if index is None and dtype is None: # <--- to TASK 7 if not allow_mgr: # GH#52419 warnings.warn( @@ -453,13 +456,8 @@ def __init__( DeprecationWarning, stacklevel=2, ) - data = data.copy(deep=False) - # GH#33357 called with just the SingleBlockManager - NDFrame.__init__(self, data) - self.name = name - return - if not allow_mgr: # <--- + if not allow_mgr: # <--- to TASK 7 warnings.warn( f"Passing a {type(data).__name__} to {type(self).__name__} " "is deprecated and will raise in a future version. " @@ -468,6 +466,14 @@ def __init__( stacklevel=2, ) + if index is None and dtype is None: # TODO 2.5 Starts here. + data = data.copy(deep=False) # <--- to TASK 5.A + # GH#33357 called with just the SingleBlockManager + + NDFrame.__init__(self, data) # < --- to TASK 6 + self.name = name + return + # Series TASK 3: DATA TRANSFORMATION. # COMMENT: Dict is SPECIAL case, since it's data has From cf5070fd00ac0136945e9ca5a8a365f351291c9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Thu, 14 Mar 2024 08:38:44 -0300 Subject: [PATCH 31/92] REF Series - TODO 2. Decouple warnings / data manipulation. Steps 2.3.1-2.3.3 --- pandas/core/series.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index b5ed565dff45c..12f95bbef14bb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -412,6 +412,7 @@ def __init__( is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) data_dtype = getattr(data, "dtype", None) original_dtype = dtype + original_data_type = type(data) # For warning in the end refs = None name = ibase.maybe_extract_name(name, data, type(self)) @@ -437,15 +438,32 @@ def __init__( # TODO 2: Decouple warning/Manager manipulation IN THE TWO CALLS BELOW. # TODO 2.1.1: (DONE) Make the two if-else checks below have a common pattern # TODO 2.1.2: (DONE) Join the two Manager checks below into a single if-else - # TODO 2.2: <--- Decouple warnings / DATA MANIPULATION. - # TODO 2.3: Slide the warnings to Series Task 7. + # TODO 2.2: (DONE) Decouple warnings / DATA MANIPULATION. + # TODO 2.3: (Working) Here Slide the warnings to Series Task 7. + # Decouple and Move in one commit. + # TODO 2.3.1 <--- Decouple Dataframe Creation from warning. + # TODO 2.3.2 <--- Check if 'allow_mgr' doesn't changes signature. + # TODO 2.3.3 <--- Capture 'original_data_type' to raise warnings latter. + # TODO 2.3.4 Move # TODO 2.4: Slide copying the manager to Series TASK 5.A # TODO 2.5: (DONE) Check if it is possible to separate copying from # --------- Data Frame Creation. # TODO 2.5.1: Move both blocks to TASK 5.A # TODO 2.5.2: Decouple Copying DataFrame Creation to TASKS 5.A AND 6 # TODO 2.5.3: Move DataFrame Creation to 'Series Task 6'. - if isinstance(data, SingleBlockManager) and not copy: # <--- + if isinstance(data, SingleBlockManager) and not copy: + if index is None and dtype is None: # TODO 2.5 Starts here. + data = data.copy(deep=False) # <--- to TASK 5.A + # GH#33357 called with just the SingleBlockManager + + NDFrame.__init__(self, data) # < --- to TASK 6 + self.name = name + return + + if ( + original_data_type is SingleBlockManager and not copy + ): # <--- Warnings (Change signature) + # if isinstance(data, SingleBlockManager) and not copy: # <--- Warnings if index is None and dtype is None: # <--- to TASK 7 if not allow_mgr: # GH#52419 @@ -466,14 +484,6 @@ def __init__( stacklevel=2, ) - if index is None and dtype is None: # TODO 2.5 Starts here. - data = data.copy(deep=False) # <--- to TASK 5.A - # GH#33357 called with just the SingleBlockManager - - NDFrame.__init__(self, data) # < --- to TASK 6 - self.name = name - return - # Series TASK 3: DATA TRANSFORMATION. # COMMENT: Dict is SPECIAL case, since it's data has From 2edf3ca6e7e7d32a6f276d371b3db0981bb8e23f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Fri, 15 Mar 2024 11:07:50 -0300 Subject: [PATCH 32/92] REF Series - TODO 2. Decouple warnings / data manipulation. Steps 2.3.4 and 2.3.5 --- pandas/core/series.py | 129 ++++++++++++++++++++---------------------- 1 file changed, 60 insertions(+), 69 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 12f95bbef14bb..c07917ca22deb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -13,6 +13,7 @@ import operator import sys from textwrap import dedent +from types import NoneType from typing import ( IO, TYPE_CHECKING, @@ -364,39 +365,13 @@ def __init__( ) -> None: allow_mgr = False - # # TODO 2: Send to Series Task 7, below. URGENT. Classify this. - # if ( - # isinstance(data, SingleBlockManager) - # and index is None - # and dtype is None - # and (copy is False or copy is None) - # ): - # if not allow_mgr: - # # GH#52419 - # warnings.warn( - # f"Passing a {type(data).__name__} to {type(self).__name__} " - # "is deprecated and will raise in a future version. " - # "Use public APIs instead.", - # DeprecationWarning, - # stacklevel=2, - # ) - # data = data.copy(deep=False) - # # GH#33357 called with just the SingleBlockManager - # NDFrame.__init__(self, data) - # self.name = name - # return - # Series TASK 1: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. if isinstance(data, MultiIndex): raise NotImplementedError( "initializing a Series from a MultiIndex is not supported" ) - # DONE - # TODO 3.0: OK Create if-else-logic to move - # TODO 3.1: OK Check if is possible to move to Series TASK-0. Above - # TODO 3.2: OK Move to Series Task 0 - # TODO 3.3: OK Unify if-else structure + # TODO FINAL: Check GH#19275 if isinstance(data, SingleBlockManager): # DeMorgan Rule if not (data.index.equals(index) or index is None) or copy: @@ -408,15 +383,21 @@ def __init__( "`index` argument. `copy` must be False." ) - # Series TASK 1: CAPTURE DATA NECESSARY FOR WARNINGS AND CLOSING + # Series TASK 1: CAPTURE INPUT SIGNATURE + # COMMENT: NECESSARY FOR WARNINGS AND ERRORS is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) - data_dtype = getattr(data, "dtype", None) + original_copy = copy if copy else False # convert None to False original_dtype = dtype + original_index_type = type(index) original_data_type = type(data) # For warning in the end + original_data_dtype = getattr(data, "dtype", None) refs = None name = ibase.maybe_extract_name(name, data, type(self)) # Series TASK 2: VALIDATE BASIC TYPES (meanwhile, dtype only). + + # TODO FINAL: Try to move copy validation to here. + # copy = copy if copy else False # convert None to False if dtype is not None: dtype = self._validate_dtype(dtype) @@ -430,6 +411,8 @@ def __init__( if copy is not False: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): data = data.copy() + + # TODO: NEXT Try to if copy is None: copy = False @@ -439,18 +422,24 @@ def __init__( # TODO 2.1.1: (DONE) Make the two if-else checks below have a common pattern # TODO 2.1.2: (DONE) Join the two Manager checks below into a single if-else # TODO 2.2: (DONE) Decouple warnings / DATA MANIPULATION. - # TODO 2.3: (Working) Here Slide the warnings to Series Task 7. + # TODO 2.3 (DONE): Here Slide the warnings to Series Task 7. # Decouple and Move in one commit. - # TODO 2.3.1 <--- Decouple Dataframe Creation from warning. - # TODO 2.3.2 <--- Check if 'allow_mgr' doesn't changes signature. - # TODO 2.3.3 <--- Capture 'original_data_type' to raise warnings latter. - # TODO 2.3.4 Move + # TODO 2.3.1 (DONE) Decouple Dataframe Creation from warning. + # TODO 2.3.2 (DONE) Capture 'original_data_type' to raise warnings latter. + # TODO 2.3.3 (DONE) Check if 'allow_mgr' doesn't changes signature. + # This variable is used only for warnings. Possibly to block on or more + # similar warnings after the first one was raised. Investigate in the end. + # TODO 2.3.4 <--- (DONE) Capture signature 'original_index_dtype' + # and 'original_copy' + # TODO 2.3.5 <--- (DONE) Move # TODO 2.4: Slide copying the manager to Series TASK 5.A # TODO 2.5: (DONE) Check if it is possible to separate copying from # --------- Data Frame Creation. # TODO 2.5.1: Move both blocks to TASK 5.A # TODO 2.5.2: Decouple Copying DataFrame Creation to TASKS 5.A AND 6 # TODO 2.5.3: Move DataFrame Creation to 'Series Task 6'. + # TODO 1: DONE + # TODO 3: DONE if isinstance(data, SingleBlockManager) and not copy: if index is None and dtype is None: # TODO 2.5 Starts here. data = data.copy(deep=False) # <--- to TASK 5.A @@ -460,30 +449,6 @@ def __init__( self.name = name return - if ( - original_data_type is SingleBlockManager and not copy - ): # <--- Warnings (Change signature) - # if isinstance(data, SingleBlockManager) and not copy: # <--- Warnings - if index is None and dtype is None: # <--- to TASK 7 - if not allow_mgr: - # GH#52419 - warnings.warn( - f"Passing a {type(data).__name__} to {type(self).__name__} " - "is deprecated and will raise in a future version. " - "Use public APIs instead.", - DeprecationWarning, - stacklevel=2, - ) - - if not allow_mgr: # <--- to TASK 7 - warnings.warn( - f"Passing a {type(data).__name__} to {type(self).__name__} " - "is deprecated and will raise in a future version. " - "Use public APIs instead.", - DeprecationWarning, - stacklevel=2, - ) - # Series TASK 3: DATA TRANSFORMATION. # COMMENT: Dict is SPECIAL case, since it's data has @@ -555,6 +520,12 @@ def __init__( DeprecationWarning, stacklevel=2, ) + + # TODO FINAL: Review warnings + # This not used after this point. + # This variable is used only for warnings. + # Possibly to block on or more similar warnings + # after the first one was raised. allow_mgr = True # Series TASK 5.A: COPYING THE MANAGER. @@ -566,7 +537,7 @@ def __init__( data = data.copy(deep) else: # Creating the SingleBlockManager - # TODO Decouple single element from the other data. + # TODO 8: Decouple single element from the other data. # Use 'single_element' signature. # TODO 8.0. Separate if-else single element; # TODO 8.1. Group common 'index' definitions on 'not single_element' cases. @@ -635,16 +606,36 @@ def __init__( self._set_axis(0, index) # Series TASK 7: RAISE WARNINGS - if original_dtype is None and is_pandas_object and data_dtype == np.object_: - if self.dtype != data_dtype: - warnings.warn( - "Dtype inference on a pandas object " - "(Series, Index, ExtensionArray) is deprecated. The Series " - "constructor will keep the original dtype in the future. " - "Call `infer_objects` on the result to get the old behavior.", - FutureWarning, - stacklevel=find_stack_level(), - ) + if ( + original_dtype is None + and is_pandas_object + and original_data_dtype == np.object_ + and self.dtype != original_data_dtype + ): + warnings.warn( + "Dtype inference on a pandas object " + "(Series, Index, ExtensionArray) is deprecated. The Series " + "constructor will keep the original dtype in the future. " + "Call `infer_objects` on the result to get the old behavior.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if original_data_type is SingleBlockManager and not original_copy: + if not allow_mgr: + if original_index_type is NoneType and original_dtype is None: + # TODO FINAL: Check GH#52419 + # This is somewhat peculiar, because the same warning was being + # presented twice. Check if there is a reason for that, + # If so, come back to that code and create a new test. + warnings.warn( + f"Passing a {original_data_type.__name__}" + "to {type(self).__name__} " + "is deprecated and will raise in a future version. " + "Use public APIs instead.", + DeprecationWarning, + stacklevel=2, + ) # ---------------------------------------------------------------------- From 83ef80f4cd8bd47efa448d955241bf724d05a855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Fri, 15 Mar 2024 11:53:20 -0300 Subject: [PATCH 33/92] REF Series - TODO 2. Decouple warnings / data manipulation. Step 2.5.3 fast path on manager arg --- pandas/core/series.py | 67 ++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index c07917ca22deb..de613f6adc865 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -365,17 +365,17 @@ def __init__( ) -> None: allow_mgr = False - # Series TASK 1: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. + # Series TASK 0: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. if isinstance(data, MultiIndex): raise NotImplementedError( "initializing a Series from a MultiIndex is not supported" ) - # TODO FINAL: Check GH#19275 if isinstance(data, SingleBlockManager): # DeMorgan Rule if not (data.index.equals(index) or index is None) or copy: - # GH#19275 SingleBlockManager input should only be called + # TODO FINAL: Check GitHub Issue + # GH #19275 SingleBlockManager input should only be called # internally raise AssertionError( "Cannot pass both SingleBlockManager " @@ -416,39 +416,6 @@ def __init__( if copy is None: copy = False - # This one I will do a single commit documenting each sub-step, so that - # other programmers can understand the method I am using on this refactoring. - # TODO 2: Decouple warning/Manager manipulation IN THE TWO CALLS BELOW. - # TODO 2.1.1: (DONE) Make the two if-else checks below have a common pattern - # TODO 2.1.2: (DONE) Join the two Manager checks below into a single if-else - # TODO 2.2: (DONE) Decouple warnings / DATA MANIPULATION. - # TODO 2.3 (DONE): Here Slide the warnings to Series Task 7. - # Decouple and Move in one commit. - # TODO 2.3.1 (DONE) Decouple Dataframe Creation from warning. - # TODO 2.3.2 (DONE) Capture 'original_data_type' to raise warnings latter. - # TODO 2.3.3 (DONE) Check if 'allow_mgr' doesn't changes signature. - # This variable is used only for warnings. Possibly to block on or more - # similar warnings after the first one was raised. Investigate in the end. - # TODO 2.3.4 <--- (DONE) Capture signature 'original_index_dtype' - # and 'original_copy' - # TODO 2.3.5 <--- (DONE) Move - # TODO 2.4: Slide copying the manager to Series TASK 5.A - # TODO 2.5: (DONE) Check if it is possible to separate copying from - # --------- Data Frame Creation. - # TODO 2.5.1: Move both blocks to TASK 5.A - # TODO 2.5.2: Decouple Copying DataFrame Creation to TASKS 5.A AND 6 - # TODO 2.5.3: Move DataFrame Creation to 'Series Task 6'. - # TODO 1: DONE - # TODO 3: DONE - if isinstance(data, SingleBlockManager) and not copy: - if index is None and dtype is None: # TODO 2.5 Starts here. - data = data.copy(deep=False) # <--- to TASK 5.A - # GH#33357 called with just the SingleBlockManager - - NDFrame.__init__(self, data) # < --- to TASK 6 - self.name = name - return - # Series TASK 3: DATA TRANSFORMATION. # COMMENT: Dict is SPECIAL case, since it's data has @@ -505,6 +472,34 @@ def __init__( copy = False elif isinstance(data, SingleBlockManager): + # todo management + # This one I will do a single commit documenting each sub-step, so + # that other programmers can understand the refactoring procedure. + # TODO 1: DONE + # TODO 3: DONE + # TODO 2.1: (DONE) Organize if-else logic to visualize decoupling + # TODO 2.2: (DONE) Decouple warnings / DATA MANIPULATION. + # TODO 2.3 (DONE): Here Slide the warnings to Series Task 7. + # TODO 2.4: (DONE) Slide copying the manager to Series TASK 5.A + # TODO 2.5.0: (DONE) Check if it is possible to separate copying + # --------- from DataFrame Creation. + # TODO 2.5.1: (DONE) Move block to TASK 5.A + # TODO 2.5.2: (DONE) Decouple DF Copying from Creation. + # Send to to TASKS 5.A AND 6. + # TODO 2: Decouple warning/Manager manipulation IN THE TWO CALLS BELOW. + # TODO 2.5.3: <--- Grouping again because it is a Fast Path + # --------- for DataFrame Creation + # TODO 2.5.4: Implement fast path logic + # TODO 2.5.5: Move DataFrame Creation to 'Series Task 6'. + if not copy: + # GH#33357 called with just the SingleBlockManager + # Note, this is a fast track to + if index is None and dtype is None: + data = data.copy(deep=False) + NDFrame.__init__(self, data) # < --- MOVE TO TASK 6 + self.name = name # < --- MOVE + return # < --- MOVE + if index is None: index = data.index From 7ea6b8e84db6f4e7cc8f569d15ff085ecb7a4e34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Fri, 15 Mar 2024 11:56:40 -0300 Subject: [PATCH 34/92] REF Series - TODO 2. Decouple warnings / data manipulation. Organizing TODOs... --- pandas/core/series.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index de613f6adc865..8d1955f9d1247 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -363,6 +363,24 @@ def __init__( name=None, copy: bool | None = None, ) -> None: + # todo management + # This one I will do a single commit documenting each sub-step, so + # that other programmers can understand the refactoring procedure. + # TODO 1: DONE + # TODO 3: DONE + # TODO 2.1: (DONE) Organize if-else logic to visualize decoupling + # TODO 2.2: (DONE) Decouple warnings / DATA MANIPULATION. + # TODO 2.3 (DONE): Here Slide the warnings to Series Task 7. + # TODO 2.4: (DONE) Slide copying the manager to Series TASK 5.A + # TODO 2.5.0: (DONE) Check if it is possible to separate copying + # --------- from DataFrame Creation. + # TODO 2.5.1: (DONE) Move block to TASK 5.A + # TODO 2.5.2: (DONE) Decouple DF Copying from Creation. + # Send to to TASKS 5.A AND 6. + # TODO 2: Decouple warning/Manager manipulation IN THE TWO CALLS BELOW. + # TODO 2.5.3: (DONE) Grouping again because it is a Fast Path + # --------- for DataFrame Creation + allow_mgr = False # Series TASK 0: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. @@ -472,24 +490,7 @@ def __init__( copy = False elif isinstance(data, SingleBlockManager): - # todo management - # This one I will do a single commit documenting each sub-step, so - # that other programmers can understand the refactoring procedure. - # TODO 1: DONE - # TODO 3: DONE - # TODO 2.1: (DONE) Organize if-else logic to visualize decoupling - # TODO 2.2: (DONE) Decouple warnings / DATA MANIPULATION. - # TODO 2.3 (DONE): Here Slide the warnings to Series Task 7. - # TODO 2.4: (DONE) Slide copying the manager to Series TASK 5.A - # TODO 2.5.0: (DONE) Check if it is possible to separate copying - # --------- from DataFrame Creation. - # TODO 2.5.1: (DONE) Move block to TASK 5.A - # TODO 2.5.2: (DONE) Decouple DF Copying from Creation. - # Send to to TASKS 5.A AND 6. - # TODO 2: Decouple warning/Manager manipulation IN THE TWO CALLS BELOW. - # TODO 2.5.3: <--- Grouping again because it is a Fast Path - # --------- for DataFrame Creation - # TODO 2.5.4: Implement fast path logic + # TODO 2.5.4: < --- Implement fast path logic # TODO 2.5.5: Move DataFrame Creation to 'Series Task 6'. if not copy: # GH#33357 called with just the SingleBlockManager From 8dd002d2957dafac4febcec9f3fcc136fc329eda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Fri, 15 Mar 2024 12:35:09 -0300 Subject: [PATCH 35/92] REF Series - TODO 2. Decouple warnings / data manipulation. Step 2.5.4 and 2.5.5. Fast path and Move --- pandas/core/series.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8d1955f9d1247..122984fb3a012 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -382,6 +382,7 @@ def __init__( # --------- for DataFrame Creation allow_mgr = False + fast_path_manager = False # Series TASK 0: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. if isinstance(data, MultiIndex): @@ -490,19 +491,22 @@ def __init__( copy = False elif isinstance(data, SingleBlockManager): - # TODO 2.5.4: < --- Implement fast path logic - # TODO 2.5.5: Move DataFrame Creation to 'Series Task 6'. - if not copy: - # GH#33357 called with just the SingleBlockManager - # Note, this is a fast track to - if index is None and dtype is None: - data = data.copy(deep=False) - NDFrame.__init__(self, data) # < --- MOVE TO TASK 6 - self.name = name # < --- MOVE - return # < --- MOVE + # GH#33357 called with just the SingleBlockManager + # Note, this is a fast track to DF Creation + # TODO 2.5.4: (DONE) Implement fast path logic + # TODO 2.5.5: (DONE) Move DataFrame Creation to 'Series Task 6'. if index is None: - index = data.index + if not copy and dtype is None: + deep = False + fast_path_manager = True + data = data.copy(deep) + # NDFrame.__init__(self, data) # < --- DUPLICATED + # self.name = name # < --- same + # return # < --- Not needed. + # TODO FINAL: Avoid warnings on fast_path_manager? + else: + index = data.index # TODO 4.0: Check if it is possible to move below to Series TASK 7. # TODO 4.1: Recreate if @@ -529,7 +533,7 @@ def __init__( data = data.astype(dtype=dtype, errors="ignore") copy = False - if copy: + if copy or fast_path_manager: data = data.copy(deep) else: # Creating the SingleBlockManager @@ -599,7 +603,8 @@ def __init__( # Series TASK 6: CREATE THE MANAGER NDFrame.__init__(self, data) self.name = name - self._set_axis(0, index) + if not fast_path_manager: + self._set_axis(0, index) # Series TASK 7: RAISE WARNINGS if ( From ff9b4cef505cb1e11362882edef03df45b1e1fb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Fri, 15 Mar 2024 13:52:29 -0300 Subject: [PATCH 36/92] REF Series - TODO 2. Decouple warnings / data manipulation. Reorganizing TODOs... --- pandas/core/series.py | 172 +++++++++++++++++++++++------------------- 1 file changed, 94 insertions(+), 78 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 122984fb3a012..2156a4a480236 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -366,20 +366,61 @@ def __init__( # todo management # This one I will do a single commit documenting each sub-step, so # that other programmers can understand the refactoring procedure. - # TODO 1: DONE - # TODO 3: DONE - # TODO 2.1: (DONE) Organize if-else logic to visualize decoupling - # TODO 2.2: (DONE) Decouple warnings / DATA MANIPULATION. - # TODO 2.3 (DONE): Here Slide the warnings to Series Task 7. - # TODO 2.4: (DONE) Slide copying the manager to Series TASK 5.A - # TODO 2.5.0: (DONE) Check if it is possible to separate copying - # --------- from DataFrame Creation. - # TODO 2.5.1: (DONE) Move block to TASK 5.A - # TODO 2.5.2: (DONE) Decouple DF Copying from Creation. - # Send to to TASKS 5.A AND 6. - # TODO 2: Decouple warning/Manager manipulation IN THE TWO CALLS BELOW. - # TODO 2.5.3: (DONE) Grouping again because it is a Fast Path - # --------- for DataFrame Creation + # DONE 1: + # DONE 2.1: Organize if-else logic to visualize decoupling + # DONE 2.2: Decouple warnings / DATA MANIPULATION. + # DONE 2.3: Here Slide the warnings to Series Task 7. + # DONE 2.4: Slide copying the manager to Series TASK 5.A + # DONE 2.5.0: Check if it is possible to separate copying from DF Creation. + # DONE 2.5.1: Move block to TASK 5.A + # DONE 2.5.2: Decouple DF Copying from Creation. Send to to TASKS 5.A AND 6. + # DONE 2: Decouple warning/Manager manipulation IN THE TWO CALLS BELOW. + # DONE 2.5.3: Grouping again because it is a Fast Path for DataFrame Creation + # DONE 2.5.4: Implement fast path logic + # DONE 2.5.5: Move DataFrame Creation to 'Series Task 6'. + # DONE 3: (DONE) FINAL: Recover the FINAL Steps used on that for final register. + # TODO 4: Move Warning to Series TASK 7 + # TODO 4.1: Recreate if + # TODO 4.2: and move. + # TODO 4.3: Unify if-else structure. + # TODO 5.0: Prepare if-signature to move to Series TASK-0 + # TODO 5.1: Try to move + # TODO 5.2: Move + # TODO 5.3: Unify if-else signature. + # TODO 6: <--- DECOUPLE MANAGER PREPARATION FROM COPYING. + # I realize it will help with the other tasks if I do this first! Let's do it. + + # TODO 6.1: (NEXT) Avoid copying twice the manager when + # type(data) is SingleBlockManager + # TODO 6.2: Unify the copying signature when + # type(data) is Series (index is None: ... else: ...) + # TODO 6.3: Move the copying logic on the series to below. + # TODO 6.4: Unify the if-else logic within the Series+SingleBlockManager) case. + + # TODO 7: Code that changes dtype to object when data satisfies: + # is_list_like, empty and with None, should be moved to Series TASK-1. + # TODO 7.0: Prepare if-signature to move to Series TASK- + # TODO 7.1: Try to move + # TODO 7.2: Move + # TODO 7.3: Unify if-else signature. + # TODO 8: Decouple single element from the other data. + # Use 'single_element' signature. + # TODO 8.0. Separate if-else single element; + # TODO 8.1. Group common 'index' definitions on 'not single_element' cases. + # TODO 10: Codes for Copying ExtensionArray to TASK 5.B. + # TODO 10.1: Try to move copy validation to Series TASK-1 + # TODO 10.2: If possible, use: + # --------- 'copy = copy if copy else False' to convert None to False + # TODO 11: Investigate. This is an unknown type that is being converted to list. + # TODO 12: Review warnings 'allow_mgr' is not used below. + # ------- This variable is used only for warnings. Possibly to block + # ------- one or more similar warnings after the first one was raised. + # TODO 13: Try capture final data type that seems scalar. + # -------- But does not satisfy is_scalar(). It comes directly from args. + # TODO 14: Check GH#52419 + # This is somewhat peculiar, because the same warning was being + # presented twice. Check if there is a reason for that, + # If so, come back to that code and create a new test. allow_mgr = False fast_path_manager = False @@ -402,49 +443,42 @@ def __init__( "`index` argument. `copy` must be False." ) - # Series TASK 1: CAPTURE INPUT SIGNATURE - # COMMENT: NECESSARY FOR WARNINGS AND ERRORS + # Series TASK 1: CAPTURE INPUT SIGNATURE. NECESSARY FOR WARNINGS AND ERRORS. is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) - original_copy = copy if copy else False # convert None to False + original_copy = copy if copy else False # proper way to convert None to False original_dtype = dtype original_index_type = type(index) - original_data_type = type(data) # For warning in the end + original_data_type = type(data) original_data_dtype = getattr(data, "dtype", None) refs = None name = ibase.maybe_extract_name(name, data, type(self)) # Series TASK 2: VALIDATE BASIC TYPES (meanwhile, dtype only). - - # TODO FINAL: Try to move copy validation to here. - # copy = copy if copy else False # convert None to False if dtype is not None: dtype = self._validate_dtype(dtype) - # TODO 10: Codes to move to Series TASK 5.B. Copying the Manager, below. - # TRY TO move below, to copy. - # Note that the logic changes! - # Does ExtensionArray copies with None and True? - # BlockManagers copies only with True - # Since copy maybe None, if copy is None it will enter this + # TODO 10: Codes for Copying ExtensionArray to TASK 5.B. + # TRY TO move below, to copy. Note that the logic changes! + # Does ExtensionArray copies with None and True? BlockManagers copies + # only with True! Since copy maybe None, if copy is None it will enter this. if isinstance(data, (ExtensionArray, np.ndarray)): if copy is not False: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): data = data.copy() - - # TODO: NEXT Try to if copy is None: copy = False # Series TASK 3: DATA TRANSFORMATION. - # COMMENT: Dict is SPECIAL case, since it's data has - # data values and index keys. - # Here it is being sent to Series, but it could different, for simplicity. - # It could be sent to array (for faster manipulation, for example). - - # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] - # raises KeyError). Send it to Series for "standard" construction: if is_dict_like(data) and not is_pandas_object: + # COMMENT: Dict is SPECIAL case, since it's data has + # data values and index keys. + # Here it is being sent to Series, but it could different, for simplicity. + # It could be sent to array (for faster manipulation, for example). + + # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] + # raises KeyError). Send it to Series for "standard" construction: + data = ( Series( data=list(data.values()), @@ -460,7 +494,6 @@ def __init__( data = list(data) # Series TASK 4: COMMON INDEX MANIPULATION - # Common operations on index na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) if index is None: if data is None: @@ -471,11 +504,21 @@ def __init__( if data is None: data = na_value if len(index) or dtype is not None else [] - # Series TASK 5: CREATING OR COPYING THE MANAGER. - # TODO 6: DECOUPLE MANAGER PREPARATION FROM COPYING + # Series TASK 5: CREATING OR COPYING THE MANAGER. A: PREPARE. B: COPY. + + # TODO 6: DECOUPLE MANAGER PREPARATION FROM COPYING. I realize it will help + # ------ with the other tasks if I do this first! Let's do it. + # TODO 6.1 (NEXT) Avoid copying twice the manager when + # type(data) is SingleBlockManager + # TODO 6.2 Unify the copying signature when + # type(data) is Series (index is None: ... else: ...) + # TODO 6.3 Move the copying logic on the series to below. + # TODO 6.4 Unify the if-else logic within the (Series, SingleBlockManager) case. + if isinstance(data, (Series, SingleBlockManager)): deep = True + # Series TASK 5.A: ADAPTING DATA AND INDEX ON SERIES EACH CASE. if isinstance(data, Series): if index is None: index = data.index @@ -491,27 +534,18 @@ def __init__( copy = False elif isinstance(data, SingleBlockManager): - # GH#33357 called with just the SingleBlockManager - # Note, this is a fast track to DF Creation - - # TODO 2.5.4: (DONE) Implement fast path logic - # TODO 2.5.5: (DONE) Move DataFrame Creation to 'Series Task 6'. if index is None: if not copy and dtype is None: + # TODO FINAL: Avoid warnings on fast_path_manager? + # GH#33357 called with just the SingleBlockManager + # Note, this is a fast track to DF Creation deep = False fast_path_manager = True data = data.copy(deep) - # NDFrame.__init__(self, data) # < --- DUPLICATED - # self.name = name # < --- same - # return # < --- Not needed. - # TODO FINAL: Avoid warnings on fast_path_manager? else: index = data.index - # TODO 4.0: Check if it is possible to move below to Series TASK 7. - # TODO 4.1: Recreate if - # TODO 4.2: and move. - # TODO 4.3: Unify if-else structure. + # TODO 4: Move Warning to TASK 7 if not allow_mgr: warnings.warn( f"Passing a {type(data).__name__} to {type(self).__name__} " @@ -520,15 +554,10 @@ def __init__( DeprecationWarning, stacklevel=2, ) - - # TODO FINAL: Review warnings - # This not used after this point. - # This variable is used only for warnings. - # Possibly to block on or more similar warnings - # after the first one was raised. + # TODO 12: Review warnings 'allow_mgr' is not used below. allow_mgr = True - # Series TASK 5.A: COPYING THE MANAGER. + # Series TASK 5.B: COPYING THE MANAGER. if dtype is not None: data = data.astype(dtype=dtype, errors="ignore") copy = False @@ -538,9 +567,6 @@ def __init__( else: # Creating the SingleBlockManager # TODO 8: Decouple single element from the other data. - # Use 'single_element' signature. - # TODO 8.0. Separate if-else single element; - # TODO 8.1. Group common 'index' definitions on 'not single_element' cases. if isinstance(data, Index): if index is None: index = default_index(len(data)) @@ -556,10 +582,7 @@ def __init__( if index is None: index = default_index(len(data)) - # TODO 6.0: Prepare if-signature to move to Series TASK-0 - # TODO 6.1: Try to move - # TODO 6.2: Move - # TODO 6.3: Unify if-else signature. + # TODO 5.0: Prepare if-signature to move to Series TASK-0 if len(data.dtype): # GH#13296 we are dealing with a compound dtype, which # should be treated as 2D @@ -576,18 +599,14 @@ def __init__( if index is None: index = default_index(len(data)) - # TODO 7: Try to Move above, on data preparation. - # TODO 7.0: Prepare if-signature to move to Series TASK- - # TODO 7.1: Try to move - # TODO 7.2: Move - # TODO 7.3: Unify if-else signature. + # TODO 7: Code that changes dtype to object when data satisfies: + # is_list_like, empty and with None, should be moved to Series TASK-1. if not len(data) and dtype is None: # GH 29405: Pre-2.0, this defaulted to float. dtype = np.dtype(object) - else: # data is not None: # TODO 12: FIND HOW TO CAPTURE THIS DATA TYPE. - # is_scalar(data) fails: #data is not None: OK - # seems scalar, directly from input only. + else: # elif data is not None: # this works too + # TODO 13: Try capture final data type that seems scalar. if index is None: index = default_index(1) data = [data] @@ -625,10 +644,7 @@ def __init__( if original_data_type is SingleBlockManager and not original_copy: if not allow_mgr: if original_index_type is NoneType and original_dtype is None: - # TODO FINAL: Check GH#52419 - # This is somewhat peculiar, because the same warning was being - # presented twice. Check if there is a reason for that, - # If so, come back to that code and create a new test. + # TODO 14: Check GH#52419 (Review main and use it here.) warnings.warn( f"Passing a {original_data_type.__name__}" "to {type(self).__name__} " From b6d463652ccaa5a849df6480642a8bb29887dc4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Fri, 15 Mar 2024 15:42:02 -0300 Subject: [PATCH 37/92] REF Series - TODO 6. DECOUPLE MANAGER PREPARATION FROM COPYING. Step 6.1 Avoid copying twice the manager when type(data) is SingleBlockManager --- pandas/core/series.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 2156a4a480236..d1f2bfd138e57 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -421,6 +421,9 @@ def __init__( # This is somewhat peculiar, because the same warning was being # presented twice. Check if there is a reason for that, # If so, come back to that code and create a new test. + # TODO 15: Check GitHub Issue + # TODO 16: GH#33357 called with just the SingleBlockManager, + # -------- Avoid warnings on fast_path_manager? allow_mgr = False fast_path_manager = False @@ -434,7 +437,7 @@ def __init__( if isinstance(data, SingleBlockManager): # DeMorgan Rule if not (data.index.equals(index) or index is None) or copy: - # TODO FINAL: Check GitHub Issue + # TODO 15: Check GitHub Issue # GH #19275 SingleBlockManager input should only be called # internally raise AssertionError( @@ -508,8 +511,7 @@ def __init__( # TODO 6: DECOUPLE MANAGER PREPARATION FROM COPYING. I realize it will help # ------ with the other tasks if I do this first! Let's do it. - # TODO 6.1 (NEXT) Avoid copying twice the manager when - # type(data) is SingleBlockManager + # DONE 6.1 Avoid copying twice the manager when type(data) is SingleBlockManager # TODO 6.2 Unify the copying signature when # type(data) is Series (index is None: ... else: ...) # TODO 6.3 Move the copying logic on the series to below. @@ -536,12 +538,9 @@ def __init__( elif isinstance(data, SingleBlockManager): if index is None: if not copy and dtype is None: - # TODO FINAL: Avoid warnings on fast_path_manager? - # GH#33357 called with just the SingleBlockManager - # Note, this is a fast track to DF Creation + # TODO 16: # GH#33357 called with just the SingleBlockManager deep = False fast_path_manager = True - data = data.copy(deep) else: index = data.index From cf12b428485015d774fee8059c66f27f12c9e390 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Fri, 15 Mar 2024 16:11:49 -0300 Subject: [PATCH 38/92] REF Series - TODO 4: Move Warning to Series TASK 7. --- pandas/core/series.py | 75 +++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 39 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d1f2bfd138e57..374a8bae3b0ca 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -13,7 +13,6 @@ import operator import sys from textwrap import dedent -from types import NoneType from typing import ( IO, TYPE_CHECKING, @@ -379,10 +378,10 @@ def __init__( # DONE 2.5.4: Implement fast path logic # DONE 2.5.5: Move DataFrame Creation to 'Series Task 6'. # DONE 3: (DONE) FINAL: Recover the FINAL Steps used on that for final register. - # TODO 4: Move Warning to Series TASK 7 - # TODO 4.1: Recreate if - # TODO 4.2: and move. - # TODO 4.3: Unify if-else structure. + # DONE 4: Move Warning to Series TASK 7 + # DONE 4.1: Recreate if + # DONE 4.2: and move. + # DONE 4.3: Unify if-else structure. # TODO 5.0: Prepare if-signature to move to Series TASK-0 # TODO 5.1: Try to move # TODO 5.2: Move @@ -390,8 +389,7 @@ def __init__( # TODO 6: <--- DECOUPLE MANAGER PREPARATION FROM COPYING. # I realize it will help with the other tasks if I do this first! Let's do it. - # TODO 6.1: (NEXT) Avoid copying twice the manager when - # type(data) is SingleBlockManager + # DONE 6.1 Avoid copying twice the manager when type(data) is SingleBlockManager # TODO 6.2: Unify the copying signature when # type(data) is Series (index is None: ... else: ...) # TODO 6.3: Move the copying logic on the series to below. @@ -412,15 +410,12 @@ def __init__( # TODO 10.2: If possible, use: # --------- 'copy = copy if copy else False' to convert None to False # TODO 11: Investigate. This is an unknown type that is being converted to list. - # TODO 12: Review warnings 'allow_mgr' is not used below. - # ------- This variable is used only for warnings. Possibly to block - # ------- one or more similar warnings after the first one was raised. + # DONE 12: 'allow_mgr' were not used anyware. # TODO 13: Try capture final data type that seems scalar. # -------- But does not satisfy is_scalar(). It comes directly from args. - # TODO 14: Check GH#52419 - # This is somewhat peculiar, because the same warning was being - # presented twice. Check if there is a reason for that, - # If so, come back to that code and create a new test. + # TODO 14: Check GH#52419. This is somewhat peculiar. There were 3 identical + # -------- warnings. Check if there is a reason for it. If so: + # -------- fix and create a new test. # TODO 15: Check GitHub Issue # TODO 16: GH#33357 called with just the SingleBlockManager, # -------- Avoid warnings on fast_path_manager? @@ -448,9 +443,9 @@ def __init__( # Series TASK 1: CAPTURE INPUT SIGNATURE. NECESSARY FOR WARNINGS AND ERRORS. is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) - original_copy = copy if copy else False # proper way to convert None to False + # original_copy = copy if copy else False # proper way to convert None to False original_dtype = dtype - original_index_type = type(index) + # original_index_type = type(index) original_data_type = type(data) original_data_dtype = getattr(data, "dtype", None) refs = None @@ -544,18 +539,6 @@ def __init__( else: index = data.index - # TODO 4: Move Warning to TASK 7 - if not allow_mgr: - warnings.warn( - f"Passing a {type(data).__name__} to {type(self).__name__} " - "is deprecated and will raise in a future version. " - "Use public APIs instead.", - DeprecationWarning, - stacklevel=2, - ) - # TODO 12: Review warnings 'allow_mgr' is not used below. - allow_mgr = True - # Series TASK 5.B: COPYING THE MANAGER. if dtype is not None: data = data.astype(dtype=dtype, errors="ignore") @@ -640,18 +623,32 @@ def __init__( stacklevel=find_stack_level(), ) - if original_data_type is SingleBlockManager and not original_copy: + if original_data_type is SingleBlockManager: if not allow_mgr: - if original_index_type is NoneType and original_dtype is None: - # TODO 14: Check GH#52419 (Review main and use it here.) - warnings.warn( - f"Passing a {original_data_type.__name__}" - "to {type(self).__name__} " - "is deprecated and will raise in a future version. " - "Use public APIs instead.", - DeprecationWarning, - stacklevel=2, - ) + warnings.warn( + f"Passing a {type(data).__name__} to {type(self).__name__} " + "is deprecated and will raise in a future version. " + "Use public APIs instead.", + DeprecationWarning, + stacklevel=2, + ) + # DONE 12: Review warnings 'allow_mgr' is not used below. + # This code is no longer necessary. + # allow_mgr = True + + # THIS IS NO LONGER NECESSARY + # if original_data_type is SingleBlockManager and not original_copy: + # if not allow_mgr: + # if original_index_type is NoneType and original_dtype is None: + # # DONE 14: Check GH#52419 (Review main and use it here.) + # warnings.warn( + # f"Passing a {original_data_type.__name__}" + # "to {type(self).__name__} " + # "is deprecated and will raise in a future version. " + # "Use public APIs instead.", + # DeprecationWarning, + # stacklevel=2, + # ) # ---------------------------------------------------------------------- From 4b7215cc063188e6ffd0552d6e3d7306f2e9cec0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Fri, 15 Mar 2024 17:40:33 -0300 Subject: [PATCH 39/92] REF Series - TODO 10. Move copy code for ExtendedArrays and NDArrays to TASK 5.B. Steps 10.1-4 --- pandas/core/series.py | 90 ++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 35 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 374a8bae3b0ca..710d41172caf5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -382,10 +382,7 @@ def __init__( # DONE 4.1: Recreate if # DONE 4.2: and move. # DONE 4.3: Unify if-else structure. - # TODO 5.0: Prepare if-signature to move to Series TASK-0 - # TODO 5.1: Try to move - # TODO 5.2: Move - # TODO 5.3: Unify if-else signature. + # DONE 5.0: Move ndarray ValueError to TASK-0. # TODO 6: <--- DECOUPLE MANAGER PREPARATION FROM COPYING. # I realize it will help with the other tasks if I do this first! Let's do it. @@ -405,10 +402,17 @@ def __init__( # Use 'single_element' signature. # TODO 8.0. Separate if-else single element; # TODO 8.1. Group common 'index' definitions on 'not single_element' cases. - # TODO 10: Codes for Copying ExtensionArray to TASK 5.B. - # TODO 10.1: Try to move copy validation to Series TASK-1 - # TODO 10.2: If possible, use: - # --------- 'copy = copy if copy else False' to convert None to False + + # TODO 10: <---Move codes for Copying ExtensionArray to TASK 5.B. + # DONE 10.1: <--- Understand that the logic is different for + # --------- ExtensionArrays + Arrays vs + # --------- Managers, Series, etc. + # DONE 10.2: <--- Split if-else logic for Extension Arrays and arrays + # DONE 10.3: <--- Move np.ndarray + # DONE 10.4: <--- Move ExtensionArray + # TODO 10.5: Unify if-else np.ndarray + # TODO 10.6: Unify if-else ExtensionArray + # TODO 11: Investigate. This is an unknown type that is being converted to list. # DONE 12: 'allow_mgr' were not used anyware. # TODO 13: Try capture final data type that seems scalar. @@ -419,10 +423,22 @@ def __init__( # TODO 15: Check GitHub Issue # TODO 16: GH#33357 called with just the SingleBlockManager, # -------- Avoid warnings on fast_path_manager? + # TODO 17: Invert the name 'Series TASK 0' and 'Series TASK 2'. + # -------- There is an array error that most be done after validating allow_mgr = False fast_path_manager = False + # TODO 17: Invert the name 'Series TASK 0' and 'Series TASK 2'. + # Series TASK 2: VALIDATE BASIC TYPES (meanwhile, dtype only). + if dtype is not None: + dtype = self._validate_dtype(dtype) + + copy_arrays = copy is True or copy is None # for Arrays and ExtendedArrays + # original_copy_arrays = copy_arrays + copy = copy is True # This is for Series and Block Manager + # original_copy = copy + # Series TASK 0: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. if isinstance(data, MultiIndex): raise NotImplementedError( @@ -433,17 +449,25 @@ def __init__( # DeMorgan Rule if not (data.index.equals(index) or index is None) or copy: # TODO 15: Check GitHub Issue - # GH #19275 SingleBlockManager input should only be called - # internally + # GH #19275 SingleBlockManager input should only be called internally raise AssertionError( "Cannot pass both SingleBlockManager " "`data` argument and a different " "`index` argument. `copy` must be False." ) + if isinstance(data, np.ndarray): + if len(data.dtype): + # GH#13296 we are dealing with a compound dtype, + # which should be treated as 2D. + raise ValueError( + "Cannot construct a Series from an ndarray with " + "compound dtype. Use DataFrame instead." + ) + # Series TASK 1: CAPTURE INPUT SIGNATURE. NECESSARY FOR WARNINGS AND ERRORS. is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) - # original_copy = copy if copy else False # proper way to convert None to False + # original_copy = original_copy # defined above. original_dtype = dtype # original_index_type = type(index) original_data_type = type(data) @@ -451,21 +475,6 @@ def __init__( refs = None name = ibase.maybe_extract_name(name, data, type(self)) - # Series TASK 2: VALIDATE BASIC TYPES (meanwhile, dtype only). - if dtype is not None: - dtype = self._validate_dtype(dtype) - - # TODO 10: Codes for Copying ExtensionArray to TASK 5.B. - # TRY TO move below, to copy. Note that the logic changes! - # Does ExtensionArray copies with None and True? BlockManagers copies - # only with True! Since copy maybe None, if copy is None it will enter this. - if isinstance(data, (ExtensionArray, np.ndarray)): - if copy is not False: - if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): - data = data.copy() - if copy is None: - copy = False - # Series TASK 3: DATA TRANSFORMATION. if is_dict_like(data) and not is_pandas_object: @@ -548,6 +557,26 @@ def __init__( data = data.copy(deep) else: # Creating the SingleBlockManager + # TODO 10: Move codes for Copying ExtensionArray to TASK 5.B. + # DONE 10.1: Understand that the logic is different for + # --------- ExtensionArrays + Arrays vs + # --------- Managers, Series, etc. + # DONE 10.2: Split if-else logic for Extension Arrays and arrays + # DONE 10.3: Move np.ndarray + # DONE 10.4: Move ExtensionArray + # TODO 10.5: Unify if-else np.ndarray + # TODO 10.6: Unify if-else ExtensionArray + + if isinstance(data, np.ndarray): + if copy_arrays: + if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): + data = data.copy() + + if isinstance(data, ExtensionArray): + if copy_arrays: + if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): + data = data.copy() + # TODO 8: Decouple single element from the other data. if isinstance(data, Index): if index is None: @@ -564,15 +593,6 @@ def __init__( if index is None: index = default_index(len(data)) - # TODO 5.0: Prepare if-signature to move to Series TASK-0 - if len(data.dtype): - # GH#13296 we are dealing with a compound dtype, which - # should be treated as 2D - raise ValueError( - "Cannot construct a Series from an ndarray with " - "compound dtype. Use DataFrame instead." - ) - elif isinstance(data, ExtensionArray): if index is None: index = default_index(len(data)) From 6b9a79274d6f5e040539b56e82a6b2a1608e0087 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Fri, 15 Mar 2024 17:48:48 -0300 Subject: [PATCH 40/92] REF Series - TODO 10. Move copy code for ExtendedArrays and NDArrays to TASK 5.B. Steps --- pandas/core/series.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 710d41172caf5..6194ea456d9fa 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -567,16 +567,6 @@ def __init__( # TODO 10.5: Unify if-else np.ndarray # TODO 10.6: Unify if-else ExtensionArray - if isinstance(data, np.ndarray): - if copy_arrays: - if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): - data = data.copy() - - if isinstance(data, ExtensionArray): - if copy_arrays: - if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): - data = data.copy() - # TODO 8: Decouple single element from the other data. if isinstance(data, Index): if index is None: @@ -593,10 +583,18 @@ def __init__( if index is None: index = default_index(len(data)) + if copy_arrays: + if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): + data = data.copy() + elif isinstance(data, ExtensionArray): if index is None: index = default_index(len(data)) + if copy_arrays: + if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): + data = data.copy() + elif is_list_like(data): if index is None: index = default_index(len(data)) From 18f70decbd2eba4107d65777604169fde18b731e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Fri, 15 Mar 2024 18:37:46 -0300 Subject: [PATCH 41/92] REF Series - TODO 7 - dtype Series with arguments equivalent to empty list, with dtype=None, must be object. --- pandas/core/series.py | 47 +++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 6194ea456d9fa..bd612e8b6355e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -392,26 +392,23 @@ def __init__( # TODO 6.3: Move the copying logic on the series to below. # TODO 6.4: Unify the if-else logic within the Series+SingleBlockManager) case. - # TODO 7: Code that changes dtype to object when data satisfies: - # is_list_like, empty and with None, should be moved to Series TASK-1. - # TODO 7.0: Prepare if-signature to move to Series TASK- - # TODO 7.1: Try to move - # TODO 7.2: Move - # TODO 7.3: Unify if-else signature. + # DONE 7: Move code to Final requirements. Task 5. + # ------ dtype Series with arguments equivalent to empty list, + # ------ with dtype=None, must be object. # TODO 8: Decouple single element from the other data. # Use 'single_element' signature. # TODO 8.0. Separate if-else single element; # TODO 8.1. Group common 'index' definitions on 'not single_element' cases. - # TODO 10: <---Move codes for Copying ExtensionArray to TASK 5.B. - # DONE 10.1: <--- Understand that the logic is different for + # DONE 10: Move codes for Copying ExtensionArray to TASK 5.B. + # DONE 10.1: Understand that the logic is different for # --------- ExtensionArrays + Arrays vs # --------- Managers, Series, etc. - # DONE 10.2: <--- Split if-else logic for Extension Arrays and arrays - # DONE 10.3: <--- Move np.ndarray - # DONE 10.4: <--- Move ExtensionArray - # TODO 10.5: Unify if-else np.ndarray - # TODO 10.6: Unify if-else ExtensionArray + # DONE 10.2: Split if-else logic for Extension Arrays and arrays + # DONE 10.3: Move np.ndarray + # DONE 10.4: Move ExtensionArray + # OTHERTODO: 10.5: Unify if-else np.ndarray --- Unnecessary. + # OTHERTODO: 10.6: Unify if-else ExtensionArray --- Solves in OTHERTODO. # TODO 11: Investigate. This is an unknown type that is being converted to list. # DONE 12: 'allow_mgr' were not used anyware. @@ -557,15 +554,7 @@ def __init__( data = data.copy(deep) else: # Creating the SingleBlockManager - # TODO 10: Move codes for Copying ExtensionArray to TASK 5.B. - # DONE 10.1: Understand that the logic is different for - # --------- ExtensionArrays + Arrays vs - # --------- Managers, Series, etc. - # DONE 10.2: Split if-else logic for Extension Arrays and arrays - # DONE 10.3: Move np.ndarray - # DONE 10.4: Move ExtensionArray - # TODO 10.5: Unify if-else np.ndarray - # TODO 10.6: Unify if-else ExtensionArray + list_like_input = False # TODO 8: Decouple single element from the other data. if isinstance(data, Index): @@ -599,11 +588,7 @@ def __init__( if index is None: index = default_index(len(data)) - # TODO 7: Code that changes dtype to object when data satisfies: - # is_list_like, empty and with None, should be moved to Series TASK-1. - if not len(data) and dtype is None: - # GH 29405: Pre-2.0, this defaulted to float. - dtype = np.dtype(object) + list_like_input = True else: # elif data is not None: # this works too # TODO 13: Try capture final data type that seems scalar. @@ -611,11 +596,17 @@ def __init__( index = default_index(1) data = [data] - # Series TASK 5.B: CREATING THE MANAGER. # Final requirements if is_list_like(data): com.require_length_match(data, index) + if list_like_input and dtype is None: + if not len(data): + # GH 29405: Pre-2.0, this defaulted to float. + dtype = np.dtype(object) + + # Series TASK 5.B: CREATING THE MANAGER. + data = sanitize_array(data, index, dtype, copy) data = SingleBlockManager.from_array(data, index, refs=refs) From 0be3e2daf5678e55809db89fc1a3c3e60d089739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Fri, 15 Mar 2024 18:49:47 -0300 Subject: [PATCH 42/92] REF Series: Reorganizing TODOs --- pandas/core/series.py | 55 +++++++++++-------------------------------- 1 file changed, 14 insertions(+), 41 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index bd612e8b6355e..3d0a9cd799650 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -383,7 +383,7 @@ def __init__( # DONE 4.2: and move. # DONE 4.3: Unify if-else structure. # DONE 5.0: Move ndarray ValueError to TASK-0. - # TODO 6: <--- DECOUPLE MANAGER PREPARATION FROM COPYING. + # TODO 6: DECOUPLE MANAGER PREPARATION FROM COPYING. # I realize it will help with the other tasks if I do this first! Let's do it. # DONE 6.1 Avoid copying twice the manager when type(data) is SingleBlockManager @@ -392,7 +392,7 @@ def __init__( # TODO 6.3: Move the copying logic on the series to below. # TODO 6.4: Unify the if-else logic within the Series+SingleBlockManager) case. - # DONE 7: Move code to Final requirements. Task 5. + # DONE 7: <--- Move code to Final Requirements. Task 5. # ------ dtype Series with arguments equivalent to empty list, # ------ with dtype=None, must be object. # TODO 8: Decouple single element from the other data. @@ -410,33 +410,30 @@ def __init__( # OTHERTODO: 10.5: Unify if-else np.ndarray --- Unnecessary. # OTHERTODO: 10.6: Unify if-else ExtensionArray --- Solves in OTHERTODO. - # TODO 11: Investigate. This is an unknown type that is being converted to list. - # DONE 12: 'allow_mgr' were not used anyware. - # TODO 13: Try capture final data type that seems scalar. + # DONE 11: Invert the name 'Series TASK 0' and 'Series TASK 2'. + # -------- There is an array error that most be done after validating + # TODO 12: Investigate. This is an unknown type that is being converted to list. + # DONE 13: 'allow_mgr' were not used anyware. + # TODO 14: Try capture final data type that seems scalar. # -------- But does not satisfy is_scalar(). It comes directly from args. - # TODO 14: Check GH#52419. This is somewhat peculiar. There were 3 identical + # TODO 15: Check GH#52419. This is somewhat peculiar. There were 3 identical # -------- warnings. Check if there is a reason for it. If so: # -------- fix and create a new test. - # TODO 15: Check GitHub Issue - # TODO 16: GH#33357 called with just the SingleBlockManager, + # TODO 16: Check GitHub Issue + # TODO 17: GH#33357 called with just the SingleBlockManager, # -------- Avoid warnings on fast_path_manager? - # TODO 17: Invert the name 'Series TASK 0' and 'Series TASK 2'. - # -------- There is an array error that most be done after validating allow_mgr = False fast_path_manager = False - # TODO 17: Invert the name 'Series TASK 0' and 'Series TASK 2'. - # Series TASK 2: VALIDATE BASIC TYPES (meanwhile, dtype only). + # Series TASK 0: VALIDATE BASIC TYPES. if dtype is not None: dtype = self._validate_dtype(dtype) - copy_arrays = copy is True or copy is None # for Arrays and ExtendedArrays - # original_copy_arrays = copy_arrays - copy = copy is True # This is for Series and Block Manager - # original_copy = copy + copy_arrays = copy is True or copy is None # Arrays and ExtendedArrays + copy = copy is True # Series and Manager - # Series TASK 0: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. + # Series TASK 1: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. if isinstance(data, MultiIndex): raise NotImplementedError( "initializing a Series from a MultiIndex is not supported" @@ -464,16 +461,13 @@ def __init__( # Series TASK 1: CAPTURE INPUT SIGNATURE. NECESSARY FOR WARNINGS AND ERRORS. is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) - # original_copy = original_copy # defined above. original_dtype = dtype - # original_index_type = type(index) original_data_type = type(data) original_data_dtype = getattr(data, "dtype", None) refs = None name = ibase.maybe_extract_name(name, data, type(self)) # Series TASK 3: DATA TRANSFORMATION. - if is_dict_like(data) and not is_pandas_object: # COMMENT: Dict is SPECIAL case, since it's data has # data values and index keys. @@ -482,7 +476,6 @@ def __init__( # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] # raises KeyError). Send it to Series for "standard" construction: - data = ( Series( data=list(data.values()), @@ -517,7 +510,6 @@ def __init__( # type(data) is Series (index is None: ... else: ...) # TODO 6.3 Move the copying logic on the series to below. # TODO 6.4 Unify the if-else logic within the (Series, SingleBlockManager) case. - if isinstance(data, (Series, SingleBlockManager)): deep = True @@ -591,7 +583,6 @@ def __init__( list_like_input = True else: # elif data is not None: # this works too - # TODO 13: Try capture final data type that seems scalar. if index is None: index = default_index(1) data = [data] @@ -606,7 +597,6 @@ def __init__( dtype = np.dtype(object) # Series TASK 5.B: CREATING THE MANAGER. - data = sanitize_array(data, index, dtype, copy) data = SingleBlockManager.from_array(data, index, refs=refs) @@ -641,23 +631,6 @@ def __init__( DeprecationWarning, stacklevel=2, ) - # DONE 12: Review warnings 'allow_mgr' is not used below. - # This code is no longer necessary. - # allow_mgr = True - - # THIS IS NO LONGER NECESSARY - # if original_data_type is SingleBlockManager and not original_copy: - # if not allow_mgr: - # if original_index_type is NoneType and original_dtype is None: - # # DONE 14: Check GH#52419 (Review main and use it here.) - # warnings.warn( - # f"Passing a {original_data_type.__name__}" - # "to {type(self).__name__} " - # "is deprecated and will raise in a future version. " - # "Use public APIs instead.", - # DeprecationWarning, - # stacklevel=2, - # ) # ---------------------------------------------------------------------- From 07091c890077d1bb4b6286624bad1c7c7a1ff158 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sat, 16 Mar 2024 10:17:10 -0300 Subject: [PATCH 43/92] REF Series - TODO 10. DECOUPLE MANAGER PREPARATION FROM COPYING. Step 6.3 - Simplify if-else logic on series and manager --- pandas/core/series.py | 46 +++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3d0a9cd799650..bed0d4fa74090 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -383,7 +383,7 @@ def __init__( # DONE 4.2: and move. # DONE 4.3: Unify if-else structure. # DONE 5.0: Move ndarray ValueError to TASK-0. - # TODO 6: DECOUPLE MANAGER PREPARATION FROM COPYING. + # TODO 6: <--- WORKING HERE!! DECOUPLE MANAGER PREPARATION FROM COPYING. # I realize it will help with the other tasks if I do this first! Let's do it. # DONE 6.1 Avoid copying twice the manager when type(data) is SingleBlockManager @@ -392,7 +392,7 @@ def __init__( # TODO 6.3: Move the copying logic on the series to below. # TODO 6.4: Unify the if-else logic within the Series+SingleBlockManager) case. - # DONE 7: <--- Move code to Final Requirements. Task 5. + # DONE 7: Move code to Final Requirements. Task 5. # ------ dtype Series with arguments equivalent to empty list, # ------ with dtype=None, must be object. # TODO 8: Decouple single element from the other data. @@ -422,6 +422,7 @@ def __init__( # TODO 16: Check GitHub Issue # TODO 17: GH#33357 called with just the SingleBlockManager, # -------- Avoid warnings on fast_path_manager? + # TODO 18: Check if DataFrame.astype() copies allow_mgr = False fast_path_manager = False @@ -503,43 +504,42 @@ def __init__( # Series TASK 5: CREATING OR COPYING THE MANAGER. A: PREPARE. B: COPY. + # START WORKING HERE 2024-03-16 Saturday Morning! # TODO 6: DECOUPLE MANAGER PREPARATION FROM COPYING. I realize it will help # ------ with the other tasks if I do this first! Let's do it. # DONE 6.1 Avoid copying twice the manager when type(data) is SingleBlockManager - # TODO 6.2 Unify the copying signature when + # DONE 6.2 Unify the copying signature when # type(data) is Series (index is None: ... else: ...) - # TODO 6.3 Move the copying logic on the series to below. - # TODO 6.4 Unify the if-else logic within the (Series, SingleBlockManager) case. + # DONE 6.3 Simplify logic on copy for both Series and manager. + # -------- + # TODO 6.4 Move the copying logic on the series to below. + # TODO 6.5 Unify the if-else logic within the (Series, SingleBlockManager) case. if isinstance(data, (Series, SingleBlockManager)): deep = True # Series TASK 5.A: ADAPTING DATA AND INDEX ON SERIES EACH CASE. if isinstance(data, Series): - if index is None: - index = data.index - deep = False - data = data._mgr.copy(deep=deep) + copy = True if index is None else False # I think it's better verbose. + deep = False if index is None else True - else: - key = index - data = data.reindex(index=key) # Copy the manager - index = data.index - data = data._mgr + if index is not None: + data = data.reindex(index) # Copy the manager - copy = False + data = data._mgr + index = data.index elif isinstance(data, SingleBlockManager): - if index is None: - if not copy and dtype is None: - # TODO 16: # GH#33357 called with just the SingleBlockManager - deep = False - fast_path_manager = True - else: - index = data.index + if index is None and not copy and dtype is None: + # TODO 16: # GH#33357 called with just the SingleBlockManager + deep = False + fast_path_manager = True + + index = data.index # Series TASK 5.B: COPYING THE MANAGER. if dtype is not None: - data = data.astype(dtype=dtype, errors="ignore") + # TODO 18: Check if DataFrame.astype() copies + data = data.astype(dtype=dtype, errors="ignore") # Copy the manager? copy = False if copy or fast_path_manager: From a80760f73de23115ff31a0a390e5dd9e6aa5cb22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sat, 16 Mar 2024 11:32:53 -0300 Subject: [PATCH 44/92] REF Series - TODO 10. DECOUPLE MANAGER PREPARATION FROM COPYING. Step 6.3 - Simplify if-else logic on series and manager. part 2 --- pandas/core/series.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index bed0d4fa74090..9e44d2b9f7509 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -504,7 +504,6 @@ def __init__( # Series TASK 5: CREATING OR COPYING THE MANAGER. A: PREPARE. B: COPY. - # START WORKING HERE 2024-03-16 Saturday Morning! # TODO 6: DECOUPLE MANAGER PREPARATION FROM COPYING. I realize it will help # ------ with the other tasks if I do this first! Let's do it. # DONE 6.1 Avoid copying twice the manager when type(data) is SingleBlockManager @@ -519,24 +518,23 @@ def __init__( # Series TASK 5.A: ADAPTING DATA AND INDEX ON SERIES EACH CASE. if isinstance(data, Series): - copy = True if index is None else False # I think it's better verbose. - deep = False if index is None else True + # copy logic is delicate and maybe has no been fully implemented. + # Each data instance has it's own logic. + copy = True if index is None else False + deep = not copy if index is not None: data = data.reindex(index) # Copy the manager data = data._mgr - index = data.index elif isinstance(data, SingleBlockManager): - if index is None and not copy and dtype is None: - # TODO 16: # GH#33357 called with just the SingleBlockManager - deep = False - fast_path_manager = True + fast_path_manager = index is None and not copy and dtype is None - index = data.index + index = data.index # Series TASK 5.B: COPYING THE MANAGER. + deep = deep if not fast_path_manager else False if dtype is not None: # TODO 18: Check if DataFrame.astype() copies data = data.astype(dtype=dtype, errors="ignore") # Copy the manager? From d35153801f2c16c9e79c3055100f337aa03bff0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sat, 16 Mar 2024 11:36:26 -0300 Subject: [PATCH 45/92] REF Series - TODO 18. Done --- pandas/core/series.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9e44d2b9f7509..eea93e3bb0821 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -422,7 +422,7 @@ def __init__( # TODO 16: Check GitHub Issue # TODO 17: GH#33357 called with just the SingleBlockManager, # -------- Avoid warnings on fast_path_manager? - # TODO 18: Check if DataFrame.astype() copies + # DONE 18: Check if DataFrame.astype() copies. 'copy: bool, default True'. allow_mgr = False fast_path_manager = False @@ -536,8 +536,7 @@ def __init__( # Series TASK 5.B: COPYING THE MANAGER. deep = deep if not fast_path_manager else False if dtype is not None: - # TODO 18: Check if DataFrame.astype() copies - data = data.astype(dtype=dtype, errors="ignore") # Copy the manager? + data = data.astype(dtype=dtype, errors="ignore") # Copy the manager copy = False if copy or fast_path_manager: From bc14bbb488a9f82a476e2640e53b5e0933fc30e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sat, 16 Mar 2024 13:29:19 -0300 Subject: [PATCH 46/92] REF Series - TODO 6. DECOUPLE MANAGER PREPARATION FROM COPYING. Working on NDArrays/Extension case + simplify empty list with none dtype --- pandas/core/series.py | 53 +++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index eea93e3bb0821..e7ea77c0c108f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -383,14 +383,22 @@ def __init__( # DONE 4.2: and move. # DONE 4.3: Unify if-else structure. # DONE 5.0: Move ndarray ValueError to TASK-0. - # TODO 6: <--- WORKING HERE!! DECOUPLE MANAGER PREPARATION FROM COPYING. - # I realize it will help with the other tasks if I do this first! Let's do it. + # TODO 6: DECOUPLE MANAGER PREPARATION FROM COPYING. I realize it will help + # ------ with the other tasks if I do this first! Let's do it. # DONE 6.1 Avoid copying twice the manager when type(data) is SingleBlockManager - # TODO 6.2: Unify the copying signature when + # DONE 6.2 Unify the copying signature when # type(data) is Series (index is None: ... else: ...) - # TODO 6.3: Move the copying logic on the series to below. - # TODO 6.4: Unify the if-else logic within the Series+SingleBlockManager) case. + # DONE 6.3 Simplify logic on copy for both Series and manager. + # -------- + # DONE 6.4 Move the copying logic on the series to below. + # DONE 6.5 Unify the if-else logic within the (Series, SingleBlockManager) case. + # DONE 6.6 Use deep arg for NDArrays and Extension Array + # TODO 6.7 Simplify dtype = object for (Index, arrays, is_list) + + # -------- single element group. + # TODO 6.7 DO TASK 8 HERE + # TODO 6.8 Move single element to outside the (Index, arrays, is_list) group + # TODO 6.9 Separate the index is None case on the group (Index, arrays, is_list) # DONE 7: Move code to Final Requirements. Task 5. # ------ dtype Series with arguments equivalent to empty list, @@ -426,6 +434,7 @@ def __init__( allow_mgr = False fast_path_manager = False + deep = True # deep copy, by standard. # Series TASK 0: VALIDATE BASIC TYPES. if dtype is not None: @@ -511,11 +520,15 @@ def __init__( # type(data) is Series (index is None: ... else: ...) # DONE 6.3 Simplify logic on copy for both Series and manager. # -------- - # TODO 6.4 Move the copying logic on the series to below. - # TODO 6.5 Unify the if-else logic within the (Series, SingleBlockManager) case. + # DONE 6.4 Move the copying logic on the series to below. + # DONE 6.5 Unify the if-else logic within the (Series, SingleBlockManager) case. + # DONE 6.6 Use deep arg for NDArrays and Extension Array + # TODO 6.7 Simplify dtype = object for (Index, arrays, is_list) + + # -------- single element group. + # TODO 6.7 DO TASK 8 HERE + # TODO 6.8 Move single element to outside the (Index, arrays, is_list) group + # TODO 6.9 Separate the index is None case on the group (Index, arrays, is_list) if isinstance(data, (Series, SingleBlockManager)): - deep = True - # Series TASK 5.A: ADAPTING DATA AND INDEX ON SERIES EACH CASE. if isinstance(data, Series): # copy logic is delicate and maybe has no been fully implemented. @@ -557,15 +570,7 @@ def __init__( data = data._values copy = False - elif isinstance(data, np.ndarray): - if index is None: - index = default_index(len(data)) - - if copy_arrays: - if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): - data = data.copy() - - elif isinstance(data, ExtensionArray): + elif isinstance(data, (np.ndarray, ExtensionArray)): if index is None: index = default_index(len(data)) @@ -588,10 +593,14 @@ def __init__( if is_list_like(data): com.require_length_match(data, index) - if list_like_input and dtype is None: - if not len(data): - # GH 29405: Pre-2.0, this defaulted to float. - dtype = np.dtype(object) + # GH 29405: Pre-2.0, this defaulted to float. + empty_list = list_like_input and dtype is None and not len(data) + dtype = np.dtype(object) if empty_list else dtype + + # if list_like_input and dtype is None: + # if not len(data): + # # GH 29405: Pre-2.0, this defaulted to float. + # dtype = np.dtype(object) # Series TASK 5.B: CREATING THE MANAGER. data = sanitize_array(data, index, dtype, copy) From 1dfa4b12a6c7c6cbf2eebb31fda5561e2fea60b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sat, 16 Mar 2024 14:01:49 -0300 Subject: [PATCH 47/92] REF Series - TODO 8: Decouple single element from the other data. --- pandas/core/series.py | 96 ++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 56 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index e7ea77c0c108f..c74c9bda857ce 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -386,27 +386,27 @@ def __init__( # TODO 6: DECOUPLE MANAGER PREPARATION FROM COPYING. I realize it will help # ------ with the other tasks if I do this first! Let's do it. - # DONE 6.1 Avoid copying twice the manager when type(data) is SingleBlockManager - # DONE 6.2 Unify the copying signature when + # DONE 6.1: Avoid copying twice when type(data) is SingleBlockManager + # DONE 6.2: Unify the copying signature when # type(data) is Series (index is None: ... else: ...) - # DONE 6.3 Simplify logic on copy for both Series and manager. + # DONE 6.3: Simplify logic on copy for both Series and manager. # -------- - # DONE 6.4 Move the copying logic on the series to below. - # DONE 6.5 Unify the if-else logic within the (Series, SingleBlockManager) case. - # DONE 6.6 Use deep arg for NDArrays and Extension Array - # TODO 6.7 Simplify dtype = object for (Index, arrays, is_list) + + # DONE 6.4: Move the copying logic on the series to below. + # DONE 6.5: Unify the if-else within the (Series, SingleBlockManager) case. + # DONE 6.6: Use deep arg for NDArrays and Extension Array + # DONE 6.7: Simplify dtype = object for (Index, arrays, is_list) + # -------- single element group. - # TODO 6.7 DO TASK 8 HERE - # TODO 6.8 Move single element to outside the (Index, arrays, is_list) group + # DONE 6.7: DO TASK 8 HERE + # DONE 6.8 Move single element to outside the (Index, arrays, is_list) group # TODO 6.9 Separate the index is None case on the group (Index, arrays, is_list) # DONE 7: Move code to Final Requirements. Task 5. # ------ dtype Series with arguments equivalent to empty list, # ------ with dtype=None, must be object. - # TODO 8: Decouple single element from the other data. + # DONE 8: Decouple single element from the other data. # Use 'single_element' signature. - # TODO 8.0. Separate if-else single element; - # TODO 8.1. Group common 'index' definitions on 'not single_element' cases. + # DONE 8.0. Separate if-else single element; + # DONE 8.1. Group common 'index' definitions on 'not single_element' cases. # DONE 10: Move codes for Copying ExtensionArray to TASK 5.B. # DONE 10.1: Understand that the logic is different for @@ -450,7 +450,6 @@ def __init__( ) if isinstance(data, SingleBlockManager): - # DeMorgan Rule if not (data.index.equals(index) or index is None) or copy: # TODO 15: Check GitHub Issue # GH #19275 SingleBlockManager input should only be called internally @@ -514,20 +513,7 @@ def __init__( # Series TASK 5: CREATING OR COPYING THE MANAGER. A: PREPARE. B: COPY. # TODO 6: DECOUPLE MANAGER PREPARATION FROM COPYING. I realize it will help - # ------ with the other tasks if I do this first! Let's do it. - # DONE 6.1 Avoid copying twice the manager when type(data) is SingleBlockManager - # DONE 6.2 Unify the copying signature when - # type(data) is Series (index is None: ... else: ...) - # DONE 6.3 Simplify logic on copy for both Series and manager. - # -------- - # DONE 6.4 Move the copying logic on the series to below. - # DONE 6.5 Unify the if-else logic within the (Series, SingleBlockManager) case. - # DONE 6.6 Use deep arg for NDArrays and Extension Array - # TODO 6.7 Simplify dtype = object for (Index, arrays, is_list) + - # -------- single element group. - # TODO 6.7 DO TASK 8 HERE - # TODO 6.8 Move single element to outside the (Index, arrays, is_list) group - # TODO 6.9 Separate the index is None case on the group (Index, arrays, is_list) + # TODO 6.9: Separate the index is None case on the group (Index,arrays,is_list) if isinstance(data, (Series, SingleBlockManager)): # Series TASK 5.A: ADAPTING DATA AND INDEX ON SERIES EACH CASE. if isinstance(data, Series): @@ -557,34 +543,37 @@ def __init__( else: # Creating the SingleBlockManager list_like_input = False + if isinstance(data, (Index, np.ndarray, ExtensionArray)) or is_list_like( + data + ): + if isinstance(data, Index): + if index is None: + index = default_index(len(data)) - # TODO 8: Decouple single element from the other data. - if isinstance(data, Index): - if index is None: - index = default_index(len(data)) + if dtype is not None: + data = data.astype(dtype) - if dtype is not None: - data = data.astype(dtype) + refs = data._references + data = data._values + copy = False - refs = data._references - data = data._values - copy = False + elif isinstance(data, (np.ndarray, ExtensionArray)): + if index is None: + index = default_index(len(data)) - elif isinstance(data, (np.ndarray, ExtensionArray)): - if index is None: - index = default_index(len(data)) + if copy_arrays: + if dtype is None or astype_is_view( + data.dtype, pandas_dtype(dtype) + ): + data = data.copy() # not np.ndarray.copy(deep=...) - if copy_arrays: - if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): - data = data.copy() + elif is_list_like(data): + if index is None: + index = default_index(len(data)) - elif is_list_like(data): - if index is None: - index = default_index(len(data)) + list_like_input = True - list_like_input = True - - else: # elif data is not None: # this works too + else: # elif data is not None: # this works too # possibly single_element. if index is None: index = default_index(1) data = [data] @@ -593,14 +582,9 @@ def __init__( if is_list_like(data): com.require_length_match(data, index) - # GH 29405: Pre-2.0, this defaulted to float. - empty_list = list_like_input and dtype is None and not len(data) - dtype = np.dtype(object) if empty_list else dtype - - # if list_like_input and dtype is None: - # if not len(data): - # # GH 29405: Pre-2.0, this defaulted to float. - # dtype = np.dtype(object) + # GH 29405: Pre-2.0, this defaulted to float. + empty_list = list_like_input and dtype is None and not len(data) + dtype = np.dtype(object) if empty_list else dtype # Series TASK 5.B: CREATING THE MANAGER. data = sanitize_array(data, index, dtype, copy) From 8a7f2b6fc9b544dbc58cde0382befccb570aba48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sat, 16 Mar 2024 14:31:50 -0300 Subject: [PATCH 48/92] REF Series - TODO 6. DECOUPLE MANAGER PREPARATION FROM COPYING. Step 6.9 Isolate `index is None` --- pandas/core/series.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index c74c9bda857ce..255f28a4171c7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -547,9 +547,6 @@ def __init__( data ): if isinstance(data, Index): - if index is None: - index = default_index(len(data)) - if dtype is not None: data = data.astype(dtype) @@ -558,9 +555,6 @@ def __init__( copy = False elif isinstance(data, (np.ndarray, ExtensionArray)): - if index is None: - index = default_index(len(data)) - if copy_arrays: if dtype is None or astype_is_view( data.dtype, pandas_dtype(dtype) @@ -568,16 +562,14 @@ def __init__( data = data.copy() # not np.ndarray.copy(deep=...) elif is_list_like(data): - if index is None: - index = default_index(len(data)) - list_like_input = True else: # elif data is not None: # this works too # possibly single_element. if index is None: - index = default_index(1) data = [data] + index = index if index is not None else default_index(len(data)) + # Final requirements if is_list_like(data): com.require_length_match(data, index) From da2bdac98f963387d73a322ef2cd761b7accab2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sun, 17 Mar 2024 10:43:00 -0300 Subject: [PATCH 49/92] Revert "REF Series: add tests to ensure that series dict constructor preserve dtype" This reverts commit e72cff15d4fabf3173a2e4da6c6639295b567f1d. --- pandas/tests/series/test_constructors.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 7efc428cf740e..f428ec5d8990b 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -2257,26 +2257,3 @@ def test_series_with_complex_nan(input_list): result = Series(ser.array) assert ser.dtype == "complex128" tm.assert_series_equal(ser, result) - - -@pytest.mark.parametrize( - "value", - [ - (1), - (1.0), - (1.0 + 1.0j), - ], -) -@pytest.mark.parametrize( - "unused_value", - [ - (True), - ("a"), - ], -) -def test_dict_constructor_preserve_dtype(value, unused_value): - d = {"a": value, "b": unused_value} - e = {"a": value} - s = Series(data=d, index=["a"]) - expected = Series(data=e) - tm.assert_series_equal(s, expected) From 43d5592fec07a8c63ae65868d99463ae01c9eabd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sun, 17 Mar 2024 11:33:03 -0300 Subject: [PATCH 50/92] REF Series - TODO 6. DECOUPLE MANAGER PREPARATION FROM COPYING. Step 6.10 Index, lists, arrays and single element --- pandas/core/series.py | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 965bca57b4382..286a4714485e2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -470,6 +470,7 @@ def __init__( ) # Series TASK 1: CAPTURE INPUT SIGNATURE. NECESSARY FOR WARNINGS AND ERRORS. + is_array = isinstance(data, (np.ndarray, ExtensionArray)) is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) original_dtype = dtype original_data_type = type(data) @@ -514,7 +515,8 @@ def __init__( # Series TASK 5: CREATING OR COPYING THE MANAGER. A: PREPARE. B: COPY. # TODO 6: DECOUPLE MANAGER PREPARATION FROM COPYING. I realize it will help - # TODO 6.9: Separate the index is None case on the group (Index,arrays,is_list) + # DONE 6.9: Separate the index is None case on the group (Index,arrays,is_list) + # TODO 6.10: Separate copy logic when data is ndarray or extended array if isinstance(data, (Series, SingleBlockManager)): # Series TASK 5.A: ADAPTING DATA AND INDEX ON SERIES EACH CASE. if isinstance(data, Series): @@ -544,26 +546,19 @@ def __init__( else: # Creating the SingleBlockManager list_like_input = False - if isinstance(data, (Index, np.ndarray, ExtensionArray)) or is_list_like( - data - ): - if isinstance(data, Index): - if dtype is not None: - data = data.astype(dtype) - - refs = data._references - data = data._values - copy = False - - elif isinstance(data, (np.ndarray, ExtensionArray)): - if copy_arrays: - if dtype is None or astype_is_view( - data.dtype, pandas_dtype(dtype) - ): - data = data.copy() # not np.ndarray.copy(deep=...) - - elif is_list_like(data): - list_like_input = True + if isinstance(data, Index): + if dtype is not None: + data = data.astype(dtype) + + refs = data._references + data = data._values + copy = False + + elif is_array: + pass + + elif is_list_like(data): + list_like_input = True else: # elif data is not None: # this works too # possibly single_element. if index is None: @@ -579,6 +574,12 @@ def __init__( empty_list = list_like_input and dtype is None and not len(data) dtype = np.dtype(object) if empty_list else dtype + # copy + if is_array and copy_arrays: + if copy_arrays: + if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): + data = data.copy() # not np.ndarray.copy(deep=...) + # Series TASK 5.B: CREATING THE MANAGER. data = sanitize_array(data, index, dtype, copy) data = SingleBlockManager.from_array(data, index, refs=refs) From 9877d2fc914058eee578a0a53ef7aeea214266f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sun, 17 Mar 2024 11:58:38 -0300 Subject: [PATCH 51/92] REF Series - TODO 6. DECOUPLE MANAGER PREPARATION FROM COPYING. Everybody --- pandas/core/series.py | 48 +++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 286a4714485e2..cfdf775227af0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -384,8 +384,7 @@ def __init__( # DONE 4.2: and move. # DONE 4.3: Unify if-else structure. # DONE 5.0: Move ndarray ValueError to TASK-0. - - # TODO 6: DECOUPLE MANAGER PREPARATION FROM COPYING. I realize it will help + # DONE 6: DECOUPLE MANAGER PREPARATION FROM COPYING. I realize it will help # ------ with the other tasks if I do this first! Let's do it. # DONE 6.1: Avoid copying twice when type(data) is SingleBlockManager # DONE 6.2: Unify the copying signature when @@ -399,8 +398,9 @@ def __init__( # -------- single element group. # DONE 6.7: DO TASK 8 HERE # DONE 6.8 Move single element to outside the (Index, arrays, is_list) group - # TODO 6.9 Separate the index is None case on the group (Index, arrays, is_list) - + # DONE 6.9: Separate the index is None case on the group (Index,arrays,is_list) + # DONE 6.10: Separate copy logic when data is ndarray or extended array + # DONE 6.11: Move copy logic below preparation. # DONE 7: Move code to Final Requirements. Task 5. # ------ dtype Series with arguments equivalent to empty list, # ------ with dtype=None, must be object. @@ -512,13 +512,10 @@ def __init__( if data is None: data = na_value if len(index) or dtype is not None else [] - # Series TASK 5: CREATING OR COPYING THE MANAGER. A: PREPARE. B: COPY. - - # TODO 6: DECOUPLE MANAGER PREPARATION FROM COPYING. I realize it will help - # DONE 6.9: Separate the index is None case on the group (Index,arrays,is_list) - # TODO 6.10: Separate copy logic when data is ndarray or extended array + # Series TASK 5: PREPARING THE MANAGER if isinstance(data, (Series, SingleBlockManager)): - # Series TASK 5.A: ADAPTING DATA AND INDEX ON SERIES EACH CASE. + require_manager = False + if isinstance(data, Series): # copy logic is delicate and maybe has no been fully implemented. # Each data instance has it's own logic. @@ -535,16 +532,8 @@ def __init__( index = data.index - # Series TASK 5.B: COPYING THE MANAGER. - deep = deep if not fast_path_manager else False - if dtype is not None: - data = data.astype(dtype=dtype, errors="ignore") # Copy the manager - copy = False - - if copy or fast_path_manager: - data = data.copy(deep) - - else: # Creating the SingleBlockManager + else: # Creating the manager + require_manager = True list_like_input = False if isinstance(data, Index): if dtype is not None: @@ -560,7 +549,7 @@ def __init__( elif is_list_like(data): list_like_input = True - else: # elif data is not None: # this works too # possibly single_element. + else: # elif data is not None: # possibly single_element. if index is None: data = [data] @@ -574,23 +563,32 @@ def __init__( empty_list = list_like_input and dtype is None and not len(data) dtype = np.dtype(object) if empty_list else dtype - # copy + # Series TASK 6: COPYING THE MANAGER. + if require_manager: if is_array and copy_arrays: if copy_arrays: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): data = data.copy() # not np.ndarray.copy(deep=...) - # Series TASK 5.B: CREATING THE MANAGER. data = sanitize_array(data, index, dtype, copy) data = SingleBlockManager.from_array(data, index, refs=refs) - # Series TASK 6: CREATE THE MANAGER + else: + deep = deep if not fast_path_manager else False + if dtype is not None: + data = data.astype(dtype=dtype, errors="ignore") # Copy the manager + copy = False + + if copy or fast_path_manager: + data = data.copy(deep) + + # Series TASK 7: CREATE THE DATAFRAME NDFrame.__init__(self, data) self.name = name if not fast_path_manager: self._set_axis(0, index) - # Series TASK 7: RAISE WARNINGS + # Series TASK 8: RAISE WARNINGS if ( original_dtype is None and is_pandas_object From eea989dd42c9d1897b398ef9ec3d07a1e7b3af0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sun, 17 Mar 2024 13:11:56 -0300 Subject: [PATCH 52/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. --- pandas/core/series.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index cfdf775227af0..7f5c6b2afe58a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -503,10 +503,22 @@ def __init__( # Series TASK 4: COMMON INDEX MANIPULATION na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) + + # single_element = False + single_element = ( + not is_pandas_object and not is_array and not is_list_like(data) + ) + if index is None: if data is None: index = default_index(0) data = na_value if dtype is not None else [] + else: + pass + # single_element = (not is_pandas_object and + # not is_array and + # not is_list_like(data)) + else: index = ensure_index(index) if data is None: @@ -549,7 +561,7 @@ def __init__( elif is_list_like(data): list_like_input = True - else: # elif data is not None: # possibly single_element. + elif single_element: # elif data is not None: # possibly single_element. if index is None: data = [data] From 9692594854c41bddba01c72c29c89584729461ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sun, 17 Mar 2024 13:55:28 -0300 Subject: [PATCH 53/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 2 --- pandas/core/series.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7f5c6b2afe58a..00362398322d7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -504,26 +504,25 @@ def __init__( # Series TASK 4: COMMON INDEX MANIPULATION na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) - # single_element = False - single_element = ( - not is_pandas_object and not is_array and not is_list_like(data) - ) - if index is None: if data is None: index = default_index(0) data = na_value if dtype is not None else [] else: pass - # single_element = (not is_pandas_object and - # not is_array and - # not is_list_like(data)) else: index = ensure_index(index) if data is None: data = na_value if len(index) or dtype is not None else [] + scalar_input = ( + not is_pandas_object + and not is_array + and not is_list_like(data) + and not isinstance(data, SingleBlockManager) + ) + # Series TASK 5: PREPARING THE MANAGER if isinstance(data, (Series, SingleBlockManager)): require_manager = False @@ -547,7 +546,12 @@ def __init__( else: # Creating the manager require_manager = True list_like_input = False - if isinstance(data, Index): + + if scalar_input: # elif data is not None: # possibly single_element. + if index is None: + data = [data] + + elif isinstance(data, Index): if dtype is not None: data = data.astype(dtype) @@ -561,10 +565,6 @@ def __init__( elif is_list_like(data): list_like_input = True - elif single_element: # elif data is not None: # possibly single_element. - if index is None: - data = [data] - index = index if index is not None else default_index(len(data)) # Final requirements From d230651a905b153ac7db7a166280fb662d7d57f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sun, 17 Mar 2024 14:35:29 -0300 Subject: [PATCH 54/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 3 --- pandas/core/series.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 00362398322d7..49c61aad30856 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -522,6 +522,7 @@ def __init__( and not is_list_like(data) and not isinstance(data, SingleBlockManager) ) + is_list = is_list_like(data) # Series TASK 5: PREPARING THE MANAGER if isinstance(data, (Series, SingleBlockManager)): @@ -562,7 +563,7 @@ def __init__( elif is_array: pass - elif is_list_like(data): + elif is_list: # _like(data): list_like_input = True index = index if index is not None else default_index(len(data)) From 2c3b4153f4b47d8aea1d49efe63c740b8bb5f7bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sun, 17 Mar 2024 15:29:12 -0300 Subject: [PATCH 55/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 4 --- pandas/core/series.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 49c61aad30856..152b5e7ddb04d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -504,26 +504,33 @@ def __init__( # Series TASK 4: COMMON INDEX MANIPULATION na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) + is_scalar_without_index = False + if index is None: if data is None: index = default_index(0) data = na_value if dtype is not None else [] - else: - pass + + is_scalar_without_index = ( + not is_pandas_object + and not is_array + and not is_list_like(data) + and not isinstance(data, SingleBlockManager) + ) else: index = ensure_index(index) if data is None: data = na_value if len(index) or dtype is not None else [] - scalar_input = ( - not is_pandas_object - and not is_array - and not is_list_like(data) - and not isinstance(data, SingleBlockManager) - ) is_list = is_list_like(data) + list_like_input = False + + if is_scalar_without_index: # elif data is not None: # possibly single_element. + if index is None: + data = [data] + # Series TASK 5: PREPARING THE MANAGER if isinstance(data, (Series, SingleBlockManager)): require_manager = False @@ -546,13 +553,8 @@ def __init__( else: # Creating the manager require_manager = True - list_like_input = False - - if scalar_input: # elif data is not None: # possibly single_element. - if index is None: - data = [data] - elif isinstance(data, Index): + if isinstance(data, Index): if dtype is not None: data = data.astype(dtype) @@ -563,7 +565,7 @@ def __init__( elif is_array: pass - elif is_list: # _like(data): + elif is_list: list_like_input = True index = index if index is not None else default_index(len(data)) From 092a9181bb5f67cc31b1b2f464aba326fd633f26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sun, 17 Mar 2024 16:58:09 -0300 Subject: [PATCH 56/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 5 --- pandas/core/series.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 152b5e7ddb04d..c909cf799ee2d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -504,19 +504,20 @@ def __init__( # Series TASK 4: COMMON INDEX MANIPULATION na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) - is_scalar_without_index = False + is_scalar = ( + not is_pandas_object + and not is_array + and not is_list_like(data) + and not isinstance(data, SingleBlockManager) + ) if index is None: if data is None: index = default_index(0) data = na_value if dtype is not None else [] - - is_scalar_without_index = ( - not is_pandas_object - and not is_array - and not is_list_like(data) - and not isinstance(data, SingleBlockManager) - ) + else: + if is_scalar: + data = [data] else: index = ensure_index(index) @@ -527,10 +528,6 @@ def __init__( list_like_input = False - if is_scalar_without_index: # elif data is not None: # possibly single_element. - if index is None: - data = [data] - # Series TASK 5: PREPARING THE MANAGER if isinstance(data, (Series, SingleBlockManager)): require_manager = False From 5e6f9b04ff190392302457e8d543e2a576642a1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sun, 17 Mar 2024 19:29:31 -0300 Subject: [PATCH 57/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 6 --- pandas/core/series.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index c909cf799ee2d..e06a5e9bec197 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -477,6 +477,18 @@ def __init__( original_data_dtype = getattr(data, "dtype", None) refs = None name = ibase.maybe_extract_name(name, data, type(self)) + na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) + + # TODO 11: Investigate. This is an unknown type that must be converted to list. + if is_list_like(data) and not isinstance(data, Sized): + data = list(data) + + is_scalar = ( + not is_pandas_object + and not is_array + and not is_list_like(data) + and not isinstance(data, SingleBlockManager) + ) # Series TASK 3: DATA TRANSFORMATION. if is_dict_like(data) and not is_pandas_object: @@ -497,20 +509,7 @@ def __init__( else None ) - # TODO 11: Investigate. This is an unknown type that must be converted to list. - if is_list_like(data) and not isinstance(data, Sized): - data = list(data) - # Series TASK 4: COMMON INDEX MANIPULATION - na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) - - is_scalar = ( - not is_pandas_object - and not is_array - and not is_list_like(data) - and not isinstance(data, SingleBlockManager) - ) - if index is None: if data is None: index = default_index(0) @@ -525,7 +524,6 @@ def __init__( data = na_value if len(index) or dtype is not None else [] is_list = is_list_like(data) - list_like_input = False # Series TASK 5: PREPARING THE MANAGER From 08c27eb4200f2f6a2b151c3d19074c396aa19988 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Sun, 17 Mar 2024 21:11:22 -0300 Subject: [PATCH 58/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 7 --- pandas/core/series.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index e06a5e9bec197..f215a90a60cc1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -523,7 +523,6 @@ def __init__( if data is None: data = na_value if len(index) or dtype is not None else [] - is_list = is_list_like(data) list_like_input = False # Series TASK 5: PREPARING THE MANAGER @@ -560,7 +559,7 @@ def __init__( elif is_array: pass - elif is_list: + elif is_list_like(data): list_like_input = True index = index if index is not None else default_index(len(data)) From a58ab1bb9125453d89a46402d0869815bdd42425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 00:12:49 -0300 Subject: [PATCH 59/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 8 --- pandas/core/series.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index f215a90a60cc1..0ac84139b90f2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -518,11 +518,17 @@ def __init__( if is_scalar: data = [data] + # if not isinstance(data, (Series, SingleBlockManager)): + # index = index if index is not None else default_index(len(data)) + else: index = ensure_index(index) if data is None: data = na_value if len(index) or dtype is not None else [] + if not isinstance(data, (Series, SingleBlockManager)): + index = index if index is not None else default_index(len(data)) + list_like_input = False # Series TASK 5: PREPARING THE MANAGER From 61f7908de2306ea60c93b0e8209a8da6fbd3a100 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 00:19:05 -0300 Subject: [PATCH 60/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 9 --- pandas/core/series.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0ac84139b90f2..a8f8991229af2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -517,10 +517,6 @@ def __init__( else: if is_scalar: data = [data] - - # if not isinstance(data, (Series, SingleBlockManager)): - # index = index if index is not None else default_index(len(data)) - else: index = ensure_index(index) if data is None: @@ -530,6 +526,7 @@ def __init__( index = index if index is not None else default_index(len(data)) list_like_input = False + require_manager = True # Series TASK 5: PREPARING THE MANAGER if isinstance(data, (Series, SingleBlockManager)): @@ -552,8 +549,6 @@ def __init__( index = data.index else: # Creating the manager - require_manager = True - if isinstance(data, Index): if dtype is not None: data = data.astype(dtype) @@ -568,7 +563,7 @@ def __init__( elif is_list_like(data): list_like_input = True - index = index if index is not None else default_index(len(data)) + # index = index if index is not None else default_index(len(data)) # Final requirements if is_list_like(data): From 316c6007e27d9820e7af9fddf2e8851bb0f6cf89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 00:38:37 -0300 Subject: [PATCH 61/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 10 --- pandas/core/series.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index a8f8991229af2..01013973dfb6a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -514,16 +514,26 @@ def __init__( if data is None: index = default_index(0) data = na_value if dtype is not None else [] + else: if is_scalar: data = [data] + index = default_index(1) + + if index is None: + if not isinstance(data, (Series, SingleBlockManager)): + index = default_index(len(data)) + else: index = ensure_index(index) if data is None: data = na_value if len(index) or dtype is not None else [] - if not isinstance(data, (Series, SingleBlockManager)): - index = index if index is not None else default_index(len(data)) + # if index is None: + # if not isinstance(data, (Series, SingleBlockManager)): + # index = default_index(len(data)) + + # index = index if index is not None else default_index(len(data)) list_like_input = False require_manager = True From 023f4dbb3679671792dacf5111b65c3467e494d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 00:45:07 -0300 Subject: [PATCH 62/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 11 --- pandas/core/series.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 01013973dfb6a..95508f514c710 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -515,26 +515,17 @@ def __init__( index = default_index(0) data = na_value if dtype is not None else [] - else: - if is_scalar: - data = [data] - index = default_index(1) + elif is_scalar: + data = [data] - if index is None: - if not isinstance(data, (Series, SingleBlockManager)): - index = default_index(len(data)) + if not isinstance(data, (Series, SingleBlockManager)): + index = index if index is not None else default_index(len(data)) else: index = ensure_index(index) if data is None: data = na_value if len(index) or dtype is not None else [] - # if index is None: - # if not isinstance(data, (Series, SingleBlockManager)): - # index = default_index(len(data)) - - # index = index if index is not None else default_index(len(data)) - list_like_input = False require_manager = True @@ -573,8 +564,6 @@ def __init__( elif is_list_like(data): list_like_input = True - # index = index if index is not None else default_index(len(data)) - # Final requirements if is_list_like(data): com.require_length_match(data, index) From 0b8eeeab60cf94b9e80bfcd122f0a69f46c96c5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 00:56:21 -0300 Subject: [PATCH 63/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 12 --- pandas/core/series.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 95508f514c710..97ccf67be967b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -511,9 +511,13 @@ def __init__( # Series TASK 4: COMMON INDEX MANIPULATION if index is None: - if data is None: + if data is None and dtype is None: + index = default_index(0) + data = [] + + elif data is None and dtype is not None: index = default_index(0) - data = na_value if dtype is not None else [] + data = na_value elif is_scalar: data = [data] From 37cfa65048513521300e66e4db2d598a1298f2fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 01:01:34 -0300 Subject: [PATCH 64/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 13 --- pandas/core/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 97ccf67be967b..273f37376cb06 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -512,12 +512,12 @@ def __init__( # Series TASK 4: COMMON INDEX MANIPULATION if index is None: if data is None and dtype is None: - index = default_index(0) + # index = default_index(0) data = [] elif data is None and dtype is not None: index = default_index(0) - data = na_value + # data = na_value elif is_scalar: data = [data] From 82270dbd3f8f671433cd49efd21ba64ca619a272 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 01:16:22 -0300 Subject: [PATCH 65/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 14 --- pandas/core/series.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 273f37376cb06..89a96f1baf085 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -511,13 +511,9 @@ def __init__( # Series TASK 4: COMMON INDEX MANIPULATION if index is None: - if data is None and dtype is None: - # index = default_index(0) - data = [] - - elif data is None and dtype is not None: - index = default_index(0) - # data = na_value + if data is None: + data = [] if dtype is None else data + index = default_index(0) if dtype is not None else index elif is_scalar: data = [data] From 9c8628d4fe4b759be5a172c20e17a2c45aaa25a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 01:22:31 -0300 Subject: [PATCH 66/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 15 --- pandas/core/series.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 89a96f1baf085..fca9986c07ceb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -488,6 +488,7 @@ def __init__( and not is_array and not is_list_like(data) and not isinstance(data, SingleBlockManager) + and data is not None ) # Series TASK 3: DATA TRANSFORMATION. @@ -515,7 +516,7 @@ def __init__( data = [] if dtype is None else data index = default_index(0) if dtype is not None else index - elif is_scalar: + if is_scalar: # None is scalar data = [data] if not isinstance(data, (Series, SingleBlockManager)): From 87bf240abcfb227480a8159a1c6d127b987b64a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 09:00:02 -0300 Subject: [PATCH 67/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 16 --- pandas/core/series.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index fca9986c07ceb..620dc2ce76544 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -511,20 +511,28 @@ def __init__( ) # Series TASK 4: COMMON INDEX MANIPULATION + + # Default empty Series with dtype + if index is None: + if data is None and dtype is not None: + index = default_index(0) + else: + index = ensure_index(index) + + # Series TASK 4: COMMON INDEX MANIPULATION ** this will have data manipulation if index is None: if data is None: data = [] if dtype is None else data - index = default_index(0) if dtype is not None else index - if is_scalar: # None is scalar + if is_scalar: data = [data] if not isinstance(data, (Series, SingleBlockManager)): index = index if index is not None else default_index(len(data)) else: - index = ensure_index(index) if data is None: + # TODO FINAL: avoid using na_value data = na_value if len(index) or dtype is not None else [] list_like_input = False From 434c1fca493eca91071469750f679be8afb78b2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 09:16:50 -0300 Subject: [PATCH 68/92] REF Series - TODO 14: Try capture final data type that seems scalar. Temptative working steps. 17 --- pandas/core/series.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 620dc2ce76544..bca82995125e3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -527,14 +527,18 @@ def __init__( if is_scalar: data = [data] - if not isinstance(data, (Series, SingleBlockManager)): - index = index if index is not None else default_index(len(data)) + # if not isinstance(data, (Series, SingleBlockManager)): + # index = index if index is not None else default_index(len(data)) else: if data is None: # TODO FINAL: avoid using na_value data = na_value if len(index) or dtype is not None else [] + if index is None: + if not isinstance(data, (Series, SingleBlockManager)): + index = index if index is not None else default_index(len(data)) + list_like_input = False require_manager = True From 5d24ed4467aab128543e0b2e8be1432fef8573a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 09:43:30 -0300 Subject: [PATCH 69/92] REF Series - TODO 14: Try capture final data type that seems scalar. Decoupling Scalar to Data preparation. --- pandas/core/series.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index bca82995125e3..44236d5312156 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -516,19 +516,14 @@ def __init__( if index is None: if data is None and dtype is not None: index = default_index(0) - else: - index = ensure_index(index) # Series TASK 4: COMMON INDEX MANIPULATION ** this will have data manipulation if index is None: if data is None: data = [] if dtype is None else data - if is_scalar: - data = [data] - - # if not isinstance(data, (Series, SingleBlockManager)): - # index = index if index is not None else default_index(len(data)) + if index is None and is_scalar: + data = [data] else: if data is None: @@ -539,6 +534,9 @@ def __init__( if not isinstance(data, (Series, SingleBlockManager)): index = index if index is not None else default_index(len(data)) + if index is not None: + index = ensure_index(index) + list_like_input = False require_manager = True From a8cedb053a562c1a8496310a75d82edea28748b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 10:01:26 -0300 Subject: [PATCH 70/92] REF Series - TODO 14: Try capture final data type that seems scalar. Decoupled Scalar to Data preparation. --- pandas/core/series.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 44236d5312156..fcdbddb41fa4e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -479,15 +479,12 @@ def __init__( name = ibase.maybe_extract_name(name, data, type(self)) na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) - # TODO 11: Investigate. This is an unknown type that must be converted to list. - if is_list_like(data) and not isinstance(data, Sized): - data = list(data) - is_scalar = ( not is_pandas_object and not is_array and not is_list_like(data) and not isinstance(data, SingleBlockManager) + and not (is_list_like(data) and not isinstance(data, Sized)) # and data is not None ) @@ -510,6 +507,13 @@ def __init__( else None ) + if is_scalar and index is None: + data = [data] + + # TODO 11: Investigate. This is an unknown type that must be converted to list. + if is_list_like(data) and not isinstance(data, Sized): + data = list(data) + # Series TASK 4: COMMON INDEX MANIPULATION # Default empty Series with dtype @@ -522,10 +526,10 @@ def __init__( if data is None: data = [] if dtype is None else data - if index is None and is_scalar: - data = [data] + # if index is None and is_scalar: + # data = [data] - else: + if index is not None: if data is None: # TODO FINAL: avoid using na_value data = na_value if len(index) or dtype is not None else [] From 92c25da2b2a454eccdca5a01dfdcc93c15fac35b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 10:54:51 -0300 Subject: [PATCH 71/92] REF Series - TODO 14: Refactor data transformation on edge cases. --- pandas/core/series.py | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index fcdbddb41fa4e..9d5a4169ad53a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -437,14 +437,14 @@ def __init__( fast_path_manager = False deep = True # deep copy, by standard. - # Series TASK 0: VALIDATE BASIC TYPES. + # Series TASK 1: VALIDATE BASIC TYPES. if dtype is not None: dtype = self._validate_dtype(dtype) copy_arrays = copy is True or copy is None # Arrays and ExtendedArrays copy = copy is True # Series and Manager - # Series TASK 1: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. + # Series TASK 2: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. if isinstance(data, MultiIndex): raise NotImplementedError( "initializing a Series from a MultiIndex is not supported" @@ -469,7 +469,7 @@ def __init__( "compound dtype. Use DataFrame instead." ) - # Series TASK 1: CAPTURE INPUT SIGNATURE. NECESSARY FOR WARNINGS AND ERRORS. + # Series TASK 3: CAPTURE INPUT SIGNATURE. NECESSARY FOR WARNINGS AND ERRORS. is_array = isinstance(data, (np.ndarray, ExtensionArray)) is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) original_dtype = dtype @@ -488,7 +488,7 @@ def __init__( and data is not None ) - # Series TASK 3: DATA TRANSFORMATION. + # Series TASK 4: DATA TRANSFORMATIONS. if is_dict_like(data) and not is_pandas_object: # COMMENT: Dict is SPECIAL case, since it's data has # data values and index keys. @@ -514,32 +514,28 @@ def __init__( if is_list_like(data) and not isinstance(data, Sized): data = list(data) - # Series TASK 4: COMMON INDEX MANIPULATION + # Series TASK 5: DATA AND INDEX TRANSFORMATION ON EDGE CASES + + # TASK 5.A: INDEX - # Default empty Series with dtype if index is None: if data is None and dtype is not None: index = default_index(0) + else: + index = ensure_index(index) - # Series TASK 4: COMMON INDEX MANIPULATION ** this will have data manipulation - if index is None: - if data is None: - data = [] if dtype is None else data - - # if index is None and is_scalar: - # data = [data] + # TASK 5.B: DATA - if index is not None: - if data is None: - # TODO FINAL: avoid using na_value - data = na_value if len(index) or dtype is not None else [] + if data is None: + if index is None: + data = [] if dtype is None else data + elif index is not None: + data = [] if not len(index) and dtype is None else na_value - if index is None: - if not isinstance(data, (Series, SingleBlockManager)): - index = index if index is not None else default_index(len(data)) + # TASK 5.B: COUPLING - if index is not None: - index = ensure_index(index) + if not isinstance(data, (Series, SingleBlockManager)): + index = index if index is not None else default_index(len(data)) list_like_input = False require_manager = True From 0178074d50ba1302aa96d7a7938470f86534624c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 11:11:43 -0300 Subject: [PATCH 72/92] EF Series - TODO 14: Refactor data transformation on edge cases. Index logic is dtype independent. --- pandas/core/series.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9d5a4169ad53a..22b59c1a38b14 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -514,33 +514,28 @@ def __init__( if is_list_like(data) and not isinstance(data, Sized): data = list(data) - # Series TASK 5: DATA AND INDEX TRANSFORMATION ON EDGE CASES - + # Series TASK 5: TRANSFORMATION ON EDGE CASES # TASK 5.A: INDEX - if index is None: - if data is None and dtype is not None: + if data is None: index = default_index(0) else: index = ensure_index(index) # TASK 5.B: DATA - if data is None: if index is None: data = [] if dtype is None else data elif index is not None: data = [] if not len(index) and dtype is None else na_value - # TASK 5.B: COUPLING - + # TASK 5.C: COUPLING if not isinstance(data, (Series, SingleBlockManager)): index = index if index is not None else default_index(len(data)) + # Series TASK 6: DETAILS FOR SERIES AND MANAGER. CREATES OTHERWISE list_like_input = False require_manager = True - - # Series TASK 5: PREPARING THE MANAGER if isinstance(data, (Series, SingleBlockManager)): require_manager = False @@ -560,7 +555,7 @@ def __init__( index = data.index - else: # Creating the manager + else: if isinstance(data, Index): if dtype is not None: data = data.astype(dtype) @@ -583,7 +578,7 @@ def __init__( empty_list = list_like_input and dtype is None and not len(data) dtype = np.dtype(object) if empty_list else dtype - # Series TASK 6: COPYING THE MANAGER. + # Series TASK 7: COPYING THE MANAGER. if require_manager: if is_array and copy_arrays: if copy_arrays: @@ -602,13 +597,13 @@ def __init__( if copy or fast_path_manager: data = data.copy(deep) - # Series TASK 7: CREATE THE DATAFRAME + # Series TASK 8: CREATE THE DATAFRAME NDFrame.__init__(self, data) self.name = name if not fast_path_manager: self._set_axis(0, index) - # Series TASK 8: RAISE WARNINGS + # Series TASK 9: RAISE WARNINGS if ( original_dtype is None and is_pandas_object From 8f7cf6b33e734f40034c99142dfa452d6fa39780 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 11:22:12 -0300 Subject: [PATCH 73/92] REF Series: change variable name require_manager <-> has_manager --- pandas/core/series.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 22b59c1a38b14..0caf550d21b62 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -535,10 +535,10 @@ def __init__( # Series TASK 6: DETAILS FOR SERIES AND MANAGER. CREATES OTHERWISE list_like_input = False - require_manager = True - if isinstance(data, (Series, SingleBlockManager)): - require_manager = False - + # require_manager = True + data_has_manager = isinstance(data, (Series, SingleBlockManager)) + # if isinstance(data, (Series, SingleBlockManager)): + if data_has_manager: if isinstance(data, Series): # copy logic is delicate and maybe has no been fully implemented. # Each data instance has it's own logic. @@ -579,7 +579,8 @@ def __init__( dtype = np.dtype(object) if empty_list else dtype # Series TASK 7: COPYING THE MANAGER. - if require_manager: + # if require_manager: + if not data_has_manager: if is_array and copy_arrays: if copy_arrays: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): From 07c1664957c1317238c7157b2aa9c4e65c76e05f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 12:16:42 -0300 Subject: [PATCH 74/92] REF Series - TODO 14: Refactor data transformation on edge cases. Simplifying logic. --- pandas/core/series.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0caf550d21b62..3051bff21e78c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -525,9 +525,18 @@ def __init__( # TASK 5.B: DATA if data is None: if index is None: - data = [] if dtype is None else data + data = [] elif index is not None: - data = [] if not len(index) and dtype is None else na_value + if not len(index): + if dtype is None: + data = [] + else: + pass + elif len(index): + if dtype is None: + data = na_value # np.float64 (tem que poder retirar.) + else: + data = na_value # na_value # exchange for 'pass' # TASK 5.C: COUPLING if not isinstance(data, (Series, SingleBlockManager)): @@ -581,6 +590,14 @@ def __init__( # Series TASK 7: COPYING THE MANAGER. # if require_manager: if not data_has_manager: + # # Final requirements + # if is_list_like(data): + # com.require_length_match(data, index) + + # # GH 29405: Pre-2.0, this defaulted to float. + # empty_series = list_like_input and dtype is None and not len(data) + # dtype = np.dtype(object) if empty_series else dtype + if is_array and copy_arrays: if copy_arrays: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): From d7a2798befad8c26dca3f332d972efe0e5519db9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Mon, 18 Mar 2024 13:31:26 -0300 Subject: [PATCH 75/92] REF Series - TODO 14: Refactor data transformation on edge cases. Simplifying logic. Step 2 --- pandas/core/series.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3051bff21e78c..59de615cfe8bf 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -522,21 +522,18 @@ def __init__( else: index = ensure_index(index) + require_manager = True # TASK 5.B: DATA if data is None: if index is None: - data = [] + data = [] # needed for consistency + elif index is not None: if not len(index): if dtype is None: - data = [] - else: - pass - elif len(index): - if dtype is None: - data = na_value # np.float64 (tem que poder retirar.) - else: - data = na_value # na_value # exchange for 'pass' + data = [] # needed to to make dtype=np.object + else: + data = na_value # Check tests # TASK 5.C: COUPLING if not isinstance(data, (Series, SingleBlockManager)): @@ -544,10 +541,8 @@ def __init__( # Series TASK 6: DETAILS FOR SERIES AND MANAGER. CREATES OTHERWISE list_like_input = False - # require_manager = True - data_has_manager = isinstance(data, (Series, SingleBlockManager)) - # if isinstance(data, (Series, SingleBlockManager)): - if data_has_manager: + if isinstance(data, (Series, SingleBlockManager)): + require_manager = False if isinstance(data, Series): # copy logic is delicate and maybe has no been fully implemented. # Each data instance has it's own logic. @@ -565,6 +560,7 @@ def __init__( index = data.index else: + require_manager = True if isinstance(data, Index): if dtype is not None: data = data.astype(dtype) @@ -588,8 +584,7 @@ def __init__( dtype = np.dtype(object) if empty_list else dtype # Series TASK 7: COPYING THE MANAGER. - # if require_manager: - if not data_has_manager: + if require_manager: # # Final requirements # if is_list_like(data): # com.require_length_match(data, index) From 7e488871bf53442944a21fa9913c17ec3a79542d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Tue, 19 Mar 2024 13:30:37 -0300 Subject: [PATCH 76/92] REF Series: saving memory on dict input --- pandas/core/series.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 59de615cfe8bf..78b43d6948586 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -489,7 +489,7 @@ def __init__( ) # Series TASK 4: DATA TRANSFORMATIONS. - if is_dict_like(data) and not is_pandas_object: + if is_dict_like(data) and not is_pandas_object and data is not None: # COMMENT: Dict is SPECIAL case, since it's data has # data values and index keys. # Here it is being sent to Series, but it could different, for simplicity. @@ -497,15 +497,16 @@ def __init__( # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] # raises KeyError). Send it to Series for "standard" construction: - data = ( - Series( + + # index = tuple(data.keys()) consumes more memory (up to 25%). + if data: + data = Series( data=list(data.values()), - index=tuple(data.keys()), + index=data.keys(), dtype=dtype, ) - if data - else None - ) + else: + data = None if is_scalar and index is None: data = [data] @@ -541,6 +542,7 @@ def __init__( # Series TASK 6: DETAILS FOR SERIES AND MANAGER. CREATES OTHERWISE list_like_input = False + if isinstance(data, (Series, SingleBlockManager)): require_manager = False if isinstance(data, Series): @@ -585,14 +587,6 @@ def __init__( # Series TASK 7: COPYING THE MANAGER. if require_manager: - # # Final requirements - # if is_list_like(data): - # com.require_length_match(data, index) - - # # GH 29405: Pre-2.0, this defaulted to float. - # empty_series = list_like_input and dtype is None and not len(data) - # dtype = np.dtype(object) if empty_series else dtype - if is_array and copy_arrays: if copy_arrays: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): From d698c57c81acd8a54b1bf2635e5c36006900fbf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Tue, 19 Mar 2024 14:49:55 -0300 Subject: [PATCH 77/92] REF Series: Unifying index treatment in a single place --- pandas/core/series.py | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 78b43d6948586..1fceb9d867b12 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -423,7 +423,7 @@ def __init__( # -------- There is an array error that most be done after validating # TODO 12: Investigate. This is an unknown type that is being converted to list. # DONE 13: 'allow_mgr' were not used anyware. - # TODO 14: Try capture final data type that seems scalar. + # DONE 14: capture final data type that seems scalar. # -------- But does not satisfy is_scalar(). It comes directly from args. # TODO 15: Check GH#52419. This is somewhat peculiar. There were 3 identical # -------- warnings. Check if there is a reason for it. If so: @@ -516,10 +516,19 @@ def __init__( data = list(data) # Series TASK 5: TRANSFORMATION ON EDGE CASES - # TASK 5.A: INDEX + # TASK 5.A: ENSURE that there is always an index below. 'index is not == True' + # Except for Series, whose index can be None. if index is None: if data is None: index = default_index(0) + else: + if isinstance(data, SingleBlockManager): + index = data.index + if isinstance(data, Series): + pass + else: + index = default_index(len(data)) + else: index = ensure_index(index) @@ -536,11 +545,7 @@ def __init__( else: data = na_value # Check tests - # TASK 5.C: COUPLING - if not isinstance(data, (Series, SingleBlockManager)): - index = index if index is not None else default_index(len(data)) - - # Series TASK 6: DETAILS FOR SERIES AND MANAGER. CREATES OTHERWISE + # # Series TASK 6: DETAILS FOR SERIES AND MANAGER. CREATES OTHERWISE list_like_input = False if isinstance(data, (Series, SingleBlockManager)): @@ -559,7 +564,7 @@ def __init__( elif isinstance(data, SingleBlockManager): fast_path_manager = index is None and not copy and dtype is None - index = data.index + index = data.index # Pode subir para Series else: require_manager = True @@ -577,14 +582,22 @@ def __init__( elif is_list_like(data): list_like_input = True + # list_like_input = (not isinstance(data, (Series, SingleBlockManager)) + # # and not isinstance(data, Index) + # and not is_array + # and not isinstance(data, np.ndarray) + # and is_list_like(data) + # ) + + # GH 29405: Pre-2.0, this defaulted to float. + default_empty_series = list_like_input and dtype is None and not len(data) + + dtype = np.dtype(object) if default_empty_series else dtype + # Final requirements if is_list_like(data): com.require_length_match(data, index) - # GH 29405: Pre-2.0, this defaulted to float. - empty_list = list_like_input and dtype is None and not len(data) - dtype = np.dtype(object) if empty_list else dtype - # Series TASK 7: COPYING THE MANAGER. if require_manager: if is_array and copy_arrays: From c16a0e9a229d30c40684a02e0c1b064bdee591b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Tue, 19 Mar 2024 14:59:49 -0300 Subject: [PATCH 78/92] REF Series: removing dead code and if (True). --- pandas/core/series.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 1fceb9d867b12..8d31928c9b033 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -535,15 +535,11 @@ def __init__( require_manager = True # TASK 5.B: DATA if data is None: - if index is None: - data = [] # needed for consistency - - elif index is not None: - if not len(index): - if dtype is None: - data = [] # needed to to make dtype=np.object - else: - data = na_value # Check tests + if not len(index): + if dtype is None: + data = [] # needed to to make dtype=np.object + else: + data = na_value # Check tests # # Series TASK 6: DETAILS FOR SERIES AND MANAGER. CREATES OTHERWISE list_like_input = False From cc9154d563a924805a079bcbed5b32cc5f980de3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Tue, 19 Mar 2024 17:16:37 -0300 Subject: [PATCH 79/92] REF Series: Unifying index treatment in a single place. step 2 --- pandas/core/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8d31928c9b033..e6bc3ff99f435 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -518,6 +518,7 @@ def __init__( # Series TASK 5: TRANSFORMATION ON EDGE CASES # TASK 5.A: ENSURE that there is always an index below. 'index is not == True' # Except for Series, whose index can be None. + original_index = index if index is None: if data is None: index = default_index(0) @@ -528,7 +529,6 @@ def __init__( pass else: index = default_index(len(data)) - else: index = ensure_index(index) @@ -549,7 +549,7 @@ def __init__( if isinstance(data, Series): # copy logic is delicate and maybe has no been fully implemented. # Each data instance has it's own logic. - copy = True if index is None else False + copy = True if original_index is None else False deep = not copy if index is not None: From f8063d24d0a1f1d2ea7e2e9f5ea7344a185db203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Tue, 19 Mar 2024 18:13:29 -0300 Subject: [PATCH 80/92] REF Series: Unifying index treatment in a single place. step 3 --- pandas/core/series.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index e6bc3ff99f435..adabc2a13113b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -526,7 +526,7 @@ def __init__( if isinstance(data, SingleBlockManager): index = data.index if isinstance(data, Series): - pass + index = data.index else: index = default_index(len(data)) else: @@ -552,13 +552,15 @@ def __init__( copy = True if original_index is None else False deep = not copy - if index is not None: + if original_index is not None: data = data.reindex(index) # Copy the manager data = data._mgr elif isinstance(data, SingleBlockManager): - fast_path_manager = index is None and not copy and dtype is None + fast_path_manager = ( + original_index is None and not copy and dtype is None + ) index = data.index # Pode subir para Series From c6064643965e4c05ece119de4e4ec55777c49d7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Tue, 19 Mar 2024 18:22:37 -0300 Subject: [PATCH 81/92] REF Series: Unifying index treatment in a single place. step 4 --- pandas/core/series.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index adabc2a13113b..dc7a3ee20b0c9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -554,6 +554,7 @@ def __init__( if original_index is not None: data = data.reindex(index) # Copy the manager + index = data.index data = data._mgr @@ -562,8 +563,6 @@ def __init__( original_index is None and not copy and dtype is None ) - index = data.index # Pode subir para Series - else: require_manager = True if isinstance(data, Index): From f88bbe407f09bc248213abe979250024f7698a97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Tue, 19 Mar 2024 18:56:16 -0300 Subject: [PATCH 82/92] REF Series: simplify if-else structure on data manipulation. Step 1 --- pandas/core/series.py | 63 +++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index dc7a3ee20b0c9..e5c5a2682c90c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -544,50 +544,43 @@ def __init__( # # Series TASK 6: DETAILS FOR SERIES AND MANAGER. CREATES OTHERWISE list_like_input = False - if isinstance(data, (Series, SingleBlockManager)): + if isinstance(data, Series): require_manager = False - if isinstance(data, Series): - # copy logic is delicate and maybe has no been fully implemented. - # Each data instance has it's own logic. - copy = True if original_index is None else False - deep = not copy - - if original_index is not None: - data = data.reindex(index) # Copy the manager - index = data.index + # copy logic is delicate and maybe has no been fully implemented. + # Each data instance has it's own logic. + copy = True if original_index is None else False + deep = not copy - data = data._mgr + if original_index is not None: + data = data.reindex(index) # Copy the manager + index = data.index - elif isinstance(data, SingleBlockManager): - fast_path_manager = ( - original_index is None and not copy and dtype is None - ) + data = data._mgr - else: - require_manager = True - if isinstance(data, Index): - if dtype is not None: - data = data.astype(dtype) + elif isinstance(data, SingleBlockManager): + require_manager = False + fast_path_manager = original_index is None and not copy and dtype is None - refs = data._references - data = data._values - copy = False + elif isinstance(data, Index): + if dtype is not None: + data = data.astype(dtype) - elif is_array: - pass + refs = data._references + data = data._values + copy = False - elif is_list_like(data): - list_like_input = True + elif is_array: + pass - # list_like_input = (not isinstance(data, (Series, SingleBlockManager)) - # # and not isinstance(data, Index) - # and not is_array - # and not isinstance(data, np.ndarray) - # and is_list_like(data) - # ) + elif is_list_like(data): + list_like_input = True + # Series TASK 7: COPYING THE MANAGER. + if require_manager: # GH 29405: Pre-2.0, this defaulted to float. - default_empty_series = list_like_input and dtype is None and not len(data) + default_empty_series = False + if list_like_input: + default_empty_series = dtype is None and not len(data) dtype = np.dtype(object) if default_empty_series else dtype @@ -595,8 +588,6 @@ def __init__( if is_list_like(data): com.require_length_match(data, index) - # Series TASK 7: COPYING THE MANAGER. - if require_manager: if is_array and copy_arrays: if copy_arrays: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): From a21ad6db846b253a40075b78529e4fe95b113526 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Tue, 19 Mar 2024 19:27:13 -0300 Subject: [PATCH 83/92] REF Series: simplify if-else structure on data manipulation. Step 2 --- pandas/core/series.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index e5c5a2682c90c..e375570af4584 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -515,7 +515,7 @@ def __init__( if is_list_like(data) and not isinstance(data, Sized): data = list(data) - # Series TASK 5: TRANSFORMATION ON EDGE CASES + # Series TASK 5: TRANSFORMATION ON INDEX # TASK 5.A: ENSURE that there is always an index below. 'index is not == True' # Except for Series, whose index can be None. original_index = index @@ -532,17 +532,14 @@ def __init__( else: index = ensure_index(index) + # Series TASK 6: TRANSFORMATIONS ON DATA. + # WITH REQUISITES FOR COPYING AND MANAGER CREATION (WHEN NEEDED). + list_like_input = False require_manager = True - # TASK 5.B: DATA - if data is None: - if not len(index): - if dtype is None: - data = [] # needed to to make dtype=np.object - else: - data = na_value # Check tests - # # Series TASK 6: DETAILS FOR SERIES AND MANAGER. CREATES OTHERWISE - list_like_input = False + if data is None: + if len(index): + data = na_value if isinstance(data, Series): require_manager = False From 83518c555bb4f7f510793707a097eb8ba6b04ff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Tue, 19 Mar 2024 19:39:11 -0300 Subject: [PATCH 84/92] EF Series: simplify if-else structure on data manipulation. Done --- pandas/core/series.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index e375570af4584..0161a96608e80 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -537,11 +537,10 @@ def __init__( list_like_input = False require_manager = True - if data is None: - if len(index): - data = na_value + if data is None and len(index): + data = na_value - if isinstance(data, Series): + elif isinstance(data, Series): require_manager = False # copy logic is delicate and maybe has no been fully implemented. # Each data instance has it's own logic. From 3dfcbb206797c49540e56cd03282a194d7bba4ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Tue, 19 Mar 2024 19:54:13 -0300 Subject: [PATCH 85/92] REF Series: Simplify identification of default_empty_series to change it's dtype. --- pandas/core/series.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0161a96608e80..d00b750be6e8c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -434,7 +434,6 @@ def __init__( # DONE 18: Check if DataFrame.astype() copies. 'copy: bool, default True'. allow_mgr = False - fast_path_manager = False deep = True # deep copy, by standard. # Series TASK 1: VALIDATE BASIC TYPES. @@ -516,7 +515,7 @@ def __init__( data = list(data) # Series TASK 5: TRANSFORMATION ON INDEX - # TASK 5.A: ENSURE that there is always an index below. 'index is not == True' + # ENSURE that there is always an index below. # Except for Series, whose index can be None. original_index = index if index is None: @@ -536,7 +535,7 @@ def __init__( # WITH REQUISITES FOR COPYING AND MANAGER CREATION (WHEN NEEDED). list_like_input = False require_manager = True - + fast_path_manager = False if data is None and len(index): data = na_value @@ -574,11 +573,9 @@ def __init__( # Series TASK 7: COPYING THE MANAGER. if require_manager: # GH 29405: Pre-2.0, this defaulted to float. - default_empty_series = False - if list_like_input: - default_empty_series = dtype is None and not len(data) - - dtype = np.dtype(object) if default_empty_series else dtype + default_empty_series = list_like_input and not len(data) and dtype is None + if default_empty_series: + dtype = np.dtype(object) # Final requirements if is_list_like(data): From 09fdd0373f099c321e49bf418026b8afdaff96cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 20 Mar 2024 12:40:54 -0300 Subject: [PATCH 86/92] REF Series: Erasing comments, and DONEs and TODOs --- pandas/core/series.py | 82 +++---------------------------------------- 1 file changed, 4 insertions(+), 78 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d00b750be6e8c..89dcaaec43135 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -363,76 +363,6 @@ def __init__( name=None, copy: bool | None = None, ) -> None: - # todo management - # This one I will do a single commit documenting each sub-step, so - # that other programmers can understand the refactoring procedure. - # DONE 1: - # DONE 2.1: Organize if-else logic to visualize decoupling - # DONE 2.2: Decouple warnings / DATA MANIPULATION. - # DONE 2.3: Here Slide the warnings to Series Task 7. - # DONE 2.4: Slide copying the manager to Series TASK 5.A - # DONE 2.5.0: Check if it is possible to separate copying from DF Creation. - # DONE 2.5.1: Move block to TASK 5.A - # DONE 2.5.2: Decouple DF Copying from Creation. Send to to TASKS 5.A AND 6. - # DONE 2: Decouple warning/Manager manipulation IN THE TWO CALLS BELOW. - # DONE 2.5.3: Grouping again because it is a Fast Path for DataFrame Creation - # DONE 2.5.4: Implement fast path logic - # DONE 2.5.5: Move DataFrame Creation to 'Series Task 6'. - # DONE 3: (DONE) FINAL: Recover the FINAL Steps used on that for final register. - # DONE 4: Move Warning to Series TASK 7 - # DONE 4.1: Recreate if - # DONE 4.2: and move. - # DONE 4.3: Unify if-else structure. - # DONE 5.0: Move ndarray ValueError to TASK-0. - # DONE 6: DECOUPLE MANAGER PREPARATION FROM COPYING. I realize it will help - # ------ with the other tasks if I do this first! Let's do it. - # DONE 6.1: Avoid copying twice when type(data) is SingleBlockManager - # DONE 6.2: Unify the copying signature when - # type(data) is Series (index is None: ... else: ...) - # DONE 6.3: Simplify logic on copy for both Series and manager. - # -------- - # DONE 6.4: Move the copying logic on the series to below. - # DONE 6.5: Unify the if-else within the (Series, SingleBlockManager) case. - # DONE 6.6: Use deep arg for NDArrays and Extension Array - # DONE 6.7: Simplify dtype = object for (Index, arrays, is_list) + - # -------- single element group. - # DONE 6.7: DO TASK 8 HERE - # DONE 6.8 Move single element to outside the (Index, arrays, is_list) group - # DONE 6.9: Separate the index is None case on the group (Index,arrays,is_list) - # DONE 6.10: Separate copy logic when data is ndarray or extended array - # DONE 6.11: Move copy logic below preparation. - # DONE 7: Move code to Final Requirements. Task 5. - # ------ dtype Series with arguments equivalent to empty list, - # ------ with dtype=None, must be object. - # DONE 8: Decouple single element from the other data. - # Use 'single_element' signature. - # DONE 8.0. Separate if-else single element; - # DONE 8.1. Group common 'index' definitions on 'not single_element' cases. - - # DONE 10: Move codes for Copying ExtensionArray to TASK 5.B. - # DONE 10.1: Understand that the logic is different for - # --------- ExtensionArrays + Arrays vs - # --------- Managers, Series, etc. - # DONE 10.2: Split if-else logic for Extension Arrays and arrays - # DONE 10.3: Move np.ndarray - # DONE 10.4: Move ExtensionArray - # OTHERTODO: 10.5: Unify if-else np.ndarray --- Unnecessary. - # OTHERTODO: 10.6: Unify if-else ExtensionArray --- Solves in OTHERTODO. - - # DONE 11: Invert the name 'Series TASK 0' and 'Series TASK 2'. - # -------- There is an array error that most be done after validating - # TODO 12: Investigate. This is an unknown type that is being converted to list. - # DONE 13: 'allow_mgr' were not used anyware. - # DONE 14: capture final data type that seems scalar. - # -------- But does not satisfy is_scalar(). It comes directly from args. - # TODO 15: Check GH#52419. This is somewhat peculiar. There were 3 identical - # -------- warnings. Check if there is a reason for it. If so: - # -------- fix and create a new test. - # TODO 16: Check GitHub Issue - # TODO 17: GH#33357 called with just the SingleBlockManager, - # -------- Avoid warnings on fast_path_manager? - # DONE 18: Check if DataFrame.astype() copies. 'copy: bool, default True'. - allow_mgr = False deep = True # deep copy, by standard. @@ -451,7 +381,6 @@ def __init__( if isinstance(data, SingleBlockManager): if not (data.index.equals(index) or index is None) or copy: - # TODO 15: Check GitHub Issue # GH #19275 SingleBlockManager input should only be called internally raise AssertionError( "Cannot pass both SingleBlockManager " @@ -510,19 +439,18 @@ def __init__( if is_scalar and index is None: data = [data] - # TODO 11: Investigate. This is an unknown type that must be converted to list. + # TODO: Investigate. This is an unknown type that must be converted to list. if is_list_like(data) and not isinstance(data, Sized): data = list(data) # Series TASK 5: TRANSFORMATION ON INDEX # ENSURE that there is always an index below. - # Except for Series, whose index can be None. original_index = index if index is None: if data is None: index = default_index(0) else: - if isinstance(data, SingleBlockManager): + if isinstance(data, SingleBlockManager): # TODO: GROUP SERIES AND MANAG index = data.index if isinstance(data, Series): index = data.index @@ -532,7 +460,7 @@ def __init__( index = ensure_index(index) # Series TASK 6: TRANSFORMATIONS ON DATA. - # WITH REQUISITES FOR COPYING AND MANAGER CREATION (WHEN NEEDED). + # REQUIREMENTS FOR COPYING AND MANAGER CREATION (WHEN NEEDED). list_like_input = False require_manager = True fast_path_manager = False @@ -541,13 +469,11 @@ def __init__( elif isinstance(data, Series): require_manager = False - # copy logic is delicate and maybe has no been fully implemented. - # Each data instance has it's own logic. copy = True if original_index is None else False deep = not copy if original_index is not None: - data = data.reindex(index) # Copy the manager + data = data.reindex(index) index = data.index data = data._mgr From daaaeb88dfb1b2348d8cc1be6100165374b488eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 20 Mar 2024 12:47:35 -0300 Subject: [PATCH 87/92] REF Series: group two identical cases on if-else logic of index construction --- pandas/core/series.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 89dcaaec43135..2797fc330579e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -450,9 +450,7 @@ def __init__( if data is None: index = default_index(0) else: - if isinstance(data, SingleBlockManager): # TODO: GROUP SERIES AND MANAG - index = data.index - if isinstance(data, Series): + if isinstance(data, (SingleBlockManager, Series)): index = data.index else: index = default_index(len(data)) From c68c73e8be015bb92449459d13a410be272c9928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Wed, 20 Mar 2024 12:56:07 -0300 Subject: [PATCH 88/92] REF Series: simplifying comments --- pandas/core/series.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 2797fc330579e..0ab633c796100 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -364,7 +364,7 @@ def __init__( copy: bool | None = None, ) -> None: allow_mgr = False - deep = True # deep copy, by standard. + deep = True # deep copy # Series TASK 1: VALIDATE BASIC TYPES. if dtype is not None: @@ -373,7 +373,7 @@ def __init__( copy_arrays = copy is True or copy is None # Arrays and ExtendedArrays copy = copy is True # Series and Manager - # Series TASK 2: RAISE ERRORS ON KNOWN UNACEPPTED CASES, ETC. + # Series TASK 2: RAISE ERRORS ON KNOWN UNACEPPTED CASES. if isinstance(data, MultiIndex): raise NotImplementedError( "initializing a Series from a MultiIndex is not supported" @@ -397,7 +397,7 @@ def __init__( "compound dtype. Use DataFrame instead." ) - # Series TASK 3: CAPTURE INPUT SIGNATURE. NECESSARY FOR WARNINGS AND ERRORS. + # Series TASK 3: CAPTURE INPUT SIGNATURE. is_array = isinstance(data, (np.ndarray, ExtensionArray)) is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) original_dtype = dtype @@ -418,10 +418,7 @@ def __init__( # Series TASK 4: DATA TRANSFORMATIONS. if is_dict_like(data) and not is_pandas_object and data is not None: - # COMMENT: Dict is SPECIAL case, since it's data has - # data values and index keys. - # Here it is being sent to Series, but it could different, for simplicity. - # It could be sent to array (for faster manipulation, for example). + # Dict is SPECIAL case, since it's data has data values and index keys. # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] # raises KeyError). Send it to Series for "standard" construction: @@ -443,8 +440,7 @@ def __init__( if is_list_like(data) and not isinstance(data, Sized): data = list(data) - # Series TASK 5: TRANSFORMATION ON INDEX - # ENSURE that there is always an index below. + # Series TASK 5: TRANSFORMATION ON INDEX. There is always an index after this. original_index = index if index is None: if data is None: @@ -471,7 +467,7 @@ def __init__( deep = not copy if original_index is not None: - data = data.reindex(index) + data = data.reindex(index) # copy index = data.index data = data._mgr From e312146c5ed7f75a78debd7d8adcc1ffe6653cd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Thu, 21 Mar 2024 08:55:13 -0300 Subject: [PATCH 89/92] REF Series: simplifying detection/manipulation of scalar data --- pandas/core/series.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 08dcf7ab1ae24..b446637052d33 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -407,13 +407,8 @@ def __init__( name = ibase.maybe_extract_name(name, data, type(self)) na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) - is_scalar = ( - not is_pandas_object - and not is_array - and not is_list_like(data) - and not isinstance(data, SingleBlockManager) - and not (is_list_like(data) and not isinstance(data, Sized)) # - and data is not None + data_is_scalar = not is_list_like(data) and ( + is_scalar(data) or not isinstance(data, Sized) ) # Series TASK 4: DATA TRANSFORMATIONS. @@ -433,7 +428,7 @@ def __init__( else: data = None - if is_scalar and index is None: + if data_is_scalar and index is None and data is not None: data = [data] # TODO: Investigate. This is an unknown type that must be converted to list. From 5304d75754f0104f4f1a7151fac7a67ba9885b2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Thu, 21 Mar 2024 09:07:54 -0300 Subject: [PATCH 90/92] REF Series: simplifying detection/manipulation of scalar data. Step 2 --- pandas/core/series.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index b446637052d33..f5605aedf736d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -407,10 +407,6 @@ def __init__( name = ibase.maybe_extract_name(name, data, type(self)) na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) - data_is_scalar = not is_list_like(data) and ( - is_scalar(data) or not isinstance(data, Sized) - ) - # Series TASK 4: DATA TRANSFORMATIONS. if is_dict_like(data) and not is_pandas_object and data is not None: # Dict is SPECIAL case, since it's data has data values and index keys. @@ -428,13 +424,22 @@ def __init__( else: data = None - if data_is_scalar and index is None and data is not None: - data = [data] - # TODO: Investigate. This is an unknown type that must be converted to list. if is_list_like(data) and not isinstance(data, Sized): data = list(data) + # if data_is_scalar and index is None and data is not None: + if ( + (is_scalar(data) or not isinstance(data, Sized)) + and index is None + and data is not None + ): + data = [data] + + # # TODO: Investigate. This is an unknown type that must be converted to list. + # if is_list_like(data) and not isinstance(data, Sized): + # data = list(data) + # Series TASK 5: TRANSFORMATION ON INDEX. There is always an index after this. original_index = index if index is None: From 32dda5b5d470326e4e237fdb53bc7a46700a6ec7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Thu, 21 Mar 2024 11:05:13 -0300 Subject: [PATCH 91/92] REF Series: simplifying logic for single element becoming a list --- pandas/core/series.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index f5605aedf736d..b9104b9788301 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -424,11 +424,9 @@ def __init__( else: data = None - # TODO: Investigate. This is an unknown type that must be converted to list. if is_list_like(data) and not isinstance(data, Sized): data = list(data) - # if data_is_scalar and index is None and data is not None: if ( (is_scalar(data) or not isinstance(data, Sized)) and index is None @@ -436,10 +434,6 @@ def __init__( ): data = [data] - # # TODO: Investigate. This is an unknown type that must be converted to list. - # if is_list_like(data) and not isinstance(data, Sized): - # data = list(data) - # Series TASK 5: TRANSFORMATION ON INDEX. There is always an index after this. original_index = index if index is None: From f3967ca7d1bad4174493d018b692872e19e04694 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Aur=C3=A9lio=20Barbosa?= Date: Thu, 21 Mar 2024 11:15:18 -0300 Subject: [PATCH 92/92] REF Series: incorporate changes from #57889 --- pandas/core/series.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index b9104b9788301..7f059d708d055 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -408,7 +408,8 @@ def __init__( na_value = na_value_for_dtype(pandas_dtype(dtype), compat=False) # Series TASK 4: DATA TRANSFORMATIONS. - if is_dict_like(data) and not is_pandas_object and data is not None: + if isinstance(data, Mapping): + # if is_dict_like(data) and not is_pandas_object and data is not None: # Dict is SPECIAL case, since it's data has data values and index keys. # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]