From 47717cfeb757b409074f423db744dc90cd3be41b Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Thu, 18 Apr 2024 12:27:29 +0530 Subject: [PATCH 01/13] fixed PR01,SA01 in docstring for pandas.HDFStore.append --- pandas/io/pytables.py | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 5ecf7e287ea58..336098b6c5484 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1261,15 +1261,25 @@ def append( Table format. Write as a PyTables Table structure which may perform worse but allow more flexible operations like searching / selecting subsets of the data. + axes : default None + This parameter is currently not accepted. index : bool, default True Write DataFrame index as a column. append : bool, default True Append the input data to the existing. - data_columns : list of columns, or True, default None - List of columns to create as indexed data columns for on-disk - queries, or True to use all columns. By default only the axes - of the object are indexed. See `here - `__. + complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' + Specifies the compression library to be used. + These additional compressors for Blosc are supported + (default if no compressor specified: 'blosc:blosclz'): + {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + 'blosc:zlib', 'blosc:zstd'}. + Specifying a compression library which is not available issues + a ValueError. + complevel : int, 0-9, default None + Specifies a compression level for data. + A value of 0 or None disables compression. + columns : default None + This parameter is currently not accepted, try data_columns. min_itemsize : int, dict, or None Dict of columns that specify minimum str sizes. nan_rep : str @@ -1278,11 +1288,26 @@ def append( Size to chunk the writing. expectedrows : int Expected TOTAL row size of this table. - encoding : default None - Provide an encoding for str. dropna : bool, default False, optional Do not write an ALL nan row to the store settable by the option 'io.hdf.dropna_table'. + data_columns : list of columns, or True, default None + List of columns to create as indexed data columns for on-disk + queries, or True to use all columns. By default only the axes + of the object are indexed. See `here + `__. + encoding : default None + Provide an encoding for str. + errors : str, default 'strict' + The error handling scheme to use for encoding errors. + The default is 'strict' meaning that encoding errors raise a + UnicodeEncodeError. Other possible values are 'ignore', 'replace' and + 'xmlcharrefreplace' as well as any other name registered with + codecs.register_error that can handle UnicodeEncodeErrors. + + See Also + -------- + HDFStore.append_to_multiple : Append to multiple tables. Notes ----- From a027192fbf2e7260174409423192afc6629ff23c Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Thu, 18 Apr 2024 12:30:25 +0530 Subject: [PATCH 02/13] removed method pandas.HDFStore.append --- ci/code_checks.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 66f6bfd7195f9..a982e4cc70f3f 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -154,7 +154,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DatetimeTZDtype.tz SA01" \ -i "pandas.DatetimeTZDtype.unit SA01" \ -i "pandas.Grouper PR02,SA01" \ - -i "pandas.HDFStore.append PR01,SA01" \ -i "pandas.HDFStore.get SA01" \ -i "pandas.HDFStore.groups SA01" \ -i "pandas.HDFStore.info RT03,SA01" \ From f3e9155f94ee08582bef20dcb4b54a93054f25f8 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Fri, 19 Apr 2024 12:31:02 +0530 Subject: [PATCH 03/13] changed default value to None --- pandas/io/pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f2d52d9e35b77..0c4457603ccf9 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1267,7 +1267,7 @@ def append( Write DataFrame index as a column. append : bool, default True Append the input data to the existing. - complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' + complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default None Specifies the compression library to be used. These additional compressors for Blosc are supported (default if no compressor specified: 'blosc:blosclz'): From 06a788991fc0d1dadbdad15bd22d986b3db61bad Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Fri, 19 Apr 2024 17:05:31 +0530 Subject: [PATCH 04/13] commented out from complib onwards --- pandas/io/pytables.py | 74 +++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 0c4457603ccf9..db783bc11fb2a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1267,43 +1267,43 @@ def append( Write DataFrame index as a column. append : bool, default True Append the input data to the existing. - complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default None - Specifies the compression library to be used. - These additional compressors for Blosc are supported - (default if no compressor specified: 'blosc:blosclz'): - {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', - 'blosc:zlib', 'blosc:zstd'}. - Specifying a compression library which is not available issues - a ValueError. - complevel : int, 0-9, default None - Specifies a compression level for data. - A value of 0 or None disables compression. - columns : default None - This parameter is currently not accepted, try data_columns. - min_itemsize : int, dict, or None - Dict of columns that specify minimum str sizes. - nan_rep : str - Str to use as str nan representation. - chunksize : int or None - Size to chunk the writing. - expectedrows : int - Expected TOTAL row size of this table. - dropna : bool, default False, optional - Do not write an ALL nan row to the store settable - by the option 'io.hdf.dropna_table'. - data_columns : list of columns, or True, default None - List of columns to create as indexed data columns for on-disk - queries, or True to use all columns. By default only the axes - of the object are indexed. See `here - `__. - encoding : default None - Provide an encoding for str. - errors : str, default 'strict' - The error handling scheme to use for encoding errors. - The default is 'strict' meaning that encoding errors raise a - UnicodeEncodeError. Other possible values are 'ignore', 'replace' and - 'xmlcharrefreplace' as well as any other name registered with - codecs.register_error that can handle UnicodeEncodeErrors. + # complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default None + # Specifies the compression library to be used. + # These additional compressors for Blosc are supported + # (default if no compressor specified: 'blosc:blosclz'): + # {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + # 'blosc:zlib', 'blosc:zstd'}. + # Specifying a compression library which is not available issues + # a ValueError. + # complevel : int, 0-9, default None + # Specifies a compression level for data. + # A value of 0 or None disables compression. + # columns : default None + # This parameter is currently not accepted, try data_columns. + # min_itemsize : int, dict, or None + # Dict of columns that specify minimum str sizes. + # nan_rep : str + # Str to use as str nan representation. + # chunksize : int or None + # Size to chunk the writing. + # expectedrows : int + # Expected TOTAL row size of this table. + # dropna : bool, default False, optional + # Do not write an ALL nan row to the store settable + # by the option 'io.hdf.dropna_table'. + # data_columns : list of columns, or True, default None + # List of columns to create as indexed data columns for on-disk + # queries, or True to use all columns. By default only the axes + # of the object are indexed. See `here + # `__. + # encoding : default None + # Provide an encoding for str. + # errors : str, default 'strict' + # The error handling scheme to use for encoding errors. + # The default is 'strict' meaning that encoding errors raise a + # UnicodeEncodeError. Other possible values are 'ignore', 'replace' and + # 'xmlcharrefreplace' as well as any other name registered with + # codecs.register_error that can handle UnicodeEncodeErrors. See Also -------- From e532fbd5acf14f9f165f126a843ac1457c442470 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Fri, 19 Apr 2024 18:02:33 +0530 Subject: [PATCH 05/13] restored complib --- pandas/io/pytables.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index db783bc11fb2a..8ec6eafb2e1eb 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1267,14 +1267,14 @@ def append( Write DataFrame index as a column. append : bool, default True Append the input data to the existing. - # complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default None - # Specifies the compression library to be used. - # These additional compressors for Blosc are supported - # (default if no compressor specified: 'blosc:blosclz'): - # {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', - # 'blosc:zlib', 'blosc:zstd'}. - # Specifying a compression library which is not available issues - # a ValueError. + complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default None + Specifies the compression library to be used. + These additional compressors for Blosc are supported + (default if no compressor specified: 'blosc:blosclz'): + {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + 'blosc:zlib', 'blosc:zstd'}. + Specifying a compression library which is not available issues + a ValueError. # complevel : int, 0-9, default None # Specifies a compression level for data. # A value of 0 or None disables compression. From 5d5d0c11e5df5e432b92a24ffcfa85929ece542f Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Fri, 19 Apr 2024 18:26:02 +0530 Subject: [PATCH 06/13] commented out half of complib --- pandas/io/pytables.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 8ec6eafb2e1eb..fef2512480737 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1269,12 +1269,12 @@ def append( Append the input data to the existing. complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default None Specifies the compression library to be used. - These additional compressors for Blosc are supported - (default if no compressor specified: 'blosc:blosclz'): - {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', - 'blosc:zlib', 'blosc:zstd'}. - Specifying a compression library which is not available issues - a ValueError. + # These additional compressors for Blosc are supported + # (default if no compressor specified: 'blosc:blosclz'): + # {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + # 'blosc:zlib', 'blosc:zstd'}. + # Specifying a compression library which is not available issues + # a ValueError. # complevel : int, 0-9, default None # Specifies a compression level for data. # A value of 0 or None disables compression. From 782bc8531792971af57039af9e1bfcc95e56358e Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Fri, 19 Apr 2024 18:45:09 +0530 Subject: [PATCH 07/13] commented out set from complib --- pandas/io/pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fef2512480737..2697a5cc30620 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1267,7 +1267,7 @@ def append( Write DataFrame index as a column. append : bool, default True Append the input data to the existing. - complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default None + complib : default None Specifies the compression library to be used. # These additional compressors for Blosc are supported # (default if no compressor specified: 'blosc:blosclz'): From 6032af7eb0c451fc85898363349d6e813e71a47b Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Fri, 19 Apr 2024 19:07:42 +0530 Subject: [PATCH 08/13] changed the type of complib --- pandas/io/pytables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 2697a5cc30620..47c7a6fbe1d19 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1267,7 +1267,7 @@ def append( Write DataFrame index as a column. append : bool, default True Append the input data to the existing. - complib : default None + complib : str Specifies the compression library to be used. # These additional compressors for Blosc are supported # (default if no compressor specified: 'blosc:blosclz'): From 673826732b940cabd178c85e9b7547f5465ece21 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Sat, 20 Apr 2024 08:08:42 +0530 Subject: [PATCH 09/13] uncommented all other parameters --- pandas/io/pytables.py | 58 +++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 47c7a6fbe1d19..441abcd622c90 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1275,35 +1275,35 @@ def append( # 'blosc:zlib', 'blosc:zstd'}. # Specifying a compression library which is not available issues # a ValueError. - # complevel : int, 0-9, default None - # Specifies a compression level for data. - # A value of 0 or None disables compression. - # columns : default None - # This parameter is currently not accepted, try data_columns. - # min_itemsize : int, dict, or None - # Dict of columns that specify minimum str sizes. - # nan_rep : str - # Str to use as str nan representation. - # chunksize : int or None - # Size to chunk the writing. - # expectedrows : int - # Expected TOTAL row size of this table. - # dropna : bool, default False, optional - # Do not write an ALL nan row to the store settable - # by the option 'io.hdf.dropna_table'. - # data_columns : list of columns, or True, default None - # List of columns to create as indexed data columns for on-disk - # queries, or True to use all columns. By default only the axes - # of the object are indexed. See `here - # `__. - # encoding : default None - # Provide an encoding for str. - # errors : str, default 'strict' - # The error handling scheme to use for encoding errors. - # The default is 'strict' meaning that encoding errors raise a - # UnicodeEncodeError. Other possible values are 'ignore', 'replace' and - # 'xmlcharrefreplace' as well as any other name registered with - # codecs.register_error that can handle UnicodeEncodeErrors. + complevel : int, 0-9, default None + Specifies a compression level for data. + A value of 0 or None disables compression. + columns : default None + This parameter is currently not accepted, try data_columns. + min_itemsize : int, dict, or None + Dict of columns that specify minimum str sizes. + nan_rep : str + Str to use as str nan representation. + chunksize : int or None + Size to chunk the writing. + expectedrows : int + Expected TOTAL row size of this table. + dropna : bool, default False, optional + Do not write an ALL nan row to the store settable + by the option 'io.hdf.dropna_table'. + data_columns : list of columns, or True, default None + List of columns to create as indexed data columns for on-disk + queries, or True to use all columns. By default only the axes + of the object are indexed. See `here + `__. + encoding : default None + Provide an encoding for str. + errors : str, default 'strict' + The error handling scheme to use for encoding errors. + The default is 'strict' meaning that encoding errors raise a + UnicodeEncodeError. Other possible values are 'ignore', 'replace' and + 'xmlcharrefreplace' as well as any other name registered with + codecs.register_error that can handle UnicodeEncodeErrors. See Also -------- From f71d19a2f5db9058dcdebe1187323227578776ef Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Sat, 20 Apr 2024 08:29:31 +0530 Subject: [PATCH 10/13] removed # --- pandas/io/pytables.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 441abcd622c90..952ebd583ed02 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1269,12 +1269,6 @@ def append( Append the input data to the existing. complib : str Specifies the compression library to be used. - # These additional compressors for Blosc are supported - # (default if no compressor specified: 'blosc:blosclz'): - # {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', - # 'blosc:zlib', 'blosc:zstd'}. - # Specifying a compression library which is not available issues - # a ValueError. complevel : int, 0-9, default None Specifies a compression level for data. A value of 0 or None disables compression. From 9e4da39be2009c4ec98607616bbdd93f00dc4b24 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Sat, 20 Apr 2024 09:08:21 +0530 Subject: [PATCH 11/13] changed default value to None --- pandas/io/pytables.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 952ebd583ed02..86ddd85dea789 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1267,8 +1267,10 @@ def append( Write DataFrame index as a column. append : bool, default True Append the input data to the existing. - complib : str + complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default None Specifies the compression library to be used. + Specifying a compression library which is not available issues + a ValueError. complevel : int, 0-9, default None Specifies a compression level for data. A value of 0 or None disables compression. From 609dbc1c7b237fa320ec26fa2dd5223034fcd900 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Sat, 20 Apr 2024 10:15:08 +0530 Subject: [PATCH 12/13] changed default value to None --- client.py | 8 ++++++++ pandas/io/pytables.py | 6 ++---- 2 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 client.py diff --git a/client.py b/client.py new file mode 100644 index 0000000000000..8ddaccdabc923 --- /dev/null +++ b/client.py @@ -0,0 +1,8 @@ +import pandas as pd + +df1 = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) +store = pd.HDFStore("store.h5", "w") # doctest: +SKIP +store.put("data", df1, format="table") # doctest: +SKIP +df2 = pd.DataFrame([[5, 6], [7, 8]], columns=["A", "B"]) +store.append("data", df2,complib="blah") # doctest: +SKIP +store.close() # doctest: +SKIP \ No newline at end of file diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 86ddd85dea789..28eb467236832 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1267,10 +1267,8 @@ def append( Write DataFrame index as a column. append : bool, default True Append the input data to the existing. - complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default None - Specifies the compression library to be used. - Specifying a compression library which is not available issues - a ValueError. + complib : default None + This parameter is currently not accepted. complevel : int, 0-9, default None Specifies a compression level for data. A value of 0 or None disables compression. From 8f219626756e6c70c73b9a1e8946218f55291a03 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Sat, 20 Apr 2024 10:16:22 +0530 Subject: [PATCH 13/13] deleted client.py --- client.py | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 client.py diff --git a/client.py b/client.py deleted file mode 100644 index 8ddaccdabc923..0000000000000 --- a/client.py +++ /dev/null @@ -1,8 +0,0 @@ -import pandas as pd - -df1 = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) -store = pd.HDFStore("store.h5", "w") # doctest: +SKIP -store.put("data", df1, format="table") # doctest: +SKIP -df2 = pd.DataFrame([[5, 6], [7, 8]], columns=["A", "B"]) -store.append("data", df2,complib="blah") # doctest: +SKIP -store.close() # doctest: +SKIP \ No newline at end of file