Merge pull request pandas-dev#794 from shashank88/no_bson

shashank88 · web-flow · commit 37a744d16806 · 2019-07-05T18:35:00.000+01:00
Add a config option to skip bson encoding in pickle store
diff --git a/arctic/_config.py b/arctic/_config.py
@@ -103,3 +103,10 @@ class FwPointersCfg(Enum):
 # Flag used for indicating caching levels. For now just for list_libraries.
 # -------------------------------
 ENABLE_CACHE = not bool(os.environ.get('ARCTIC_DISABLE_CACHE'))
+
+# -------------------------------
+# Currently we try to bson encode if the data is less than a given size and store it in
+# the version collection, but pickling might be preferable if we have characters that don't
+# play well with the bson encoder or if you always want your data in the data collection.
+# -------------------------------
+SKIP_BSON_ENCODE_PICKLE_STORE = bool(os.environ.get('SKIP_BSON_ENCODE_PICKLE_STORE'))
diff --git a/arctic/store/_pickle_store.py b/arctic/store/_pickle_store.py
@@ -11,6 +11,8 @@
 from ._version_store_utils import checksum, pickle_compat_load, version_base_or_id
 from .._compression import decompress, compress_array
 from ..exceptions import UnsupportedPickleStoreVersion
+from .._config import SKIP_BSON_ENCODE_PICKLE_STORE
+
 
 # new versions of chunked pickled objects MUST begin with __chunked__
 _MAGIC_CHUNKED = '__chunked__'
@@ -75,15 +77,19 @@ def read(self, mongoose_lib, version, symbol, **kwargs):
     def read_options():
         return []
 
-    def write(self, arctic_lib, version, symbol, item, previous_version):
-        try:
-            # If it's encodeable, then ship it
-            b = bson.BSON.encode({'data': item})
-            if len(b) < _MAX_BSON_ENCODE:
-                version['data'] = item
-                return
-        except InvalidDocument:
-            pass
+    def write(self, arctic_lib, version, symbol, item, _previous_version):
+        # Currently we try to bson encode if the data is less than a given size and store it in
+        # the version collection, but pickling might be preferable if we have characters that don't
+        # play well with the bson encoder or if you always want your data in the data collection.
+        if not SKIP_BSON_ENCODE_PICKLE_STORE:
+            try:
+                # If it's encodeable, then ship it
+                b = bson.BSON.encode({'data': item})
+                if len(b) < _MAX_BSON_ENCODE:
+                    version['data'] = item
+                    return
+            except InvalidDocument:
+                pass
 
         # Pickle, chunk and store the data
         collection = arctic_lib.get_top_level_collection()