From 3fdd10e06683e5300b1efbf9b9f230d6adb9eef5 Mon Sep 17 00:00:00 2001 From: Neradoc Date: Wed, 5 Mar 2025 21:02:29 +0100 Subject: [PATCH 1/3] iterator on objects --- adafruit_json_stream.py | 35 +++++++++++++++++++++++++++++++++++ tests/test_json_stream.py | 19 +++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/adafruit_json_stream.py b/adafruit_json_stream.py index 4b9da92..fe8d8ef 100644 --- a/adafruit_json_stream.py +++ b/adafruit_json_stream.py @@ -239,6 +239,41 @@ def __getitem__(self, key): self.done = self.data.fast_forward(",") raise KeyError(key) + def __iter__(self): + return self + + def _next_item(self): + """Return the next item as a (key, value) pair, regardless of key.""" + if self.active_child: + self.active_child.finish() + self.done = self.data.fast_forward(",") + self.active_child = None + if self.done: + raise StopIteration() + + current_key = self.data.next_value(":") + if current_key is None: + self.done = True + raise StopIteration() + + next_value = self.data.next_value(",") + if self.data.last_char == ord("}"): + self.done = True + if isinstance(next_value, Transient): + self.active_child = next_value + return (current_key, next_value) + + def __next__(self): + return self._next_item()[0] + + def items(self): + """Return iterator ine the dictionary’s items ((key, value) pairs).""" + try: + while not self.done: + yield self._next_item() + except StopIteration: + return + def load(data_iter): """Returns an object to represent the top level of the given JSON stream.""" diff --git a/tests/test_json_stream.py b/tests/test_json_stream.py index 04f4faa..1a5697c 100644 --- a/tests/test_json_stream.py +++ b/tests/test_json_stream.py @@ -685,3 +685,22 @@ def test_as_object_grabbing_multiple_subscriptable_levels_again_after_passed_rai assert next(dict_1["sub_list"]) == "a" with pytest.raises(KeyError, match="sub_dict"): dict_1["sub_dict"]["sub_dict_name"] + + +def test_iterating_keys(dict_with_keys): + """Iterate through keys of a simple object""" + + bytes_io_chunk = BytesChunkIO(dict_with_keys.encode()) + stream = adafruit_json_stream.load(bytes_io_chunk) + output = list(stream) + assert output == ["field_1", "field_2", "field_3"] + + +def test_iterating_items(dict_with_keys): + """Iterate through items of a simple object""" + + bytes_io_chunk = BytesChunkIO(dict_with_keys.encode()) + stream = adafruit_json_stream.load(bytes_io_chunk) + output = list(stream.items()) + assert output == [("field_1", 1), ("field_2", 2), ("field_3", 3)] + From f2ed1a1ce7bc6cc81ba1e1317819c9b0ed8783ca Mon Sep 17 00:00:00 2001 From: Neradoc Date: Tue, 18 Mar 2025 01:49:49 +0100 Subject: [PATCH 2/3] add example that uses .items() and tests for iterating after already accessing an item --- examples/json_stream_advanced.py | 63 ++++++++++++++++++++++++++++++++ tests/test_json_stream.py | 23 +++++++++++- 2 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 examples/json_stream_advanced.py diff --git a/examples/json_stream_advanced.py b/examples/json_stream_advanced.py new file mode 100644 index 0000000..39e0448 --- /dev/null +++ b/examples/json_stream_advanced.py @@ -0,0 +1,63 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 Scott Shawcroft for Adafruit Industries +# +# SPDX-License-Identifier: Unlicense + +import sys +import time + +import adafruit_json_stream as json_stream + +# import json_stream + + +class FakeResponse: + def __init__(self, file): + self.file = file + + def iter_content(self, chunk_size): + while True: + yield self.file.read(chunk_size) + + +f = open(sys.argv[1], "rb") # pylint: disable=consider-using-with +obj = json_stream.load(FakeResponse(f).iter_content(32)) + + +def find_keys(obj, keys): + """If we don't know the order in which the keys are, + go through all of them and pick the ones we want""" + out = dict() + # iterate on the items of an object + for key, value in obj.items(): + if key in keys: + # if it's a sub object, get it all + if isinstance(value, json_stream.Transient): + value = value.as_object() + out[key] = value + return out + +def time_to_date(stamp): + tt = time.localtime(stamp) + month = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"][tt.tm_mon] + return f"{tt.tm_mday:2d}th of {month}" + +def ftoc(temp): + return (temp - 32) * 5 / 9 + +currently = obj["currently"] +print("Currently:") +print(" ", time_to_date(currently["time"])) +print(" ", currently["icon"]) + +# iterate on the content of a list +for i, day in enumerate(obj["daily"]["data"]): + day_items = find_keys(day, ("time", "summary", "temperatureHigh")) + date = time_to_date(day_items["time"]) + print( + f'On {date}: {day_items["summary"]},', + f'Max: {int(day_items["temperatureHigh"])}F', + f'({int(ftoc(day_items["temperatureHigh"]))}C)' + ) + + if i > 4: + break diff --git a/tests/test_json_stream.py b/tests/test_json_stream.py index 1a5697c..1afc2d7 100644 --- a/tests/test_json_stream.py +++ b/tests/test_json_stream.py @@ -688,7 +688,7 @@ def test_as_object_grabbing_multiple_subscriptable_levels_again_after_passed_rai def test_iterating_keys(dict_with_keys): - """Iterate through keys of a simple object""" + """Iterate through keys of a simple object.""" bytes_io_chunk = BytesChunkIO(dict_with_keys.encode()) stream = adafruit_json_stream.load(bytes_io_chunk) @@ -697,10 +697,29 @@ def test_iterating_keys(dict_with_keys): def test_iterating_items(dict_with_keys): - """Iterate through items of a simple object""" + """Iterate through items of a simple object.""" bytes_io_chunk = BytesChunkIO(dict_with_keys.encode()) stream = adafruit_json_stream.load(bytes_io_chunk) output = list(stream.items()) assert output == [("field_1", 1), ("field_2", 2), ("field_3", 3)] + +def test_iterating_keys_after_get(dict_with_keys): + """Iterate through keys of a simple object after an item has already been read.""" + + bytes_io_chunk = BytesChunkIO(dict_with_keys.encode()) + stream = adafruit_json_stream.load(bytes_io_chunk) + assert stream["field_1"] == 1 + output = list(stream) + assert output == ["field_2", "field_3"] + + +def test_iterating_items_after_get(dict_with_keys): + """Iterate through items of a simple object after an item has already been read.""" + + bytes_io_chunk = BytesChunkIO(dict_with_keys.encode()) + stream = adafruit_json_stream.load(bytes_io_chunk) + assert stream["field_1"] == 1 + output = list(stream.items()) + assert output == [("field_2", 2), ("field_3", 3)] From a2739d70aa007640c7de73e30ced9eb1f1af256e Mon Sep 17 00:00:00 2001 From: Neradoc Date: Tue, 18 Mar 2025 21:11:14 +0100 Subject: [PATCH 3/3] Rework iteration to only retrieve the key, enable getting the value of the current key. Use common active_key, fix finish(), etc. Rename example and use the key iteration. Additional tests. --- adafruit_json_stream.py | 55 ++++++++++++------- ....py => json_stream_local_file_advanced.py} | 37 ++++++++++--- tests/test_json_stream.py | 37 +++++++++++++ 3 files changed, 100 insertions(+), 29 deletions(-) rename examples/{json_stream_advanced.py => json_stream_local_file_advanced.py} (71%) diff --git a/adafruit_json_stream.py b/adafruit_json_stream.py index fe8d8ef..b5172d7 100644 --- a/adafruit_json_stream.py +++ b/adafruit_json_stream.py @@ -154,7 +154,7 @@ def __init__(self, stream): self.finish_char = "" def finish(self): - """Consume all of the characters for this list from the stream.""" + """Consume all of the characters for this container from the stream.""" if not self.done: if self.active_child: self.active_child.finish() @@ -163,7 +163,8 @@ def finish(self): self.done = True def as_object(self): - """Consume all of the characters for this list from the stream and return as an object.""" + """Consume all of the characters for this container from the stream + and return as an object.""" if self.has_read: raise BufferError("Object has already been partly read.") @@ -207,10 +208,17 @@ class TransientObject(Transient): def __init__(self, stream): super().__init__(stream) self.finish_char = "}" - self.active_child_key = None + self.active_key = None + + def finish(self): + """Consume all of the characters for this container from the stream.""" + if self.active_key and not self.active_child: + self.done = self.data.fast_forward(",") + self.active_key = None + super().finish() def __getitem__(self, key): - if self.active_child and self.active_child_key == key: + if self.active_child and self.active_key == key: return self.active_child self.has_read = True @@ -219,12 +227,16 @@ def __getitem__(self, key): self.active_child.finish() self.done = self.data.fast_forward(",") self.active_child = None - self.active_child_key = None + self.active_key = None if self.done: raise KeyError(key) while not self.done: - current_key = self.data.next_value(":") + if self.active_key: + current_key = self.active_key + self.active_key = None + else: + current_key = self.data.next_value(":") if current_key is None: self.done = True break @@ -234,7 +246,7 @@ def __getitem__(self, key): self.done = True if isinstance(next_value, Transient): self.active_child = next_value - self.active_child_key = key + self.active_key = key return next_value self.done = self.data.fast_forward(",") raise KeyError(key) @@ -242,35 +254,36 @@ def __getitem__(self, key): def __iter__(self): return self - def _next_item(self): - """Return the next item as a (key, value) pair, regardless of key.""" - if self.active_child: - self.active_child.finish() + def _next_key(self): + """Return the next item's key, without consuming the value.""" + if self.active_key: + if self.active_child: + self.active_child.finish() + self.active_child = None self.done = self.data.fast_forward(",") - self.active_child = None + self.active_key = None if self.done: raise StopIteration() + self.has_read = True + current_key = self.data.next_value(":") if current_key is None: self.done = True raise StopIteration() - next_value = self.data.next_value(",") - if self.data.last_char == ord("}"): - self.done = True - if isinstance(next_value, Transient): - self.active_child = next_value - return (current_key, next_value) + self.active_key = current_key + return current_key def __next__(self): - return self._next_item()[0] + return self._next_key() def items(self): - """Return iterator ine the dictionary’s items ((key, value) pairs).""" + """Return iterator in the dictionary’s items ((key, value) pairs).""" try: while not self.done: - yield self._next_item() + key = self._next_key() + yield (key, self[key]) except StopIteration: return diff --git a/examples/json_stream_advanced.py b/examples/json_stream_local_file_advanced.py similarity index 71% rename from examples/json_stream_advanced.py rename to examples/json_stream_local_file_advanced.py index 39e0448..2920619 100644 --- a/examples/json_stream_advanced.py +++ b/examples/json_stream_local_file_advanced.py @@ -23,31 +23,52 @@ def iter_content(self, chunk_size): obj = json_stream.load(FakeResponse(f).iter_content(32)) -def find_keys(obj, keys): +def find_keys(haystack, keys): """If we don't know the order in which the keys are, go through all of them and pick the ones we want""" - out = dict() + out = {} # iterate on the items of an object - for key, value in obj.items(): + for key in haystack: if key in keys: + # retrieve the value only if needed + value = haystack[key] # if it's a sub object, get it all - if isinstance(value, json_stream.Transient): + if hasattr(value, "as_object"): value = value.as_object() out[key] = value return out + +months = [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", +] + + def time_to_date(stamp): tt = time.localtime(stamp) - month = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"][tt.tm_mon] + month = months[tt.tm_mon] return f"{tt.tm_mday:2d}th of {month}" + def ftoc(temp): return (temp - 32) * 5 / 9 + currently = obj["currently"] print("Currently:") -print(" ", time_to_date(currently["time"])) -print(" ", currently["icon"]) +print(" ", time_to_date(currently["time"])) +print(" ", currently["icon"]) # iterate on the content of a list for i, day in enumerate(obj["daily"]["data"]): @@ -56,7 +77,7 @@ def ftoc(temp): print( f'On {date}: {day_items["summary"]},', f'Max: {int(day_items["temperatureHigh"])}F', - f'({int(ftoc(day_items["temperatureHigh"]))}C)' + f'({int(ftoc(day_items["temperatureHigh"]))}C)', ) if i > 4: diff --git a/tests/test_json_stream.py b/tests/test_json_stream.py index 1afc2d7..7ed05c9 100644 --- a/tests/test_json_stream.py +++ b/tests/test_json_stream.py @@ -696,6 +696,18 @@ def test_iterating_keys(dict_with_keys): assert output == ["field_1", "field_2", "field_3"] +def test_iterating_keys_get(dict_with_keys): + """Iterate through keys of a simple object and get values.""" + + the_dict = json.loads(dict_with_keys) + + bytes_io_chunk = BytesChunkIO(dict_with_keys.encode()) + stream = adafruit_json_stream.load(bytes_io_chunk) + for key in stream: + value = stream[key] + assert value == the_dict[key] + + def test_iterating_items(dict_with_keys): """Iterate through items of a simple object.""" @@ -723,3 +735,28 @@ def test_iterating_items_after_get(dict_with_keys): assert stream["field_1"] == 1 output = list(stream.items()) assert output == [("field_2", 2), ("field_3", 3)] + + +def test_iterating_complex_dict(complex_dict): + """Mix iterating over items of objects in objects in arrays.""" + + names = ["one", "two", "three", "four"] + sub_values = [None, "two point one", "three point one", None] + + stream = adafruit_json_stream.load(BytesChunkIO(complex_dict.encode())) + + thing_num = 0 + for (index, item) in enumerate(stream.items()): + key, a_list = item + assert key == f"list_{index+1}" + for thing in a_list: + assert thing["dict_name"] == names[thing_num] + for sub_key in thing["sub_dict"]: + # break after getting a key with or without the value + # (testing finish() called from the parent list) + if sub_key == "sub_dict_name": + if thing_num in {1, 2}: + value = thing["sub_dict"][sub_key] + assert value == sub_values[thing_num] + break + thing_num += 1