docs: fix seek positioning and byte size

heitorlessa · rubenfonseca · commit 28e2040eac01 · 2022-11-24T10:06:28.000+01:00
diff --git a/docs/utilities/streaming.md b/docs/utilities/streaming.md
@@ -107,7 +107,7 @@ For example, let's imagine you have a large CSV file, each row has a non-uniform
 
 You found out the last row has exactly 30 bytes. We can use `seek()` to skip to the end of the file, read 30 bytes, then transform to CSV.
 
-```python title="Reading only the last CSV row" hl_lines="16 18"
+```python title="Reading only the last CSV row" hl_lines="16 19"
 --8<-- "examples/streaming/src/s3_csv_stream_non_uniform_seek.py"
 ```
 
@@ -121,7 +121,7 @@ You can also solve with `seek`, but let's take a large uniform CSV file to make
 --8<-- "examples/streaming/src/uniform_sample.csv"
 ```
 
-You found out that each row has 8 bytes, the header line has 22 bytes, and every new line has 1 byte.
+You found out that each row has 8 bytes, the header line has 21 bytes, and every new line has 1 byte.
 
 You want to skip the first 100 lines.
 
diff --git a/examples/streaming/src/s3_csv_stream_non_uniform_seek.py b/examples/streaming/src/s3_csv_stream_non_uniform_seek.py
@@ -12,10 +12,8 @@
 def lambda_handler(event: Dict[str, str], context: LambdaContext):
     sample_csv = S3Object(bucket=event["bucket"], key="sample.csv")
 
-    # Jump to the end of the file
-    sample_csv.seek(0, io.SEEK_END)
-    # From the current position, jump exactly 30 bytes
-    sample_csv.seek(sample_csv.tell() - LAST_ROW_SIZE, io.SEEK_SET)
+    # From the end of the file, jump exactly 30 bytes backwards
+    sample_csv.seek(-LAST_ROW_SIZE, io.SEEK_END)
 
     # Transform portion of data into CSV with our headers
     sample_csv.transform(CsvTransform(fieldnames=CSV_HEADERS), in_place=True)
diff --git a/examples/streaming/src/s3_csv_stream_seek.py b/examples/streaming/src/s3_csv_stream_seek.py
@@ -17,7 +17,7 @@
 
 CSV_HEADERS = ["reading", "position", "type"]
 ROW_SIZE = 8 + 1  # 1 byte newline
-HEADER_SIZE = 22 + 1  # 1 byte newline
+HEADER_SIZE = 21 + 1  # 1 byte newline
 LINES_TO_JUMP = 100
 
 
@@ -28,7 +28,7 @@ def lambda_handler(event: Dict[str, str], context: LambdaContext):
     sample_csv.seek(HEADER_SIZE, io.SEEK_SET)
 
     # Jump 100 lines of 9 bytes each (8 bytes of data + 1 byte newline)
-    sample_csv.seek(LINES_TO_JUMP * ROW_SIZE, io.SEEK_SET)
+    sample_csv.seek(LINES_TO_JUMP * ROW_SIZE, io.SEEK_CUR)
 
     sample_csv.transform(CsvTransform(), in_place=True)
     for row in sample_csv: