Skip to content

Commit 52d4d6a

Browse files
committed
fix: multiprocessing example
1 parent 0dcc35b commit 52d4d6a

File tree

1 file changed

+61
-61
lines changed

1 file changed

+61
-61
lines changed

examples/import_data_set_multiprocessing.py

Lines changed: 61 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
https://github.com/toddwschneider/nyc-taxi-data
55
"""
66
import concurrent.futures
7+
import gzip
78
import io
89
import multiprocessing
910
from collections import OrderedDict
@@ -92,10 +93,10 @@ def parse_row(row: OrderedDict):
9293

9394
return Point("taxi-trip-data") \
9495
.tag("dispatching_base_num", row['dispatching_base_num']) \
95-
.tag("PULocationID", row['PULocationID']) \
96-
.tag("DOLocationID", row['DOLocationID']) \
96+
.tag("PULocationID", row['PUlocationID']) \
97+
.tag("DOLocationID", row['DOlocationID']) \
9798
.tag("SR_Flag", row['SR_Flag']) \
98-
.field("dropoff_datetime", row['dropoff_datetime']) \
99+
.field("dropoff_datetime", row['dropOff_datetime']) \
99100
.time(row['pickup_datetime']) \
100101
.to_line_protocol()
101102

@@ -141,80 +142,79 @@ def init_counter(counter, progress, queue):
141142
progress_ = Value('i', 0)
142143
startTime = datetime.now()
143144

144-
url = "https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2019-01.csv"
145-
# url = "file:///Users/bednar/Developer/influxdata/influxdb-client-python/examples/fhv_tripdata_2019-01.csv"
145+
url = "https://github.com/DataTalksClub/nyc-tlc-data/releases/download/fhv/fhv_tripdata_2019-01.csv.gz"
146146

147147
"""
148148
Open URL and for stream data
149149
"""
150150
response = urlopen(url)
151151
if response.headers:
152152
content_length = response.headers['Content-length']
153-
io_wrapper = ProgressTextIOWrapper(response)
154-
io_wrapper.progress = progress_
155153

156154
"""
157-
Start writer as a new process
155+
Open GZIP stream
158156
"""
159-
writer = InfluxDBWriter(queue_)
160-
writer.start()
157+
with gzip.open(response, 'rb') as stream:
158+
io_wrapper = ProgressTextIOWrapper(stream, encoding='utf-8')
159+
io_wrapper.progress = progress_
161160

162-
"""
163-
Create process pool for parallel encoding into LineProtocol
164-
"""
165-
cpu_count = multiprocessing.cpu_count()
166-
with concurrent.futures.ProcessPoolExecutor(cpu_count, initializer=init_counter,
167-
initargs=(counter_, progress_, queue_)) as executor:
168161
"""
169-
Converts incoming HTTP stream into sequence of LineProtocol
162+
Start writer as a new process
170163
"""
171-
data = rx \
172-
.from_iterable(DictReader(io_wrapper)) \
173-
.pipe(ops.buffer_with_count(10_000),
174-
# Parse 10_000 rows into LineProtocol on subprocess
175-
ops.flat_map(lambda rows: executor.submit(parse_rows, rows, content_length)))
164+
writer = InfluxDBWriter(queue_)
165+
writer.start()
176166

177167
"""
178-
Write data into InfluxDB
168+
Create process pool for parallel encoding into LineProtocol
179169
"""
180-
data.subscribe(on_next=lambda x: None, on_error=lambda ex: print(f'Unexpected error: {ex}'))
181-
182-
"""
183-
Terminate Writer
184-
"""
185-
queue_.put(None)
186-
queue_.join()
170+
cpu_count = multiprocessing.cpu_count()
171+
with concurrent.futures.ProcessPoolExecutor(cpu_count, initializer=init_counter,
172+
initargs=(counter_, progress_, queue_)) as executor:
173+
"""
174+
Converts incoming HTTP stream into sequence of LineProtocol
175+
"""
176+
data = rx \
177+
.from_iterable(DictReader(io_wrapper)) \
178+
.pipe(ops.buffer_with_count(10_000),
179+
# Parse 10_000 rows into LineProtocol on subprocess
180+
ops.map(lambda rows: executor.submit(parse_rows, rows, content_length)))
181+
182+
"""
183+
Write data into InfluxDB
184+
"""
185+
data.subscribe(on_next=lambda x: None, on_error=lambda ex: print(f'Unexpected error: {ex}'))
187186

188-
print()
189-
print(f'Import finished in: {datetime.now() - startTime}')
190-
print()
191-
192-
"""
193-
Querying 10 pickups from dispatching 'B00008'
194-
"""
195-
query = 'from(bucket:"my-bucket")' \
196-
'|> range(start: 2019-01-01T00:00:00Z, stop: now()) ' \
197-
'|> filter(fn: (r) => r._measurement == "taxi-trip-data")' \
198-
'|> filter(fn: (r) => r.dispatching_base_num == "B00008")' \
199-
'|> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")' \
200-
'|> rename(columns: {_time: "pickup_datetime"})' \
201-
'|> drop(columns: ["_start", "_stop"])|> limit(n:10, offset: 0)'
202-
203-
client = InfluxDBClient(url="http://localhost:8086", token="my-token", org="my-org", debug=False)
204-
result = client.query_api().query(query=query)
187+
"""
188+
Terminate Writer
189+
"""
190+
queue_.put(None)
191+
queue_.join()
205192

206-
"""
207-
Processing results
208-
"""
209-
print()
210-
print("=== Querying 10 pickups from dispatching 'B00008' ===")
211-
print()
212-
for table in result:
213-
for record in table.records:
214-
print(
215-
f'Dispatching: {record["dispatching_base_num"]} pickup: {record["pickup_datetime"]} dropoff: {record["dropoff_datetime"]}')
193+
print()
194+
print(f'Import finished in: {datetime.now() - startTime}')
195+
print()
216196

217-
"""
218-
Close client
219-
"""
220-
client.close()
197+
"""
198+
Querying 10 pickups from dispatching 'B00008'
199+
"""
200+
query = 'from(bucket:"my-bucket")' \
201+
'|> range(start: 2019-01-01T00:00:00Z, stop: now()) ' \
202+
'|> filter(fn: (r) => r._measurement == "taxi-trip-data")' \
203+
'|> filter(fn: (r) => r.dispatching_base_num == "B00008")' \
204+
'|> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")' \
205+
'|> rename(columns: {_time: "pickup_datetime"})' \
206+
'|> drop(columns: ["_start", "_stop"])|> limit(n:10, offset: 0)'
207+
208+
with InfluxDBClient(url="http://localhost:8086", token="my-token", org="my-org", debug=False) as client:
209+
result = client.query_api().query(query=query)
210+
211+
"""
212+
Processing results
213+
"""
214+
print()
215+
print("=== Querying 10 pickups from dispatching 'B00008' ===")
216+
print()
217+
for table in result:
218+
for record in table.records:
219+
print(
220+
f'Dispatching: {record["dispatching_base_num"]} pickup: {record["pickup_datetime"]} dropoff: {record["dropoff_datetime"]}')

0 commit comments

Comments
 (0)