Skip to content

Commit 441833d

Browse files
authored
Merge pull request #24 from tinybirdco/fix_log
* avoid panic when trying to parse response * fix/reorder logs to get better visibility of flow * improve Dockerfile to launch in container with a mysql db
2 parents a998e4c + d193a21 commit 441833d

File tree

5 files changed

+83
-23
lines changed

5 files changed

+83
-23
lines changed

Dockerfile

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,24 @@
11
#FROM python:3.8
2-
FROM ubuntu:20.04 as build
3-
COPY requirements.txt requirements.txt
4-
RUN apt-get update && apt-get install -y python3-pip libmysqlclient-dev openssh-client mysql-client nano -y iputils-ping
2+
FROM ubuntu:22.04 as build
3+
#COPY requirements.txt requirements.txt
4+
5+
ARG DEBIAN_FRONTEND=noninteractive
6+
run apt update && apt install -y mysql-server software-properties-common
7+
run add-apt-repository ppa:deadsnakes/ppa
8+
9+
RUN apt update && apt install -y python3-pip python3.8 python3.8-venv python3.8-dev libssl-dev libcurl4-openssl-dev libpython3-dev build-essential libmysqlclient-dev autossh mysql-client pkg-config
10+
11+
run mkdir -p /mnt/disks/tb/tinybird_mysql_connector/syncer_files
12+
run chmod 0755 /mnt/disks/tb/tinybird_mysql_connector/
13+
run chmod 0755 /mnt/disks/tb/tinybird_mysql_connector/syncer_files
14+
515

6-
RUN pip3 install --upgrade pip
7-
RUN pip3 install mysqlclient
8-
RUN pip3 install -r requirements.txt
916
ENV LC_ALL=C.UTF-8
1017
ENV LANG=C.UTF-8
11-
#COPY . .
12-
#RUN chmod +x dev_run_config_file.sh
13-
#CMD ["/bin/bash", "dev_run_config_file.sh"]
18+
COPY . .
19+
20+
run python3.8 -m pip wheel --wheel-dir=/tmp/clickkhouse-mysql-data-reader/ .
21+
run find /tmp/clickkhouse-mysql-data-reader/ -name *.whl -exec pip install {} \;
22+
23+
ENTRYPOINT ["./init-docker.sh"]
24+
cmd ["--config-file=./clickhouse-mysql-docker.conf"]

clickhouse-mysql-docker.conf

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
tb_host=https://api.tinybird.co
2+
tb_token=
3+
4+
log_file=/mnt/disks/tb/tinybird_mysql_connector/main.log
5+
log_level=debug
6+
nice_pause=1
7+
8+
binlog_position_file=/mnt/disks/tb/tinybird_mysql_connector/syncer_files/bl-raw_landing-local-pos
9+
10+
mempool=yes
11+
mempool_max_events_num=10
12+
mempool_max_flush_interval=5
13+
csvpool=yes
14+
csvpool_file_path_prefix=/mnt/disks/tb/tinybird_mysql_connector/syncer_files/raw_landing_local_
15+
#csvpool_keep_files=yes
16+
pump_data=yes
17+
18+
src_server_id=1
19+
src_host=127.0.0.1
20+
#src_port=3306
21+
src_user=clickhouse_mysql_reader
22+
src_password=1234
23+
src_schemas=clickhouse_mysql_reader
24+
src_tables=one,two
25+
src_wait=yes
26+
src_resume=yes
27+
28+
dst_table=raw_landing

clickhouse_mysql/writer/csvwriter.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,9 @@ def close(self):
203203
def destroy(self):
204204
if self.delete and os.path.isfile(self.path):
205205
self.close()
206+
if self.next_writer_builder and self.next_writer_builder.get().not_uploaded:
207+
logging.error(f"CSV { self.path } not uploaded into TB")
208+
return
206209
os.remove(self.path)
207210

208211
if __name__ == '__main__':

clickhouse_mysql/writer/tbcsvwriter.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ class TBCSVWriter(Writer):
2222
tb_host = None
2323
tb_token = None
2424

25+
not_uploaded = None
26+
2527
def __init__(
2628
self,
2729
tb_host,
@@ -30,6 +32,7 @@ def __init__(
3032
dst_table=None,
3133
dst_table_prefix=None,
3234
dst_distribute=False,
35+
not_uploaded=False,
3336
):
3437
# if dst_distribute and dst_schema is not None:
3538
# dst_schema += "_all"
@@ -44,16 +47,18 @@ def __init__(
4447
if self.tb_host is None or self.tb_token is None:
4548
logging.critical(
4649
f" Host: {self.tb_host} or token {self.tb_token} is missing")
47-
return None
50+
return
4851

4952
self.dst_schema = dst_schema
5053
self.dst_table = dst_table
5154
self.dst_table_prefix = dst_table_prefix
5255
self.dst_distribute = dst_distribute
56+
self.not_uploaded = not_uploaded
5357

5458

5559
def uploadCSV(self, table, filename, tries=1):
5660
limit_of_retries = 3
61+
self.not_uploaded = False
5762
params = {
5863
'name': table,
5964
'mode': 'append',
@@ -76,32 +81,32 @@ def uploadCSV(self, table, filename, tries=1):
7681
params=params,
7782
verify=False)
7883

79-
# logging.debug(response.text)
80-
logging.info(response.json())
84+
logging.info(response.content) # this is ugly, but we need to check what is in the response for some detected errors
8185
if response.status_code == 200:
8286
json_object = json.loads(response.content)
8387
logging.debug(f"Import id: {json_object['import_id']}")
8488
elif response.status_code == 429:
8589
retry_after = int(response.headers['Retry-After']) + tries
86-
logging.error(
87-
f"Too many requests retrying in {retry_after} seconds to upload {filename } to {table}")
90+
logging.error(f"Too many requests retrying in {retry_after} seconds to upload {filename} to {table}")
8891
time.sleep(retry_after)
8992
self.uploadCSV(table, filename, tries + 1)
9093
else:
91-
# In case of error let's retry only
92-
logging.exception(response.json())
93-
time.sleep(tries)
94-
logging.info(f"Retrying { tries } of { limit_of_retries }")
94+
# In case of error let's retry only `limit_of_retries` times
95+
logging.exception(response.content)
9596
if tries > limit_of_retries:
97+
self.not_uploaded = True
9698
return
99+
logging.info(f"Retrying {filename} when status {response.status_code}, try {tries} of {limit_of_retries}")
100+
time.sleep(tries)
97101
self.uploadCSV(table, filename, tries + 1)
98102
except Exception as e:
99103
logging.exception(e)
100-
# We wait tries^2 sec to try again
101-
time.sleep(tries * tries)
102-
logging.info(f"Retrying { tries } of { limit_of_retries }")
103104
if tries > limit_of_retries:
105+
self.not_uploaded = True
104106
return
107+
# We wait tries^2 sec to try again
108+
logging.info(f"Retrying {filename} when exception: try {tries} of {limit_of_retries}")
109+
time.sleep(tries * tries)
105110
self.uploadCSV(table, filename, tries + 1)
106111

107112
def insert(self, event_or_events=None):
@@ -124,7 +129,7 @@ def insert(self, event_or_events=None):
124129
logging.debug('class:%s insert %d rows', __class__, len(events))
125130

126131
for event in events:
127-
#schema = self.dst_schema if self.dst_schema else event.schema
132+
# schema = self.dst_schema if self.dst_schema else event.schema
128133
table = self.dst_table if self.dst_table else event.table
129134
self.uploadCSV(table, event.filename)
130135

@@ -231,7 +236,7 @@ def update(self, event_or_events=None):
231236
# )
232237

233238
# choptions = ""
234-
# if self.host:
239+
# if self.host::wq
235240
# choptions += " --host=" + shlex.quote(self.host)
236241
# if self.port:
237242
# choptions += " --port=" + str(self.port)

init-docker.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/bash
2+
3+
mysqld &
4+
sleep 5
5+
mysql -e "create user 'clickhouse_mysql_reader'@'%' identified by '1234';"
6+
mysql -e "create database clickhouse_mysql_reader"
7+
mysql -e "grant super on *.* to 'clickhouse_mysql_reader';"
8+
mysql -e "grant replication slave on *.* to 'clickhouse_mysql_reader';"
9+
10+
mysql -e "use clickhouse_mysql_reader; create table one (id int, field varchar(10));"
11+
mysql -e "use clickhouse_mysql_reader; create table two (id int, field varchar(10));"
12+
13+
clickhouse-mysql $@ #--config-file=./clickhouse-mysql.conf

0 commit comments

Comments
 (0)