7
7
import copy
8
8
import time
9
9
import uuid
10
+ import json
10
11
11
12
from clickhouse_mysql .writer .writer import Writer
12
13
from clickhouse_mysql .event .event import Event
@@ -125,9 +126,17 @@ def insert(self, event_or_events):
125
126
126
127
event_converted = self .convert (event )
127
128
rows = event_converted .pymysqlreplication_event .rows
128
- headers = list (rows [0 ]['values' ].keys ())
129
+ if 'after_values' in rows [0 ].keys ():
130
+ headers = list (rows [0 ]['after_values' ].keys ())
131
+ else :
132
+ headers = list (rows [0 ]['values' ].keys ())
129
133
headers .insert (0 , 'operation' )
130
134
headers .insert (1 , 'tb_upd' )
135
+ headers .insert (2 , 'table' )
136
+ headers .insert (3 , 'schema' )
137
+ headers .insert (4 , 'log_pos' )
138
+ headers .insert (5 , 'binlog_timestamp' )
139
+ headers .insert (6 , 'payload' )
131
140
132
141
# self.fieldnames = sorted(self.convert(copy.copy(event.first_row())).keys())
133
142
self .fieldnames = headers
@@ -136,59 +145,7 @@ def insert(self, event_or_events):
136
145
if self .dst_table is None :
137
146
self .dst_table = event .table
138
147
139
- self .writer = csv .DictWriter (self .file , fieldnames = self .fieldnames , quoting = csv .QUOTE_NONNUMERIC )
140
- if not self .header_written :
141
- self .writer .writeheader ()
142
-
143
- for event in events :
144
- if not event .verify :
145
- logging .warning ('Event verification failed. Skip one event. Event: %s Class: %s' , event .meta (), __class__ )
146
- continue # for event
147
- self .generate_row (event )
148
-
149
- def delete_row (self , event_or_events ):
150
-
151
- # event_or_events = [
152
- # event: {
153
- # row: {'id': 3, 'a': 3}
154
- # },
155
- # event: {
156
- # row: {'id': 3, 'a': 3}
157
- # },
158
- # ]
159
-
160
- logging .debug ("Delete CSV Writer" )
161
-
162
- events = self .listify (event_or_events )
163
- if len (events ) < 1 :
164
- logging .warning ('No events to delete. class: %s' , __class__ )
165
- return
166
-
167
- # assume we have at least one Event
168
-
169
- logging .debug ('class:%s delete %d events' , __class__ , len (events ))
170
-
171
- if not self .opened ():
172
- self .open ()
173
-
174
- if not self .writer :
175
- # pick any event from the list
176
- event = events [0 ]
177
- if not event .verify :
178
- logging .warning ('Event verification failed. Skip insert(). Event: %s Class: %s' , event .meta (), __class__ )
179
- return
180
-
181
- event_converted = self .convert (event )
182
- rows = event_converted .pymysqlreplication_event .rows
183
- headers = list (rows [0 ]['values' ].keys ())
184
- headers .insert (0 , 'operation' )
185
- headers .insert (1 , 'tb_upd' )
186
-
187
- self .fieldnames = headers
188
- if self .dst_schema is None :
189
- self .dst_schema = event .schema
190
- if self .dst_table is None :
191
- self .dst_table = event .table
148
+ self .fieldnames = self .fieldnames [0 :7 ] # get only operation, tb_upd, table and payload
192
149
193
150
self .writer = csv .DictWriter (self .file , fieldnames = self .fieldnames , quoting = csv .QUOTE_NONNUMERIC )
194
151
if not self .header_written :
@@ -200,72 +157,6 @@ def delete_row(self, event_or_events):
200
157
continue # for event
201
158
self .generate_row (event )
202
159
203
-
204
-
205
- def update (self , event_or_events ):
206
-
207
- # event_or_events = [
208
- # event: {
209
- # row: {
210
- # 'before_values': {'id': 3, 'a': 3},
211
- # 'after_values': {'id': 3, 'a': 2}
212
- # }
213
- # },
214
- # event: {
215
- # row: {
216
- # 'before_values': {'id': 2, 'a': 3},
217
- # 'after_values': {'id': 2, 'a': 2}
218
- # }
219
- # },
220
- # ]
221
-
222
- logging .debug ("Update CSV Writer" )
223
-
224
- events = self .listify (event_or_events )
225
- if len (events ) < 1 :
226
- logging .warning ('No events to update. class: %s' , __class__ )
227
- return
228
-
229
- # assume we have at least one Event
230
-
231
- logging .debug ('class:%s updated %d events' , __class__ , len (events ))
232
-
233
- if not self .opened ():
234
- self .open ()
235
-
236
- if not self .writer :
237
- # pick any event from the list
238
- event = events [0 ]
239
- if not event .verify :
240
- logging .warning ('Event verification failed. Skip insert(). Event: %s Class: %s' , event .meta (), __class__ )
241
- return
242
-
243
- event_converted = self .convert (event )
244
- rows = event_converted .pymysqlreplication_event .rows
245
- headers = list (rows [0 ]['after_values' ].keys ())
246
- headers .insert (0 , 'operation' )
247
- headers .insert (1 , 'tb_upd' )
248
-
249
- # self.fieldnames = sorted(headers)
250
- self .fieldnames = headers
251
- if self .dst_schema is None :
252
- self .dst_schema = event .schema
253
- if self .dst_table is None :
254
- self .dst_table = event .table
255
-
256
- self .writer = csv .DictWriter (self .file , fieldnames = self .fieldnames , quoting = csv .QUOTE_NONNUMERIC )
257
- if not self .header_written :
258
- self .writer .writeheader ()
259
-
260
- for event in events :
261
- if not event .verify :
262
- logging .warning ('Event verification failed. Skip one event. Event: %s Class: %s' , event .meta (), __class__ )
263
- continue # for event
264
-
265
- event_converted = self .convert (event )
266
- self .generate_row (event_converted )
267
-
268
-
269
160
def convert_null_values (self , row ):
270
161
""" We need to mark those fields that are null to be able to distinguish between NULL and empty strings """
271
162
for key in list (row .keys ()):
@@ -274,26 +165,22 @@ def convert_null_values(self, row):
274
165
275
166
def generate_row (self , event ):
276
167
""" When using mempool or csvpool events are cached so you can receive different kind of events in the same list. These events should be handled in a different way """
277
-
278
- if isinstance (event .pymysqlreplication_event , WriteRowsEvent ):
279
- for row in event :
280
- row ['tb_upd' ] = datetime .utcnow ().strftime ('%Y-%m-%d %H:%M:%S.%f' )
281
- row ['operation' ] = 0
282
- self .convert_null_values (row )
283
- self .writer .writerow (self .convert (row ))
284
- elif isinstance (event .pymysqlreplication_event , DeleteRowsEvent ):
285
- for row in event :
286
- row ['tb_upd' ] = datetime .utcnow ().strftime ('%Y-%m-%d %H:%M:%S.%f' )
287
- row ['operation' ] = 2
288
- self .convert_null_values (row )
289
- self .writer .writerow (self .convert (row ))
290
- elif isinstance (event .pymysqlreplication_event , UpdateRowsEvent ):
291
- for row in event .pymysqlreplication_event .rows :
292
- row ['after_values' ]['tb_upd' ] = datetime .utcnow ().strftime ('%Y-%m-%d %H:%M:%S.%f' )
293
- row ['after_values' ]['operation' ] = 1
294
- self .convert_null_values (row ['after_values' ])
295
- self .writer .writerow (self .convert (row ['after_values' ]))
296
-
168
+ row_w_payload = {}
169
+ for row in event :
170
+ row_w_payload ['tb_upd' ] = datetime .utcnow ().strftime ('%Y-%m-%d %H:%M:%S.%f' )
171
+ if isinstance (event .pymysqlreplication_event , WriteRowsEvent ):
172
+ row_w_payload ['operation' ] = 0
173
+ elif isinstance (event .pymysqlreplication_event , DeleteRowsEvent ):
174
+ row_w_payload ['operation' ] = 2
175
+ else :
176
+ row_w_payload ['operation' ] = 1
177
+ row_w_payload ['table' ] = event .table
178
+ row_w_payload ['schema' ] = str (event .schema ).split ('_' )[0 ]
179
+ row_w_payload ['log_pos' ] = event .pymysqlreplication_event .packet .log_pos
180
+ row_w_payload ['binlog_timestamp' ] = event .pymysqlreplication_event .timestamp
181
+ self .convert_null_values (row )
182
+ row_w_payload ['payload' ] = json .dumps (row , default = str )
183
+ self .writer .writerow (self .convert (row_w_payload ))
297
184
298
185
def push (self ):
299
186
if not self .next_writer_builder or not self .fieldnames :
0 commit comments