Skip to content

Commit c7a3c17

Browse files
author
Dylan Terry
committed
This commit addresses an issue where acknowledgments (ACKs) were sometimes sent to the master before binlog events were fully written and fsynced to disk during backup operations. Sending ACKs prematurely in semi-synchronous replication could lead to data loss if the replica fails after sending the ACK but before persisting the event.
Key changes: - Introduced an `EventHandler` interface with a `HandleEvent` method for processing binlog events. This allows custom event handling logic to be injected into the replication stream. - Added an `eventHandler` field to `BinlogSyncer` and provided a `SetEventHandler` method to assign an event handler. This enables `BinlogSyncer` to delegate event processing to the assigned handler. - Implemented `BackupEventHandler` which writes binlog events to disk and ensures that each event is fsynced before returning. This ensures data durability before ACKs are sent. - Modified the `onStream` method in `BinlogSyncer` to separate event parsing (`parseEvent`) from event handling and ACK sending (`handleEventAndACK`). This adheres to the single-responsibility principle and makes the code cleaner. - Moved state updates (e.g., updating `b.nextPos`) and GTID set handling from `parseEvent` to `handleEventAndACK` to avoid side effects during parsing. - Ensured that ACKs are sent only after the event has been fully processed and fsynced by sending the ACK in `handleEventAndACK` after event handling.
1 parent 3a665d0 commit c7a3c17

File tree

3 files changed

+253
-72
lines changed

3 files changed

+253
-72
lines changed

replication/backup.go

+112-45
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"io"
66
"os"
77
"path"
8+
"sync"
89
"time"
910

1011
. "github.com/go-mysql-org/go-mysql/mysql"
@@ -41,77 +42,143 @@ func (b *BinlogSyncer) StartBackupWithHandler(p Position, timeout time.Duration,
4142
// Force use raw mode
4243
b.parser.SetRawMode(true)
4344

45+
// Set up the backup event handler
46+
backupHandler := &BackupEventHandler{
47+
handler: handler,
48+
}
49+
50+
if b.cfg.SyncMode == SyncModeSync {
51+
// Set the event handler in BinlogSyncer for synchronous mode
52+
b.SetEventHandler(backupHandler)
53+
}
54+
4455
s, err := b.StartSync(p)
4556
if err != nil {
4657
return errors.Trace(err)
4758
}
4859

49-
var filename string
50-
var offset uint32
51-
52-
var w io.WriteCloser
5360
defer func() {
54-
var closeErr error
55-
if w != nil {
56-
closeErr = w.Close()
57-
}
58-
if retErr == nil {
59-
retErr = closeErr
61+
b.SetEventHandler(nil) // Reset the event handler
62+
if backupHandler.w != nil {
63+
closeErr := backupHandler.w.Close()
64+
if retErr == nil {
65+
retErr = closeErr
66+
}
6067
}
6168
}()
6269

63-
for {
64-
ctx, cancel := context.WithTimeout(context.Background(), timeout)
65-
e, err := s.GetEvent(ctx)
66-
cancel()
70+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
71+
defer cancel()
6772

68-
if err == context.DeadlineExceeded {
73+
if b.cfg.SyncMode == SyncModeSync {
74+
// Synchronous mode: wait for completion or error
75+
select {
76+
case <-ctx.Done():
6977
return nil
70-
}
71-
72-
if err != nil {
78+
case <-b.ctx.Done():
79+
return nil
80+
case err := <-s.ech:
7381
return errors.Trace(err)
7482
}
83+
} else {
84+
// Asynchronous mode: consume events from the streamer
85+
for {
86+
select {
87+
case <-ctx.Done():
88+
return nil
89+
case <-b.ctx.Done():
90+
return nil
91+
case err := <-s.ech:
92+
return errors.Trace(err)
93+
case e := <-s.ch:
94+
err = backupHandler.HandleEvent(e)
95+
if err != nil {
96+
return errors.Trace(err)
97+
}
98+
}
99+
}
100+
}
101+
}
75102

76-
offset = e.Header.LogPos
103+
// BackupEventHandler handles writing events for backup
104+
type BackupEventHandler struct {
105+
handler func(binlogFilename string) (io.WriteCloser, error)
106+
w io.WriteCloser
107+
mutex sync.Mutex
108+
fsyncedChan chan struct{}
109+
eventCount int // eventCount used for testing
77110

78-
if e.Header.EventType == ROTATE_EVENT {
79-
rotateEvent := e.Event.(*RotateEvent)
80-
filename = string(rotateEvent.NextLogName)
111+
filename string
112+
}
81113

82-
if e.Header.Timestamp == 0 || offset == 0 {
83-
// fake rotate event
84-
continue
85-
}
86-
} else if e.Header.EventType == FORMAT_DESCRIPTION_EVENT {
87-
// FormateDescriptionEvent is the first event in binlog, we will close old one and create a new
114+
func (h *BackupEventHandler) HandleEvent(e *BinlogEvent) error {
115+
h.mutex.Lock()
116+
defer h.mutex.Unlock()
88117

89-
if w != nil {
90-
if err = w.Close(); err != nil {
91-
w = nil
92-
return errors.Trace(err)
93-
}
94-
}
118+
var err error
95119

96-
if len(filename) == 0 {
97-
return errors.Errorf("empty binlog filename for FormateDescriptionEvent")
98-
}
120+
// Update the offset
121+
offset := e.Header.LogPos
99122

100-
w, err = handler(filename)
101-
if err != nil {
102-
return errors.Trace(err)
103-
}
123+
if e.Header.EventType == ROTATE_EVENT {
124+
rotateEvent := e.Event.(*RotateEvent)
125+
h.filename = string(rotateEvent.NextLogName)
104126

105-
// write binlog header fe'bin'
106-
if _, err = w.Write(BinLogFileHeader); err != nil {
127+
if e.Header.Timestamp == 0 || offset == 0 {
128+
// Fake rotate event, skip processing
129+
return nil
130+
}
131+
} else if e.Header.EventType == FORMAT_DESCRIPTION_EVENT {
132+
// Close the current writer and open a new one
133+
if h.w != nil {
134+
if err = h.w.Close(); err != nil {
135+
h.w = nil
107136
return errors.Trace(err)
108137
}
109138
}
110139

111-
if n, err := w.Write(e.RawData); err != nil {
140+
if len(h.filename) == 0 {
141+
return errors.Errorf("empty binlog filename for FormatDescriptionEvent")
142+
}
143+
144+
h.w, err = h.handler(h.filename)
145+
if err != nil {
146+
return errors.Trace(err)
147+
}
148+
149+
// Write binlog header fe'bin'
150+
_, err = h.w.Write(BinLogFileHeader)
151+
if err != nil {
112152
return errors.Trace(err)
113-
} else if n != len(e.RawData) {
153+
}
154+
}
155+
156+
// Write raw event data to the current writer
157+
if h.w != nil {
158+
n, err := h.w.Write(e.RawData)
159+
if err != nil {
160+
return errors.Trace(err)
161+
}
162+
if n != len(e.RawData) {
114163
return errors.Trace(io.ErrShortWrite)
115164
}
165+
166+
// Perform Sync if the writer supports it
167+
if f, ok := h.w.(*os.File); ok {
168+
if err := f.Sync(); err != nil {
169+
return errors.Trace(err)
170+
}
171+
// Signal that fsync has completed
172+
if h.fsyncedChan != nil {
173+
h.fsyncedChan <- struct{}{}
174+
}
175+
}
176+
} else {
177+
// If writer is nil and event is not FORMAT_DESCRIPTION_EVENT, we can't write
178+
// This should not happen if events are in expected order
179+
return errors.New("writer is not initialized")
116180
}
181+
182+
h.eventCount++
183+
return nil
117184
}

replication/backup_test.go

+46
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,49 @@ func (t *testSyncerSuite) TestStartBackupEndInGivenTime() {
4747
t.T().Fatal("time out error")
4848
}
4949
}
50+
51+
// TestAsyncBackup runs the backup process in asynchronous mode and verifies binlog file creation.
52+
func (t *testSyncerSuite) TestAsyncBackup() {
53+
testSyncModeBackup(t, SyncModeAsync)
54+
}
55+
56+
// TestSyncBackup runs the backup process in synchronous mode and verifies binlog file creation.
57+
func (t *testSyncerSuite) TestSyncBackup() {
58+
testSyncModeBackup(t, SyncModeSync)
59+
}
60+
61+
// testSyncModeBackup is a helper function that runs the backup process for a given sync mode and checks if binlog files are written correctly.
62+
func testSyncModeBackup(t *testSyncerSuite, syncMode SyncMode) {
63+
t.setupTest(mysql.MySQLFlavor)
64+
t.b.cfg.SemiSyncEnabled = false // Ensure semi-sync is disabled
65+
t.b.cfg.SyncMode = syncMode // Set the sync mode
66+
67+
binlogDir := "./var"
68+
os.RemoveAll(binlogDir)
69+
timeout := 3 * time.Second
70+
71+
done := make(chan bool)
72+
73+
// Start the backup process in a goroutine
74+
go func() {
75+
err := t.b.StartBackup(binlogDir, mysql.Position{Name: "", Pos: uint32(0)}, timeout)
76+
require.NoError(t.T(), err)
77+
done <- true
78+
}()
79+
80+
failTimeout := 2 * timeout
81+
ctx, cancel := context.WithTimeout(context.Background(), failTimeout)
82+
defer cancel()
83+
84+
// Wait for the backup to complete or timeout
85+
select {
86+
case <-done:
87+
// Check if binlog files are written to the specified directory
88+
files, err := os.ReadDir(binlogDir)
89+
require.NoError(t.T(), err, "Failed to read binlog directory")
90+
require.Greater(t.T(), len(files), 0, "Binlog files were not written to the directory")
91+
t.T().Logf("Backup completed successfully in %v mode with %d binlog file(s).", syncMode, len(files))
92+
case <-ctx.Done():
93+
t.T().Fatalf("Timeout error during backup in %v mode.", syncMode)
94+
}
95+
}

0 commit comments

Comments
 (0)