Skip to content

Commit 521a7ae

Browse files
committed
Bug#36750146 Ndb : Graceful node shutdown does not avoid event duplicates
Graceful node stop is intended to carefully handover responsibility for forwarding event streams from the stopping node to still-running nodes, so that there are no duplicate events sent to event subscribers as a result of an unexpected node failure. However, this was not working correctly which could lead to issues with event merge and other event consumers even when performing a graceful node stop. The problem is fixed and two new tests covering the problem with Graceful stop are added : test_event -n EventConsumer_Graceful test_event -n MergeEventConsumer_Graceful Change-Id: I2effa2e0908c920e48ac235a5e0f3a1fce10413a
1 parent 7337680 commit 521a7ae

File tree

3 files changed

+523
-12
lines changed

3 files changed

+523
-12
lines changed

storage/ndb/src/kernel/blocks/suma/Suma.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4758,6 +4758,11 @@ void Suma::doFIRE_TRIG_ORD(Signal *signal, LinearSectionPtr lsptr[3]) {
47584758

47594759
Uint32 bucket = hashValue % c_no_of_buckets;
47604760
m_max_seen_gci = (gci > m_max_seen_gci ? gci : m_max_seen_gci);
4761+
/**
4762+
* Normally a bucket is either in the active or switchover set, or neither.
4763+
* Exception is during add/drop NG when an active bucket may be in the
4764+
* switchover set for notifying subscribers of change.
4765+
*/
47614766
if (m_active_buckets.get(bucket) ||
47624767
(m_switchover_buckets.get(bucket) && (check_switchover(bucket, gci)))) {
47634768
jam();
@@ -6207,9 +6212,10 @@ void Suma::execSUMA_HANDOVER_REQ(Signal *signal) {
62076212
nodegroup.clear(nodeId);
62086213
if (get_responsible_node(i) == nodeId &&
62096214
get_responsible_node(i, nodegroup) == getOwnNodeId()) {
6210-
// I'm will be running this bucket when nodeId shutdown
6215+
// I will be running this bucket when nodeId shutdown
62116216
jam();
62126217
tmp.set(i);
6218+
ndbassert(!m_active_buckets.get(i));
62136219
m_switchover_buckets.set(i);
62146220
c_buckets[i].m_switchover_gci = (Uint64(start_gci) << 32) - 1;
62156221
c_buckets[i].m_state |= Bucket::BUCKET_SHUTDOWN_TO;
@@ -6295,6 +6301,7 @@ void Suma::execSUMA_HANDOVER_CONF(Signal *signal) {
62956301
gci, buf, c_no_of_buckets);
62966302
g_eventLogger->info("Suma: handover from node %u gci: %u buckets: %s (%u)",
62976303
nodeId, gci, buf, c_no_of_buckets);
6304+
ndbassert(!m_active_buckets.overlaps(tmp));
62986305
m_switchover_buckets.bitOR(tmp);
62996306
ndbrequire(c_startup.m_handover_nodes.get(nodeId));
63006307
c_startup.m_handover_nodes.clear(nodeId);
@@ -6317,6 +6324,7 @@ void Suma::execSUMA_HANDOVER_CONF(Signal *signal) {
63176324
buf, c_no_of_buckets);
63186325
g_eventLogger->info("Suma: handover to node %u gci: %u buckets: %s (%u)",
63196326
nodeId, gci, buf, c_no_of_buckets);
6327+
m_active_buckets.bitANDC(tmp);
63206328
m_switchover_buckets.bitOR(tmp);
63216329
c_startup.m_handover_nodes.clear(nodeId);
63226330
DBUG_VOID_RETURN;
@@ -6681,6 +6689,7 @@ void Suma::start_resend(Signal *signal, Uint32 buck) {
66816689
bucket->m_switchover_node = get_responsible_node(buck);
66826690
bucket->m_switchover_gci = max;
66836691

6692+
ndbassert(!m_active_buckets.get(buck));
66846693
m_switchover_buckets.set(buck);
66856694

66866695
signal->theData[0] = SumaContinueB::RESEND_BUCKET;

0 commit comments

Comments
 (0)