Skip to content

Commit 823b2c4

Browse files
committed
Bug#35840020 Non distribution aware node selection is imbalanced
Unhinted transactions, or hintable transactions where hinting is disabled will use a round-robin (RR) algorithm, balancing the transaction load across a set of candidate data nodes. The ndb_optimized_node_selection option controls which nodes are candidates. ndb_optimized_node_selection & 1 All data nodes 0 Local data nodes 1 Locality is defined or inferred by a number of NdbApi level concepts such as : Config : LocationDomain Api : DataNodeNeighbour Internal : tryBind remote address->Group Config : Group In both cases there can be multiple candidates, so NdbApi attempts to spread the load across the candidates. However the state memory required to do this is part of the Ndb (session) object, and is reset (to the same 'next' position) in every new Ndb object. In some use cases new Ndb objects are always used, such as when a SQL user connects, issues an unhinted query and then disconnects. In this case there is no RR balance, resulting in resource usage imbalances and limiting system scalability and capacity. This is fixed so that in these cases the first transaction issued by a connection is also handled in a round-robin way, avoiding persistent imbalance. The MTR testcase ndb_optimized_node_selection is extended to cover this scenario. Change-Id: Ia491b3bc4ae96e5e57a27baa375d2d84b3068d60
1 parent b58f0f1 commit 823b2c4

File tree

6 files changed

+106
-11
lines changed

6 files changed

+106
-11
lines changed

mysql-test/suite/ndb/r/ndb_optimized_node_selection.result

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ READ PRIMARY o-n-s 0 case Autocommit update by pk
4747
node_id balance
4848
1 HALF
4949
2 HALF
50+
READ PRIMARY o-n-s 0 case Unhinted table scan on new connection
51+
node_id balance
52+
1 HALF
53+
2 HALF
5054
set ndb_optimized_node_selection=1;
5155
show variables like 'ndb_optimized_node_selection';
5256
Variable_name Value
@@ -63,6 +67,10 @@ READ PRIMARY o-n-s 1 case Autocommit update by pk
6367
node_id balance
6468
1 HALF
6569
2 HALF
70+
READ PRIMARY o-n-s 1 case Unhinted table scan on new connection
71+
node_id balance
72+
1 HALF
73+
2 HALF
6674
set ndb_optimized_node_selection=2;
6775
show variables like 'ndb_optimized_node_selection';
6876
Variable_name Value
@@ -79,6 +87,10 @@ READ PRIMARY o-n-s 2 case Autocommit update by pk
7987
node_id balance
8088
1 NONE
8189
2 ALL
90+
READ PRIMARY o-n-s 2 case Unhinted table scan on new connection
91+
node_id balance
92+
1 HALF
93+
2 HALF
8294
set ndb_optimized_node_selection=3;
8395
show variables like 'ndb_optimized_node_selection';
8496
Variable_name Value
@@ -95,6 +107,10 @@ READ PRIMARY o-n-s 3 case Autocommit update by pk
95107
node_id balance
96108
1 NONE
97109
2 ALL
110+
READ PRIMARY o-n-s 3 case Unhinted table scan on new connection
111+
node_id balance
112+
1 HALF
113+
2 HALF
98114
drop table basecounts;
99115
drop table t1;
100116
create table t1 (a int primary key, b int) engine=ndb comment="NDB_TABLE=READ_BACKUP=1";
@@ -116,6 +132,10 @@ READ BACKUP o-n-s 0 case Autocommit update by pk
116132
node_id balance
117133
1 HALF
118134
2 HALF
135+
READ BACKUP o-n-s 0 case Unhinted table scan on new connection
136+
node_id balance
137+
1 HALF
138+
2 HALF
119139
set ndb_optimized_node_selection=1;
120140
show variables like 'ndb_optimized_node_selection';
121141
Variable_name Value
@@ -132,6 +152,10 @@ READ BACKUP o-n-s 1 case Autocommit update by pk
132152
node_id balance
133153
1 HALF
134154
2 HALF
155+
READ BACKUP o-n-s 1 case Unhinted table scan on new connection
156+
node_id balance
157+
1 HALF
158+
2 HALF
135159
set ndb_optimized_node_selection=2;
136160
show variables like 'ndb_optimized_node_selection';
137161
Variable_name Value
@@ -148,6 +172,10 @@ READ BACKUP o-n-s 2 case Autocommit update by pk
148172
node_id balance
149173
1 HALF
150174
2 HALF
175+
READ BACKUP o-n-s 2 case Unhinted table scan on new connection
176+
node_id balance
177+
1 HALF
178+
2 HALF
151179
set ndb_optimized_node_selection=3;
152180
show variables like 'ndb_optimized_node_selection';
153181
Variable_name Value
@@ -164,6 +192,10 @@ READ BACKUP o-n-s 3 case Autocommit update by pk
164192
node_id balance
165193
1 HALF
166194
2 HALF
195+
READ BACKUP o-n-s 3 case Unhinted table scan on new connection
196+
node_id balance
197+
1 HALF
198+
2 HALF
167199
drop table basecounts;
168200
drop table t1;
169201
create table t1 (a int primary key, b int) engine=ndb comment="NDB_TABLE=FULLY_REPLICATED=1";
@@ -185,6 +217,10 @@ FULLY REPLICATED o-n-s 0 case Autocommit update by pk
185217
node_id balance
186218
1 HALF
187219
2 HALF
220+
FULLY REPLICATED o-n-s 0 case Unhinted table scan on new connection
221+
node_id balance
222+
1 HALF
223+
2 HALF
188224
set ndb_optimized_node_selection=1;
189225
show variables like 'ndb_optimized_node_selection';
190226
Variable_name Value
@@ -201,6 +237,10 @@ FULLY REPLICATED o-n-s 1 case Autocommit update by pk
201237
node_id balance
202238
1 HALF
203239
2 HALF
240+
FULLY REPLICATED o-n-s 1 case Unhinted table scan on new connection
241+
node_id balance
242+
1 HALF
243+
2 HALF
204244
set ndb_optimized_node_selection=2;
205245
show variables like 'ndb_optimized_node_selection';
206246
Variable_name Value
@@ -217,6 +257,10 @@ FULLY REPLICATED o-n-s 2 case Autocommit update by pk
217257
node_id balance
218258
1 HALF
219259
2 HALF
260+
FULLY REPLICATED o-n-s 2 case Unhinted table scan on new connection
261+
node_id balance
262+
1 HALF
263+
2 HALF
220264
set ndb_optimized_node_selection=3;
221265
show variables like 'ndb_optimized_node_selection';
222266
Variable_name Value
@@ -233,5 +277,9 @@ FULLY REPLICATED o-n-s 3 case Autocommit update by pk
233277
node_id balance
234278
1 HALF
235279
2 HALF
280+
FULLY REPLICATED o-n-s 3 case Unhinted table scan on new connection
281+
node_id balance
282+
1 HALF
283+
2 HALF
236284
drop table basecounts;
237285
drop table t1;

mysql-test/suite/ndb/t/ndb_optimized_node_selection.test

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ while ($dist < 3)
6464
show variables like 'ndb_optimized_node_selection';
6565

6666
--let $case=0
67-
while ($case < 3)
67+
while ($case < 4)
6868
{
6969
--disable_query_log
7070
--disable_result_log
@@ -78,7 +78,8 @@ while ($dist < 3)
7878
where counter_name="TRANSACTIONS"
7979
group by node_id;
8080

81-
--let $iter=10
81+
--let $iter=100
82+
--let $errbound=$iter/10
8283
--let $rpt=$iter
8384
while ($rpt)
8485
{
@@ -100,6 +101,19 @@ while ($dist < 3)
100101
--let $casename=Autocommit update by pk
101102
update test.t1 set b=2 where a=0;
102103
}
104+
if ($case == 3)
105+
{
106+
--let $casename=Unhinted table scan on new connection
107+
connect(testcon, localhost, root,,);
108+
--connection testcon
109+
--eval set ndb_optimized_node_selection=$val;
110+
begin;
111+
select * from test.t1;
112+
select * from test.t1 where a=0;
113+
rollback;
114+
--connection default
115+
disconnect testcon;
116+
}
103117
--dec $rpt
104118
}
105119
--enable_result_log
@@ -111,10 +125,10 @@ while ($dist < 3)
111125
# test.
112126
--echo $distname o-n-s $val case $casename
113127
eval select x.node_id,
114-
if (x.transcount >= $iter, "ALL",
115-
if (x.transcount <= 1, "NONE",
116-
if (x.transcount BETWEEN (($iter/2)-1)
117-
AND (($iter/2)+1),
128+
if (x.transcount >= ($iter-($errbound)), "ALL",
129+
if (x.transcount <= ($errbound), "NONE",
130+
if (x.transcount BETWEEN (($iter/2)-($errbound))
131+
AND (($iter/2)+($errbound)),
118132
"HALF", "ERROR"))) as balance
119133
from (
120134
select a.node_id, b.transcount - a.transcount as transcount

storage/ndb/src/ndbapi/Ndbif.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (c) 2003, 2021, Oracle and/or its affiliates.
2+
Copyright (c) 2003, 2024, Oracle and/or its affiliates.
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License, version 2.0,
@@ -116,6 +116,28 @@ Ndb::init(int aMaxNoOfTransactions)
116116
theFirstTransId |= theImpl->m_ndb_cluster_connection.
117117
get_next_transid(theNdbBlockNumber);
118118

119+
/**
120+
* Initialise current connect index and node iterator to get balanced
121+
* node selection offset for first request using a new Ndb object
122+
*/
123+
{
124+
/**
125+
* Use ndb_cluster_connection given id to provide RR variation
126+
* of 'starting offset' for unhinted TC node choice.
127+
*/
128+
const Uint32 tcNodeChoiceOffset =
129+
(nodeId + theImpl->theCurrentConnectIndex) %
130+
(theImpl->theNoOfDBnodes ? theImpl->theNoOfDBnodes : MAX_NDB_NODES);
131+
132+
/* Configure RR + proximity aware unhinted iterators */
133+
theImpl->theCurrentConnectIndex = tcNodeChoiceOffset;
134+
theImpl->m_ndb_cluster_connection.init_get_next_node(theImpl->m_node_iter);
135+
for (Uint32 i = 0; i < tcNodeChoiceOffset; i++)
136+
{
137+
theImpl->m_ndb_cluster_connection.get_next_node(theImpl->m_node_iter);
138+
}
139+
}
140+
119141
/* Init cached min node version */
120142
theFacade->lock_poll_mutex();
121143
theCachedMinDbNodeVersion = theFacade->getMinDbNodeVersion();

storage/ndb/src/ndbapi/Ndbinit.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,10 @@ void Ndb::setup(Ndb_cluster_connection *ndb_cluster_connection,
125125
}
126126
}
127127

128-
theImpl->m_ndb_cluster_connection.link_ndb_object(this);
128+
const Uint64 id = theImpl->m_ndb_cluster_connection.link_ndb_object(this);
129+
130+
/* Give unhinted TC choice alg an offset to use in init() */
131+
theImpl->theCurrentConnectIndex = id;
129132

130133
DBUG_VOID_RETURN;
131134
}

storage/ndb/src/ndbapi/ndb_cluster_connection.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,7 @@ Ndb_cluster_connection_impl(const char * connect_string,
453453
m_latest_error_msg(),
454454
m_latest_error(0),
455455
m_data_node_neighbour(0),
456+
m_num_created_ndb_objects(0),
456457
m_multi_wait_group(0),
457458
m_uri_scheme(NULL),
458459
m_uri_host(NULL),
@@ -639,7 +640,7 @@ Ndb_cluster_connection::get_next_ndb_object(const Ndb* p)
639640
return p->theImpl->m_next_ndb_object;
640641
}
641642

642-
void
643+
Uint64
643644
Ndb_cluster_connection_impl::link_ndb_object(Ndb* p)
644645
{
645646
lock_ndb_objects();
@@ -650,11 +651,15 @@ Ndb_cluster_connection_impl::link_ndb_object(Ndb* p)
650651

651652
p->theImpl->m_next_ndb_object = m_first_ndb_object;
652653
m_first_ndb_object = p;
653-
654+
655+
const Uint64 id = m_num_created_ndb_objects++;
656+
654657
// Wake up anyone waiting for changes to the Ndb instance list
655658
NdbCondition_Broadcast(m_new_delete_ndb_cond);
656659

657660
unlock_ndb_objects();
661+
662+
return id;
658663
}
659664

660665
void

storage/ndb/src/ndbapi/ndb_cluster_connection_impl.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ class Ndb_cluster_connection_impl : public Ndb_cluster_connection
176176
NdbMutex* m_new_delete_ndb_mutex;
177177
NdbCondition* m_new_delete_ndb_cond;
178178
Ndb* m_first_ndb_object;
179-
void link_ndb_object(Ndb*);
179+
Uint64 link_ndb_object(Ndb*);
180180
void unlink_ndb_object(Ndb*);
181181

182182
BaseString m_latest_error_msg;
@@ -193,6 +193,9 @@ class Ndb_cluster_connection_impl : public Ndb_cluster_connection
193193
// Closest data node neighbour
194194
Uint32 m_data_node_neighbour;
195195

196+
// Number of Ndb object creations
197+
Uint64 m_num_created_ndb_objects;
198+
196199
// Base offset for stats, from Ndb objects that are no
197200
// longer with us
198201
Uint64 globalApiStatsBaseline[ Ndb::NumClientStatistics ];

0 commit comments

Comments
 (0)