Skip to content

Commit b673ec1

Browse files
committed
Bug#36495761 PB2: ndb_restore_schema_tolerance fail sporadically creating table
1/2 Patch for 7.6 Only Context: MTR test ndb_restore_schema_tolerance fail sporadically due to an error in DBUTIL. Util has a pool of prepared operations that stores the Prepared Operations until they are executed. The pool has a fixed size of 6 where 4 are 'reserved' for some operations on special tables, so, in fact, there are only 2 available spaces in preparedOperationPoll. When UTIL handles many preperareOperation simultaneous the pool can get exhausted because it fails seize a free object in the pool to store the new prepared op. In a similar way, runningPrepares pool used by UTIL to store the running prepare operations, can also be exhausted. The pool size is 1 but there is nothing preventing TRIX/DICT to send many UTIL_PREPARE_REQ in parallel to UTIL. In that scenario UTIL will fail to seize a free object for the new running prepare operation. To check the described scenarios 2 new API level test are added to test_event. 1 - ExhaustedPreparedPoolsApiOps: Check if DBUTIL PreparedOperationPool and runningPrepares pool get Exhausted when many getEvent operation run in parallel. 2 - ExhaustedPreparedPoolsInternalOps: Checks that, if DBUTIL PreparedOperationPool and/or runningPrepares get Exhausted due to events, internal operations still succeed. Test launches 32 threads running getEvent operation + 1 thread running drop/create index in parallel in order to force UTIL to handle many UTIL_PREPARE_REQ signals from both DICT and TRIX, this way prepared op pool and runningPrepares pool can get Exhausted since pool sizes are small and there is nothing preventing UTIL to handle many UTIL_PREPARE_REQ simultaneously. Test fails only when the operation that get the pool exhausted is internal (from trix). Change-Id: I12f3ff7f92ab4310dda32ecc0c825609720ce933
1 parent 930dfa1 commit b673ec1

File tree

4 files changed

+162
-2
lines changed

4 files changed

+162
-2
lines changed

storage/ndb/src/kernel/blocks/ERROR_codes.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ Next LGMAN 15002
4040
Next TSMAN 16002
4141
Next DBSPJ 17000
4242
Next TRIX 18004
43-
Next DBUTIL 19001
43+
Next DBUTIL 19002
4444

4545
TESTING NODE FAILURE, ARBITRATION
4646
---------------------------------
@@ -910,6 +910,7 @@ Index stats:
910910
DBUTIL
911911
-------
912912
19000: fail UTIL_PREPARE_REQ with PREPARE_SEIZE_ERROR
913+
19001: crash in UTIL_PREPARE_REQ if preparedOperationPool or runningPrepares pool is exhausted
913914

914915
NDBFS
915916
-----

storage/ndb/src/kernel/blocks/dbutil/DbUtil.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (c) 2003, 2022, Oracle and/or its affiliates.
2+
Copyright (c) 2003, 2024, Oracle and/or its affiliates.
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License, version 2.0,
@@ -1137,6 +1137,10 @@ DbUtil::execUTIL_PREPARE_REQ(Signal* signal)
11371137
releaseSections(handle);
11381138
sendUtilPrepareRef(signal, UtilPrepareRef::PREPARE_SEIZE_ERROR,
11391139
senderRef, senderData);
1140+
if (ERROR_INSERTED(19001)) {
1141+
/* Should never fail to seize a record */
1142+
ndbrequire(false);
1143+
}
11401144
return;
11411145
};
11421146
handle.getSection(ptr, UtilPrepareReq::PROPERTIES_SECTION);
@@ -1355,6 +1359,10 @@ DbUtil::prepareOperation(Signal* signal,
13551359
sendUtilPrepareRef(signal, UtilPrepareRef::PREPARED_OPERATION_SEIZE_ERROR,
13561360
prepPtr.p->clientRef, prepPtr.p->clientData);
13571361
releasePrepare(prepPtr);
1362+
if (ERROR_INSERTED(19001)) {
1363+
/* Should never fail to seize a record */
1364+
ndbrequire(false);
1365+
}
13581366
return;
13591367
}
13601368
prepPtr.p->prepOpPtr = prepOpPtr;

storage/ndb/test/ndbapi/test_event.cpp

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,122 @@ static int runCreateEvent(NDBT_Context* ctx, NDBT_Step* step)
197197
return NDBT_OK;
198198
}
199199

200+
201+
int runInsertError(NDBT_Context *ctx, NDBT_Step *step) {
202+
int error = ctx->getProperty("ErrorCode");
203+
NdbRestarter restarter;
204+
205+
if (restarter.insertErrorInAllNodes(error) != 0) {
206+
ndbout << "Could not insert error in all nodes " << endl;
207+
return NDBT_FAILED;
208+
}
209+
return NDBT_OK;
210+
}
211+
212+
int runClearError(NDBT_Context *ctx, NDBT_Step *step) {
213+
NdbRestarter restarter;
214+
215+
if (restarter.insertErrorInAllNodes(0) != 0) {
216+
ndbout << "Could not clear error in all nodes " << endl;
217+
return NDBT_FAILED;
218+
}
219+
return NDBT_OK;
220+
}
221+
222+
static int runGetEvent(NDBT_Context *ctx, NDBT_Step *step) {
223+
Ndb *pNdb = GETNDB(step);
224+
int loops = ctx->getNumLoops();
225+
NdbDictionary::Dictionary *myDict = pNdb->getDictionary();
226+
227+
if (!myDict) {
228+
g_err << "Dictionary not found " << pNdb->getNdbError().code << " "
229+
<< pNdb->getNdbError().message << endl;
230+
return NDBT_FAILED;
231+
}
232+
233+
char eventName[1024];
234+
const NdbDictionary::Table &tab = *ctx->getTab();
235+
sprintf(eventName, "%s_EVENT", tab.getName());
236+
237+
while (loops-- && !ctx->isTestStopped()) {
238+
const NdbDictionary::Event* ev = myDict->getEvent(eventName);
239+
if (ev == NULL) {
240+
g_err << "getEvent (" << step->getStepNo() << "): Event not found. "
241+
<< myDict->getNdbError().code << " "
242+
<< myDict->getNdbError().message << endl;
243+
return NDBT_FAILED;
244+
}
245+
}
246+
ctx->stopTest();
247+
return NDBT_OK;
248+
}
249+
250+
int runCreateTable(NDBT_Context *ctx, NDBT_Step *step) {
251+
Ndb *pNdb = GETNDB(step);
252+
const char *tableName = ctx->getProperty("tableName", (char *)NULL);
253+
254+
NdbDictionary::Dictionary *pDict = pNdb->getDictionary();
255+
NdbDictionary::Table tab(tableName);
256+
{
257+
NdbDictionary::Column col("a");
258+
col.setType(NdbDictionary::Column::Unsigned);
259+
col.setPrimaryKey(true);
260+
tab.addColumn(col);
261+
}
262+
{
263+
NdbDictionary::Column col("b");
264+
col.setType(NdbDictionary::Column::Unsigned);
265+
col.setNullable(false);
266+
tab.addColumn(col);
267+
}
268+
if (pDict->createTable(tab) != 0) {
269+
g_err << "Failed to create table : " << pDict->getNdbError() << endl;
270+
return NDBT_FAILED;
271+
}
272+
if (!pDict->getTable(tableName)) {
273+
g_err << "Failed to get table : " << pDict->getNdbError() << endl;
274+
return NDBT_FAILED;
275+
}
276+
return NDBT_OK;
277+
}
278+
279+
int runDropTable(NDBT_Context *ctx, NDBT_Step *step) {
280+
Ndb *pNdb = GETNDB(step);
281+
const char *tableName = ctx->getProperty("tableName", (char *)NULL);
282+
NdbDictionary::Dictionary *pDict = pNdb->getDictionary();
283+
if (pDict->dropTable(tableName) != 0) {
284+
g_err << "Failed to drop table : " << pDict->getNdbError() << endl;
285+
return NDBT_FAILED;
286+
}
287+
return NDBT_OK;
288+
}
289+
290+
int runCreateDropIndex(NDBT_Context *ctx, NDBT_Step *step) {
291+
Ndb *pNdb = GETNDB(step);
292+
NdbDictionary::Dictionary *pDict = pNdb->getDictionary();
293+
const char *tableName = ctx->getProperty("tableName", (char *)NULL);
294+
while (!ctx->isTestStopped()) {
295+
NdbDictionary::Index ind("idx_te");
296+
ind.setTable(tableName);
297+
ind.setType(NdbDictionary::Index::OrderedIndex);
298+
ind.setLogging(false);
299+
ind.addColumn("b");
300+
301+
if (pDict->createIndex(ind) != 0) {
302+
g_err << "Failed to create index : " << pDict->getNdbError() << endl;
303+
return NDBT_FAILED;
304+
}
305+
g_err << "index created" << endl;
306+
307+
if (pDict->dropIndex("idx_te", tableName) != 0) {
308+
g_err << "Failed to drop index : " << pDict->getNdbError() << endl;
309+
return NDBT_FAILED;
310+
}
311+
g_err << "index dropped" << endl;
312+
}
313+
return NDBT_OK;
314+
}
315+
200316
Uint32 setAnyValue(Ndb* ndb, NdbTransaction* trans, int rowid, int updVal)
201317
{
202318
/* XOR 2 32bit words of transid together */
@@ -7735,6 +7851,33 @@ TESTCASE("DelayedEventDrop",
77357851
STEP(runInsertDeleteUntilStopped);
77367852
FINALIZER(runDropEvent);
77377853
}
7854+
TESTCASE("ExhaustedPreparedPoolsApiOps",
7855+
"Check that DBUTIL PreparedOperationPool and runningPrepares pool do"
7856+
"not get Exhausted when N (=1 for now) getEvent operation run in "
7857+
"parallel") {
7858+
TC_PROPERTY("ErrorCode", 19001);
7859+
INITIALIZER(runCreateEvent);
7860+
INITIALIZER(runInsertError); // set error insert
7861+
STEPS(runGetEvent, 1); // Only 1 parallel getEvent for now, idea is to
7862+
// increase the concurrency in the future.
7863+
FINALIZER(runClearError); // clear error insert
7864+
FINALIZER(runDropEvent);
7865+
}
7866+
TESTCASE("ExhaustedPreparedPoolsInternalOps",
7867+
"Check that when DBUTIL PreparedOperationPool and/or runningPrepares"
7868+
"get Exhausted due to events, internal operations (generated by"
7869+
"create table/create index in this case) still succeed") {
7870+
TC_PROPERTY("tableName", "table_te");
7871+
INITIALIZER(runCreateEvent);
7872+
INITIALIZER(runCreateTable);
7873+
STEPS(runGetEvent,
7874+
32); // 32 parallel GetEvent just to ensure that util pools will get
7875+
// exhausted, 32 is higher than the poll size.
7876+
STEP(runCreateDropIndex);
7877+
FINALIZER(runDropTable);
7878+
FINALIZER(runDropEvent);
7879+
}
7880+
77387881
#if 0
77397882
TESTCASE("BackwardCompatiblePollCOverflowEB",
77407883
"Check whether backward compatibility of pollEvents manually"

storage/ndb/test/run-test/daily-devel--07-tests.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,11 @@ cmd: testNdbApi
297297
args: -n SetVarbinaryWithSetValue WIDE_2COL
298298
max-time: 180
299299

300+
cmd: test_event
301+
args: -n ExhaustedPreparedPoolsApiOps T1 -l 10000
302+
max-time: 240
303+
304+
cmd: test_event
305+
args: -n ExhaustedPreparedPoolsInternalOps T1 -l 1000
306+
max-time: 240
307+

0 commit comments

Comments
 (0)