Skip to content

Commit e66a42f

Browse files
committed
Bug#30529132 REVIEW OUTDATED LCPSCANFRAGWATCHDOG AND BUG24664 USING ERROR INSERT 10039
This fix replaces the use of the old error code 10039 with new error code 10055 for stalling local checkpoints in testcases where that is required. Error code 10039 was modified in the 7.6 release so that it no longer stalled LCP and so the testcases were not giving the coverage they were designed for. This fix restores the testcase coverage. Testcases affected : testNodeRestart -n LcpScanFragWatchdog -n LcpScanFragWatchdogDisable -n LcpScanFragWatchdogIsolation testSystemRestart -n Bug24664 Notes : - In 7.6 Partial LCP : - The LCP Fragment Scan Watchdog was changed to cover more of the LCP process including non scan periods. - An undocumented hard coded two minute time limit on each actual fragment scan has been implemented. This means that the user supplied limit is ignored when a fragment scan takes > 2 minutes. This behavioural change is left as-is. The 2 minute hard limit does not affect e.g. testNodeRestart -n LCPScanFragWatchdogDisable as the scan stall error injection occurs before that timing mechanism starts. - testSystemRestart -n Bug24664 was using error insert 10040 to resume the stalled LCP. This is no longer necessary, clearing the error is sufficient. Additionally, error insert 10040 has been reused for a different purpose so the overall effect was incorrect. Change-Id: I28f6462c101016a50b74bde2375176424428b9fe
1 parent 266f712 commit e66a42f

File tree

3 files changed

+9
-9
lines changed

3 files changed

+9
-9
lines changed

storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32662,7 +32662,7 @@ Dblqh::handle_check_system_scans(Signal *signal)
3266232662
if (i == ZLCP_CHECK_INDEX)
3266332663
{
3266432664
jam();
32665-
g_eventLogger->info("LCP Scan have stalled for %u seconds, last"
32665+
g_eventLogger->info("LCP Scan has stalled for %u seconds, "
3266632666
"last seen on line %u, check_lcp_stop_count: %u",
3266732667
time_stalled,
3266832668
c_check_scanptr_save_line[i],
@@ -32671,7 +32671,7 @@ Dblqh::handle_check_system_scans(Signal *signal)
3267132671
else if (i == ZBACKUP_CHECK_INDEX)
3267232672
{
3267332673
jam();
32674-
g_eventLogger->info("Backup Scan have stalled for %u seconds, "
32674+
g_eventLogger->info("Backup Scan has stalled for %u seconds, "
3267532675
"last seen on line %u, check_lcp_stop_count: %u",
3267632676
time_stalled,
3267732677
c_check_scanptr_save_line[i],
@@ -32680,7 +32680,7 @@ Dblqh::handle_check_system_scans(Signal *signal)
3268032680
else if (i == ZCOPY_FRAGREQ_CHECK_INDEX)
3268132681
{
3268232682
jam();
32683-
g_eventLogger->info("COPY_FRAGREQ Scan have stalled for %u seconds,"
32683+
g_eventLogger->info("COPY_FRAGREQ Scan has stalled for %u seconds,"
3268432684
"last seen on line %u, check_lcp_stop_count: %u",
3268532685
time_stalled,
3268632686
c_check_scanptr_save_line[i],

storage/ndb/test/ndbapi/testNodeRestart.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6587,7 +6587,7 @@ runTestScanFragWatchdog(NDBT_Context* ctx, NDBT_Step* step)
65876587
break;
65886588
}
65896589

6590-
if (restarter.insertErrorInNode(victim, 10039) != 0) /* Cause LCP/backup frag scan to halt */
6590+
if (restarter.insertErrorInNode(victim, 10055) != 0) /* Cause LCP/backup frag scan to halt */
65916591
{
65926592
g_err << "Error insert failed." << endl;
65936593
break;
@@ -6798,7 +6798,7 @@ runTestScanFragWatchdogDisable(NDBT_Context* ctx, NDBT_Step* step)
67986798

67996799
g_err << "Injecting fault in node " << victim;
68006800
g_err << " to suspend LCP frag scan..." << endl;
6801-
if (restarter.insertErrorInNode(victim, 10039) != 0)
6801+
if (restarter.insertErrorInNode(victim, 10055) != 0)
68026802
{
68036803
g_err << "Error insert failed." << endl;
68046804
break;
@@ -6862,7 +6862,7 @@ runTestScanFragWatchdogDisable(NDBT_Context* ctx, NDBT_Step* step)
68626862
} while (0);
68636863

68646864
// Insert error code to resume LCP in case node halted
6865-
if (restarter.insertErrorInNode(victim, 10040) != 0)
6865+
if (restarter.insertErrorInNode(victim, 0) != 0)
68666866
{
68676867
g_err << "Test cleanup failed: failed to resume LCP." << endl;
68686868
}

storage/ndb/test/ndbapi/testSystemRestart.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (c) 2003, 2021, Oracle and/or its affiliates.
2+
Copyright (c) 2003, 2023, Oracle and/or its affiliates.
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License, version 2.0,
@@ -2153,7 +2153,7 @@ runBug24664(NDBT_Context* ctx, NDBT_Step* step)
21532153
return NDBT_FAILED;
21542154
}
21552155

2156-
restarter.insertErrorInAllNodes(10039); // Hang LCP
2156+
restarter.insertErrorInAllNodes(10055); // Hang LCP
21572157
CHECK(restarter.dumpStateAllNodes(dump, 1) == 0);
21582158
while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
21592159
event.type != NDB_LE_LocalCheckpointStarted);
@@ -2163,7 +2163,7 @@ runBug24664(NDBT_Context* ctx, NDBT_Step* step)
21632163
return NDBT_FAILED;
21642164
}
21652165

2166-
restarter.insertErrorInAllNodes(10040); // Resume LCP
2166+
restarter.insertErrorInAllNodes(0); // Resume LCP
21672167
while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
21682168
event.type != NDB_LE_LocalCheckpointCompleted);
21692169

0 commit comments

Comments
 (0)