Skip to content

Commit c34ea6b

Browse files
committed
Bug#32042393: SETTING TO INACTIVE AN UPGRADED EMPTY UNDO
TABLESPACE STATE DOES NOT CHANGE THE SPACE TO EMPTY After the commit for Bug#31991688, it was found that an idle system may not ever get around to truncating an undo tablespace when it is SET INACTIVE. Actually, it takes about 128 seconds before the undo tablespace is finally truncated. There are three main tasks for the function trx_purge(). 1) Process the undo logs and apply changes to the data files. (May be multiple threads) 2) Clean up the history list by freeing old undo logs and rollback segments. 3) Truncate undo tablespaces that have grown too big or are SET INACTIVE explicitly. Bug#31991688 made sure that steps 2 & 3 are not done too often. Concentrating this effort keeps the purge lag from growing too large. By default, trx_purge() does step#1 128 times before attempting steps #2 & #3 which are called 'truncate' steps. This is set by the setting innodb_purge_rseg_truncate_frequency. On an idle system, trx_purge() is called once per second if it has nothing to do in step 1. After 128 seconds, it will finally do steps 2 (truncating the undo logs and rollback segments which reduces the history list to zero) and step 3 (truncating any undo tablespaces that need it). The function that the purge coordinator thread uses to make these repeated calls to trx_purge() is called srv_do_purge(). When trx_purge() returns having done nothing, srv_do_purge() returns to srv_purge_coordinator_thread() which will put the purge thread to sleep. It is woke up again once per second by the master thread in srv_master_do_idle_tasks() if not sooner by any of several of other threads and activities. This is how an idle system can wait 128 seconds before the truncate steps are done and an undo tablespace that was SET INACTIVE can finally become 'empty'. The solution in this patch is to modify srv_do_purge() so that if trx_purge() did nothing and there is an undo space that was explicitly set to inactive, it will immediately call trx_purge again with do_truncate=true so that steps #2 and #3 will be done. This does not affect the effort by Bug#31991688 to keep the purge lag from growing too big on sysbench UPDATE NO_KEY. With this change, the purge lag has to be zero and there must be a pending explicit undo space truncate before this extra call to trx_purge is done. Approved by Sunny in RB#25311
1 parent 924dbff commit c34ea6b

File tree

3 files changed

+50
-20
lines changed

3 files changed

+50
-20
lines changed

storage/innobase/include/trx0purge.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,31 @@ class Tablespaces {
720720
return (nullptr);
721721
}
722722

723+
/** Find the first undo space that is marked inactive explicitly.
724+
@param[in,out] num_active If there are no inactive_explicit spaces
725+
found, this will contain the number of
726+
active spaces found.
727+
@return pointer to an undo::Tablespace struct */
728+
Tablespace *find_first_inactive_explicit(size_t *num_active) {
729+
ut_ad(own_latch());
730+
731+
if (m_spaces.empty()) {
732+
return (nullptr);
733+
}
734+
735+
for (auto undo_space : m_spaces) {
736+
if (undo_space->is_inactive_explicit()) {
737+
return (undo_space);
738+
}
739+
740+
if (num_active != nullptr && undo_space->is_active()) {
741+
(*num_active)++;
742+
}
743+
}
744+
745+
return (nullptr);
746+
}
747+
723748
#ifdef UNIV_DEBUG
724749
/** Determine if this thread owns a lock on m_latch. */
725750
bool own_latch() {

storage/innobase/srv/srv0srv.cc

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2964,16 +2964,16 @@ void srv_worker_thread() {
29642964
}
29652965

29662966
/** Do the actual purge operation.
2967-
@return length of history list before the last purge batch. */
2968-
static ulint srv_do_purge(
2969-
ulint *n_total_purged) /*!< in/out: total pages purged */
2970-
{
2967+
@param[in,out] n_total_purged Total pages purged in this call
2968+
@return length of history list before the last purge batch. */
2969+
static ulint srv_do_purge(ulint *n_total_purged) {
29712970
ulint n_pages_purged;
29722971

29732972
static ulint count = 0;
29742973
static ulint n_use_threads = 0;
29752974
static uint64_t rseg_history_len = 0;
29762975
ulint old_activity_count = srv_get_activity_count();
2976+
bool need_explicit_truncate = false;
29772977

29782978
const auto n_threads = srv_threads.m_purge_workers_n;
29792979

@@ -3020,18 +3020,25 @@ static ulint srv_do_purge(
30203020
break;
30213021
}
30223022

3023-
bool do_truncate =
3024-
(srv_shutdown_state.load() == SRV_SHUTDOWN_PURGE
3025-
? true
3026-
: (++count % srv_purge_rseg_truncate_frequency) == 0);
3023+
bool do_truncate = need_explicit_truncate ||
3024+
srv_shutdown_state.load() == SRV_SHUTDOWN_PURGE ||
3025+
(++count % srv_purge_rseg_truncate_frequency) == 0;
30273026

30283027
n_pages_purged =
30293028
trx_purge(n_use_threads, srv_purge_batch_size, do_truncate);
30303029

30313030
*n_total_purged += n_pages_purged;
30323031

3033-
} while (!srv_purge_should_exit(n_pages_purged) && n_pages_purged > 0 &&
3034-
purge_sys->state == PURGE_STATE_RUN);
3032+
need_explicit_truncate = (n_pages_purged == 0);
3033+
if (need_explicit_truncate) {
3034+
undo::spaces->s_lock();
3035+
need_explicit_truncate =
3036+
(undo::spaces->find_first_inactive_explicit(nullptr) != nullptr);
3037+
undo::spaces->s_unlock();
3038+
}
3039+
} while (purge_sys->state == PURGE_STATE_RUN &&
3040+
(n_pages_purged > 0 || need_explicit_truncate) &&
3041+
!srv_purge_should_exit(n_pages_purged));
30353042

30363043
return rseg_history_len;
30373044
}

storage/innobase/trx/trx0purge.cc

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,16 +1221,14 @@ static bool trx_purge_mark_undo_for_truncate(size_t truncate_count) {
12211221
/* In order to implicitly select an undo space to truncate, we need
12221222
at least 2 active UNDO tablespaces. As long as there is one undo
12231223
tablespace active the server will continue to operate. */
1224-
ulint num_active = 0;
1224+
size_t num_active = 0;
12251225

12261226
/* Look for any undo space that is inactive explicitly. */
1227-
for (auto undo_ts : undo::spaces->m_spaces) {
1228-
if (undo_ts->is_inactive_explicit()) {
1229-
undo_trunc->mark(undo_ts);
1230-
undo::spaces->s_unlock();
1231-
return (true);
1232-
}
1233-
num_active += (undo_ts->is_active() ? 1 : 0);
1227+
auto undo_ts = undo::spaces->find_first_inactive_explicit(&num_active);
1228+
if (undo_ts != nullptr) {
1229+
undo_trunc->mark(undo_ts);
1230+
undo::spaces->s_unlock();
1231+
return (true);
12341232
}
12351233

12361234
undo::spaces->s_unlock();
@@ -2007,12 +2005,12 @@ static MY_ATTRIBUTE((warn_unused_result))
20072005

20082006
if (!purge_sys->next_stored) {
20092007
DBUG_PRINT("ib_purge", ("no logs left in the history list"));
2010-
return (nullptr);
2008+
return nullptr;
20112009
}
20122010
}
20132011

20142012
if (purge_sys->iter.trx_no >= purge_sys->view.low_limit_no()) {
2015-
return (nullptr);
2013+
return nullptr;
20162014
}
20172015

20182016
/* fprintf(stderr, "Thread %lu purging trx %llu undo record %llu\n",

0 commit comments

Comments
 (0)