Skip to content

Commit 3f1f7f4

Browse files
committed
Bug #28936888 XCOM'S ACCEPTOR_LEARNER_TASK USES-AFTER-FREE A REFERENCE TO A SERVER
Problem ======================================================================== Running the GCS tests with ASAN seldomly reports a user-after-free of the server reference that the acceptor_learner_task uses. Here is an excerpt of ASAN's output: ==43936==ERROR: AddressSanitizer: heap-use-after-free on address 0x63100021c840 at pc 0x000000530ff8 bp 0x7fc0427e8530 sp 0x7fc0427e8520 WRITE of size 8 at 0x63100021c840 thread T3 #0 0x530ff7 in server_detected /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:962 #1 0x533814 in buffered_read_bytes /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:1249 #2 0x5481af in buffered_read_msg /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:1399 #3 0x51e171 in acceptor_learner_task /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:4690 #4 0x562357 in task_loop /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/task.c:1140 #5 0x5003b2 in xcom_taskmain2 /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:1324 #6 0x6a278a in Gcs_xcom_proxy_impl::xcom_init(unsigned short, node_address*) /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_proxy.cc:164 #7 0x59b3c1 in xcom_taskmain_startup /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_control_interface.cc:107 #8 0x7fc04a2e4dd4 in start_thread (/lib64/libpthread.so.0+0x7dd4) #9 0x7fc047ff2bfc in __clone (/lib64/libc.so.6+0xfebfc) 0x63100021c840 is located 64 bytes inside of 65688-byte region [0x63100021c800,0x63100022c898) freed by thread T3 here: #0 0x7fc04a5d7508 in __interceptor_free (/lib64/libasan.so.4+0xde508) #1 0x52cf86 in freesrv /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:836 #2 0x52ea78 in srv_unref /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:868 #3 0x524c30 in reply_handler_task /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:4914 #4 0x562357 in task_loop /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/task.c:1140 #5 0x5003b2 in xcom_taskmain2 /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:1324 #6 0x6a278a in Gcs_xcom_proxy_impl::xcom_init(unsigned short, node_address*) /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_proxy.cc:164 #7 0x59b3c1 in xcom_taskmain_startup /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_control_interface.cc:107 #8 0x7fc04a2e4dd4 in start_thread (/lib64/libpthread.so.0+0x7dd4) previously allocated by thread T3 here: #0 0x7fc04a5d7a88 in __interceptor_calloc (/lib64/libasan.so.4+0xdea88) #1 0x543604 in mksrv /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:721 #2 0x543b4c in addsrv /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:755 #3 0x54af61 in update_servers /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c:1747 #4 0x501082 in site_install_action /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:1572 #5 0x55447c in import_config /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/site_def.c:486 #6 0x506dfc in handle_x_snapshot /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:5257 #7 0x50c444 in xcom_fsm /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:5325 #8 0x516c36 in dispatch_op /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:4510 #9 0x521997 in acceptor_learner_task /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:4772 #10 0x562357 in task_loop /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/task.c:1140 #11 0x5003b2 in xcom_taskmain2 /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c:1324 #12 0x6a278a in Gcs_xcom_proxy_impl::xcom_init(unsigned short, node_address*) /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_proxy.cc:164 #13 0x59b3c1 in xcom_taskmain_startup /home/tvale/mysql/plugin/group_replication/libmysqlgcs/src/bindings/xcom/gcs_xcom_control_interface.cc:107 #14 0x7fc04a2e4dd4 in start_thread (/lib64/libpthread.so.0+0x7dd4) Analysis ======================================================================== The server structure is reference counted by the associated sender_task and reply_handler_task. When they finish, they unreference the server, which leads to its memory being freed. However, the acceptor_learner_task keeps a "naked" reference to the server structure. Under the right ordering of operations, i.e. the sender_task and reply_handler_task terminating after the acceptor_learner_task acquires, but before it uses, the reference to the server structure, leads to the acceptor_learner_task accessing the server structure after it has been freed. Solution ======================================================================== Let the acceptor_learner_task also reference count the server structure so it is not freed while still in use. Reviewed-by: André Negrão <[email protected]> Reviewed-by: Venkatesh Venugopal <[email protected]> RB: 21209
1 parent 6f2927c commit 3f1f7f4

File tree

2 files changed

+15
-0
lines changed

2 files changed

+15
-0
lines changed

rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_base.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4006,7 +4006,11 @@ int acceptor_learner_task(task_arg arg)
40064006
common to both the sender_task, reply_handler_task, and the ac‐
40074007
ceptor_learner_task.
40084008
*/
4009+
// Allow the previous server reference to be freed.
4010+
if (ep->srv) srv_unref(ep->srv);
40094011
ep->srv = get_server(site, ep->p->from);
4012+
// Prevent the new server reference from being freed.
4013+
if (ep->srv) srv_ref(ep->srv);
40104014
ep->p->refcnt = 1; /* Refcnt from other end is void here */
40114015
MAY_DBG(FN;
40124016
NDBG(ep->rfd.fd, d); NDBG(task_now(), f);
@@ -4120,6 +4124,8 @@ int acceptor_learner_task(task_arg arg)
41204124
if (ep->buf)
41214125
X_FREE(ep->buf);
41224126
free(ep->in_buf);
4127+
// Allow the server reference to be freed.
4128+
if (ep->srv) srv_unref(ep->srv);
41234129

41244130
TASK_END;
41254131
}

rapid/plugin/group_replication/libmysqlgcs/src/bindings/xcom/xcom/xcom_transport.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,12 @@ mksrv(char *srv, xcom_port port)
747747
s->reply_handler = task_new(reply_handler_task, void_arg(s), "reply_handler_task", XCOM_THREAD_DEBUG);
748748
}
749749
reset_srv_buf(&s->out_buf);
750+
/*
751+
Keep the server from being freed if the acceptor_learner_task calls
752+
srv_unref on the server before the {local_,}server_task and
753+
reply_handler_task begin.
754+
*/
755+
srv_ref(s);
750756
return s;
751757
}
752758

@@ -872,6 +878,9 @@ static void shut_srv(server *s)
872878
task_terminate(s->sender);
873879
if (s->reply_handler)
874880
task_terminate(s->reply_handler);
881+
882+
// Allow the server to be freed. This unref pairs with the ref from mksrv.
883+
srv_unref(s);
875884
}
876885

877886

0 commit comments

Comments
 (0)