Skip to content

Commit 8b97eba

Browse files
committed
MDEV-21674 purge_sys.stop() fails to wait for purge workers to complete
Since commit 5e62b6a (MDEV-16264), purge_sys_t::stop() no longer waited for all purge activity to stop. This caused problems on FLUSH TABLES...FOR EXPORT because of purge running concurrently with the buffer pool flush. The assertion at the end of buf_flush_dirty_pages() could fail. The, implemented by Vladislav Vaintroub, aims to eliminate race conditions when stopping or resuming purge: waitable_task::disable(): Wait for the task to complete, then replace the task callback function with noop. waitable_task::enable(): Restore the original task callback function after disable(). purge_sys_t::stop(): Invoke purge_coordinator_task.disable(). purge_sys_t::resume(): Invoke purge_coordinator_task.enable(). purge_sys_t::running(): Add const qualifier, and clarify the comment. The purge coordinator task will remain active as long as any purge worker task is active. purge_worker_callback(): Assert purge_sys.running(). srv_purge_wakeup(): Merge with the only caller purge_sys_t::resume(). purge_coordinator_task: Use static linkage.
1 parent cd3bdc0 commit 8b97eba

File tree

6 files changed

+103
-92
lines changed

6 files changed

+103
-92
lines changed

storage/innobase/include/srv0srv.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -818,9 +818,6 @@ void srv_error_monitor_task(void*);
818818
ulint srv_get_task_queue_length();
819819
#endif
820820

821-
/** Wakeup the purge threads. */
822-
void srv_purge_wakeup();
823-
824821
/** Shut down the purge threads. */
825822
void srv_purge_shutdown();
826823

storage/innobase/include/trx0purge.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*****************************************************************************
22
33
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
4-
Copyright (c) 2017, 2019, MariaDB Corporation.
4+
Copyright (c) 2017, 2020, MariaDB Corporation.
55
66
This program is free software; you can redistribute it and/or modify it under
77
the terms of the GNU General Public License as published by the Free Software
@@ -250,8 +250,8 @@ class purge_sys_t
250250
m_enabled.store(false, std::memory_order_relaxed);
251251
}
252252

253-
/** @return whether the purge coordinator thread is active */
254-
bool running();
253+
/** @return whether the purge tasks are active */
254+
bool running() const;
255255
/** Stop purge during FLUSH TABLES FOR EXPORT */
256256
void stop();
257257
/** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */

storage/innobase/srv/srv0srv.cc

Lines changed: 65 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
44
Copyright (c) 2008, 2009 Google Inc.
55
Copyright (c) 2009, Percona Inc.
6-
Copyright (c) 2013, 2019, MariaDB Corporation.
6+
Copyright (c) 2013, 2020, MariaDB Corporation.
77
88
Portions of this file contain modifications contributed and copyrighted by
99
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -589,10 +589,6 @@ struct purge_coordinator_state
589589
};
590590

591591
static purge_coordinator_state purge_state;
592-
extern tpool::waitable_task purge_coordinator_task;
593-
594-
/** @return whether the purge coordinator thread is active */
595-
bool purge_sys_t::running() { return purge_coordinator_task.is_running(); }
596592

597593
/** threadpool timer for srv_error_monitor_task(). */
598594
std::unique_ptr<tpool::timer> srv_error_monitor_timer;
@@ -1590,9 +1586,8 @@ static tpool::task_group purge_task_group;
15901586
tpool::waitable_task purge_worker_task(purge_worker_callback, nullptr,
15911587
&purge_task_group);
15921588
static tpool::task_group purge_coordinator_task_group(1);
1593-
tpool::waitable_task purge_coordinator_task(purge_coordinator_callback,
1594-
nullptr,
1595-
&purge_coordinator_task_group);
1589+
static tpool::waitable_task purge_coordinator_task
1590+
(purge_coordinator_callback, nullptr, &purge_coordinator_task_group);
15961591

15971592
static tpool::timer *purge_coordinator_timer;
15981593

@@ -1611,6 +1606,66 @@ srv_wake_purge_thread_if_not_active()
16111606
}
16121607
}
16131608

1609+
/** @return whether the purge tasks are active */
1610+
bool purge_sys_t::running() const
1611+
{
1612+
return purge_coordinator_task.is_running();
1613+
}
1614+
1615+
/** Stop purge during FLUSH TABLES FOR EXPORT */
1616+
void purge_sys_t::stop()
1617+
{
1618+
rw_lock_x_lock(&latch);
1619+
1620+
if (!enabled())
1621+
{
1622+
/* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */
1623+
ut_ad(!srv_undo_sources);
1624+
rw_lock_x_unlock(&latch);
1625+
return;
1626+
}
1627+
1628+
ut_ad(srv_n_purge_threads > 0);
1629+
1630+
const auto paused= m_paused++;
1631+
1632+
rw_lock_x_unlock(&latch);
1633+
1634+
if (!paused)
1635+
{
1636+
ib::info() << "Stopping purge";
1637+
MONITOR_ATOMIC_INC(MONITOR_PURGE_STOP_COUNT);
1638+
purge_coordinator_task.disable();
1639+
}
1640+
}
1641+
1642+
/** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */
1643+
void purge_sys_t::resume()
1644+
{
1645+
if (!enabled())
1646+
{
1647+
/* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */
1648+
ut_ad(!srv_undo_sources);
1649+
return;
1650+
}
1651+
ut_ad(!srv_read_only_mode);
1652+
ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
1653+
ut_ad(!sync_check_iterate(sync_check()));
1654+
purge_coordinator_task.enable();
1655+
rw_lock_x_lock(&latch);
1656+
int32_t paused= m_paused--;
1657+
ut_a(paused);
1658+
1659+
if (paused == 1)
1660+
{
1661+
ib::info() << "Resuming purge";
1662+
purge_state.m_running = 0;
1663+
srv_wake_purge_thread_if_not_active();
1664+
MONITOR_ATOMIC_INC(MONITOR_PURGE_RESUME_COUNT);
1665+
}
1666+
rw_lock_x_unlock(&latch);
1667+
}
1668+
16141669
/** Wake up the master thread if it is suspended or being suspended. */
16151670
void
16161671
srv_wake_master_thread()
@@ -2182,7 +2237,8 @@ static void purge_worker_callback(void*)
21822237
ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
21832238
void *ctx;
21842239
THD *thd= acquire_thd(&ctx);
2185-
while (srv_task_execute()) {}
2240+
while (srv_task_execute())
2241+
ut_ad(purge_sys.running());
21862242
release_thd(thd,ctx);
21872243
}
21882244

@@ -2287,19 +2343,6 @@ ulint srv_get_task_queue_length()
22872343
}
22882344
#endif
22892345

2290-
/** Wake up the purge coordinator. */
2291-
void
2292-
srv_purge_wakeup()
2293-
{
2294-
ut_ad(!srv_read_only_mode);
2295-
if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2296-
return;
2297-
}
2298-
ut_a(purge_sys.enabled() && !purge_sys.paused());
2299-
purge_state.m_running = 0;
2300-
srv_wake_purge_thread_if_not_active();
2301-
}
2302-
23032346
/** Shut down the purge threads. */
23042347
void srv_purge_shutdown()
23052348
{

storage/innobase/trx/trx0purge.cc

Lines changed: 1 addition & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*****************************************************************************
22
33
Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
4-
Copyright (c) 2017, 2019, MariaDB Corporation.
4+
Copyright (c) 2017, 2020, MariaDB Corporation.
55
66
This program is free software; you can redistribute it and/or modify it under
77
the terms of the GNU General Public License as published by the Free Software
@@ -1308,60 +1308,3 @@ ulint trx_purge(ulint n_tasks, bool truncate)
13081308

13091309
return(n_pages_handled);
13101310
}
1311-
1312-
extern tpool::waitable_task purge_coordinator_task;
1313-
1314-
/** Stop purge during FLUSH TABLES FOR EXPORT */
1315-
void purge_sys_t::stop()
1316-
{
1317-
rw_lock_x_lock(&latch);
1318-
1319-
if (!enabled())
1320-
{
1321-
/* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */
1322-
ut_ad(!srv_undo_sources);
1323-
rw_lock_x_unlock(&latch);
1324-
return;
1325-
}
1326-
1327-
ut_ad(srv_n_purge_threads > 0);
1328-
1329-
if (m_paused++ == 0)
1330-
{
1331-
rw_lock_x_unlock(&latch);
1332-
ib::info() << "Stopping purge";
1333-
MONITOR_ATOMIC_INC(MONITOR_PURGE_STOP_COUNT);
1334-
return;
1335-
}
1336-
1337-
rw_lock_x_unlock(&latch);
1338-
1339-
if (running())
1340-
{
1341-
ib::info() << "Waiting for purge to stop";
1342-
purge_coordinator_task.wait();
1343-
}
1344-
}
1345-
1346-
/** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */
1347-
void purge_sys_t::resume()
1348-
{
1349-
if (!enabled())
1350-
{
1351-
/* Shutdown must have been initiated during FLUSH TABLES FOR EXPORT. */
1352-
ut_ad(!srv_undo_sources);
1353-
return;
1354-
}
1355-
ut_ad(!sync_check_iterate(sync_check()));
1356-
rw_lock_x_lock(&latch);
1357-
int32_t paused= m_paused--;
1358-
ut_a(paused);
1359-
1360-
if (paused == 1)
1361-
{
1362-
ib::info() << "Resuming purge";
1363-
srv_purge_wakeup();
1364-
MONITOR_ATOMIC_INC(MONITOR_PURGE_RESUME_COUNT);
1365-
}
1366-
rw_lock_x_unlock(&latch);
1367-
}

tpool/task.cc

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright(C) 2019 MariaDB Corporation.
1+
/* Copyright (C) 2019, 2020, MariaDB Corporation.
22
33
This program is free software; you can redistribute itand /or modify
44
it under the terms of the GNU General Public License as published by
@@ -57,7 +57,7 @@ void execute_after_task_callback()
5757

5858
/* Task that provide wait() operation. */
5959
waitable_task::waitable_task(callback_func func, void* arg, task_group* group) :
60-
task(func,arg, group),m_mtx(),m_cv(),m_ref_count(),m_waiter_count(){}
60+
task(func,arg, group),m_mtx(),m_cv(),m_ref_count(),m_waiter_count(),m_original_func(){}
6161

6262
void waitable_task::add_ref()
6363
{
@@ -72,13 +72,37 @@ void execute_after_task_callback()
7272
if (!m_ref_count && m_waiter_count)
7373
m_cv.notify_all();
7474
}
75-
void waitable_task::wait()
75+
void waitable_task::wait(std::unique_lock<std::mutex>& lk)
7676
{
77-
std::unique_lock<std::mutex> lk(m_mtx);
7877
m_waiter_count++;
7978
while (m_ref_count)
8079
m_cv.wait(lk);
8180
m_waiter_count--;
8281
}
82+
void waitable_task::wait()
83+
{
84+
std::unique_lock<std::mutex> lk(m_mtx);
85+
wait(lk);
86+
}
8387

84-
}
88+
static void noop(void*)
89+
{
90+
}
91+
void waitable_task::disable()
92+
{
93+
std::unique_lock<std::mutex> lk(m_mtx);
94+
if (m_func == noop)
95+
return;
96+
wait(lk);
97+
m_original_func = m_func;
98+
m_func = noop;
99+
}
100+
void waitable_task::enable()
101+
{
102+
std::unique_lock<std::mutex> lk(m_mtx);
103+
if(m_func != noop)
104+
return;
105+
wait(lk);
106+
m_func = m_original_func;
107+
}
108+
}

tpool/tpool.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright(C) 2019 MariaDB
1+
/* Copyright (C) 2019, 2020, MariaDB Corporation.
22
33
This program is free software; you can redistribute itand /or modify
44
it under the terms of the GNU General Public License as published by
@@ -96,13 +96,17 @@ class waitable_task :public task
9696
std::condition_variable m_cv;
9797
int m_ref_count;
9898
int m_waiter_count;
99+
callback_func m_original_func;
100+
void wait(std::unique_lock<std::mutex>&lk);
99101
public:
100102
waitable_task(callback_func func, void* arg, task_group* group = nullptr);
101103
void add_ref() override;
102104
void release() override;
103105
TPOOL_SUPPRESS_TSAN bool is_running() { return get_ref_count() > 0; }
104106
TPOOL_SUPPRESS_TSAN int get_ref_count() {return m_ref_count;}
105107
void wait();
108+
void disable();
109+
void enable();
106110
virtual ~waitable_task() {};
107111
};
108112
enum class aio_opcode

0 commit comments

Comments
 (0)