Skip to content

Commit 7f5791a

Browse files
committed
CXX-103 CXX-334 kill StaticObserver and enable RSMW to be stopped/started
1 parent e13b3e3 commit 7f5791a

File tree

9 files changed

+111
-130
lines changed

9 files changed

+111
-130
lines changed

src/SConscript.client

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,6 @@ clientSourceBasic = [
154154
'mongo/util/text.cpp',
155155
'mongo/util/time_support.cpp',
156156
'mongo/util/timer.cpp',
157-
'mongo/util/util.cpp',
158157
'third_party/murmurhash3/MurmurHash3.cpp',
159158
]
160159

src/mongo/client/dbclient_rs_test.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,14 @@ namespace {
6767
class BasicRS: public mongo::unittest::Test {
6868
protected:
6969
void setUp() {
70-
ReplicaSetMonitor::cleanup();
70+
ReplicaSetMonitor::initialize();
7171
_replSet.reset(new MockReplicaSet("test", 2));
7272
ConnectionString::setConnectionHook(
7373
mongo::MockConnRegistry::get()->getConnStrHook());
7474
}
7575

7676
void tearDown() {
77-
ReplicaSetMonitor::cleanup();
77+
ReplicaSetMonitor::shutdown();
7878
_replSet.reset();
7979
}
8080

@@ -150,7 +150,7 @@ namespace {
150150
class AllNodesDown: public mongo::unittest::Test {
151151
protected:
152152
void setUp() {
153-
ReplicaSetMonitor::cleanup();
153+
ReplicaSetMonitor::initialize();
154154
_replSet.reset(new MockReplicaSet("test", 2));
155155
ConnectionString::setConnectionHook(
156156
mongo::MockConnRegistry::get()->getConnStrHook());
@@ -163,7 +163,7 @@ namespace {
163163
}
164164

165165
void tearDown() {
166-
ReplicaSetMonitor::cleanup();
166+
ReplicaSetMonitor::shutdown();
167167
_replSet.reset();
168168
}
169169

@@ -226,15 +226,15 @@ namespace {
226226
class PrimaryDown: public mongo::unittest::Test {
227227
protected:
228228
void setUp() {
229-
ReplicaSetMonitor::cleanup();
229+
ReplicaSetMonitor::initialize();
230230
_replSet.reset(new MockReplicaSet("test", 2));
231231
ConnectionString::setConnectionHook(
232232
mongo::MockConnRegistry::get()->getConnStrHook());
233233
_replSet->kill(_replSet->getPrimary());
234234
}
235235

236236
void tearDown() {
237-
ReplicaSetMonitor::cleanup();
237+
ReplicaSetMonitor::shutdown();
238238
_replSet.reset();
239239
}
240240

@@ -311,7 +311,7 @@ namespace {
311311
class SecondaryDown: public mongo::unittest::Test {
312312
protected:
313313
void setUp() {
314-
ReplicaSetMonitor::cleanup();
314+
ReplicaSetMonitor::initialize();
315315
_replSet.reset(new MockReplicaSet("test", 2));
316316
ConnectionString::setConnectionHook(
317317
mongo::MockConnRegistry::get()->getConnStrHook());
@@ -320,7 +320,7 @@ namespace {
320320
}
321321

322322
void tearDown() {
323-
ReplicaSetMonitor::cleanup();
323+
ReplicaSetMonitor::shutdown();
324324
_replSet.reset();
325325

326326
}
@@ -406,7 +406,7 @@ namespace {
406406

407407
// This shuts down the background RSMWatcher thread and prevents it from running. These
408408
// tests depend on controlling when the RSMs are updated.
409-
ReplicaSetMonitor::cleanup();
409+
ReplicaSetMonitor::initialize();
410410

411411
_replSet.reset(new MockReplicaSet("test", 5));
412412
_originalConnectionHook = ConnectionString::getConnectionHook();
@@ -477,7 +477,7 @@ namespace {
477477
ReplicaSetMonitor::useDeterministicHostSelection = false;
478478

479479
ConnectionString::setConnectionHook(_originalConnectionHook);
480-
ReplicaSetMonitor::cleanup();
480+
ReplicaSetMonitor::shutdown();
481481
_replSet.reset();
482482

483483
}

src/mongo/client/init.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ namespace client {
8888
if (!result.isOK())
8989
return result;
9090

91+
result = ReplicaSetMonitor::initialize();
92+
if (!result.isOK()) {
93+
return result;
94+
}
95+
9196
return Status::OK();
9297
}
9398
else if (initStatus == 1) {
@@ -108,7 +113,10 @@ namespace client {
108113

109114
if (initStatus == 1) {
110115

111-
ReplicaSetMonitor::cleanup();
116+
Status result = ReplicaSetMonitor::shutdown(Options::current().autoShutdownGracePeriodMillis());
117+
if (!result.isOK()) {
118+
return result;
119+
}
112120
shutdownNetworking();
113121
return Status::OK();
114122
}

src/mongo/client/replica_set_monitor.cpp

Lines changed: 57 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,14 @@
2323
#include <limits>
2424

2525
#include <boost/make_shared.hpp>
26+
#include <boost/scoped_ptr.hpp>
2627
#include <boost/thread.hpp>
2728
#include <boost/thread/condition.hpp>
2829
#include <boost/thread/mutex.hpp>
2930

3031
#include "mongo/client/options.h"
3132
#include "mongo/client/private/options.h"
3233
#include "mongo/client/replica_set_monitor_internal.h"
33-
#include "mongo/util/concurrency/mutex.h" // for StaticObserver
3434
#include "mongo/util/background.h"
3535
#include "mongo/util/debug_util.h"
3636
#include "mongo/util/log.h"
@@ -66,29 +66,27 @@ namespace {
6666

6767
const double socketTimeoutSecs = 5;
6868

69-
/* Replica Set Monitor shared state:
70-
* If a program (such as one built with the C++ driver) exits (by either calling exit()
71-
* or by returning from main()), static objects will be destroyed in the reverse order
72-
* of their creation (within each translation unit (source code file)). This makes it
73-
* vital that the order be explicitly controlled within the source file so that destroyed
74-
* objects never reference objects that have been destroyed earlier.
69+
/* Replica Set Monitor global state
7570
*
76-
* The order chosen below is intended to allow safe destruction in reverse order from
77-
* construction order:
78-
* setsLock -- mutex protecting _seedServers and _sets, destroyed last
71+
* watcherLifetimeLock -- mutex held during creation/destruction of
72+
* replicaSetMonitorWatcher
73+
* replicaSetMonitorWatcher -- background job to check Replica Set members
74+
* setsLock -- mutex protecting seedServers and sets
7975
* seedServers -- list (map) of servers
8076
* sets -- list (map) of ReplicaSetMonitors
81-
* replicaSetMonitorWatcher -- background job to check Replica Set members
82-
* staticObserver -- sentinel to detect process termination
83-
*
84-
* Related to:
85-
* SERVER-8891 -- Simple client fail with segmentation fault in mongoclient library
8677
*
8778
* Mutex locking order:
88-
* Don't lock setsLock while holding any SetState::mutex. It is however safe to grab a
89-
* SetState::mutex without holder setsLock, but then you can't grab setsLock until you
90-
* release the SetState::mutex.
79+
* watcherLock should be acquired first when acquiring it and any other lock.
80+
* Don't lock setsLock while holding any SetState::mutex.
81+
* It is however safe to grab a SetState::mutex without holding setsLock, but
82+
* then you can't grab setsLock until you release the SetState::mutex.
9183
*/
84+
85+
class ReplicaSetMonitorWatcher;
86+
87+
boost::mutex watcherLifetimeLock;
88+
boost::scoped_ptr<ReplicaSetMonitorWatcher> replicaSetMonitorWatcher;
89+
9290
boost::mutex setsLock;
9391
StringMap<set<HostAndPort> > seedServers;
9492
StringMap<ReplicaSetMonitorPtr> sets;
@@ -105,9 +103,6 @@ namespace {
105103
~ReplicaSetMonitorWatcher() {
106104
stop();
107105

108-
// We relying on the fact that if the monitor was rerun again, wait will not hang
109-
// because _destroyingStatics will make the run method exit immediately.
110-
dassert(StaticObserver::_destroyingStatics);
111106
if (running()) {
112107
wait();
113108
}
@@ -139,16 +134,7 @@ namespace {
139134
void run() {
140135
log() << "starting"; // includes thread name in output
141136

142-
// Added only for patching timing problems in test. Remove after tests
143-
// are fixed - see 392b933598668768bf12b1e41ad444aa3548d970.
144-
// Should not be needed after SERVER-7533 gets implemented and tests start
145-
// using it.
146-
if (!StaticObserver::_destroyingStatics) {
147-
boost::unique_lock<boost::mutex> sl( _monitorMutex );
148-
_stopRequestedCV.timed_wait(sl, boost::posix_time::seconds(10));
149-
}
150-
151-
while ( !StaticObserver::_destroyingStatics ) {
137+
while ( true ) {
152138
{
153139
boost::lock_guard<boost::mutex> sl( _monitorMutex );
154140
if (_stopRequested) {
@@ -207,9 +193,7 @@ namespace {
207193

208194
boost::condition_variable _stopRequestedCV;
209195
bool _stopRequested;
210-
} replicaSetMonitorWatcher;
211-
212-
StaticObserver staticObserver;
196+
};
213197

214198
//
215199
// Helpers for stl algorithms
@@ -356,7 +340,11 @@ namespace {
356340
if ( ! m )
357341
m = boost::make_shared<ReplicaSetMonitor>( name , servers );
358342

359-
replicaSetMonitorWatcher.safeGo();
343+
// Don't need to hold the lifetime lock for safeGo as
344+
// 1) we assume the monitor is created as the contract of this class is such that initialize()
345+
// must have been called.
346+
// 2) replicaSetMonitorWatcher synchronizes safeGo internally using the _monitorMutex
347+
replicaSetMonitorWatcher->safeGo();
360348
}
361349

362350
ReplicaSetMonitorPtr ReplicaSetMonitor::get(const string& name,
@@ -374,7 +362,9 @@ namespace {
374362
ReplicaSetMonitorPtr& m = sets[name];
375363
invariant( !m );
376364
m.reset( new ReplicaSetMonitor( name, j->second ) );
377-
replicaSetMonitorWatcher.safeGo();
365+
// see above comment in createIfNeeded for why we don't need the
366+
// watcherLifetimeLock
367+
replicaSetMonitorWatcher->safeGo();
378368
return m;
379369
}
380370
}
@@ -444,14 +434,40 @@ namespace {
444434
hosts.done();
445435
}
446436

447-
void ReplicaSetMonitor::cleanup() {
437+
// Users shouldn't need to call this more than once, but our tests do.
438+
// Note that this doesn't actually start the watcher, it just creates it.
439+
// The watcher is lazily started when we start monitoring our first replica
440+
// set, so we don't have to pay the cost of the extra thread unless needed.
441+
Status ReplicaSetMonitor::initialize() {
442+
boost::lock_guard<boost::mutex> lock(watcherLifetimeLock);
443+
if (replicaSetMonitorWatcher) {
444+
return Status(ErrorCodes::IllegalOperation,
445+
"ReplicaSetMonitorWatcher has already been initialized");
446+
}
447+
replicaSetMonitorWatcher.reset(new ReplicaSetMonitorWatcher());
448+
return Status::OK();
449+
}
450+
451+
Status ReplicaSetMonitor::shutdown(int gracePeriodMillis) {
452+
boost::lock_guard<boost::mutex> lock(watcherLifetimeLock);
453+
if (!replicaSetMonitorWatcher) {
454+
return Status(ErrorCodes::IllegalOperation,
455+
"ReplicaSetMonitorWatcher has not been initialized");
456+
}
448457
// Call cancel first, in case the RSMW was never started.
449-
replicaSetMonitorWatcher.cancel();
450-
replicaSetMonitorWatcher.stop();
451-
replicaSetMonitorWatcher.wait();
452-
boost::lock_guard<boost::mutex> lock(setsLock);
458+
replicaSetMonitorWatcher->cancel();
459+
replicaSetMonitorWatcher->stop();
460+
bool success = replicaSetMonitorWatcher->wait(gracePeriodMillis);
461+
if (!success) {
462+
return Status(ErrorCodes::InternalError,
463+
"Timed out waiting for ReplicaSetMonitorWatcher to shutdown");
464+
}
465+
replicaSetMonitorWatcher.reset();
466+
boost::lock_guard<boost::mutex> lockSets(setsLock);
453467
sets = StringMap<ReplicaSetMonitorPtr>();
454468
seedServers = StringMap<set<HostAndPort> >();
469+
470+
return Status::OK();
455471
}
456472

457473
Refresher::Refresher(const SetStatePtr& setState)

src/mongo/client/replica_set_monitor.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,12 +158,25 @@ namespace mongo {
158158
*/
159159
static void MONGO_CLIENT_FUNC setConfigChangeHook(ConfigChangeHook hook);
160160

161+
/**
162+
* Starts the ReplicaSetMonitorWatcher. You shouldn't have to call this in production code
163+
* as it is called in init.cpp. However, it may be useful to pair calls to initalize
164+
* and cleanup in tests. The behavior of other methods in this class is undefined if initialize()
165+
* has not been called.
166+
*/
167+
static Status MONGO_CLIENT_FUNC initialize();
168+
161169
/**
162170
* Permanently stops all monitoring on replica sets and clears all cached information
163171
* as well. As a consequence, NEVER call this if you have other threads that have a
164-
* DBClientReplicaSet instance.
172+
* DBClientReplicaSet instance. After this is called, the behavior of other methods in this
173+
* class is undefined until a subsequent call to initialize.
174+
*
175+
* The gracePeriodMillis parameter determines the maximum amount of time to wait. A value
176+
* of 0 (the default) indicates no timeout. If a nonzero timeout is specified, a return value
177+
* of Status::OK() indicates that shutdown completed successfully in the allotted time.
165178
*/
166-
static void MONGO_CLIENT_FUNC cleanup();
179+
static Status MONGO_CLIENT_FUNC shutdown(int gracePeriodMillis = 0);
167180

168181
/**
169182
* If a ReplicaSetMonitor has been refreshed more than this many times in a row without

src/mongo/dbtests/replica_set_monitor_test.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,11 +1377,12 @@ namespace mongo_test {
13771377
_originalConnectionHook = ConnectionString::getConnectionHook();
13781378
ConnectionString::setConnectionHook(
13791379
mongo::MockConnRegistry::get()->getConnStrHook());
1380+
ReplicaSetMonitor::initialize();
13801381
}
13811382

13821383
void tearDown() {
13831384
ConnectionString::setConnectionHook(_originalConnectionHook);
1384-
ReplicaSetMonitor::cleanup();
1385+
ReplicaSetMonitor::shutdown();
13851386
_replSet.reset();
13861387
}
13871388

@@ -1429,6 +1430,7 @@ namespace mongo_test {
14291430
const string replSetName(replSet.getSetName());
14301431
set<HostAndPort> seedList;
14311432
seedList.insert(HostAndPort(replSet.getPrimary()));
1433+
ReplicaSetMonitor::initialize();
14321434
ReplicaSetMonitor::createIfNeeded(replSetName, seedList);
14331435

14341436
const MockReplicaSet::ReplConfigMap origConfig = replSet.getReplConfig();
@@ -1463,7 +1465,7 @@ namespace mongo_test {
14631465
replMonitor->startOrContinueRefresh().refreshAll();
14641466
}
14651467

1466-
ReplicaSetMonitor::cleanup();
1468+
ReplicaSetMonitor::shutdown();
14671469
ConnectionString::setConnectionHook(originalConnHook);
14681470
}
14691471

@@ -1498,12 +1500,13 @@ namespace mongo_test {
14981500
}
14991501

15001502
_replSet->setConfig(config);
1503+
ReplicaSetMonitor::initialize();
15011504

15021505
}
15031506

15041507
void tearDown() {
15051508
ConnectionString::setConnectionHook(_originalConnectionHook);
1506-
ReplicaSetMonitor::cleanup();
1509+
ReplicaSetMonitor::shutdown();
15071510
_replSet.reset();
15081511
}
15091512

src/mongo/util/background.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
#include <boost/thread/thread.hpp>
2828

2929
#include "mongo/stdx/functional.h"
30-
#include "mongo/util/concurrency/mutex.h"
3130
#include "mongo/util/concurrency/thread_name.h"
3231
#include "mongo/util/debug_util.h"
3332
#include "mongo/util/log.h"
@@ -130,6 +129,20 @@ namespace mongo {
130129
return Status::OK();
131130
}
132131

132+
namespace {
133+
inline boost::xtime incxtimemillis( long long s ) {
134+
boost::xtime xt;
135+
boost::xtime_get(&xt, MONGO_BOOST_TIME_UTC);
136+
xt.sec += (int)( s / 1000 );
137+
xt.nsec += (int)(( s % 1000 ) * 1000000);
138+
if ( xt.nsec >= 1000000000 ) {
139+
xt.nsec -= 1000000000;
140+
xt.sec++;
141+
}
142+
return xt;
143+
}
144+
} // namespace
145+
133146
bool BackgroundJob::wait( unsigned msTimeOut ) {
134147
verify( !_selfDelete ); // you cannot call wait on a self-deleting job
135148
boost::unique_lock<boost::mutex> l( _status->mutex );

0 commit comments

Comments
 (0)