2323#include < limits>
2424
2525#include < boost/make_shared.hpp>
26+ #include < boost/scoped_ptr.hpp>
2627#include < boost/thread.hpp>
2728#include < boost/thread/condition.hpp>
2829#include < boost/thread/mutex.hpp>
2930
3031#include " mongo/client/options.h"
3132#include " mongo/client/private/options.h"
3233#include " mongo/client/replica_set_monitor_internal.h"
33- #include " mongo/util/concurrency/mutex.h" // for StaticObserver
3434#include " mongo/util/background.h"
3535#include " mongo/util/debug_util.h"
3636#include " mongo/util/log.h"
@@ -66,29 +66,27 @@ namespace {
6666
6767 const double socketTimeoutSecs = 5 ;
6868
69- /* Replica Set Monitor shared state:
70- * If a program (such as one built with the C++ driver) exits (by either calling exit()
71- * or by returning from main()), static objects will be destroyed in the reverse order
72- * of their creation (within each translation unit (source code file)). This makes it
73- * vital that the order be explicitly controlled within the source file so that destroyed
74- * objects never reference objects that have been destroyed earlier.
69+ /* Replica Set Monitor global state
7570 *
76- * The order chosen below is intended to allow safe destruction in reverse order from
77- * construction order:
78- * setsLock -- mutex protecting _seedServers and _sets, destroyed last
71+ * watcherLifetimeLock -- mutex held during creation/destruction of
72+ * replicaSetMonitorWatcher
73+ * replicaSetMonitorWatcher -- background job to check Replica Set members
74+ * setsLock -- mutex protecting seedServers and sets
7975 * seedServers -- list (map) of servers
8076 * sets -- list (map) of ReplicaSetMonitors
81- * replicaSetMonitorWatcher -- background job to check Replica Set members
82- * staticObserver -- sentinel to detect process termination
83- *
84- * Related to:
85- * SERVER-8891 -- Simple client fail with segmentation fault in mongoclient library
8677 *
8778 * Mutex locking order:
88- * Don't lock setsLock while holding any SetState::mutex. It is however safe to grab a
89- * SetState::mutex without holder setsLock, but then you can't grab setsLock until you
90- * release the SetState::mutex.
79+ * watcherLock should be acquired first when acquiring it and any other lock.
80+ * Don't lock setsLock while holding any SetState::mutex.
81+ * It is however safe to grab a SetState::mutex without holding setsLock, but
82+ * then you can't grab setsLock until you release the SetState::mutex.
9183 */
84+
85+ class ReplicaSetMonitorWatcher ;
86+
87+ boost::mutex watcherLifetimeLock;
88+ boost::scoped_ptr<ReplicaSetMonitorWatcher> replicaSetMonitorWatcher;
89+
9290 boost::mutex setsLock;
9391 StringMap<set<HostAndPort> > seedServers;
9492 StringMap<ReplicaSetMonitorPtr> sets;
@@ -105,9 +103,6 @@ namespace {
105103 ~ReplicaSetMonitorWatcher () {
106104 stop ();
107105
108- // We relying on the fact that if the monitor was rerun again, wait will not hang
109- // because _destroyingStatics will make the run method exit immediately.
110- dassert (StaticObserver::_destroyingStatics);
111106 if (running ()) {
112107 wait ();
113108 }
@@ -139,16 +134,7 @@ namespace {
139134 void run () {
140135 log () << " starting" ; // includes thread name in output
141136
142- // Added only for patching timing problems in test. Remove after tests
143- // are fixed - see 392b933598668768bf12b1e41ad444aa3548d970.
144- // Should not be needed after SERVER-7533 gets implemented and tests start
145- // using it.
146- if (!StaticObserver::_destroyingStatics) {
147- boost::unique_lock<boost::mutex> sl ( _monitorMutex );
148- _stopRequestedCV.timed_wait (sl, boost::posix_time::seconds (10 ));
149- }
150-
151- while ( !StaticObserver::_destroyingStatics ) {
137+ while ( true ) {
152138 {
153139 boost::lock_guard<boost::mutex> sl ( _monitorMutex );
154140 if (_stopRequested) {
@@ -207,9 +193,7 @@ namespace {
207193
208194 boost::condition_variable _stopRequestedCV;
209195 bool _stopRequested;
210- } replicaSetMonitorWatcher;
211-
212- StaticObserver staticObserver;
196+ };
213197
214198 //
215199 // Helpers for stl algorithms
@@ -356,7 +340,11 @@ namespace {
356340 if ( ! m )
357341 m = boost::make_shared<ReplicaSetMonitor>( name , servers );
358342
359- replicaSetMonitorWatcher.safeGo ();
343+ // Don't need to hold the lifetime lock for safeGo as
344+ // 1) we assume the monitor is created as the contract of this class is such that initialize()
345+ // must have been called.
346+ // 2) replicaSetMonitorWatcher synchronizes safeGo internally using the _monitorMutex
347+ replicaSetMonitorWatcher->safeGo ();
360348 }
361349
362350 ReplicaSetMonitorPtr ReplicaSetMonitor::get (const string& name,
@@ -374,7 +362,9 @@ namespace {
374362 ReplicaSetMonitorPtr& m = sets[name];
375363 invariant ( !m );
376364 m.reset ( new ReplicaSetMonitor ( name, j->second ) );
377- replicaSetMonitorWatcher.safeGo ();
365+ // see above comment in createIfNeeded for why we don't need the
366+ // watcherLifetimeLock
367+ replicaSetMonitorWatcher->safeGo ();
378368 return m;
379369 }
380370 }
@@ -444,14 +434,40 @@ namespace {
444434 hosts.done ();
445435 }
446436
447- void ReplicaSetMonitor::cleanup () {
437+ // Users shouldn't need to call this more than once, but our tests do.
438+ // Note that this doesn't actually start the watcher, it just creates it.
439+ // The watcher is lazily started when we start monitoring our first replica
440+ // set, so we don't have to pay the cost of the extra thread unless needed.
441+ Status ReplicaSetMonitor::initialize () {
442+ boost::lock_guard<boost::mutex> lock (watcherLifetimeLock);
443+ if (replicaSetMonitorWatcher) {
444+ return Status (ErrorCodes::IllegalOperation,
445+ " ReplicaSetMonitorWatcher has already been initialized" );
446+ }
447+ replicaSetMonitorWatcher.reset (new ReplicaSetMonitorWatcher ());
448+ return Status::OK ();
449+ }
450+
451+ Status ReplicaSetMonitor::shutdown (int gracePeriodMillis) {
452+ boost::lock_guard<boost::mutex> lock (watcherLifetimeLock);
453+ if (!replicaSetMonitorWatcher) {
454+ return Status (ErrorCodes::IllegalOperation,
455+ " ReplicaSetMonitorWatcher has not been initialized" );
456+ }
448457 // Call cancel first, in case the RSMW was never started.
449- replicaSetMonitorWatcher.cancel ();
450- replicaSetMonitorWatcher.stop ();
451- replicaSetMonitorWatcher.wait ();
452- boost::lock_guard<boost::mutex> lock (setsLock);
458+ replicaSetMonitorWatcher->cancel ();
459+ replicaSetMonitorWatcher->stop ();
460+ bool success = replicaSetMonitorWatcher->wait (gracePeriodMillis);
461+ if (!success) {
462+ return Status (ErrorCodes::InternalError,
463+ " Timed out waiting for ReplicaSetMonitorWatcher to shutdown" );
464+ }
465+ replicaSetMonitorWatcher.reset ();
466+ boost::lock_guard<boost::mutex> lockSets (setsLock);
453467 sets = StringMap<ReplicaSetMonitorPtr>();
454468 seedServers = StringMap<set<HostAndPort> >();
469+
470+ return Status::OK ();
455471 }
456472
457473 Refresher::Refresher (const SetStatePtr& setState)
0 commit comments