Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public static void setupCluster() throws Exception {
configureCluster(2)
.addConfig(
"config", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
.withShutdownTimeoutIsError(false)
.configure();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ public class MiniSolrCloudCluster {
private final JettyConfig jettyConfig;
private final String solrXml;
private final boolean trackJettyMetrics;
private final boolean shutdownTimeoutIsError;

private final AtomicInteger nodeIds = new AtomicInteger();
private final Map<String, CloudSolrClient> solrClientByCollection = new ConcurrentHashMap<>();
Expand Down Expand Up @@ -242,7 +243,8 @@ public MiniSolrCloudCluster(
zkTestServer,
securityJson,
false,
formatZkServer);
formatZkServer,
true);
}

/**
Expand All @@ -257,6 +259,7 @@ public MiniSolrCloudCluster(
* @param zkTestServer ZkTestServer to use. If null, one will be created
* @param securityJson A string representation of security.json file (optional).
* @param trackJettyMetrics supply jetties with metrics registry
* @param shutdownTimeoutIsError whether timeout during shutdown is an error (default true)
* @throws Exception if there was an error starting the cluster
*/
MiniSolrCloudCluster(
Expand All @@ -267,14 +270,16 @@ public MiniSolrCloudCluster(
ZkTestServer zkTestServer,
Optional<String> securityJson,
boolean trackJettyMetrics,
boolean formatZkServer)
boolean formatZkServer,
boolean shutdownTimeoutIsError)
throws Exception {

Objects.requireNonNull(securityJson);
this.baseDir = Objects.requireNonNull(baseDir);
this.jettyConfig = Objects.requireNonNull(jettyConfig);
this.solrXml = solrXml == null ? DEFAULT_CLOUD_SOLR_XML : solrXml;
this.trackJettyMetrics = trackJettyMetrics;
this.shutdownTimeoutIsError = shutdownTimeoutIsError;

log.info("Starting cluster of {} servers in {}", numServers, baseDir);

Expand Down Expand Up @@ -670,11 +675,26 @@ public void shutdown() throws Exception {
for (final JettySolrRunner jetty : jettys) {
shutdowns.add(() -> stopJettySolrRunner(jetty));
}
jettys.clear();

final ExecutorService executorCloser =
ExecutorUtil.newMDCAwareCachedThreadPool(new SolrNamedThreadFactory("jetty-closer"));
Collection<Future<JettySolrRunner>> futures = executorCloser.invokeAll(shutdowns);

// Use a 60 second timeout to prevent indefinite hangs during shutdown, especially when cores
// are in a bad state (e.g., after tragic events). This is 2x Jetty's internal timeout.
List<Future<JettySolrRunner>> futures;
try {
futures = executorCloser.invokeAll(shutdowns, 60, TimeUnit.SECONDS);
} catch (InterruptedException e) {
log.warn("Interrupted while shutting down jettys", e);
Thread.currentThread().interrupt();
executorCloser.shutdownNow();
throw e;
}

ExecutorUtil.shutdownAndAwaitTermination(executorCloser);

jettys.clear();

Exception shutdownError =
checkForExceptions("Error shutting down MiniSolrCloudCluster", futures);
if (shutdownError != null) {
Expand Down Expand Up @@ -773,9 +793,16 @@ private Exception checkForExceptions(String message, Collection<Future<JettySolr
try {
future.get();
} catch (ExecutionException e) {
log.error(message, e);
parsed.addSuppressed(e.getCause());
ok = false;
// Check if this is a TimeoutException from Jetty's internal shutdown timeout
if (e.getCause() instanceof TimeoutException && !shutdownTimeoutIsError) {
log.warn(
"Jetty shutdown task timed out (likely from Jetty Server.doStop()), but configured to not treat as error",
e);
} else {
log.error(message, e);
parsed.addSuppressed(e.getCause());
ok = false;
}
} catch (InterruptedException e) {
log.error(message, e);
Thread.interrupted();
Expand Down Expand Up @@ -1032,6 +1059,7 @@ public static class Builder {
EnvUtils.getPropertyAsBool("solr.cloud.overseer.enabled", true);
private boolean formatZkServer = true;
private boolean disableTraceIdGeneration = false;
private boolean shutdownTimeoutIsError = true;

/**
* Create a builder
Expand Down Expand Up @@ -1150,6 +1178,18 @@ public Builder formatZkServer(boolean formatZkServer) {
return this;
}

/**
* Configure whether a timeout during shutdown is treated as an error
*
* @param shutdownTimeoutIsError if true, timeout causes test failure; if false, timeout is
* logged but not treated as error (default true)
* @return the instance of {@linkplain Builder}
*/
public Builder withShutdownTimeoutIsError(boolean shutdownTimeoutIsError) {
this.shutdownTimeoutIsError = shutdownTimeoutIsError;
return this;
}

/**
* Configure and run the {@link MiniSolrCloudCluster}
*
Expand Down Expand Up @@ -1182,7 +1222,8 @@ public MiniSolrCloudCluster build() throws Exception {
null,
securityJson,
trackJettyMetrics,
formatZkServer);
formatZkServer,
shutdownTimeoutIsError);
for (Config config : configs) {
cluster.uploadConfigSet(config.path, config.name);
}
Expand Down
Loading