Skip to content

Commit 2f9eef8

Browse files
author
Rakesh Venkatesh
committed
code refactor
1 parent 417aa67 commit 2f9eef8

File tree

6 files changed

+71
-106
lines changed

6 files changed

+71
-106
lines changed

engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1791,7 +1791,7 @@ public void processHostAdded(long hostId) {
17911791
public void processConnect(final Host host, final StartupCommand cmd, final boolean forRebalance) {
17921792
if (cmd instanceof StartupRoutingCommand) {
17931793
if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) {
1794-
Map<String, String> params = new HashMap<String, String>();
1794+
Map<String, String> params = new HashMap<>();
17951795
params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString()));
17961796
params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString()));
17971797

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHABase.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,11 @@ public class KVMHABase {
3232
private static final Logger s_logger = Logger.getLogger(KVMHABase.class);
3333
private long _timeout = 60000; /* 1 minutes */
3434
protected static String s_heartBeatPath;
35-
protected static long _heartBeatUpdateTimeout = 60000;
36-
protected static long _heartBeatUpdateFreq = 60000;
37-
protected static long _heartBeatUpdateMaxRetries = 5;
38-
protected static long _heartBeatUpdateRetrySleep = 10000;
39-
protected static HeartBeatAction _heartBeatFailureAction = HeartBeatAction.HARDRESET;
35+
protected static long s_heartBeatUpdateTimeout = 60000;
36+
protected static long s_heartBeatUpdateFreq = 60000;
37+
protected static long s_heartBeatUpdateMaxRetries = 5;
38+
protected static long s_heartBeatUpdateRetrySleep = 10000;
39+
protected static HeartBeatAction s_heartBeatFailureAction = HeartBeatAction.HARDRESET;
4040

4141
public static enum PoolType {
4242
PrimaryStorage, SecondaryStorage

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ public Boolean checkingHeartBeat() {
5555
cmd.add("-m", pool._mountDestPath);
5656
cmd.add("-h", hostIp);
5757
cmd.add("-r");
58-
cmd.add("-t", String.valueOf(_heartBeatUpdateFreq / 1000));
58+
cmd.add("-t", String.valueOf(s_heartBeatUpdateFreq / 1000));
5959
OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser();
6060
String result = cmd.execute(parser);
6161
String parsedLine = parser.getLine();

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java

Lines changed: 43 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@ public class KVMHAMonitor extends KVMHABase implements Runnable {
4343
private final Map<String, NfsStoragePool> storagePool = new ConcurrentHashMap<>();
4444
private Set<String> removedPools = new HashSet<>();
4545
private final boolean rebootHostAndAlertManagementOnHeartbeatTimeout;
46-
private final Map<String, CheckPoolThread> _storagePoolCheckThreads = new HashMap<String, CheckPoolThread>();
47-
private final Map<String, String> _storagePoolCheckStatus = new HashMap<String, String>();
48-
private final static String STATUS_RUNNING = "Running";
49-
private final static String STATUS_TERMINATED = "Terminated";
46+
private final Map<String, CheckPoolThread> storagePoolCheckThreads = new HashMap<>();
47+
private final Map<String, String> storagePoolCheckStatus = new HashMap<>();
48+
private static final String STATUS_RUNNING = "Running";
49+
private static final String STATUS_TERMINATED = "Terminated";
5050

5151
private final String hostPrivateIp;
5252

@@ -57,7 +57,7 @@ public KVMHAMonitor(NfsStoragePool pool, String host, String scriptPath) {
5757
hostPrivateIp = host;
5858
configureHeartBeatPath(scriptPath);
5959

60-
_heartBeatUpdateTimeout = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.HEARTBEAT_UPDATE_TIMEOUT);
60+
s_heartBeatUpdateTimeout = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.HEARTBEAT_UPDATE_TIMEOUT);
6161
rebootHostAndAlertManagementOnHeartbeatTimeout = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.REBOOT_HOST_AND_ALERT_MANAGEMENT_ON_HEARTBEAT_TIMEOUT);
6262
}
6363

@@ -72,16 +72,16 @@ public static synchronized void configureHeartBeatParams(Long heartBeatUpdateMax
7272
s_logger.debug(String.format("Configuring heartbeat params: max retries = %s, retry sleep = %s, timeout = %s, action = %s",
7373
heartBeatUpdateMaxTries, heartBeatUpdateRetrySleep, heartBeatUpdateTimeout, heartBeatFailureAction));
7474
if (heartBeatUpdateMaxTries != null) {
75-
KVMHABase._heartBeatUpdateMaxRetries = heartBeatUpdateMaxTries;
75+
KVMHABase.s_heartBeatUpdateMaxRetries = heartBeatUpdateMaxTries;
7676
}
7777
if (heartBeatUpdateRetrySleep != null) {
78-
KVMHABase._heartBeatUpdateRetrySleep = heartBeatUpdateRetrySleep;
78+
KVMHABase.s_heartBeatUpdateRetrySleep = heartBeatUpdateRetrySleep;
7979
}
8080
if (heartBeatUpdateTimeout != null) {
81-
KVMHABase._heartBeatUpdateTimeout = heartBeatUpdateTimeout;
81+
KVMHABase.s_heartBeatUpdateTimeout = heartBeatUpdateTimeout;
8282
}
8383
if (heartBeatFailureAction != null) {
84-
KVMHABase._heartBeatFailureAction = heartBeatFailureAction;
84+
KVMHABase.s_heartBeatFailureAction = heartBeatFailureAction;
8585
}
8686
}
8787

@@ -128,13 +128,13 @@ public void runInContext() {
128128
private void check() {
129129
if (! storagePool.containsKey(primaryStoragePool._poolUUID)) {
130130
s_logger.info("Removing check on storage pool as it has been removed: " + primaryStoragePool._poolUUID);
131-
_storagePoolCheckStatus.remove(primaryStoragePool._poolUUID);
132-
_storagePoolCheckThreads.remove(primaryStoragePool._poolUUID);
131+
storagePoolCheckStatus.remove(primaryStoragePool._poolUUID);
132+
storagePoolCheckThreads.remove(primaryStoragePool._poolUUID);
133133
Thread.currentThread().interrupt();
134134
return;
135135
}
136136

137-
if (_storagePoolCheckStatus.containsKey(primaryStoragePool._poolUUID)) {
137+
if (storagePoolCheckStatus.containsKey(primaryStoragePool._poolUUID)) {
138138
s_logger.info("Ignoring check on storage pool: " + primaryStoragePool._poolUUID);
139139
return;
140140
}
@@ -143,67 +143,58 @@ private void check() {
143143

144144
String result = null;
145145
// Try multiple times, but sleep in between tries to ensure it isn't a short lived transient error
146-
for (int i = 1; i <= _heartBeatUpdateMaxRetries; i++) {
147-
s_logger.info(String.format("Trying to write heartbeat to pool %s %s of %s times", primaryStoragePool._mountDestPath, i, _heartBeatUpdateMaxRetries));
146+
for (int i = 1; i <= s_heartBeatUpdateMaxRetries; i++) {
147+
s_logger.info(String.format("Trying to write heartbeat to pool %s %s of %s times", primaryStoragePool._mountDestPath, i, s_heartBeatUpdateMaxRetries));
148148
Script cmd = createHeartBeatCommand(primaryStoragePool, hostPrivateIp, true);
149149
result = cmd.execute();
150150
s_logger.debug(String.format("The command (%s), to the pool [%s], has the result [%s].", cmd.toString(), primaryStoragePool._poolUUID, result));
151151
if (result != null) {
152-
s_logger.warn(String.format("Write heartbeat for pool [%s] failed: %s; try: %s of %s.", primaryStoragePool._poolUUID, result, i, _heartBeatUpdateMaxRetries));
153-
_storagePoolCheckStatus.put(primaryStoragePool._poolUUID, STATUS_RUNNING);
154-
if (i < _heartBeatUpdateMaxRetries) {
155-
while(true) {
156-
try {
157-
Thread.currentThread().sleep(_heartBeatUpdateRetrySleep);
158-
break;
159-
} catch (InterruptedException e) {
160-
s_logger.debug("[ignored] interupted between heartbeat retries with error message: " + e.getMessage());
161-
}
152+
s_logger.warn(String.format("Write heartbeat for pool [%s] failed: %s; try: %s of %s.", primaryStoragePool._poolUUID, result, i, s_heartBeatUpdateMaxRetries));
153+
storagePoolCheckStatus.put(primaryStoragePool._poolUUID, STATUS_RUNNING);
154+
if (i < s_heartBeatUpdateMaxRetries) {
155+
try {
156+
Thread.sleep(s_heartBeatUpdateRetrySleep);
157+
break;
158+
} catch (InterruptedException e) {
159+
s_logger.debug("[ignored] interrupted between heartbeat retries with error message: " + e.getMessage());
162160
}
163161
}
164162
} else {
165-
_storagePoolCheckStatus.remove(primaryStoragePool._poolUUID);
163+
storagePoolCheckStatus.remove(primaryStoragePool._poolUUID);
166164
break;
167165
}
168166
}
169167

170168
if (result != null) {
171-
// Perform action if can't write to heartbeat file.
169+
// Perform action if it can't write to heartbeat file.
172170
// This will raise an alert on the mgmt server
173-
s_logger.warn("write heartbeat failed: " + result);
174-
if (HeartBeatAction.NOACTION.equals(s_heartBeatFailureAction)) {
175-
s_logger.warn("No action will be performed on storage pool: " + primaryStoragePool._poolUUID);
176-
_storagePoolCheckStatus.remove(primaryStoragePool._poolUUID);
177-
return true;
178-
}
171+
s_logger.warn(String.format("write heartbeat for pool [%s] failed: %s", primaryStoragePool._poolUUID, result));
179172

180173
performAction(primaryStoragePool);
181-
_storagePoolCheckStatus.put(primaryStoragePool._poolUUID, STATUS_TERMINATED);
182-
s_logger.debug("End performing action " + _heartBeatFailureAction + " on storage pool: " + primaryStoragePool._poolUUID);
174+
storagePoolCheckStatus.put(primaryStoragePool._poolUUID, STATUS_TERMINATED);
175+
s_logger.debug("End performing action " + s_heartBeatFailureAction + " on storage pool: " + primaryStoragePool._poolUUID);
183176
return;
184177
}
185178

186179
s_logger.debug("End checking on storage pool " + primaryStoragePool._poolUUID);
187180
}
188181

189182
private void performAction(NfsStoragePool primaryStoragePool) {
190-
s_logger.warn("Performing action " + _heartBeatFailureAction + " on storage pool: " + primaryStoragePool._poolUUID);
191-
183+
s_logger.warn("Performing action " + s_heartBeatFailureAction + " on storage pool: " + primaryStoragePool._poolUUID);
192184
Script cmd = createHeartBeatCommand(primaryStoragePool, null, false);
193-
// give priority to action defined in agent.properties file
194-
if (rebootHostAndAlertManagementOnHeartbeatTimeout) {
195-
s_logger.warn(String.format("Write heartbeat for pool [%s] failed; stopping cloudstack-agent.", primaryStoragePool._poolUUID));
196-
cmd.execute();
185+
186+
if (HeartBeatAction.NOACTION.equals(s_heartBeatFailureAction)) {
187+
s_logger.warn("No action will be performed on storage pool: " + primaryStoragePool._poolUUID);
188+
storagePoolCheckStatus.remove(primaryStoragePool._poolUUID);
197189
return;
198190
}
199191

200-
if (HeartBeatAction.DESTROYVMS.equals(_heartBeatFailureAction)
201-
|| HeartBeatAction.HARDRESET.equals(_heartBeatFailureAction)) {
202-
String destroyvmsCmd = "ps aux | grep '" + LibvirtVMDef.MANUFACTURER_APACHE + "' | grep -v ' grep '";
203-
if (HeartBeatAction.DESTROYVMS.equals(_heartBeatFailureAction)) {
204-
destroyvmsCmd += " | grep " + primaryStoragePool._mountDestPath;
205-
}
206-
destroyvmsCmd += " | awk '{print $14}' | tr '\n' ','";
192+
if (HeartBeatAction.DESTROYVMS.equals(s_heartBeatFailureAction)) {
193+
String destroyvmsCmd = "ps aux | grep '" + LibvirtVMDef.MANUFACTURER_APACHE +
194+
"' | grep -v ' grep '" + " | grep " + primaryStoragePool._mountDestPath +
195+
" | awk '{print $14}' | tr '\n' ','";
196+
197+
// display the vm names which are going to be destroyed
207198
String destroyvms = Script.runSimpleBashScript(destroyvmsCmd);
208199
if (destroyvms != null) {
209200
s_logger.warn("The following vms will be destroyed: " + destroyvms);
@@ -213,20 +204,18 @@ private void performAction(NfsStoragePool primaryStoragePool) {
213204
}
214205

215206
// take action according to global setting
216-
cmd.add(_heartBeatFailureAction.getFlag());
207+
cmd.add(s_heartBeatFailureAction.getFlag());
217208
cmd.execute();
218209
}
219210

220211
private Script createHeartBeatCommand(NfsStoragePool primaryStoragePool, String hostPrivateIp, boolean hostValidation) {
221-
Script cmd = new Script(s_heartBeatPath, _heartBeatUpdateTimeout, s_logger);
212+
Script cmd = new Script(s_heartBeatPath, s_heartBeatUpdateTimeout, s_logger);
222213
cmd.add("-i", primaryStoragePool._poolIp);
223214
cmd.add("-p", primaryStoragePool._poolMountSourcePath);
224215
cmd.add("-m", primaryStoragePool._mountDestPath);
225216

226217
if (hostValidation) {
227218
cmd.add("-h", hostPrivateIp);
228-
} else {
229-
cmd.add("-c");
230219
}
231220

232221
return cmd;
@@ -268,7 +257,7 @@ private boolean checkPoolValidity(String uuid) {
268257
protected void runInContext() {
269258
synchronized (storagePool) {
270259
for (String uuid : storagePool.keySet()) {
271-
if (_storagePoolCheckThreads.containsKey(uuid)) {
260+
if (storagePoolCheckThreads.containsKey(uuid)) {
272261
s_logger.trace("Ignoring check on storage pool as there is already a thread: " + uuid);
273262
continue;
274263
}
@@ -278,30 +267,12 @@ protected void runInContext() {
278267
uuid, primaryStoragePool._poolIp, primaryStoragePool._poolMountSourcePath, primaryStoragePool._mountDestPath));
279268

280269
CheckPoolThread checkPoolThread = new CheckPoolThread(primaryStoragePool);
281-
_storagePoolCheckThreads.put(uuid, checkPoolThread);
270+
storagePoolCheckThreads.put(uuid, checkPoolThread);
282271
checkPoolThread.runInContext();
283272
} else {
284273
removedPools.add(uuid);
285274
}
286275
}
287-
288-
if (! _storagePoolCheckStatus.isEmpty()) {
289-
boolean isAllTerminated = true;
290-
for (Map.Entry<String, String> entry : _storagePoolCheckStatus.entrySet()) {
291-
String status= entry.getValue();
292-
s_logger.debug(String.format("State of check thread for pool %s is %s", entry.getKey(), status));
293-
if (!status.equals(STATUS_TERMINATED)) {
294-
isAllTerminated = false;
295-
}
296-
}
297-
if (isAllTerminated) {
298-
s_logger.debug("All heartbeat check threads on pools with issues are terminated, stopping cloudstack-agent");
299-
Script cmd = new Script("/bin/systemctl", s_logger);
300-
cmd.add("stop");
301-
cmd.add("cloudstack-agent");
302-
cmd.execute();
303-
}
304-
}
305276
}
306277

307278
if (!removedPools.isEmpty()) {
@@ -316,7 +287,7 @@ protected void runInContext() {
316287
@Override
317288
public void run() {
318289
ScheduledExecutorService haMonitor = Executors.newSingleThreadScheduledExecutor();
319-
haMonitor.scheduleAtFixedRate(new Monitor(), 0, _heartBeatUpdateFreq, TimeUnit.SECONDS);
290+
haMonitor.scheduleAtFixedRate(new Monitor(), 0, s_heartBeatUpdateFreq, TimeUnit.MILLISECONDS);
320291
}
321292

322293
}

plugins/hypervisors/kvm/src/main/java/org/apache/cloudstack/kvm/ha/KVMHAProvider.java

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,26 @@
3838

3939
import javax.inject.Inject;
4040
import java.security.InvalidParameterException;
41+
import java.util.HashMap;
42+
import java.util.Map;
43+
import java.util.Optional;
4144

4245
public final class KVMHAProvider extends HAAbstractHostProvider implements HAProvider<Host>, Configurable {
43-
private final static Logger LOG = Logger.getLogger(KVMHAProvider.class);
46+
private static final Logger LOG = Logger.getLogger(KVMHAProvider.class);
47+
private static final Map<HAProviderConfig, ConfigKey<?>> KVM_HA_CONFIG_MAP = new HashMap<>();
48+
49+
static {
50+
KVM_HA_CONFIG_MAP.put(HAProviderConfig.HealthCheckTimeout, KVMHAConfig.KvmHAHealthCheckTimeout);
51+
KVM_HA_CONFIG_MAP.put(HAProviderConfig.ActivityCheckTimeout, KVMHAConfig.KvmHAActivityCheckTimeout);
52+
KVM_HA_CONFIG_MAP.put(HAProviderConfig.MaxActivityCheckInterval, KVMHAConfig.KvmHAActivityCheckInterval);
53+
KVM_HA_CONFIG_MAP.put(HAProviderConfig.MaxActivityChecks, KVMHAConfig.KvmHAActivityCheckMaxAttempts);
54+
KVM_HA_CONFIG_MAP.put(HAProviderConfig.ActivityCheckFailureRatio, KVMHAConfig.KvmHAActivityCheckFailureThreshold);
55+
KVM_HA_CONFIG_MAP.put(HAProviderConfig.RecoveryWaitTimeout, KVMHAConfig.KvmHARecoverWaitPeriod);
56+
KVM_HA_CONFIG_MAP.put(HAProviderConfig.RecoveryTimeout, KVMHAConfig.KvmHARecoverTimeout);
57+
KVM_HA_CONFIG_MAP.put(HAProviderConfig.FenceTimeout, KVMHAConfig.KvmHAFenceTimeout);
58+
KVM_HA_CONFIG_MAP.put(HAProviderConfig.MaxRecoveryAttempts, KVMHAConfig.KvmHARecoverAttemptThreshold);
59+
KVM_HA_CONFIG_MAP.put(HAProviderConfig.MaxDegradedWaitTimeout, KVMHAConfig.KvmHADegradedMaxPeriod);
60+
}
4461

4562
@Inject
4663
protected KVMHostActivityChecker hostActivityChecker;
@@ -108,30 +125,8 @@ public HAResource.ResourceSubType resourceSubType() {
108125
@Override
109126
public Object getConfigValue(final HAProviderConfig name, final Host host) {
110127
final Long clusterId = host.getClusterId();
111-
switch (name) {
112-
case HealthCheckTimeout:
113-
return KVMHAConfig.KvmHAHealthCheckTimeout.valueIn(clusterId);
114-
case ActivityCheckTimeout:
115-
return KVMHAConfig.KvmHAActivityCheckTimeout.valueIn(clusterId);
116-
case MaxActivityCheckInterval:
117-
return KVMHAConfig.KvmHAActivityCheckInterval.valueIn(clusterId);
118-
case MaxActivityChecks:
119-
return KVMHAConfig.KvmHAActivityCheckMaxAttempts.valueIn(clusterId);
120-
case ActivityCheckFailureRatio:
121-
return KVMHAConfig.KvmHAActivityCheckFailureThreshold.valueIn(clusterId);
122-
case RecoveryWaitTimeout:
123-
return KVMHAConfig.KvmHARecoverWaitPeriod.valueIn(clusterId);
124-
case RecoveryTimeout:
125-
return KVMHAConfig.KvmHARecoverTimeout.valueIn(clusterId);
126-
case FenceTimeout:
127-
return KVMHAConfig.KvmHAFenceTimeout.valueIn(clusterId);
128-
case MaxRecoveryAttempts:
129-
return KVMHAConfig.KvmHARecoverAttemptThreshold.valueIn(clusterId);
130-
case MaxDegradedWaitTimeout:
131-
return KVMHAConfig.KvmHADegradedMaxPeriod.valueIn(clusterId);
132-
default:
133-
throw new InvalidParameterException("Unknown HAProviderConfig " + name.toString());
134-
}
128+
return Optional.ofNullable(KVM_HA_CONFIG_MAP.get(name).valueIn(clusterId))
129+
.orElseThrow(() -> new InvalidParameterException("Unknown HAProviderConfig " + name.toString()));
135130
}
136131

137132
@Override

0 commit comments

Comments
 (0)