Skip to content

Commit 95ef292

Browse files
kvm: honor migrate.wait and abort vm migration job (#5388)
* kvm: honor migrate.wait and abort vm migration job * kvm: propogate migratewait to all cloudstack agents on kvm hosts * update #5388 * update #5388: display error msg
1 parent e6058b0 commit 95ef292

File tree

5 files changed

+55
-3
lines changed

5 files changed

+55
-3
lines changed

agent/conf/agent.properties

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,12 @@ domr.scripts.dir=scripts/network/domr/kvm
9797
# migration will finish quickly. Less than 1 means disabled.
9898
#vm.migrate.pauseafter=0
9999

100+
# Time (in seconds) to wait for VM migrate finish. Less than 1 means disabled.
101+
# If vm migration is not finished in the time, the vm job will be cancelled by libvirt.
102+
# It will be configured by cloudstack management server when cloudstack agent connects.
103+
# please change the global setting 'migratewait' if needed (default value: 3600)
104+
#vm.migrate.wait=0
105+
100106
# Agent hooks is the way to override default agent behavior to extend the functionality without excessive coding
101107
# for a custom deployment. The first hook promoted is libvirt-vm-xml-transformer which allows provider to modify
102108
# VM XML specification before send to libvirt. Hooks are implemented in Groovy and must be implemented in the way

engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import javax.inject.Inject;
3939
import javax.naming.ConfigurationException;
4040

41+
import com.cloud.configuration.Config;
4142
import com.cloud.utils.NumbersUtil;
4243
import org.apache.cloudstack.agent.lb.IndirectAgentLB;
4344
import org.apache.cloudstack.ca.CAManager;
@@ -1758,7 +1759,8 @@ public void processConnect(final Host host, final StartupCommand cmd, final bool
17581759
if (cmd instanceof StartupRoutingCommand) {
17591760
if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) {
17601761
Map<String, String> params = new HashMap<String, String>();
1761-
params.put("router.aggregation.command.each.timeout", _configDao.getValue("router.aggregation.command.each.timeout"));
1762+
params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString()));
1763+
params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString()));
17621764

17631765
try {
17641766
SetHostParamsCommand cmds = new SetHostParamsCommand(params);

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import javax.xml.parsers.DocumentBuilderFactory;
4747
import javax.xml.parsers.ParserConfigurationException;
4848

49+
import com.cloud.configuration.Config;
4950
import org.apache.cloudstack.storage.configdrive.ConfigDrive;
5051
import org.apache.cloudstack.storage.to.PrimaryDataStoreTO;
5152
import org.apache.cloudstack.storage.to.TemplateObjectTO;
@@ -356,6 +357,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
356357
protected int _migrateSpeed;
357358
protected int _migrateDowntime;
358359
protected int _migratePauseAfter;
360+
protected int _migrateWait;
359361
protected boolean _diskActivityCheckEnabled;
360362
protected RollingMaintenanceExecutor rollingMaintenanceExecutor;
361363
protected long _diskActivityCheckFileSizeMin = 10485760; // 10MB
@@ -540,6 +542,10 @@ public int getMigratePauseAfter() {
540542
return _migratePauseAfter;
541543
}
542544

545+
public int getMigrateWait() {
546+
return _migrateWait;
547+
}
548+
543549
public int getMigrateSpeed() {
544550
return _migrateSpeed;
545551
}
@@ -1228,6 +1234,9 @@ public boolean configure(final String name, final Map<String, Object> params) th
12281234
value = (String) params.get("vm.migrate.pauseafter");
12291235
_migratePauseAfter = NumbersUtil.parseInt(value, -1);
12301236

1237+
value = (String) params.get("vm.migrate.wait");
1238+
_migrateWait = NumbersUtil.parseInt(value, -1);
1239+
12311240
configureAgentHooks(params);
12321241

12331242
value = (String)params.get("vm.migrate.speed");
@@ -1291,6 +1300,13 @@ public boolean configureHostParams(final Map<String, String> params) {
12911300
storage.persist("router.aggregation.command.each.timeout", String.valueOf(longValue));
12921301
}
12931302

1303+
if (params.get(Config.MigrateWait.toString()) != null) {
1304+
String value = (String)params.get(Config.MigrateWait.toString());
1305+
Integer intValue = NumbersUtil.parseInt(value, -1);
1306+
storage.persist("vm.migrate.wait", String.valueOf(intValue));
1307+
_migrateWait = intValue;
1308+
}
1309+
12941310
return true;
12951311
}
12961312

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import org.apache.log4j.Logger;
5252
import org.libvirt.Connect;
5353
import org.libvirt.Domain;
54+
import org.libvirt.DomainJobInfo;
5455
import org.libvirt.DomainInfo.DomainState;
5556
import org.libvirt.LibvirtException;
5657
import org.libvirt.StorageVol;
@@ -219,6 +220,29 @@ Use VIR_DOMAIN_XML_SECURE (value = 1) prior to v1.0.0.
219220
s_logger.info("Waiting for migration of " + vmName + " to complete, waited " + sleeptime + "ms");
220221
}
221222

223+
// abort the vm migration if the job is executed more than vm.migrate.wait
224+
final int migrateWait = libvirtComputingResource.getMigrateWait();
225+
if (migrateWait > 0 && sleeptime > migrateWait * 1000) {
226+
DomainState state = null;
227+
try {
228+
state = dm.getInfo().state;
229+
} catch (final LibvirtException e) {
230+
s_logger.info("Couldn't get VM domain state after " + sleeptime + "ms: " + e.getMessage());
231+
}
232+
if (state != null && state == DomainState.VIR_DOMAIN_RUNNING) {
233+
try {
234+
DomainJobInfo job = dm.getJobInfo();
235+
s_logger.info("Aborting " + vmName + " domain job: " + job);
236+
dm.abortJob();
237+
result = String.format("Migration of VM %s was cancelled by cloudstack due to time out after %d seconds", vmName, migrateWait);
238+
s_logger.debug(result);
239+
break;
240+
} catch (final LibvirtException e) {
241+
s_logger.info("Failed to abort the vm migration job of vm " + vmName + " : " + e.getMessage());
242+
}
243+
}
244+
}
245+
222246
// pause vm if we meet the vm.migrate.pauseafter threshold and not already paused
223247
final int migratePauseAfter = libvirtComputingResource.getMigratePauseAfter();
224248
if (migratePauseAfter > 0 && sleeptime > migratePauseAfter) {
@@ -262,7 +286,9 @@ Use VIR_DOMAIN_XML_SECURE (value = 1) prior to v1.0.0.
262286
| TransformerException
263287
| URISyntaxException e) {
264288
s_logger.debug(String.format("%s : %s", e.getClass().getSimpleName(), e.getMessage()));
265-
result = "Exception during migrate: " + e.getMessage();
289+
if (result == null) {
290+
result = "Exception during migrate: " + e.getMessage();
291+
}
266292
} finally {
267293
try {
268294
if (dm != null && result != null) {

server/src/main/java/com/cloud/configuration/ConfigurationManagerImpl.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -543,9 +543,11 @@ public void onPublishMessage(String serderAddress, String subject, Object args)
543543
if (globalSettingUpdated.equals(ApiServiceConfiguration.ManagementServerAddresses.key()) ||
544544
globalSettingUpdated.equals(IndirectAgentLBServiceImpl.IndirectAgentLBAlgorithm.key())) {
545545
_indirectAgentLB.propagateMSListToAgents();
546-
} else if (globalSettingUpdated.equals(Config.RouterAggregationCommandEachTimeout.toString())) {
546+
} else if (globalSettingUpdated.equals(Config.RouterAggregationCommandEachTimeout.toString())
547+
|| globalSettingUpdated.equals(Config.MigrateWait.toString())) {
547548
Map<String, String> params = new HashMap<String, String>();
548549
params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString()));
550+
params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString()));
549551
_agentManager.propagateChangeToAgents(params);
550552
}
551553
}

0 commit comments

Comments
 (0)