Skip to content

Commit 798ff6a

Browse files
author
Harikrishna Patnala
committed
CLOUDSTACK-9112: deployVM thread is holding the global lock on network longer and cause delays and some improvements in the planner
There are some VM deployment failures happening when multiple VMs are deployed at a time, failures mainly due to NetworkModel code that iterates over all the vlans in the pod. This causes each deployVM thread to hold the global lock on Network longer and cause delays. This delay in turn causes more threads to choose same host and fail since capacity is not available on that host. Following are some changes required to be done to reduce delays during VM deployments which in turn causes some vm deployment failures when multiple VMs are launched at a time. In Planner, remove the clusters that do not contain a host with matching service offering tag. This will save some iterations over clusters that dont have matching tagged host In NetworkModel, do not query the vlans for the pod within the loop. Also optimized the logic to query the ip/ipv6 In DeploymentPlanningManagerImpl, do not process the affinity group if the plan has hostId provided.
1 parent 850c07c commit 798ff6a

File tree

5 files changed

+100
-48
lines changed

5 files changed

+100
-48
lines changed

engine/schema/src/com/cloud/host/dao/HostDao.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,5 +98,7 @@ public interface HostDao extends GenericDao<HostVO, Long>, StateDao<Status, Stat
9898

9999
HostVO findByPublicIp(String publicIp);
100100

101+
List<Long> listClustersByHostTag(String hostTagOnOffering);
102+
101103
List<HostVO> listByType(Type type);
102104
}

engine/schema/src/com/cloud/host/dao/HostDaoImpl.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
7777
private static final Logger status_logger = Logger.getLogger(Status.class);
7878
private static final Logger state_logger = Logger.getLogger(ResourceState.class);
7979

80+
private static final String LIST_CLUSTERID_FOR_HOST_TAG = "select distinct cluster_id from host join host_tags on host.id = host_tags.host_id and host_tags.tag = ?";
81+
8082
protected SearchBuilder<HostVO> TypePodDcStatusSearch;
8183

8284
protected SearchBuilder<HostVO> IdStatusSearch;
@@ -1129,6 +1131,29 @@ public List<Long> listAllHosts(long zoneId) {
11291131
return customSearch(sc, null);
11301132
}
11311133

1134+
@Override
1135+
public List<Long> listClustersByHostTag(String hostTagOnOffering) {
1136+
TransactionLegacy txn = TransactionLegacy.currentTxn();
1137+
PreparedStatement pstmt = null;
1138+
List<Long> result = new ArrayList<Long>();
1139+
StringBuilder sql = new StringBuilder(LIST_CLUSTERID_FOR_HOST_TAG);
1140+
// during listing the clusters that cross the threshold
1141+
// we need to check with disabled thresholds of each cluster if not defined at cluster consider the global value
1142+
try {
1143+
pstmt = txn.prepareAutoCloseStatement(sql.toString());
1144+
pstmt.setString(1, hostTagOnOffering);
1145+
ResultSet rs = pstmt.executeQuery();
1146+
while (rs.next()) {
1147+
result.add(rs.getLong(1));
1148+
}
1149+
return result;
1150+
} catch (SQLException e) {
1151+
throw new CloudRuntimeException("DB Exception on: " + sql, e);
1152+
} catch (Throwable e) {
1153+
throw new CloudRuntimeException("Caught: " + sql, e);
1154+
}
1155+
}
1156+
11321157
@Override
11331158
public List<HostVO> listAllHostsByType(Host.Type type) {
11341159
SearchCriteria<HostVO> sc = TypeSearch.create();

server/src/com/cloud/deploy/DeploymentPlanningManagerImpl.java

Lines changed: 41 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -249,50 +249,16 @@ public void setAffinityGroupProcessors(List<AffinityGroupProcessor> affinityProc
249249
public DeployDestination planDeployment(VirtualMachineProfile vmProfile, DeploymentPlan plan, ExcludeList avoids, DeploymentPlanner planner)
250250
throws InsufficientServerCapacityException, AffinityConflictException {
251251

252-
// call affinitygroup chain
252+
ServiceOffering offering = vmProfile.getServiceOffering();
253+
int cpu_requested = offering.getCpu() * offering.getSpeed();
254+
long ram_requested = offering.getRamSize() * 1024L * 1024L;
253255
VirtualMachine vm = vmProfile.getVirtualMachine();
254-
long vmGroupCount = _affinityGroupVMMapDao.countAffinityGroupsForVm(vm.getId());
255256
DataCenter dc = _dcDao.findById(vm.getDataCenterId());
256257

257-
if (vmGroupCount > 0) {
258-
for (AffinityGroupProcessor processor : _affinityProcessors) {
259-
processor.process(vmProfile, plan, avoids);
260-
}
261-
}
262258

263259
if (vm.getType() == VirtualMachine.Type.User || vm.getType() == VirtualMachine.Type.DomainRouter) {
264260
checkForNonDedicatedResources(vmProfile, dc, avoids);
265261
}
266-
if (s_logger.isDebugEnabled()) {
267-
s_logger.debug("Deploy avoids pods: " + avoids.getPodsToAvoid() + ", clusters: " + avoids.getClustersToAvoid() + ", hosts: " + avoids.getHostsToAvoid());
268-
}
269-
270-
// call planners
271-
//DataCenter dc = _dcDao.findById(vm.getDataCenterId());
272-
// check if datacenter is in avoid set
273-
if (avoids.shouldAvoid(dc)) {
274-
if (s_logger.isDebugEnabled()) {
275-
s_logger.debug("DataCenter id = '" + dc.getId() + "' provided is in avoid set, DeploymentPlanner cannot allocate the VM, returning.");
276-
}
277-
return null;
278-
}
279-
280-
ServiceOffering offering = vmProfile.getServiceOffering();
281-
if(planner == null){
282-
String plannerName = offering.getDeploymentPlanner();
283-
if (plannerName == null) {
284-
if (vm.getHypervisorType() == HypervisorType.BareMetal) {
285-
plannerName = "BareMetalPlanner";
286-
} else {
287-
plannerName = _configDao.getValue(Config.VmDeploymentPlanner.key());
288-
}
289-
}
290-
planner = getDeploymentPlannerByName(plannerName);
291-
}
292-
293-
int cpu_requested = offering.getCpu() * offering.getSpeed();
294-
long ram_requested = offering.getRamSize() * 1024L * 1024L;
295-
296262
if (s_logger.isDebugEnabled()) {
297263
s_logger.debug("DeploymentPlanner allocation algorithm: " + planner);
298264

@@ -364,6 +330,44 @@ public DeployDestination planDeployment(VirtualMachineProfile vmProfile, Deploym
364330
return null;
365331
}
366332

333+
// call affinitygroup chain
334+
long vmGroupCount = _affinityGroupVMMapDao.countAffinityGroupsForVm(vm.getId());
335+
336+
if (vmGroupCount > 0) {
337+
for (AffinityGroupProcessor processor : _affinityProcessors) {
338+
processor.process(vmProfile, plan, avoids);
339+
}
340+
}
341+
342+
if (vm.getType() == VirtualMachine.Type.User) {
343+
checkForNonDedicatedResources(vmProfile, dc, avoids);
344+
}
345+
if (s_logger.isDebugEnabled()) {
346+
s_logger.debug("Deploy avoids pods: " + avoids.getPodsToAvoid() + ", clusters: " + avoids.getClustersToAvoid() + ", hosts: " + avoids.getHostsToAvoid());
347+
}
348+
349+
// call planners
350+
// DataCenter dc = _dcDao.findById(vm.getDataCenterId());
351+
// check if datacenter is in avoid set
352+
if (avoids.shouldAvoid(dc)) {
353+
if (s_logger.isDebugEnabled()) {
354+
s_logger.debug("DataCenter id = '" + dc.getId() + "' provided is in avoid set, DeploymentPlanner cannot allocate the VM, returning.");
355+
}
356+
return null;
357+
}
358+
359+
if (planner == null) {
360+
String plannerName = offering.getDeploymentPlanner();
361+
if (plannerName == null) {
362+
if (vm.getHypervisorType() == HypervisorType.BareMetal) {
363+
plannerName = "BareMetalPlanner";
364+
} else {
365+
plannerName = _configDao.getValue(Config.VmDeploymentPlanner.key());
366+
}
367+
}
368+
planner = getDeploymentPlannerByName(plannerName);
369+
}
370+
367371
if (vm.getLastHostId() != null && haVmTag == null) {
368372
s_logger.debug("This VM has last host_id specified, trying to choose the same host: " + vm.getLastHostId());
369373

server/src/com/cloud/deploy/FirstFitPlanner.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,10 @@ private List<Long> scanClustersForDestinationInZoneOrPod(long id, boolean isZone
393393
}
394394

395395
removeClustersCrossingThreshold(prioritizedClusterIds, avoid, vmProfile, plan);
396+
String hostTagOnOffering = offering.getHostTag();
397+
if (hostTagOnOffering != null) {
398+
removeClustersWithoutMatchingTag(prioritizedClusterIds, hostTagOnOffering);
399+
}
396400

397401
} else {
398402
if (s_logger.isDebugEnabled()) {
@@ -520,6 +524,18 @@ protected Pair<List<Long>, Map<Long, Double>> listPodsByCapacity(long zoneId, in
520524

521525
}
522526

527+
private void removeClustersWithoutMatchingTag(List<Long> clusterListForVmAllocation, String hostTagOnOffering) {
528+
529+
List<Long> matchingClusters = hostDao.listClustersByHostTag(hostTagOnOffering);
530+
531+
clusterListForVmAllocation.retainAll(matchingClusters);
532+
533+
if (s_logger.isDebugEnabled()) {
534+
s_logger.debug("The clusterId list for the given offering tag: " + clusterListForVmAllocation);
535+
}
536+
537+
}
538+
523539
private boolean isRootAdmin(VirtualMachineProfile vmProfile) {
524540
if (vmProfile != null) {
525541
if (vmProfile.getOwner() != null) {

server/src/com/cloud/network/NetworkModelImpl.java

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2217,24 +2217,29 @@ public String getStartIpv6Address(long networkId) {
22172217
@Override
22182218
public NicVO getPlaceholderNicForRouter(Network network, Long podId) {
22192219
List<NicVO> nics = _nicDao.listPlaceholderNicsByNetworkIdAndVmType(network.getId(), VirtualMachine.Type.DomainRouter);
2220+
List<? extends Vlan> vlans = new ArrayList<VlanVO>();
2221+
if (podId != null) {
2222+
vlans = _vlanDao.listVlansForPod(podId);
2223+
}
22202224
for (NicVO nic : nics) {
22212225
if (nic.getReserver() == null && (nic.getIPv4Address() != null || nic.getIPv6Address() != null)) {
22222226
if (podId == null) {
22232227
return nic;
22242228
} else {
2229+
IpAddress ip = null;
2230+
UserIpv6AddressVO ipv6 = null;
2231+
2232+
if (nic.getIPv4Address() != null) {
2233+
ip = _ipAddressDao.findByIpAndSourceNetworkId(network.getId(), nic.getIPv4Address());
2234+
} else {
2235+
ipv6 = _ipv6Dao.findByNetworkIdAndIp(network.getId(), nic.getIPv6Address());
2236+
}
22252237
//return nic only when its ip address belong to the pod range (for the Basic zone case)
2226-
List<? extends Vlan> vlans = _vlanDao.listVlansForPod(podId);
22272238
for (Vlan vlan : vlans) {
2228-
if (nic.getIPv4Address() != null) {
2229-
IpAddress ip = _ipAddressDao.findByIpAndSourceNetworkId(network.getId(), nic.getIPv4Address());
2230-
if (ip != null && ip.getVlanId() == vlan.getId()) {
2231-
return nic;
2232-
}
2233-
} else {
2234-
UserIpv6AddressVO ipv6 = _ipv6Dao.findByNetworkIdAndIp(network.getId(), nic.getIPv6Address());
2235-
if (ipv6 != null && ipv6.getVlanId() == vlan.getId()) {
2236-
return nic;
2237-
}
2239+
if (ip != null && ip.getVlanId() == vlan.getId()) {
2240+
return nic;
2241+
} else if (ipv6 != null && ipv6.getVlanId() == vlan.getId()) {
2242+
return nic;
22382243
}
22392244
}
22402245
}

0 commit comments

Comments
 (0)