Skip to content

Commit 8394e19

Browse files
author
Harikrishna Patnala
committed
Deploy VM failing frequently due to capacity calculation not synchronized
Changes: - In Planner, remove the clusters that do not contain a host with matching service offering tag. This will save some iterations over clusters that dont have matching tagged host - In NetworkModel, do not query the vlans for the pod within the loop. Also optimized the logic to query the ip/ipv6 - In DeploymentPlanningManagerImpl, do not process the affinity group if the plan has hostId provided.
1 parent c67d1da commit 8394e19

File tree

7 files changed

+133
-48
lines changed

7 files changed

+133
-48
lines changed

engine/schema/src/com/cloud/host/dao/HostDao.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,4 +90,6 @@ public interface HostDao extends GenericDao<HostVO, Long>, StateDao<Status, Stat
9090
List<Long> listAllHosts(long zoneId);
9191

9292
HostVO findByPublicIp(String publicIp);
93+
94+
List<Long> listClustersByHostTag(String hostTagOnOffering);
9395
}

engine/schema/src/com/cloud/host/dao/HostDaoImpl.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
7575
private static final Logger status_logger = Logger.getLogger(Status.class);
7676
private static final Logger state_logger = Logger.getLogger(ResourceState.class);
7777

78+
private static final String LIST_CLUSTERID_FOR_HOST_TAG = "select distinct cluster_id from host join host_tags on host.id = host_tags.host_id and host_tags.tag = ?";
79+
7880
protected SearchBuilder<HostVO> TypePodDcStatusSearch;
7981

8082
protected SearchBuilder<HostVO> IdStatusSearch;
@@ -1087,4 +1089,27 @@ public List<Long> listAllHosts(long zoneId) {
10871089
sc.addAnd("dataCenterId", SearchCriteria.Op.EQ, zoneId);
10881090
return customSearch(sc, null);
10891091
}
1092+
1093+
@Override
1094+
public List<Long> listClustersByHostTag(String hostTagOnOffering) {
1095+
TransactionLegacy txn = TransactionLegacy.currentTxn();
1096+
PreparedStatement pstmt = null;
1097+
List<Long> result = new ArrayList<Long>();
1098+
StringBuilder sql = new StringBuilder(LIST_CLUSTERID_FOR_HOST_TAG);
1099+
// during listing the clusters that cross the threshold
1100+
// we need to check with disabled thresholds of each cluster if not defined at cluster consider the global value
1101+
try {
1102+
pstmt = txn.prepareAutoCloseStatement(sql.toString());
1103+
pstmt.setString(1, hostTagOnOffering);
1104+
ResultSet rs = pstmt.executeQuery();
1105+
while (rs.next()) {
1106+
result.add(rs.getLong(1));
1107+
}
1108+
return result;
1109+
} catch (SQLException e) {
1110+
throw new CloudRuntimeException("DB Exception on: " + sql, e);
1111+
} catch (Throwable e) {
1112+
throw new CloudRuntimeException("Caught: " + sql, e);
1113+
}
1114+
}
10901115
}

engine/schema/src/org/apache/cloudstack/affinity/dao/AffinityGroupVMMapDao.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,6 @@ public interface AffinityGroupVMMapDao extends GenericDao<AffinityGroupVMMapVO,
4545
void updateMap(Long vmId, List<Long> affinityGroupIds);
4646

4747
List<Long> listAffinityGroupIdsByVmId(long instanceId);
48+
49+
List<Long> listHostIdsForAffinityGroupVM(long affinityGroupId);
4850
}

engine/schema/src/org/apache/cloudstack/affinity/dao/AffinityGroupVMMapDaoImpl.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,16 @@
1616
// under the License.
1717
package org.apache.cloudstack.affinity.dao;
1818

19+
import java.sql.PreparedStatement;
20+
import java.sql.ResultSet;
21+
import java.sql.SQLException;
22+
import java.util.ArrayList;
1923
import java.util.List;
2024

2125
import javax.annotation.PostConstruct;
2226
import javax.inject.Inject;
2327

28+
import com.cloud.utils.exception.CloudRuntimeException;
2429
import org.apache.cloudstack.affinity.AffinityGroupVMMapVO;
2530
import org.apache.cloudstack.affinity.AffinityGroupVO;
2631

@@ -43,6 +48,8 @@ public class AffinityGroupVMMapDaoImpl extends GenericDaoBase<AffinityGroupVMMap
4348
private SearchBuilder<AffinityGroupVMMapVO> ListByVmIdType;
4449
private GenericSearchBuilder<AffinityGroupVMMapVO, Long> ListAffinityGroupIdByVm;
4550

51+
private static final String LIST_HOSTID_FOR_GROUP_VMS = "select v.host_id from vm_instance v inner join affinity_group_vm_map a on v.id = a.instance_id and v.state = 'Running' and a.affinity_group_id = ?";
52+
4653
@Inject
4754
protected AffinityGroupDao _affinityGroupDao;
4855

@@ -170,4 +177,28 @@ public void updateMap(Long vmId, List<Long> affinityGroupIds) {
170177
txn.commit();
171178

172179
}
180+
181+
@Override
182+
public List<Long> listHostIdsForAffinityGroupVM(long affinityGroupId){
183+
TransactionLegacy txn = TransactionLegacy.currentTxn();
184+
PreparedStatement pstmt = null;
185+
List<Long> result = new ArrayList<Long>();
186+
StringBuilder sql = new StringBuilder(LIST_HOSTID_FOR_GROUP_VMS);
187+
// during listing the clusters that cross the threshold
188+
// we need to check with disabled thresholds of each cluster if not defined at cluster consider the global value
189+
try {
190+
pstmt = txn.prepareAutoCloseStatement(sql.toString());
191+
pstmt.setLong(1, affinityGroupId);
192+
193+
ResultSet rs = pstmt.executeQuery();
194+
while (rs.next()) {
195+
result.add(rs.getLong(1));
196+
}
197+
return result;
198+
} catch (SQLException e) {
199+
throw new CloudRuntimeException("DB Exception on: " + sql, e);
200+
} catch (Throwable e) {
201+
throw new CloudRuntimeException("Caught: " + sql, e);
202+
}
203+
}
173204
}

server/src/com/cloud/deploy/DeploymentPlanningManagerImpl.java

Lines changed: 41 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -249,50 +249,16 @@ public void setAffinityGroupProcessors(List<AffinityGroupProcessor> affinityProc
249249
public DeployDestination planDeployment(VirtualMachineProfile vmProfile, DeploymentPlan plan, ExcludeList avoids, DeploymentPlanner planner)
250250
throws InsufficientServerCapacityException, AffinityConflictException {
251251

252-
// call affinitygroup chain
252+
ServiceOffering offering = vmProfile.getServiceOffering();
253+
int cpu_requested = offering.getCpu() * offering.getSpeed();
254+
long ram_requested = offering.getRamSize() * 1024L * 1024L;
253255
VirtualMachine vm = vmProfile.getVirtualMachine();
254-
long vmGroupCount = _affinityGroupVMMapDao.countAffinityGroupsForVm(vm.getId());
255256
DataCenter dc = _dcDao.findById(vm.getDataCenterId());
256257

257-
if (vmGroupCount > 0) {
258-
for (AffinityGroupProcessor processor : _affinityProcessors) {
259-
processor.process(vmProfile, plan, avoids);
260-
}
261-
}
262258

263259
if (vm.getType() == VirtualMachine.Type.User || vm.getType() == VirtualMachine.Type.DomainRouter) {
264260
checkForNonDedicatedResources(vmProfile, dc, avoids);
265261
}
266-
if (s_logger.isDebugEnabled()) {
267-
s_logger.debug("Deploy avoids pods: " + avoids.getPodsToAvoid() + ", clusters: " + avoids.getClustersToAvoid() + ", hosts: " + avoids.getHostsToAvoid());
268-
}
269-
270-
// call planners
271-
//DataCenter dc = _dcDao.findById(vm.getDataCenterId());
272-
// check if datacenter is in avoid set
273-
if (avoids.shouldAvoid(dc)) {
274-
if (s_logger.isDebugEnabled()) {
275-
s_logger.debug("DataCenter id = '" + dc.getId() + "' provided is in avoid set, DeploymentPlanner cannot allocate the VM, returning.");
276-
}
277-
return null;
278-
}
279-
280-
ServiceOffering offering = vmProfile.getServiceOffering();
281-
if(planner == null){
282-
String plannerName = offering.getDeploymentPlanner();
283-
if (plannerName == null) {
284-
if (vm.getHypervisorType() == HypervisorType.BareMetal) {
285-
plannerName = "BareMetalPlanner";
286-
} else {
287-
plannerName = _configDao.getValue(Config.VmDeploymentPlanner.key());
288-
}
289-
}
290-
planner = getDeploymentPlannerByName(plannerName);
291-
}
292-
293-
int cpu_requested = offering.getCpu() * offering.getSpeed();
294-
long ram_requested = offering.getRamSize() * 1024L * 1024L;
295-
296262
if (s_logger.isDebugEnabled()) {
297263
s_logger.debug("DeploymentPlanner allocation algorithm: " + planner);
298264

@@ -364,6 +330,44 @@ public DeployDestination planDeployment(VirtualMachineProfile vmProfile, Deploym
364330
return null;
365331
}
366332

333+
// call affinitygroup chain
334+
long vmGroupCount = _affinityGroupVMMapDao.countAffinityGroupsForVm(vm.getId());
335+
336+
if (vmGroupCount > 0) {
337+
for (AffinityGroupProcessor processor : _affinityProcessors) {
338+
processor.process(vmProfile, plan, avoids);
339+
}
340+
}
341+
342+
if (vm.getType() == VirtualMachine.Type.User) {
343+
checkForNonDedicatedResources(vmProfile, dc, avoids);
344+
}
345+
if (s_logger.isDebugEnabled()) {
346+
s_logger.debug("Deploy avoids pods: " + avoids.getPodsToAvoid() + ", clusters: " + avoids.getClustersToAvoid() + ", hosts: " + avoids.getHostsToAvoid());
347+
}
348+
349+
// call planners
350+
// DataCenter dc = _dcDao.findById(vm.getDataCenterId());
351+
// check if datacenter is in avoid set
352+
if (avoids.shouldAvoid(dc)) {
353+
if (s_logger.isDebugEnabled()) {
354+
s_logger.debug("DataCenter id = '" + dc.getId() + "' provided is in avoid set, DeploymentPlanner cannot allocate the VM, returning.");
355+
}
356+
return null;
357+
}
358+
359+
if (planner == null) {
360+
String plannerName = offering.getDeploymentPlanner();
361+
if (plannerName == null) {
362+
if (vm.getHypervisorType() == HypervisorType.BareMetal) {
363+
plannerName = "BareMetalPlanner";
364+
} else {
365+
plannerName = _configDao.getValue(Config.VmDeploymentPlanner.key());
366+
}
367+
}
368+
planner = getDeploymentPlannerByName(plannerName);
369+
}
370+
367371
if (vm.getLastHostId() != null && haVmTag == null) {
368372
s_logger.debug("This VM has last host_id specified, trying to choose the same host: " + vm.getLastHostId());
369373

server/src/com/cloud/deploy/FirstFitPlanner.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,10 @@ private List<Long> scanClustersForDestinationInZoneOrPod(long id, boolean isZone
379379
}
380380

381381
removeClustersCrossingThreshold(prioritizedClusterIds, avoid, vmProfile, plan);
382+
String hostTagOnOffering = offering.getHostTag();
383+
if (hostTagOnOffering != null) {
384+
removeClustersWithoutMatchingTag(prioritizedClusterIds, hostTagOnOffering);
385+
}
382386

383387
} else {
384388
if (s_logger.isDebugEnabled()) {
@@ -506,6 +510,18 @@ protected Pair<List<Long>, Map<Long, Double>> listPodsByCapacity(long zoneId, in
506510

507511
}
508512

513+
private void removeClustersWithoutMatchingTag(List<Long> clusterListForVmAllocation, String hostTagOnOffering) {
514+
515+
List<Long> matchingClusters = hostDao.listClustersByHostTag(hostTagOnOffering);
516+
517+
clusterListForVmAllocation.retainAll(matchingClusters);
518+
519+
if (s_logger.isDebugEnabled()) {
520+
s_logger.debug("The clusterId list for the given offering tag: " + clusterListForVmAllocation);
521+
}
522+
523+
}
524+
509525
private boolean isRootAdmin(VirtualMachineProfile vmProfile) {
510526
if (vmProfile != null) {
511527
if (vmProfile.getOwner() != null) {

server/src/com/cloud/network/NetworkModelImpl.java

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2191,24 +2191,29 @@ public String getStartIpv6Address(long networkId) {
21912191
@Override
21922192
public NicVO getPlaceholderNicForRouter(Network network, Long podId) {
21932193
List<NicVO> nics = _nicDao.listPlaceholderNicsByNetworkIdAndVmType(network.getId(), VirtualMachine.Type.DomainRouter);
2194+
List<? extends Vlan> vlans = new ArrayList<VlanVO>();
2195+
if (podId != null) {
2196+
vlans = _vlanDao.listVlansForPod(podId);
2197+
}
21942198
for (NicVO nic : nics) {
21952199
if (nic.getReserver() == null && (nic.getIPv4Address() != null || nic.getIPv6Address() != null)) {
21962200
if (podId == null) {
21972201
return nic;
21982202
} else {
2203+
IpAddress ip = null;
2204+
UserIpv6AddressVO ipv6 = null;
2205+
2206+
if (nic.getIPv4Address() != null) {
2207+
ip = _ipAddressDao.findByIpAndSourceNetworkId(network.getId(), nic.getIPv4Address());
2208+
} else {
2209+
ipv6 = _ipv6Dao.findByNetworkIdAndIp(network.getId(), nic.getIPv6Address());
2210+
}
21992211
//return nic only when its ip address belong to the pod range (for the Basic zone case)
2200-
List<? extends Vlan> vlans = _vlanDao.listVlansForPod(podId);
22012212
for (Vlan vlan : vlans) {
2202-
if (nic.getIPv4Address() != null) {
2203-
IpAddress ip = _ipAddressDao.findByIpAndSourceNetworkId(network.getId(), nic.getIPv4Address());
2204-
if (ip != null && ip.getVlanId() == vlan.getId()) {
2205-
return nic;
2206-
}
2207-
} else {
2208-
UserIpv6AddressVO ipv6 = _ipv6Dao.findByNetworkIdAndIp(network.getId(), nic.getIPv6Address());
2209-
if (ipv6 != null && ipv6.getVlanId() == vlan.getId()) {
2210-
return nic;
2211-
}
2213+
if (ip != null && ip.getVlanId() == vlan.getId()) {
2214+
return nic;
2215+
} else if (ipv6 != null && ipv6.getVlanId() == vlan.getId()) {
2216+
return nic;
22122217
}
22132218
}
22142219
}

0 commit comments

Comments
 (0)