|
@@ -534,7 +534,7 @@ public class ProjectApplicationService {
|
|
String currentNodeName = "";
|
|
String currentNodeName = "";
|
|
NodeEntity currentNodeEntity = null;
|
|
NodeEntity currentNodeEntity = null;
|
|
int currentCount = Integer.MAX_VALUE;
|
|
int currentCount = Integer.MAX_VALUE;
|
|
- log.info("各节点已经预定的任务个数为:" + nodeListToCount);
|
|
|
|
|
|
+ log.info("各节点已经预定的任务个数为:{}", nodeListToCount);
|
|
for (NodeEntity nodeEntity : nodeListToCount) {
|
|
for (NodeEntity nodeEntity : nodeListToCount) {
|
|
int tempCount = nodeEntity.getCount();
|
|
int tempCount = nodeEntity.getCount();
|
|
String tempNodeName = nodeEntity.getNodeName();
|
|
String tempNodeName = nodeEntity.getNodeName();
|
|
@@ -557,7 +557,7 @@ public class ProjectApplicationService {
|
|
remainderNodeMap.put(currentNodeName, cpuOrder);
|
|
remainderNodeMap.put(currentNodeName, cpuOrder);
|
|
}
|
|
}
|
|
// 只有准备启动(即 currentCount == 0)的时候才指定 cpu 编号
|
|
// 只有准备启动(即 currentCount == 0)的时候才指定 cpu 编号
|
|
- log.info("创建任务 " + taskId + " 的 yaml:是否使用 gpu (0是1否)" + isChoiceGpu + ",当前节点已创建 yaml 个数为:" + currentCount + ",当前节点名称为:" + currentNodeName + ",当前 cpu 编号为:" + cpuOrder + ",镜像名:" + algorithmDockerImage);
|
|
|
|
|
|
+ log.info("创建任务 {} 的 yaml:是否使用 gpu (0是1否){},当前节点已创建 yaml 个数为:{},当前节点名称为:{},当前 cpu 编号为:{},镜像名:{}", taskId, isChoiceGpu, currentCount, currentNodeName, cpuOrder, algorithmDockerImage);
|
|
String yamlRedisKey = projectDomainService.createTempYaml(projectId, vehicleConfigId, modelType, algorithmDockerImage, currentNodeName, partition, offset, isChoiceGpu, cpuOrder);
|
|
String yamlRedisKey = projectDomainService.createTempYaml(projectId, vehicleConfigId, modelType, algorithmDockerImage, currentNodeName, partition, offset, isChoiceGpu, cpuOrder);
|
|
if (currentCount == 0) {
|
|
if (currentCount == 0) {
|
|
yamlToRunRedisKeyList.add(yamlRedisKey);
|
|
yamlToRunRedisKeyList.add(yamlRedisKey);
|
|
@@ -584,9 +584,9 @@ public class ProjectApplicationService {
|
|
}
|
|
}
|
|
//1 获取剩余并行度和即将使用的各node的并行度
|
|
//1 获取剩余并行度和即将使用的各node的并行度
|
|
Map<String, Integer> remainderNodeMap = projectDomainService.getRemainderNodeMap(isChoiceGpu);
|
|
Map<String, Integer> remainderNodeMap = projectDomainService.getRemainderNodeMap(isChoiceGpu);
|
|
- log.info("剩余并行度为:" + remainderNodeMap);
|
|
|
|
|
|
+ log.info("剩余并行度为:{}", remainderNodeMap);
|
|
Map<String, Integer> nodeMapToUse = projectDomainService.getNodeMapToUse(isChoiceGpu, expandParallelism);
|
|
Map<String, Integer> nodeMapToUse = projectDomainService.getNodeMapToUse(isChoiceGpu, expandParallelism);
|
|
- log.info("即将使用的并行度为:" + nodeMapToUse);
|
|
|
|
|
|
+ log.info("即将使用的并行度为:{}", nodeMapToUse);
|
|
//2 将指定 node 的并行度减少
|
|
//2 将指定 node 的并行度减少
|
|
nodeMapToUse.keySet().forEach(nodeName -> projectDomainService.decrementParallelism(isChoiceGpu, nodeName, nodeMapToUse.get(nodeName)));
|
|
nodeMapToUse.keySet().forEach(nodeName -> projectDomainService.decrementParallelism(isChoiceGpu, nodeName, nodeMapToUse.get(nodeName)));
|
|
//3 获取还未运行的任务 ("project:" + projectId + ":node:" + nodeName + ":yaml")
|
|
//3 获取还未运行的任务 ("project:" + projectId + ":node:" + nodeName + ":yaml")
|