root 2 жил өмнө
parent
commit
6564cd1911

+ 8 - 3
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/consumer/ProjectConsumer.java

@@ -556,13 +556,19 @@ public class ProjectConsumer {
                     currentNodeTO = nodeTO;
                 }
             }
+            if (currentNodeTO == null) {
+                String errorMessage = "parseProject() 挑选节点失败!";
+                log.info(errorMessage);
+                throw new RuntimeException(errorMessage);
+            }
             currentNodeTO.setCount(currentNodeTO.getCount() + 1);
             // 获取 cpu 编号
             int cpuOrder = nodeMap0.get(currentNodeName) - 1;
             nodeMap0.put(currentNodeName, cpuOrder);
 
             log.info("parseProject() 项目 " + projectId + " 准备创建 yaml:是否使用 gpu " + isChoiceGpu + ",当前节点名称为:" + currentNodeName + ",当前节点已创建 yaml 个数为:" + currentCount + ",当前 cpu 编号为:" + cpuOrder);
-            String tempYaml = projectManager.createTempYaml(projectId, vehicleConfigId, modelType, algorithmDockerImage, currentNodeName, partition, offset, isChoiceGpu, cpuOrder);
+            // 只有准备启动(即 currentCount == 0)的时候才指定 cpu 编号
+            String tempYaml = projectManager.createTempYaml(projectId, vehicleConfigId, modelType, algorithmDockerImage, currentNodeName, partition, offset, isChoiceGpu, currentCount, cpuOrder);
             if (currentCount == 0) {
                 log.info("parseProject() 加入到启动列表 " + tempYaml);
                 yamlListToRun.add(tempYaml);
@@ -570,8 +576,7 @@ public class ProjectConsumer {
             messageNumber++;
         }
         TimeUnit.SECONDS.sleep(6);
-        log.info("parseProject() 项目 " + projectId + " 共发送了 " + messageNumber + " 条消息。");
-        log.info("parseProject() 项目 " + projectId + " 准备首先启动 " + yamlListToRun);
+        log.info("parseProject() 项目 " + projectId + " 共发送了 " + messageNumber + " 条消息。 准备首先启动 " + yamlListToRun);
         for (String yaml : yamlListToRun) {
             projectUtil.createPod2(yaml);
         }

+ 17 - 5
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/manager/ProjectManager.java

@@ -50,6 +50,7 @@ public class ProjectManager {
                                  int kafkaPartition,
                                  long kafkaOffset,
                                  String isChoiceGpu,
+                                 int currentCount,
                                  int cpuOrder
 
     ) {
@@ -74,18 +75,24 @@ public class ProjectManager {
             String replace12 = replace11.replace("namespace-name", kubernetesConfiguration.getNamespace()); // pod 名称包括 projectId 和 随机字符串
             String replace13 = replace12.replace("node-name", nodeName);     // 指定 pod 运行节点
 
-            String replace14 = replace13.replace("cpu-order", "\"" + cpuOrder + "\"");     // 指定 cpu 编号
+            String replace14;
+            if (currentCount == 0) {
+                replace14 = replace13.replace("cpu-order", "\"" + cpuOrder + "\"");     // 指定 cpu 编号
+            } else {
+                replace14 = replace13;
+            }
 
-            String finalYaml = null;
+            String finalYaml;
             if (DictConstants.USE_GPU.equals(isChoiceGpu)) {
                 log.info("项目 " + projectId + " 使用 gpu 生成视频");
                 String replace15 = replace14.replace("vtd-image", kubernetesConfiguration.getImageVtdGpu());
                 finalYaml = replace15.replace("vtd-command", kubernetesConfiguration.getCommandVtdGpu());
-            }
-            if (DictConstants.NOT_USE_GPU.equals(isChoiceGpu)) {
+            } else if (DictConstants.NOT_USE_GPU.equals(isChoiceGpu)) {
                 log.info("项目 " + projectId + " 不使用 gpu 生成视频");
                 String replace15 = replace14.replace("vtd-image", kubernetesConfiguration.getImageVtdNogpu());
                 finalYaml = replace15.replace("vtd-command", kubernetesConfiguration.getCommandVtdNogpu());
+            } else {
+                throw new RuntimeException("createTempYaml() 是否使用 gpu:" + isChoiceGpu);
             }
             log.info("保存项目 " + projectId + " 的 yaml 文件:" + podYamlDirectory + podYaml);
             FileUtil.writeStringToLocalFile(finalYaml, podYamlDirectory + podYaml);
@@ -115,7 +122,12 @@ public class ProjectManager {
             String replace17 = replace16.replace("namespace-name", kubernetesConfiguration.getNamespace()); // pod 名称包括 projectId 和 随机字符串
             String replace18 = replace17.replace("node-name", nodeName);     // 指定 pod 运行节点
 
-            String replace19 = replace18.replace("cpu-order", "\"" + cpuOrder + "\"");     // 指定 cpu 编号
+            String replace19;
+            if (currentCount == 0) {
+                replace19 = replace18.replace("cpu-order", "\"" + cpuOrder + "\"");     // 指定 cpu 编号
+            } else {
+                replace19 = replace18;
+            }
 
             String finalYaml;
             if (DictConstants.USE_GPU.equals(isChoiceGpu)) {