root 2 年之前
父节点
当前提交
e2ad9c80ec

+ 0 - 2
api-common/src/main/java/api/common/pojo/dto/ProjectMessageDTO.java

@@ -33,7 +33,5 @@ public class ProjectMessageDTO {
     private Integer currentParallelism;// 当前正在使用的并行度
     private Integer currentParallelism;// 当前正在使用的并行度
     private Integer taskTotal;// 任务总数量
     private Integer taskTotal;// 任务总数量
     private Integer taskCompleted;// 任务已完成数量
     private Integer taskCompleted;// 任务已完成数量
-    //* -------------------------------- Comment --------------------------------
-    private String isChoiceGpu; //0是1否
 
 
 }
 }

+ 17 - 17
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/consumer/ProjectConsumer.java

@@ -32,8 +32,6 @@ import javax.annotation.Resource;
 import java.util.*;
 import java.util.*;
 import java.util.concurrent.CopyOnWriteArrayList;
 import java.util.concurrent.CopyOnWriteArrayList;
 import java.util.concurrent.FutureTask;
 import java.util.concurrent.FutureTask;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicReference;
 
 
 @Component
 @Component
 @Slf4j
 @Slf4j
@@ -102,8 +100,6 @@ public class ProjectConsumer {
         String initialProjectJson = projectRecord.value();
         String initialProjectJson = projectRecord.value();
         ProjectMessageDTO projectMessageDTO = JsonUtil.jsonToBean(initialProjectJson, ProjectMessageDTO.class);
         ProjectMessageDTO projectMessageDTO = JsonUtil.jsonToBean(initialProjectJson, ProjectMessageDTO.class);
         String projectId = projectMessageDTO.getProjectId();        // 手动执行项目 id 或 自动执行子项目 id
         String projectId = projectMessageDTO.getProjectId();        // 手动执行项目 id 或 自动执行子项目 id
-        ProjectPO projectPO = projectUtil.getProjectByProjectId(projectId);
-        projectMessageDTO.setIsChoiceGpu(projectPO.getIsChoiceGpu());
         FutureTask<Integer> createTaskAndFixDataFutureTask = new FutureTask<>(() -> {
         FutureTask<Integer> createTaskAndFixDataFutureTask = new FutureTask<>(() -> {
             createTaskAndFixData(projectRecord);
             createTaskAndFixData(projectRecord);
             return 1024;
             return 1024;
@@ -387,9 +383,12 @@ public class ProjectConsumer {
     public void parseProject(ProjectMessageDTO projectMessageDTO, String projectWaitingKey, String projectRunningKey,
     public void parseProject(ProjectMessageDTO projectMessageDTO, String projectWaitingKey, String projectRunningKey,
                              String userId) {
                              String userId) {
         String projectId = projectMessageDTO.getProjectId();    // 项目 id
         String projectId = projectMessageDTO.getProjectId();    // 项目 id
-        String isChoiceGpu = projectMessageDTO.getIsChoiceGpu();
+        ProjectPO projectPO = projectUtil.getProjectByProjectId(projectId);
+        log.info("项目 " + projectId + " 信息为:" + projectPO);
+        String isChoiceGpu = projectPO.getIsChoiceGpu();
         // 项目类型
         // 项目类型
         int currentParallelism = projectMessageDTO.getCurrentParallelism();   // 当前并行度
         int currentParallelism = projectMessageDTO.getCurrentParallelism();   // 当前并行度
+
         // 场景测试包 id
         // 场景测试包 id
         // 结果视频的时长
         // 结果视频的时长
         // 模型配置 id
         // 模型配置 id
@@ -427,9 +426,10 @@ public class ProjectConsumer {
         String algorithmDockerImage = algorithmMapper.selectDockerImageById(algorithmId);
         String algorithmDockerImage = algorithmMapper.selectDockerImageById(algorithmId);
         // -------------------------------- 4 发送任务消息 --------------------------------
         // -------------------------------- 4 发送任务消息 --------------------------------
         List<NodeTO> nodeListToCount = projectUtil.getNodeListToCount(nodeMap);
         List<NodeTO> nodeListToCount = projectUtil.getNodeListToCount(nodeMap);
+        log.info("项目 " + projectId + " 运行在:" + nodeListToCount);
         int messageNumber = 0;
         int messageNumber = 0;
         ApacheKafkaUtil.createTopic(kafkaAdminClient, projectId, realCurrentParallelism, (short) 1);   // 创建主题
         ApacheKafkaUtil.createTopic(kafkaAdminClient, projectId, realCurrentParallelism, (short) 1);   // 创建主题
-        Thread.sleep(5000);
+//        Thread.sleep(5000);
         // 需要即时启动的任务(并行度的大小)
         // 需要即时启动的任务(并行度的大小)
         CopyOnWriteArrayList<String> yamlListToRun = new CopyOnWriteArrayList<>();
         CopyOnWriteArrayList<String> yamlListToRun = new CopyOnWriteArrayList<>();
         for (String taskJsonPath : taskJsonList) {
         for (String taskJsonPath : taskJsonList) {
@@ -452,27 +452,27 @@ public class ProjectConsumer {
                     + partition + " 偏移量为:" + offset + " 消息体为:" + taskJson);
                     + partition + " 偏移量为:" + offset + " 消息体为:" + taskJson);
             //4-6 发送成功过的同时创建 pod.yaml 文件并把文件地址存到 redis
             //4-6 发送成功过的同时创建 pod.yaml 文件并把文件地址存到 redis
             // 选一个count 最少的 node
             // 选一个count 最少的 node
-            AtomicReference<String> currentNodeName = new AtomicReference<>("");
-            AtomicInteger currentCount = new AtomicInteger(Integer.MAX_VALUE);
+            String currentNodeName = "";
+            int currentCount = Integer.MAX_VALUE;
             for (NodeTO nodeTO : nodeListToCount) {
             for (NodeTO nodeTO : nodeListToCount) {
                 int tempCount = nodeTO.getCount();
                 int tempCount = nodeTO.getCount();
                 String tempNodeName = nodeTO.getNodeName();
                 String tempNodeName = nodeTO.getNodeName();
-                if (tempCount < currentCount.get()) {
-                    currentCount.set(tempCount);
-                    currentNodeName.set(tempNodeName);
+                if (tempCount < currentCount) {
+                    currentCount = tempCount;
+                    currentNodeName = tempNodeName;
                     nodeTO.setCount(tempCount + 1);
                     nodeTO.setCount(tempCount + 1);
                 }
                 }
             }
             }
-            String currentNodeNameValue = currentNodeName.get();
-            int currentCountValue = currentCount.get();
-            String tempYaml = projectManager.createTempYaml(projectId, algorithmDockerImage
-                    , currentNodeNameValue, partition, offset,isChoiceGpu);
-            if (currentCountValue == 0) {
+
+            log.info("项目 " + projectId + " 准备创建 yaml:是否使用 gpu " + isChoiceGpu + ",当前节点名称为:" + currentNodeName + ",当前节点已创建 yaml 个数为:" + currentCount);
+            String tempYaml = projectManager.createTempYaml(projectId, algorithmDockerImage, currentNodeName, partition, offset, isChoiceGpu);
+            if (currentCount == 0) {
+                log.info("加入到启动列表 " + tempYaml);
                 yamlListToRun.add(tempYaml);
                 yamlListToRun.add(tempYaml);
             }
             }
             messageNumber++;
             messageNumber++;
         }
         }
-        Thread.sleep(5000);
+        Thread.sleep(5);
         log.info("项目 " + projectId + " 共发送了 " + messageNumber + " 条消息。");
         log.info("项目 " + projectId + " 共发送了 " + messageNumber + " 条消息。");
         log.info("项目 " + projectId + " 准备首先启动 " + yamlListToRun);
         log.info("项目 " + projectId + " 准备首先启动 " + yamlListToRun);
         for (String yaml : yamlListToRun) {
         for (String yaml : yamlListToRun) {

+ 4 - 4
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/manager/ProjectManager.java

@@ -53,19 +53,19 @@ public class ProjectManager {
         String replace6 = replace5.replace("pod-name", podName); // pod 名称包括 projectId 和 随机字符串
         String replace6 = replace5.replace("pod-name", podName); // pod 名称包括 projectId 和 随机字符串
         String replace7 = replace6.replace("namespace-name", kubernetesConfiguration.getNamespace()); // pod 名称包括 projectId 和 随机字符串
         String replace7 = replace6.replace("namespace-name", kubernetesConfiguration.getNamespace()); // pod 名称包括 projectId 和 随机字符串
         String replace8 = replace7.replace("node-name", nodeName);     // 指定 pod 运行节点
         String replace8 = replace7.replace("node-name", nodeName);     // 指定 pod 运行节点
+
         String finalYaml = null;
         String finalYaml = null;
         if (DictConstants.USE_GPU.equals(isChoiceGpu)) {
         if (DictConstants.USE_GPU.equals(isChoiceGpu)) {
+            log.info("项目 " + projectId + " 使用 gpu 生成视频");
             String replace9 = replace8.replace("vtd-image", kubernetesConfiguration.getVtdImageUseGpu());
             String replace9 = replace8.replace("vtd-image", kubernetesConfiguration.getVtdImageUseGpu());
             finalYaml = replace9.replace("vtd-command", kubernetesConfiguration.getVtdCommandUseGpu());
             finalYaml = replace9.replace("vtd-command", kubernetesConfiguration.getVtdCommandUseGpu());
         }
         }
         if (DictConstants.NOT_USE_GPU.equals(isChoiceGpu)) {
         if (DictConstants.NOT_USE_GPU.equals(isChoiceGpu)) {
+            log.info("项目 " + projectId + " 不使用 gpu 生成视频");
             String replace9 = replace8.replace("vtd-image", kubernetesConfiguration.getVtdImageNotUseGpu());
             String replace9 = replace8.replace("vtd-image", kubernetesConfiguration.getVtdImageNotUseGpu());
             finalYaml = replace9.replace("vtd-command", kubernetesConfiguration.getVtdCommandNotUseGpu());
             finalYaml = replace9.replace("vtd-command", kubernetesConfiguration.getVtdCommandNotUseGpu());
         }
         }
-        if (finalYaml == null) {
-            throw new RuntimeException("生成 yaml 错误。");
-        }
-//        log.info("ProjectService--createPod 在节点 " + nodeName + " 开始执行 pod:" + tempPodString);
+        log.info("保存项目 " + projectId + " 的 yaml 文件:" + finalYaml);
         FileUtil.writeStringToLocalFile(finalYaml, podYamlDirectory + podYaml);
         FileUtil.writeStringToLocalFile(finalYaml, podYamlDirectory + podYaml);
 //        log.info("ProjectService--createPod 在节点 " + nodeName + " 开始执行 pod。");
 //        log.info("ProjectService--createPod 在节点 " + nodeName + " 开始执行 pod。");
 //        projectUtil.createPod(nodeName, podName, tempPodString);
 //        projectUtil.createPod(nodeName, podName, tempPodString);

+ 1 - 7
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/manager/TaskManager.java

@@ -150,13 +150,7 @@ public class TaskManager {
                 } else if ("PendingAnalysis".equals(state)) {
                 } else if ("PendingAnalysis".equals(state)) {
                     taskMapper.updateSuccessStateWithStopTime(taskId, state, TimeUtil.getNowForMysql());
                     taskMapper.updateSuccessStateWithStopTime(taskId, state, TimeUtil.getNowForMysql());
                     // 查询项目是否使用 gpu 生成视频(0是1否)
                     // 查询项目是否使用 gpu 生成视频(0是1否)
-                    String isChoiceGpu = "0";
-                    if (DictConstants.PROJECT_TYPE_MANUAL.equals(projectType)) {
-                        isChoiceGpu = manualProjectMapper.selectIsChoiceGpuById(projectId);
-                    }
-                    if (DictConstants.PROJECT_TYPE_AUTO_SUB.equals(projectType)) {
-                        isChoiceGpu = autoSubProjectMapper.selectIsChoiceGpuById(projectId);
-                    }
+                    String isChoiceGpu = projectUtil.getProjectByProjectId(projectId).getIsChoiceGpu();
                     log.info("项目 " + projectId + " 是否需要生成 gpu 视频:" + isChoiceGpu);
                     log.info("项目 " + projectId + " 是否需要生成 gpu 视频:" + isChoiceGpu);
                     if ("1".equals(isChoiceGpu)) {
                     if ("1".equals(isChoiceGpu)) {
                         FutureTask<ResponseBodyVO<String>> videoTask = new FutureTask<>(() -> videoService.generateVideo(projectId, projectType, maxSimulationTime, taskId));
                         FutureTask<ResponseBodyVO<String>> videoTask = new FutureTask<>(() -> videoService.generateVideo(projectId, projectType, maxSimulationTime, taskId));

+ 2 - 0
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/mapper/AutoSubProjectMapper.java

@@ -17,12 +17,14 @@ public interface AutoSubProjectMapper {
             @Result(column = "create_user_id", property = "createUserId", jdbcType = JdbcType.VARCHAR),
             @Result(column = "create_user_id", property = "createUserId", jdbcType = JdbcType.VARCHAR),
             @Result(column = "parallelism", property = "parallelism", jdbcType = JdbcType.VARCHAR),
             @Result(column = "parallelism", property = "parallelism", jdbcType = JdbcType.VARCHAR),
             @Result(column = "project_type", property = "projectType", jdbcType = JdbcType.VARCHAR),
             @Result(column = "project_type", property = "projectType", jdbcType = JdbcType.VARCHAR),
+            @Result(column = "is_choice_gpu", property = "isChoiceGpu", jdbcType = JdbcType.VARCHAR),
             @Result(column = "max_simulation_time", property = "maxSimulationTime", jdbcType = JdbcType.VARCHAR)
             @Result(column = "max_simulation_time", property = "maxSimulationTime", jdbcType = JdbcType.VARCHAR)
     })
     })
     @Select("select sas.id,\n" +
     @Select("select sas.id,\n" +
             "       sap.scene,\n" +
             "       sap.scene,\n" +
             "       sas.create_user_id,\n" +
             "       sas.create_user_id,\n" +
             "       '2' project_type,\n" +
             "       '2' project_type,\n" +
+            "       sap.is_choice_gpu,\n" +
             "       sap.max_simulation_time\n" +
             "       sap.max_simulation_time\n" +
             "from simulation_automatic_subproject sas\n" +
             "from simulation_automatic_subproject sas\n" +
             "         left join simulation_automatic_project sap on sas.parent_id = sap.id\n" +
             "         left join simulation_automatic_project sap on sas.parent_id = sap.id\n" +

+ 0 - 41
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/service/ProjectService.java

@@ -500,48 +500,7 @@ public class ProjectService {
         return dockerImage;
         return dockerImage;
     }
     }
 
 
-    /**
-     * 运行 pod
-     *
-     * @param projectId            项目id
-     * @param nodeMap              并行度
-     * @param algorithmDockerImage 算法镜像
-     */
-    @SneakyThrows
-    public void createPod(String projectId, Map<String, Integer> nodeMap, String algorithmDockerImage) {
-        String podString = FileUtil.read(new File(podTemplateYaml));
-        String replace0 = podString.replace("vtd-container", "vtd-" + projectId);
-        String replace1 = replace0.replace("vtd-image", kubernetesConfiguration.getVtdImage());
-        String replace2 = replace1.replace("algorithm-container", "algorithm-" + projectId);
-        String replace3 = replace2.replace("algorithm-image", algorithmDockerImage);
-        String podTemplateStringOfProject = replace3.replace("kafkaTopic", projectId);     // 消息主题名称为 projectId
-        String podTemplateFileNameOfProject = "project-" + projectId + ".yaml";     // 模板文件名称
-        FileUtil.writeStringToLocalFile(podTemplateStringOfProject, podYamlDirectory + podTemplateFileNameOfProject);
-
-        nodeMap.forEach((nodeName, parallelism) -> {
-            for (int i = 0; i < parallelism; i++) {
-                String podName = projectUtil.getRandomPodName(projectId);   // 生成 podName
-                String tempReplace4 = podTemplateStringOfProject.replace("pod-name", podName); // pod 名称包括 projectId 和 随机字符串
-                String tempReplace5 = tempReplace4.replace("namespace-name", kubernetesConfiguration.getNamespace()); // pod 名称包括 projectId 和 随机字符串
-                String tempPodString = tempReplace5.replace("node-name", nodeName);     // 指定 pod 运行节点
-//                log.info("ProjectService--createPod 在节点 " + nodeName + " 开始执行 pod:" + tempPodString);
-                log.info("ProjectService--createPod 在节点 " + nodeName + " 开始执行 pod。");
-                projectUtil.createPod(nodeName, podName, tempPodString);
-            }
-        });
-    }
 
 
-    /**
-     * 运行 pod
-     *
-     * @param projectId            项目id
-     * @param nodeMap              并行度
-     * @param algorithmDockerImage 算法镜像
-     */
-    @SneakyThrows
-    public void createPod2(String projectId, Map<String, Integer> nodeMap, String algorithmDockerImage) {
-
-    }
 
 
 
 
     /**
     /**

+ 1 - 0
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/util/ProjectUtil.java

@@ -246,6 +246,7 @@ public class ProjectUtil {
     }
     }
 
 
 
 
+
     /**
     /**
      * 获取正在运行的项目的并行度总和
      * 获取正在运行的项目的并行度总和
      *
      *

+ 12 - 21
simulation-resource-scheduler/src/main/resources/kubernetes/template/pod/pod-template.yaml

@@ -22,8 +22,8 @@ spec:
   containers:
   containers:
     - name: vtd-container
     - name: vtd-container
       image: vtd-image
       image: vtd-image
-      imagePullPolicy: IfNotPresent
-      command: [ "/Controller/VTDController", "/Controller/config/docker_cloud_carsim.ini", "kafkaTopic" ]
+      imagePullPolicy: Always
+      command: [ "/Controller/VTDController", "vtd-command", "kafkaTopic" ]
       env:
       env:
         - name: PodName
         - name: PodName
           valueFrom:
           valueFrom:
@@ -32,15 +32,15 @@ spec:
         - name: LM_LICENSE_FILE
         - name: LM_LICENSE_FILE
           value: 27500@172.14.1.103
           value: 27500@172.14.1.103
         - name: KAFKA_IP
         - name: KAFKA_IP
-          value: 172.17.0.188:9092
+          value: 172.17.0.184:9092
         - name: MINIO_IP
         - name: MINIO_IP
           value: 172.17.0.184:9000
           value: 172.17.0.184:9000
         - name: SIMULATION_CLOUD_IP
         - name: SIMULATION_CLOUD_IP
-          value: 172.17.0.192
+          value: 172.17.0.185
         - name: KAFKA_PARTITION
         - name: KAFKA_PARTITION
-          value: kafkaPartition
+          value: kafka-partition
         - name: KAFKA_OFFSET
         - name: KAFKA_OFFSET
-          value: kafkaOffset
+          value: kafka-offset
       volumeMounts:
       volumeMounts:
         - name: nvidia0
         - name: nvidia0
           mountPath: /dev/nvidia0
           mountPath: /dev/nvidia0
@@ -50,27 +50,18 @@ spec:
         privileged: true
         privileged: true
       resources:
       resources:
         limits:
         limits:
-          cpu: "1.5"
+          cpu: "4"
         requests:
         requests:
-          cpu: "0.5"
+          cpu: "4"
     - name: algorithm-container
     - name: algorithm-container
       image: algorithm-image
       image: algorithm-image
-      imagePullPolicy: IfNotPresent
+      imagePullPolicy: Always
       command: [ "/bin/sh", "-c", "/run.sh; touch /tmp/hello.txt;while true;do /bin/echo $(date +%T) >> /tmp/hello.txt; sleep 5; done;" ]
       command: [ "/bin/sh", "-c", "/run.sh; touch /tmp/hello.txt;while true;do /bin/echo $(date +%T) >> /tmp/hello.txt; sleep 5; done;" ]
       resources:
       resources:
         limits:
         limits:
-          cpu: "2.5"
+          cpu: "3"
         requests:
         requests:
-          cpu: "1.5"
-    - name: dynamics
-      image: carsim:v1
-      imagePullPolicy: Never
-      command: [ "/root/VTD_CarSim", "/root/result/simfile.sim", "/root/libcarsim.so" ]
-      resources:
-        limits:
-          cpu: "1.0"
-        requests:
-          cpu: "0.5"
+          cpu: "3"
   restartPolicy: Never
   restartPolicy: Never
   volumes:
   volumes:
     - name: nvidia0
     - name: nvidia0
@@ -78,4 +69,4 @@ spec:
         path: /dev/nvidia0
         path: /dev/nvidia0
     - name: nvidiactl
     - name: nvidiactl
       hostPath:
       hostPath:
-        path: /dev/nvidiactl
+        path: /dev/nvidiactl