Procházet zdrojové kódy

标准化测试扩充

LingxinMeng před 2 roky
rodič
revize
1e39079986

+ 10 - 6
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/application/service/ProjectService.java

@@ -411,7 +411,7 @@ public class ProjectService {
                     } else {
                         expandParallelism = waitingParallelism;
                     }
-                    expand(projectId, isChoiceGpu, parallelism, expandParallelism, expandParallelism == waitingParallelism);
+                    expand(clusterUserId, modelType, projectId, isChoiceGpu, parallelism, expandParallelism, expandParallelism == waitingParallelism);
                 }
             } else if (DictConstants.MODEL_TYPE_CARSIM.equals(modelType)) {
                 usingDynamicLicenseNumber = projectDomainService.getUsingLicenseNumber(clusterUserId, DictConstants.LICENSE_TYPE_DYNAMIC);
@@ -426,7 +426,7 @@ public class ProjectService {
                     } else {
                         expandParallelism = waitingParallelism;
                     }
-                    expand(projectId, isChoiceGpu, parallelism, expandParallelism, expandParallelism == waitingParallelism);
+                    expand(clusterUserId, modelType, projectId, isChoiceGpu, parallelism, expandParallelism, expandParallelism == waitingParallelism);
                 }
             } else {
                 throw new RuntimeException("未知模型类型:" + modelType);
@@ -505,7 +505,7 @@ public class ProjectService {
         List<NodeEntity> nodeListToCount = projectDomainService.getNodeListToCount(nodeMap);
         int messageNumber = 0;
         KafkaUtil.createTopic(kafkaAdminClient, projectId, finalParallelism, (short) 1);   // 创建主题
-        TimeUnit.SECONDS.sleep(10);
+        TimeUnit.SECONDS.sleep(3);
         // 需要即时启动的任务(并行度的大小)
         ArrayList<String> yamlToRunRedisKeyList = new ArrayList<>();
         for (String taskJsonPath : taskJsonList) {
@@ -557,7 +557,7 @@ public class ProjectService {
             }
             messageNumber++;
         }
-        TimeUnit.SECONDS.sleep(10);
+        TimeUnit.SECONDS.sleep(3);
         log.debug("项目 " + projectId + " 共发送了 " + messageNumber + " 条消息,准备首先启动 " + yamlToRunRedisKeyList);
         for (String redisKey : yamlToRunRedisKeyList) {
             projectDomainService.createPodBegin(projectId, redisKey);
@@ -570,8 +570,11 @@ public class ProjectService {
 
 
     @Synchronized
-    public void expand(String projectId, String isChoiceGpu, int totalParallelism, int expandParallelism, boolean isDone) {
+    public void expand(String clusterUserId, String modelType, String projectId, String isChoiceGpu, int totalParallelism, int expandParallelism, boolean isDone) {
         log.info("扩充项目 {} {} 个并行度", projectId, expandParallelism);
+        if (!DictConstants.SYSTEM_USER_ID.equals(clusterUserId)) {
+            projectDomainService.useLicense(clusterUserId, modelType, expandParallelism);
+        }
         //1 获取剩余并行度和即将使用的各node的并行度
         Map<String, Integer> remainderNodeMap = projectDomainService.getRemainderNodeMap(isChoiceGpu);
         log.info("剩余并行度为:" + remainderNodeMap);
@@ -595,7 +598,8 @@ public class ProjectService {
                 // 修改全部yaml
                 nodeMapToUse.forEach((nodeNameAfter, parallelismToUse) -> {
                     log.info("修改yaml执行节点 {} -> {}:{}", nodeNameBefore, nodeNameAfter, yamlPathCacheKeySetGroupByNodeName);
-                    int shareNum = yamlCount / totalParallelism * parallelismToUse;
+                    int shareNum = (int) Math.ceil(yamlCount * 1.0 / totalParallelism * parallelismToUse);
+                    log.info("需要分享的yaml个数为:{}", shareNum);
                     for (int i = 0; i < shareNum; i++) {
                         log.info("将yaml按比例均分给节点 {}", nodeNameAfter);
                         final String yamlPathCacheKeyBefore = yamlPathCacheKeySetGroupByNodeName.get(i);

+ 1 - 1
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/application/service/TaskService.java

@@ -128,7 +128,7 @@ public class TaskService {
 //                    videoFeignClient.generateVideo(generateVideoKey, nodeName, projectId, projectType, maxSimulationTime, taskId);
                         log.info("任务 {} 使用 CPU 生成视频开始>>>>>>>", taskId);
                         while (true) {
-                            TimeUnit.SECONDS.sleep(1);
+                            TimeUnit.SECONDS.sleep(3);
                             final String generateVideoValue = customRedisClient.get(generateVideoKey);
                             if (DictConstants.YES.equals(generateVideoValue)) {
                                 customRedisClient.delete(generateVideoKey);

+ 4 - 4
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/domain/service/ProjectDomainService.java

@@ -231,7 +231,7 @@ public class ProjectDomainService {
             // 先删除 redis key
             KubernetesUtil.deletePod(apiClient, kubernetesConfiguration.getNamespace(), podName);
             log.info("等待 pod " + podName + " 的资源释放完成。");
-            TimeUnit.SECONDS.sleep(7);
+            TimeUnit.SECONDS.sleep(3);
         } catch (ApiException apiException) {
             log.info("pod " + podName + " 已删除。");
         } catch (Exception e) {
@@ -269,7 +269,7 @@ public class ProjectDomainService {
         final Set<String> yamlPathCacheKeySet = RedisUtil.getKeySetByPrefix(stringRedisTemplate, "project:" + projectId + ":node:" + nodeName + ":yaml");
         if (CollectionUtil.isEmpty(yamlPathCacheKeySet)) {
             // 如果当前节点没有下一个yaml,则返回一个并行度。
-            log.info("节点 " + nodeName + " 已经执行完被分配的项目 " + projectId + " 的所有 pod。");
+            log.info("节点 {} 已经没有项目 {} 的等待执行的 yaml。", nodeName, projectId);
             incrementOneParallelism(isChoiceGpu, nodeName);
             releaseLicense(getClusterUserIdByProjectUserId(projectUserId), getModelTypeByProjectIdAndProjectType(projectId, projectType), 1);
         } else {
@@ -853,7 +853,7 @@ public class ProjectDomainService {
         log.debug("访问仿真云平台项目详情接口:" + customConfiguration.getProjectDetailsUri() + ",请求头为:" + headers + ",请求体为:" + params + "结果为:" + result);
         ResponseBodyVO<?> responseBodyVO = JsonUtil.jsonToBean(result, ResponseBodyVO.class);
         String projectDetailsVOJson = JsonUtil.beanToJson(responseBodyVO.getInfo());
-        TimeUnit.SECONDS.sleep(5);
+        TimeUnit.SECONDS.sleep(3);
         if (DictConstants.PROJECT_TYPE_MANUAL.equals(projectType)) {
             manualProjectMapper.updateProjectDetails(projectId, projectDetailsVOJson);
         } else if (DictConstants.PROJECT_TYPE_AUTO_SUB.equals(projectType)) {
@@ -882,7 +882,7 @@ public class ProjectDomainService {
         log.debug("访问仿真云平台项目报告接口:" + customConfiguration.getProjectReportUri() + ",请求头为:" + headers + ",请求体为:" + params + "结果为:" + result);
         ResponseBodyVO<?> responseBodyVO = JsonUtil.jsonToBean(result, ResponseBodyVO.class);
         String projectReportVOJson = JsonUtil.beanToJson(responseBodyVO.getInfo());
-        TimeUnit.SECONDS.sleep(5);
+        TimeUnit.SECONDS.sleep(3);
         if (DictConstants.PROJECT_TYPE_MANUAL.equals(projectType)) {
             manualProjectMapper.updateProjectReport(projectId, projectReportVOJson);
         } else if (DictConstants.PROJECT_TYPE_AUTO_SUB.equals(projectType)) {

+ 1 - 1
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/domain/service/TaskDomainService.java

@@ -94,7 +94,7 @@ public class TaskDomainService {
             return;
         }
         String packageId = projectEntity.getScenePackageId();  // 场景测试包 id,指标的rootId
-        TimeUnit.SECONDS.sleep(10); // 先等一下数据库更新
+        TimeUnit.SECONDS.sleep(3); // 先等一下数据库更新
         List<TaskEntity> taskList = taskMapper.selectTaskListByProjectId(projectId);  // 所有任务信息
         if (CollectionUtil.isEmpty(taskList)) {
             log.error("项目 {} 下没有查询到任务!", projectId);

+ 1 - 1
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/infrastructure/persistence/kafka/KafkaUtil.java

@@ -43,7 +43,7 @@ public class KafkaUtil {
     public static void deleteTopic(Admin admin, String... topics) {
         admin.deleteTopics(Arrays.asList(topics));
         log.info("删除主题:" + Arrays.toString(topics));
-        TimeUnit.SECONDS.sleep(5);
+        TimeUnit.SECONDS.sleep(3);
     }
 
 

+ 2 - 2
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/infrastructure/persistence/kubernetes/KubernetesUtil.java

@@ -236,7 +236,7 @@ public class KubernetesUtil {
     public static void deletePod2(ApiClient apiClient, String namespaceName, String podName) throws InterruptedException, ApiException {
         deletePodSync(apiClient, namespaceName, podName);
         log.info("等待 pod " + podName + " 的资源释放完成。");
-        TimeUnit.SECONDS.sleep(7);
+        TimeUnit.SECONDS.sleep(3);
         log.info("pod " + podName + " 资源释放完成。");
     }
 
@@ -254,7 +254,7 @@ public class KubernetesUtil {
         coreV1Api.deleteNamespacedPod(podName, namespaceName, null, null, 0, null, null, null);
         // 检查是否删除完毕
         while (true) {
-            TimeUnit.SECONDS.sleep(7);
+            TimeUnit.SECONDS.sleep(3);
             List<String> podNameList = getPod(apiClient, namespaceName);
             if (!podNameList.contains(podName)) {
                 return;

+ 1 - 1
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/infrastructure/scheduler/ProjectScheduler.java

@@ -25,7 +25,7 @@ public class ProjectScheduler {
     /**
      * 调度项目启动
      */
-    @Scheduled(fixedDelay = 10000)
+    @Scheduled(fixedDelay = 1000)
     public void dispatchProject() {
         List<ProjectWaitQueueEntity> projectWaitQueue = projectDomainService.getWaitQueue();
         if (CollectionUtil.isNotEmpty(projectWaitQueue)) {

+ 1 - 1
simulation-resource-server/src/main/java/com/css/simulation/resource/util/SceneUtil.java

@@ -61,7 +61,7 @@ public class SceneUtil {
         } catch (Exception e) {
             log.error("泛化接口服务崩溃,重启。");
             LinuxUtil.execute("./service-manager.sh restart", "/root/");
-            TimeUnit.SECONDS.sleep(10);
+            TimeUnit.SECONDS.sleep(3);
             post = HttpUtil.post(HttpUtil.getHttpClient(), requestConfig, generalUrl, null, map);
         }
         log.info("调用泛化接口:" + generalUrl + ",响应结果为:" + post);