martin před 3 roky
rodič
revize
307a87d64e

+ 9 - 0
api-common/src/main/java/api/common/pojo/constants/DictConstants.java

@@ -83,4 +83,13 @@ public class DictConstants {
     public static final String REPORT_LEVEL_M = "M";
     public static final String REPORT_LEVEL_P = "P";
 
+
+    //报告评测等级
+    public static final String TASK_ERROR_REASON_1 = "任务执行超时!";
+    public static final String TASK_ERROR_REASON_2 = "未知错误!";
+    public static final String TASK_ERROR_REASON_3 = "容器终止!";
+    public static final String TASK_ERROR_REASON_4 = "未知状态!";
+    public static final String TASK_ERROR_REASON_5 = "打分出错!";
+
+
 }

+ 2 - 2
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/consumer/ManualProjectConsumer.java

@@ -188,7 +188,7 @@ public class ManualProjectConsumer {
                     taskPO.setIsDeleted("0");
                     taskMapper.insert(taskPO);
                     // 心跳信息存在緩存中
-                    redisTemplate.opsForValue().set(manualProjectTopic + ":" + projectId + ":" + taskId, TimeUtil.getNowString());
+                    redisTemplate.opsForValue().set(manualProjectTopic + ":tick:" + projectId + ":" + taskId, TimeUtil.getNowString());
                     // 组装 task 消息
                     TaskTO taskTO = TaskTO.builder()
                             .info(InfoTO.builder()
@@ -266,7 +266,7 @@ public class ManualProjectConsumer {
                     String lastTargetId = taskMapper.selectLastTargetIdById(taskId);
                     taskMapper.updateStateById(DictConstants.TASK_PENDING, taskId);
                     // 心跳信息存在緩存中
-                    redisTemplate.opsForValue().set(manualProjectTopic + ":" + projectId + ":" + taskId, TimeUtil.getNowString());
+                    redisTemplate.opsForValue().set(manualProjectTopic + ":tick:" + projectId + ":" + taskId, TimeUtil.getNowString());
                     // 组装 task 消息
                     TaskTO taskTO = TaskTO.builder()
                             .info(InfoTO.builder()

+ 6 - 2
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/mapper/TaskMapper.java

@@ -56,10 +56,14 @@ public interface TaskMapper {
     void updateSuccessStateWithStopTime(@Param("id") String id, @Param("runState") String runState, @Param("runStopTime") Timestamp runStopTime);
 
     @Update("update simulation_manual_project_task\n" +
-            "set run_state = #{runState},run_end_time = #{runStopTime},run_result='Failed',score = 0\n" +
+            "set run_state = #{runState},run_end_time = #{runStopTime},run_result='Failed',score = 0,target_evaluate = #{targetEvaluate}\n" +
             "where id = #{id}" +
             " and score is null")
-    void updateFailStateWithStopTime(@Param("id") String id, @Param("runState") String runState, @Param("runStopTime") Timestamp runStopTime);
+    void updateFailStateWithStopTime(@Param("id") String id,
+                                     @Param("runState") String runState,
+                                     @Param("runStopTime") Timestamp runStopTime,
+                                     @Param("targetEvaluate") String targetEvaluate
+                                     );
 
     @Insert("insert into simulation_manual_project_task(id, p_id, scene_id, scene_name, scene_type, last_targer_id,\n" +
             "                                           run_state, run_result_file_path, create_time, create_user_id, modify_time,\n" +

+ 2 - 2
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/scheduler/TickScheduler.java

@@ -58,7 +58,7 @@ public class TickScheduler {
             for (TaskPO task : executingTaskList) {
                 String taskId = task.getId();
                 String projectId = task.getPId();
-                String s = redisTemplate.opsForValue().get(manualProjectTopic + ":" + projectId + ":" + taskId);
+                String s = redisTemplate.opsForValue().get(manualProjectTopic + ":tick:" + projectId + ":" + taskId);
 //                Optional.ofNullable(s).orElseThrow(() -> new RuntimeException("项目 " + projectId + " 下的任务 " + taskId + " 的心跳查询失败"));
                 assert s != null;
                 long tickTime = Long.parseLong(s);
@@ -74,7 +74,7 @@ public class TickScheduler {
                                 + ",并执行删除 pod 命令:" + podDeleteCommand);
                         SshUtil.execute(session, podDeleteCommand);
 //            taskManager.updateFailStateWithStopTime(taskId, state, TimeUtil.getNowForMysql()); // 如果任务 abort 代表项目失败
-                        taskMapper.updateFailStateWithStopTime(taskId, DictConstants.TASK_ABORTED, TimeUtil.getNowForMysql());
+                        taskMapper.updateFailStateWithStopTime(taskId, DictConstants.TASK_ABORTED, TimeUtil.getNowForMysql(),DictConstants.TASK_ERROR_REASON_1);
                         redisTemplate.delete("podName:" + taskId);
                     }
                 }

+ 32 - 23
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/service/TaskService.java

@@ -120,30 +120,38 @@ public class TaskService {
                     + ",并执行删除 pod 命令:" + podDeleteCommand);
             SshUtil.execute(session, podDeleteCommand);
 //            taskManager.updateFailStateWithStopTime(taskId, state, TimeUtil.getNowForMysql()); // 如果任务 abort 代表项目失败
-            taskMapper.updateFailStateWithStopTime(taskId, state, TimeUtil.getNowForMysql());
             redisTemplate.delete("podName:" + taskId);
             //result-path-minio: /project/manual-project/
-//            String projectId = taskMapper.selectProjectIdById(taskId);
-//            String minioPathOfErrorLog = resultPathMinio + projectId + "/" + taskId + "error.log";
-//            String errorString = MinioUtil.downloadToString(minioClient, bucketName, minioPathOfErrorLog);
-//            String[] lines = errorString.split("\n");
-//            AtomicReference<String> errorMessage = new AtomicReference<>("");
-//            for (String line : lines) {
-//                if (line.startsWith("Original Error")) {
-//                    errorMessage.set(errorMessage.get() + line + "\n");
-//                }
-//
-//                if (line.startsWith("Possible Cause")) {
-//                    errorMessage.set(errorMessage.get() + line);
-//                    break;
-//                }
-//            }
-//            String errorMessageString = errorMessage.get();
+            String projectId = taskMapper.selectProjectIdById(taskId);
+            String minioPathOfErrorLog = resultPathMinio + projectId + "/" + taskId + "error.log";
+            boolean objectExist = MinioUtil.isObjectExist(minioClient, bucketName, minioPathOfErrorLog);
+            String targetEvaluate;
+            if (objectExist) {
+                String errorString = MinioUtil.downloadToString(minioClient, bucketName, minioPathOfErrorLog);
+                String[] lines = errorString.split("\n");
+                StringBuilder errorMessage = new StringBuilder();
+                for (String line : lines) {
+                    if (line.startsWith("Original Error")) {
+                        errorMessage.append(line).append("\n");
+                    }
+
+                    if (line.startsWith("Possible Cause")) {
+                        errorMessage.append(line);
+                        break;
+                    }
+                }
+                targetEvaluate = errorMessage.toString();
+            } else {
+                targetEvaluate = DictConstants.TASK_ERROR_REASON_2;
+            }
+
+
+            taskMapper.updateFailStateWithStopTime(taskId, state, TimeUtil.getNowForMysql(), targetEvaluate);
         } else if ("Terminated".equals(state)) {
             log.info("TaskService--state 修改任务 " + taskId + "的状态为 Terminated,pod 名称为:" + podName
                     + ",并执行删除 pod 命令:" + podDeleteCommand);
             SshUtil.execute(session, podDeleteCommand);
-            taskMapper.updateFailStateWithStopTime(taskId, state, TimeUtil.getNowForMysql());
+            taskMapper.updateFailStateWithStopTime(taskId, state, TimeUtil.getNowForMysql(), DictConstants.TASK_ERROR_REASON_3);
             redisTemplate.delete("podName:" + taskId);
         } else if ("PendingAnalysis".equals(state)) {
             log.info("TaskService--state 修改任务 " + taskId + "的状态为 PendingAnalysis,pod 名称为:" + podName
@@ -153,7 +161,7 @@ public class TaskService {
             redisTemplate.delete("podName:" + taskId);
         } else {
             log.error("TaskService--state 出现了未知状态:" + state);
-            taskMapper.updateFailStateWithStopTime(taskId, state, TimeUtil.getNowForMysql());
+            taskMapper.updateFailStateWithStopTime(taskId, state, TimeUtil.getNowForMysql(), DictConstants.TASK_ERROR_REASON_4);
             redisTemplate.delete("podName:" + taskId);
         }
         ProjectPO projectPO = projectMapper.selectById(taskId);
@@ -186,7 +194,8 @@ public class TaskService {
         SshUtil.execute(sessionKafka, topicDeleteCommand);
         SshUtil.stop(clientKafka, sessionKafka);
         List<TaskPO> taskList = taskMapper.selectTaskListByProjectId(projectId);  // 所有任务信息
-        log.info("TaskService--state 共有 " + taskList.size() + "个任务!");
+        int taskNumber = taskList.size();
+        log.info("TaskService--state 共有 " + taskNumber + "个任务!");
         taskIndexMapper.deleteFirstByProjectId(projectId);
         taskIndexMapper.deleteLastByProjectId(projectId);
         // -------------------------------- 查询叶子指标 --------------------------------
@@ -211,7 +220,7 @@ public class TaskService {
             FileUtil.writeInputStreamToLocalFile(IoUtil.stringToInputStream(ruleDetails), ruleFilePath);
 
             List<TaskPO> taskListOfLeafIndex = taskList.stream()
-                    .filter(task -> indexId.equals(task.getLastTargetId()) && DictConstants.TASK_ANALYSIS.equals(task.getRunState()))
+                    .filter(task -> indexId.equals(task.getLastTargetId()))
                     .collect(Collectors.toList());
 
             log.info("TaskService--state 叶子节点 " + indexId + " 包括 " + taskListOfLeafIndex.size() + " 个成功运行结束任务!");
@@ -247,7 +256,7 @@ public class TaskService {
                         }
                     } catch (Exception e) {
                         task2.setRunState(DictConstants.TASK_ABORTED);
-                        taskMapper.updateFailStateWithStopTime(task2Id, DictConstants.TASK_ABORTED, TimeUtil.getNowForMysql());
+                        taskMapper.updateFailStateWithStopTime(task2Id, DictConstants.TASK_ABORTED, TimeUtil.getNowForMysql(), DictConstants.TASK_ERROR_REASON_5);
                         throw new RuntimeException(e.getMessage());
                     }
                     ScoreTO score = null;
@@ -279,7 +288,7 @@ public class TaskService {
 
             // 计算不合格的任务数(不到100分就是不合格,执行失败的不算)
             long notStandardSceneNumber = taskListOfLeafIndex.stream()
-                            .filter(task -> task.getScore() < 100)
+                    .filter(task -> task.getScore() < 100)
                     .count();
 
             // 计算总分

+ 8 - 0
simulation-resource-scheduler/src/main/java/com/css/simulation/resource/scheduler/util/MinioUtil.java

@@ -21,6 +21,14 @@ public class MinioUtil {
         return minioClient.bucketExists(BucketExistsArgs.builder().bucket(bucketName).build());
     }
 
+    /**
+     * 判断 object 是否存在
+     */
+    public static boolean isObjectExist(MinioClient minioClient, String bucket, String object) throws ServerException, InsufficientDataException, ErrorResponseException, IOException, NoSuchAlgorithmException, InvalidKeyException, InvalidResponseException, XmlParserException, io.minio.errors.InternalException {
+        StatObjectResponse objectStat = minioClient.statObject(StatObjectArgs.builder().bucket(bucket).object(object).build());
+        return objectStat != null;
+    }
+
     /**
      * 创建 bucket
      */

+ 1 - 1
simulation-resource-scheduler/src/main/resources/kubernetes/template/job/job-template.yaml

@@ -38,7 +38,7 @@ spec:
         - name: algorithm-container
           image: algorithm-image
           imagePullPolicy: Never
-          command: [ "/bin/sh", "-c", "/AEB/start_docker.sh; touch /tmp/hello.txt;while true;do /bin/echo $(date +%T) >> /tmp/hello.txt; sleep 5; done;" ]
+          command: [ "/bin/sh", "-c", "run.sh; touch /tmp/hello.txt;while true;do /bin/echo $(date +%T) >> /tmp/hello.txt; sleep 5; done;" ]
       restartPolicy: Always
       volumes:
         - name: nvidia0