|
@@ -81,18 +81,20 @@ public class TaskManager {
|
|
|
@Resource
|
|
|
ApiClient apiClient;
|
|
|
|
|
|
+ public static final String SIMULATION_NAMESPACE = "default";
|
|
|
+
|
|
|
@SneakyThrows
|
|
|
@Transactional
|
|
|
public boolean isProjectCompleted(PrefixTO redisPrefix, String projectId, String taskId, String state, String podName) {
|
|
|
- if ("Running".equals(state)) {
|
|
|
+ if ("Running".equals(state)) { // 运行中的 pod 无需删除
|
|
|
// 将运行中的任务的 pod 名称放入 redis
|
|
|
stringRedisTemplate.opsForValue().set(redisPrefix.getTaskPodKey(), podName);
|
|
|
taskTick(taskId); // 刷新一下心跳
|
|
|
log.info("TaskManager--state 修改任务 " + taskId + " 的状态为 " + state + ",pod 名称为:" + podName);
|
|
|
taskMapper.updateStateWithStartTime(taskId, state, TimeUtil.getNowForMysql());
|
|
|
return false;
|
|
|
- } else {
|
|
|
-// String podDeleteCommand = "kubectl delete pod " + podName;
|
|
|
+ } else { // 出错和结束的 pod 都直接删除
|
|
|
+ KubernetesUtil.deletePod(apiClient, SIMULATION_NAMESPACE, podName);
|
|
|
log.info("TaskManager--state 修改任务 " + taskId + "的状态为 " + state + ",pod 名称为:" + podName + ",并删除 pod。");
|
|
|
if ("Aborted".equals(state)) {
|
|
|
if (retry(projectId, taskId, redisPrefix.getTaskRetryKey(), redisPrefix.getTaskMessageKey())) {
|
|
@@ -128,15 +130,7 @@ public class TaskManager {
|
|
|
taskMapper.updateFailStateWithStopTime(taskId, state, TimeUtil.getNowForMysql(), DictConstants.TASK_ERROR_REASON_3);
|
|
|
} else if ("PendingAnalysis".equals(state)) {
|
|
|
taskMapper.updateSuccessStateWithStopTime(taskId, state, TimeUtil.getNowForMysql());
|
|
|
- } else {
|
|
|
- if (retry(projectId, taskId, redisPrefix.getTaskRetryKey(), redisPrefix.getTaskMessageKey())) {
|
|
|
- taskMapper.updateStateById(DictConstants.TASK_RUNNING, taskId);
|
|
|
- return false;
|
|
|
- }
|
|
|
- taskMapper.updateFailStateWithStopTime(taskId, state, TimeUtil.getNowForMysql(), DictConstants.TASK_ERROR_REASON_4);
|
|
|
}
|
|
|
-// SshUtil.execute(session, podDeleteCommand);
|
|
|
- KubernetesUtil.deletePod(apiClient, "default", podName);
|
|
|
}
|
|
|
int taskNum = taskMapper.selectTaskNumByProjectId(projectId);
|
|
|
int endTaskNum = taskMapper.selectEndTaskNumByProjectId(projectId); // 查询已结束的任务 'Aborted', 'PendingAnalysis', 'Terminated'
|
|
@@ -144,7 +138,6 @@ public class TaskManager {
|
|
|
log.info("TaskManager--isProjectCompleted 项目 " + projectId + " 完成进度为:" + endTaskNum + "/" + taskNum);
|
|
|
// 已结束任务数等于所有任务数量,才会准备打分;否则退出。
|
|
|
return taskNum == endTaskNum;
|
|
|
-
|
|
|
}
|
|
|
|
|
|
public boolean retry(String projectId, String taskId, String taskRetryKey, String taskMessageKey) {
|
|
@@ -153,8 +146,8 @@ public class TaskManager {
|
|
|
//1 首先查看任务是否重试过 3 次
|
|
|
String retryString = stringRedisTemplate.opsForValue().get(taskRetryKey);
|
|
|
int retry = Integer.parseInt(Objects.requireNonNull(retryString));
|
|
|
- //2 如果重试次数没有超过 3 次,则重试
|
|
|
- if (retry > 3) {
|
|
|
+ //2 如果重试次数超过 3 次,则不再重试
|
|
|
+ if (retry >= 3) {
|
|
|
return false;
|
|
|
}
|
|
|
String taskJson = stringRedisTemplate.opsForValue().get(taskMessageKey);
|