|
@@ -12,10 +12,12 @@ import com.css.simulation.resource.scheduler.pojo.po.ProjectPO;
|
|
|
import com.css.simulation.resource.scheduler.pojo.po.TaskPO;
|
|
|
import com.css.simulation.resource.scheduler.pojo.to.PrefixTO;
|
|
|
import com.css.simulation.resource.scheduler.pojo.to.ScoreTO;
|
|
|
+import com.css.simulation.resource.scheduler.util.KubernetesUtil;
|
|
|
import com.css.simulation.resource.scheduler.util.MinioUtil;
|
|
|
import com.css.simulation.resource.scheduler.util.ProjectUtil;
|
|
|
import com.fasterxml.jackson.databind.JsonNode;
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
+import io.kubernetes.client.openapi.ApiClient;
|
|
|
import io.minio.MinioClient;
|
|
|
import lombok.SneakyThrows;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
@@ -76,20 +78,22 @@ public class TaskManager {
|
|
|
RequestConfig requestConfig;
|
|
|
@Autowired
|
|
|
ProjectUtil projectUtil;
|
|
|
+ @Autowired
|
|
|
+ ApiClient apiClient;
|
|
|
|
|
|
@SneakyThrows
|
|
|
@Transactional
|
|
|
- public boolean isProjectCompleted(PrefixTO redisPrefix, String projectId, String taskId, String state, String podName, ClientSession session) {
|
|
|
+ public boolean isProjectCompleted(PrefixTO redisPrefix, String projectId, String taskId, String state, String podName) {
|
|
|
if ("Running".equals(state)) {
|
|
|
// 将运行中的任务的 pod 名称放入 redis
|
|
|
stringRedisTemplate.opsForValue().set(redisPrefix.getTaskPodKey(), podName);
|
|
|
taskTick(taskId); // 刷新一下心跳
|
|
|
- log.info("TaskManager--state 修改任务 " + taskId + "的状态为 Running,pod 名称为:" + podName);
|
|
|
+ log.info("TaskManager--state 修改任务 " + taskId + " 的状态为 " + state + ",pod 名称为:" + podName);
|
|
|
taskMapper.updateStateWithStartTime(taskId, state, TimeUtil.getNowForMysql());
|
|
|
return false;
|
|
|
} else {
|
|
|
- String podDeleteCommand = "kubectl delete pod " + podName;
|
|
|
- log.info("TaskManager--state 修改任务 " + taskId + "的状态为:" + state + ",pod 名称为:" + podName + ",并执行删除 pod 命令:" + podDeleteCommand);
|
|
|
+// String podDeleteCommand = "kubectl delete pod " + podName;
|
|
|
+ log.info("TaskManager--state 修改任务 " + taskId + "的状态为 " + state + ",pod 名称为:" + podName + ",并删除 pod。");
|
|
|
if ("Aborted".equals(state)) {
|
|
|
if (retry(projectId, taskId, redisPrefix.getTaskRetryKey(), redisPrefix.getTaskMessageKey())) {
|
|
|
taskMapper.updateStateById(DictConstants.TASK_RUNNING, taskId);
|
|
@@ -131,7 +135,8 @@ public class TaskManager {
|
|
|
}
|
|
|
taskMapper.updateFailStateWithStopTime(taskId, state, TimeUtil.getNowForMysql(), DictConstants.TASK_ERROR_REASON_4);
|
|
|
}
|
|
|
- SshUtil.execute(session, podDeleteCommand);
|
|
|
+// SshUtil.execute(session, podDeleteCommand);
|
|
|
+ KubernetesUtil.deletePod(apiClient, "default", podName);
|
|
|
}
|
|
|
int taskNum = taskMapper.selectTaskNumByProjectId(projectId);
|
|
|
int endTaskNum = taskMapper.selectEndTaskNumByProjectId(projectId); // 查询已结束的任务 'Aborted', 'PendingAnalysis', 'Terminated'
|
|
@@ -143,35 +148,26 @@ public class TaskManager {
|
|
|
}
|
|
|
|
|
|
public boolean retry(String projectId, String taskId, String taskRetryKey, String taskMessageKey) {
|
|
|
- log.info("TaskService--retry 重试操作收到的参数为:projectId=" + projectId + ",taskId=" + taskId);
|
|
|
- //1 首先查看任务是否重试过 3 次
|
|
|
- String retryString = stringRedisTemplate.opsForValue().get(taskRetryKey);
|
|
|
- int retry = Integer.parseInt(Objects.requireNonNull(retryString));
|
|
|
- //2 如果重试次数没有超过 3 次,则重试
|
|
|
- if (retry > 3) {
|
|
|
+ try {
|
|
|
+ log.info("TaskManager--retry 重试操作收到的参数为:projectId=" + projectId + ",taskId=" + taskId);
|
|
|
+ //1 首先查看任务是否重试过 3 次
|
|
|
+ String retryString = stringRedisTemplate.opsForValue().get(taskRetryKey);
|
|
|
+ int retry = Integer.parseInt(Objects.requireNonNull(retryString));
|
|
|
+ //2 如果重试次数没有超过 3 次,则重试
|
|
|
+ if (retry > 3) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ String taskJson = stringRedisTemplate.opsForValue().get(taskMessageKey);
|
|
|
+ retry++;
|
|
|
+ log.info("TaskService--retry 重试项目 " + projectId + " 的任务 " + taskId + ",重试次数为:" + retry + ",重新发送的消息为:" + taskJson);
|
|
|
+ stringRedisTemplate.opsForValue().set(taskRetryKey, retry + "");
|
|
|
+ kafkaTemplate.send(projectId, taskJson);
|
|
|
+ return true;
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("TaskManager--retry 重试操作报错:", e);
|
|
|
return false;
|
|
|
}
|
|
|
- String taskJson = stringRedisTemplate.opsForValue().get(taskMessageKey);
|
|
|
- retry++;
|
|
|
- log.info("TaskService--retry 重试项目 " + projectId + " 的任务 " + taskId + ",重试次数为:" + retry + ",重新发送的消息为:" + taskJson);
|
|
|
- stringRedisTemplate.opsForValue().set(taskRetryKey, retry + "");
|
|
|
- kafkaTemplate.send(projectId, taskJson).addCallback(success -> {
|
|
|
- // 消息发送到的topic
|
|
|
- assert success != null;
|
|
|
- String topic = success.getRecordMetadata().topic();
|
|
|
- // 消息发送到的分区
|
|
|
- int partition = success.getRecordMetadata().partition();
|
|
|
- // 消息在分区内的offset
|
|
|
- long offset = success.getRecordMetadata().offset();
|
|
|
- log.info("------- ProjectConsumer 发送消息成功:\n"
|
|
|
- + "主题 topic 为:" + topic + "\n"
|
|
|
- + "分区 partition 为:" + partition + "\n"
|
|
|
- + "偏移量为:" + offset + "\n"
|
|
|
- + "消息体为:" + taskJson);
|
|
|
- }, failure -> {
|
|
|
- log.error("------- 发送消息失败:" + failure.getMessage());
|
|
|
- });
|
|
|
- return true;
|
|
|
+
|
|
|
}
|
|
|
|
|
|
public void prepareScore(String projectRunningKey) {
|