|
@@ -1,13 +1,22 @@
|
|
|
package com.css.simulation.resource.scheduler.scheduler;
|
|
|
|
|
|
+import api.common.pojo.constants.DictConstants;
|
|
|
+import api.common.util.*;
|
|
|
import com.css.simulation.resource.scheduler.mapper.ProjectMapper;
|
|
|
import com.css.simulation.resource.scheduler.mapper.TaskMapper;
|
|
|
+import com.css.simulation.resource.scheduler.pojo.po.TaskPO;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.apache.sshd.client.SshClient;
|
|
|
+import org.apache.sshd.client.session.ClientSession;
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
|
import org.springframework.data.redis.core.StringRedisTemplate;
|
|
|
+import org.springframework.scheduling.annotation.Scheduled;
|
|
|
import org.springframework.stereotype.Component;
|
|
|
|
|
|
+import java.io.IOException;
|
|
|
+import java.util.List;
|
|
|
+
|
|
|
@Component
|
|
|
@Slf4j
|
|
|
public class TickScheduler {
|
|
@@ -24,94 +33,94 @@ public class TickScheduler {
|
|
|
@Value("${scheduler.manual-project.job-yaml}")
|
|
|
String jobYaml;
|
|
|
|
|
|
-// @Scheduled(fixedDelay = 2000)
|
|
|
-// public void tick() {
|
|
|
-//
|
|
|
-// List<TaskPO> executingTaskList = taskMapper.selectExecuting();
|
|
|
-// if (CollectionUtil.isEmpty(executingTaskList)) {
|
|
|
-// return;
|
|
|
-// }
|
|
|
-//// log.info("------- TickScheduler 查询出所有执行中的任务('Running'):" + executingTaskList);
|
|
|
-// //2 根据 key 查出任务的心跳时间
|
|
|
-// executingTaskList.forEach(task -> {
|
|
|
-// String taskId = task.getId();
|
|
|
-// String projectId = task.getPId();
|
|
|
-// try {
|
|
|
-// String s = redisTemplate.opsForValue().get(manualProjectTopic + ":" + projectId + ":" + taskId);
|
|
|
-//// Optional.ofNullable(s).orElseThrow(() -> new RuntimeException("项目 " + projectId + " 下的任务 " + taskId + " 的心跳查询失败"));
|
|
|
-// assert s != null;
|
|
|
-// long tickTime = Long.parseLong(s);
|
|
|
-// long maxSimulationTime = task.getMaxSimulationTime() * 1000;
|
|
|
-// long now = TimeUtil.getNow();
|
|
|
-// long difference = now - tickTime;
|
|
|
-//// log.info("------- TickScheduler 任务" + taskId + "心跳时间为:" + tickTime + "最大仿真时间为:" + tickTime + "时间差为:" + difference);
|
|
|
-// if (difference > maxSimulationTime) {
|
|
|
-//// log.info("------- TickScheduler 任务" + taskId + "已超时,状态修改为:" + DictConstants.TASK_ABORTED);
|
|
|
-// taskMapper.updateState(taskId, DictConstants.TASK_ABORTED);
|
|
|
-// }
|
|
|
-// } catch (Exception e) {
|
|
|
-// throw new RuntimeException(e.getMessage());
|
|
|
-// }
|
|
|
-//
|
|
|
-// });
|
|
|
-//
|
|
|
-// }
|
|
|
-//
|
|
|
-//
|
|
|
-// /**
|
|
|
-// * 检查如果有 job 在运行但是 pod 全部关闭的情况,此时需要重启一下 job
|
|
|
-// */
|
|
|
-// @Scheduled(fixedDelay = 60 * 1000)
|
|
|
-// public void checkProject() throws IOException {
|
|
|
-// SshClient client = SshUtil.getClient();
|
|
|
-// ClientSession session = SshUtil.getSession(client, "182.92.203.182", "root", "CICV2022test");
|
|
|
-//
|
|
|
-// //1 查询出正在运行中的 project
|
|
|
-// List<String> projectIdList = projectMapper.selectIdByState("20");
|
|
|
-// log.info("TickScheduler-------checkProject 查询出正在运行中的 project" + projectIdList);
|
|
|
-// //2 根据 projectId 获取 pod
|
|
|
-// projectIdList.forEach(projectId -> {
|
|
|
-//
|
|
|
-// String key = manualProjectTopic + ":" + projectId + ":check";
|
|
|
-// String nowString = TimeUtil.getNowString();
|
|
|
-//
|
|
|
-// try {
|
|
|
-// String lastNowString = redisTemplate.opsForValue().get(manualProjectTopic + ":" + projectId + ":check");
|
|
|
-// String podList = SshUtil.execute(session, "kubectl get pod | grep project-" + projectId);
|
|
|
-// int taskNumber = StringUtil.countSubString(podList, "project");
|
|
|
-// if (StringUtil.isEmpty(lastNowString) && taskNumber == 0) {
|
|
|
-// redisTemplate.opsForValue().set(key, nowString);
|
|
|
-// }
|
|
|
-//
|
|
|
-// if (StringUtil.isNotEmpty(lastNowString) && taskNumber == 0) {
|
|
|
-// // 判断两次是否超过2分钟
|
|
|
-// //3 如果 pod 为空,则重启 job
|
|
|
-// long lastNow = Long.parseLong(lastNowString);
|
|
|
-// long now = Long.parseLong(nowString);
|
|
|
-// if (now - lastNow > 2L * 60 * 1000) {
|
|
|
-// LinuxUtil.execute("kubectl delete job project-" + projectId);
|
|
|
-// while (true) {
|
|
|
-// log.info("TickScheduler-------checkProject 准备重启项目 " + projectId);
|
|
|
-// Thread.sleep(10000);
|
|
|
-// String podList2 = SshUtil.execute(session, "kubectl get pod | grep project-" + projectId);
|
|
|
-// log.info("TickScheduler-------checkProject 项目 " + projectId + " 剩余的 pod 信息为:" + podList2);
|
|
|
-// int taskNumber2 = StringUtil.countSubString(podList2, "project");
|
|
|
-// if (taskNumber2 == 0) {
|
|
|
-// break;
|
|
|
-// }
|
|
|
-// }
|
|
|
-// log.info("TickScheduler-------checkProject 重新执行项目" + projectId);
|
|
|
-// String jobTemplateYamlPathTarget = jobYaml + "project-" + projectId + ".yaml";
|
|
|
-// LinuxUtil.execute("kubectl apply -f " + jobTemplateYamlPathTarget);
|
|
|
-// }
|
|
|
-// }
|
|
|
-// } catch (IOException | InterruptedException e) {
|
|
|
-// e.printStackTrace();
|
|
|
-// }
|
|
|
-// });
|
|
|
-//
|
|
|
-// session.close();
|
|
|
-// client.stop();
|
|
|
-//
|
|
|
-// }
|
|
|
+ @Scheduled(fixedDelay = 60 * 1000)
|
|
|
+ public void tick() {
|
|
|
+
|
|
|
+ List<TaskPO> executingTaskList = taskMapper.selectExecuting();
|
|
|
+ if (CollectionUtil.isEmpty(executingTaskList)) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+// log.info("------- TickScheduler 查询出所有执行中的任务('Running'):" + executingTaskList);
|
|
|
+ //2 根据 key 查出任务的心跳时间
|
|
|
+ executingTaskList.forEach(task -> {
|
|
|
+ String taskId = task.getId();
|
|
|
+ String projectId = task.getPId();
|
|
|
+ try {
|
|
|
+ String s = redisTemplate.opsForValue().get(manualProjectTopic + ":" + projectId + ":" + taskId);
|
|
|
+// Optional.ofNullable(s).orElseThrow(() -> new RuntimeException("项目 " + projectId + " 下的任务 " + taskId + " 的心跳查询失败"));
|
|
|
+ assert s != null;
|
|
|
+ long tickTime = Long.parseLong(s);
|
|
|
+ long maxSimulationTime = task.getMaxSimulationTime() * 1000;
|
|
|
+ long now = TimeUtil.getNow();
|
|
|
+ long difference = now - tickTime;
|
|
|
+// log.info("------- TickScheduler 任务" + taskId + "心跳时间为:" + tickTime + "最大仿真时间为:" + tickTime + "时间差为:" + difference);
|
|
|
+ if (difference > maxSimulationTime) {
|
|
|
+// log.info("------- TickScheduler 任务" + taskId + "已超时,状态修改为:" + DictConstants.TASK_ABORTED);
|
|
|
+ taskMapper.updateState(taskId, DictConstants.TASK_ABORTED);
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ throw new RuntimeException(e.getMessage());
|
|
|
+ }
|
|
|
+
|
|
|
+ });
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 检查如果有 job 在运行但是 pod 全部关闭的情况,此时需要重启一下 job
|
|
|
+ */
|
|
|
+ @Scheduled(fixedDelay = 60 * 1000)
|
|
|
+ public void checkProject() throws IOException {
|
|
|
+ SshClient client = SshUtil.getClient();
|
|
|
+ ClientSession session = SshUtil.getSession(client, "182.92.203.182", "root", "CICV2022test");
|
|
|
+
|
|
|
+ //1 查询出正在运行中的 project
|
|
|
+ List<String> projectIdList = projectMapper.selectIdByState("20");
|
|
|
+ log.info("TickScheduler-------checkProject 查询出正在运行中的 project" + projectIdList);
|
|
|
+ //2 根据 projectId 获取 pod
|
|
|
+ projectIdList.forEach(projectId -> {
|
|
|
+
|
|
|
+ String key = manualProjectTopic + ":" + projectId + ":check";
|
|
|
+ String nowString = TimeUtil.getNowString();
|
|
|
+
|
|
|
+ try {
|
|
|
+ String lastNowString = redisTemplate.opsForValue().get(manualProjectTopic + ":" + projectId + ":check");
|
|
|
+ String podList = SshUtil.execute(session, "kubectl get pod | grep project-" + projectId);
|
|
|
+ int taskNumber = StringUtil.countSubString(podList, "project");
|
|
|
+ if (StringUtil.isEmpty(lastNowString) && taskNumber == 0) {
|
|
|
+ redisTemplate.opsForValue().set(key, nowString);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (StringUtil.isNotEmpty(lastNowString) && taskNumber == 0) {
|
|
|
+ // 判断两次是否超过2分钟
|
|
|
+ //3 如果 pod 为空,则重启 job
|
|
|
+ long lastNow = Long.parseLong(lastNowString);
|
|
|
+ long now = Long.parseLong(nowString);
|
|
|
+ if (now - lastNow > 2L * 60 * 1000) {
|
|
|
+ LinuxUtil.execute("kubectl delete job project-" + projectId);
|
|
|
+ Thread.sleep(30000);
|
|
|
+ while (true) {
|
|
|
+ log.info("TickScheduler-------checkProject 准备重启项目 " + projectId);
|
|
|
+ String podList2 = SshUtil.execute(session, "kubectl get pod | grep project-" + projectId);
|
|
|
+ log.info("TickScheduler-------checkProject 项目 " + projectId + " 剩余的 pod 信息为:" + podList2);
|
|
|
+ int taskNumber2 = StringUtil.countSubString(podList2, "project");
|
|
|
+ if (taskNumber2 == 0) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ log.info("TickScheduler-------checkProject 重新执行项目" + projectId);
|
|
|
+ String jobTemplateYamlPathTarget = jobYaml + "project-" + projectId + ".yaml";
|
|
|
+ LinuxUtil.execute("kubectl apply -f " + jobTemplateYamlPathTarget);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (IOException | InterruptedException e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ session.close();
|
|
|
+ client.stop();
|
|
|
+
|
|
|
+ }
|
|
|
}
|