|
@@ -1,55 +1,58 @@
|
|
|
-package com.css.simulation.resource.scheduler.scheduler;
|
|
|
-
|
|
|
-import api.common.pojo.constants.DictConstants;
|
|
|
-import api.common.util.*;
|
|
|
-import com.css.simulation.resource.scheduler.mapper.ProjectMapper;
|
|
|
-import com.css.simulation.resource.scheduler.mapper.TaskMapper;
|
|
|
-import com.css.simulation.resource.scheduler.pojo.po.TaskPO;
|
|
|
-import com.css.simulation.resource.scheduler.service.TaskService;
|
|
|
-import lombok.extern.slf4j.Slf4j;
|
|
|
-import org.apache.sshd.client.SshClient;
|
|
|
-import org.apache.sshd.client.session.ClientSession;
|
|
|
-import org.springframework.beans.factory.annotation.Autowired;
|
|
|
-import org.springframework.beans.factory.annotation.Value;
|
|
|
-import org.springframework.data.redis.core.StringRedisTemplate;
|
|
|
-import org.springframework.scheduling.annotation.Scheduled;
|
|
|
-import org.springframework.stereotype.Component;
|
|
|
-
|
|
|
-import java.io.IOException;
|
|
|
-import java.util.ArrayList;
|
|
|
-import java.util.List;
|
|
|
-
|
|
|
-@Component
|
|
|
-@Slf4j
|
|
|
-public class TickScheduler {
|
|
|
-
|
|
|
- @Value("${scheduler.manual-project.topic}")
|
|
|
- String manualProjectTopic;
|
|
|
- @Autowired
|
|
|
- StringRedisTemplate redisTemplate;
|
|
|
-
|
|
|
- @Autowired
|
|
|
- TaskService taskService;
|
|
|
-
|
|
|
- @Autowired
|
|
|
- TaskMapper taskMapper;
|
|
|
- @Autowired
|
|
|
- ProjectMapper projectMapper;
|
|
|
- @Value("${scheduler.manual-project.job-yaml}")
|
|
|
- String jobYaml;
|
|
|
-
|
|
|
- @Value("${scheduler.score.hostname}")
|
|
|
- String hostnameScore;
|
|
|
- @Value("${scheduler.score.username}")
|
|
|
- String usernameScore;
|
|
|
- @Value("${scheduler.score.password}")
|
|
|
- String passwordScore;
|
|
|
-
|
|
|
-
|
|
|
+//package com.css.simulation.resource.scheduler.scheduler;
|
|
|
+//
|
|
|
+//import api.common.pojo.constants.DictConstants;
|
|
|
+//import api.common.util.*;
|
|
|
+//import com.css.simulation.resource.scheduler.mapper.ProjectMapper;
|
|
|
+//import com.css.simulation.resource.scheduler.mapper.TaskMapper;
|
|
|
+//import com.css.simulation.resource.scheduler.pojo.po.TaskPO;
|
|
|
+//import com.css.simulation.resource.scheduler.service.TaskService;
|
|
|
+//import lombok.extern.slf4j.Slf4j;
|
|
|
+//import org.apache.sshd.client.SshClient;
|
|
|
+//import org.apache.sshd.client.session.ClientSession;
|
|
|
+//import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+//import org.springframework.beans.factory.annotation.Value;
|
|
|
+//import org.springframework.data.redis.core.StringRedisTemplate;
|
|
|
+//import org.springframework.scheduling.annotation.Scheduled;
|
|
|
+//import org.springframework.stereotype.Component;
|
|
|
+//
|
|
|
+//import java.io.IOException;
|
|
|
+//import java.util.ArrayList;
|
|
|
+//import java.util.List;
|
|
|
+//import java.util.Set;
|
|
|
+//
|
|
|
+//@Component
|
|
|
+//@Slf4j
|
|
|
+//public class TickScheduler {
|
|
|
+//
|
|
|
+// @Value("${scheduler.manual-project.topic}")
|
|
|
+// String manualProjectTopic;
|
|
|
+// @Autowired
|
|
|
+// StringRedisTemplate redisTemplate;
|
|
|
+//
|
|
|
+// @Autowired
|
|
|
+// TaskService taskService;
|
|
|
+//
|
|
|
+// @Autowired
|
|
|
+// TaskMapper taskMapper;
|
|
|
+// @Autowired
|
|
|
+// ProjectMapper projectMapper;
|
|
|
+// @Value("${scheduler.manual-project.job-yaml}")
|
|
|
+// String jobYaml;
|
|
|
+//
|
|
|
+// @Value("${scheduler.score.hostname}")
|
|
|
+// String hostnameScore;
|
|
|
+// @Value("${scheduler.score.username}")
|
|
|
+// String usernameScore;
|
|
|
+// @Value("${scheduler.score.password}")
|
|
|
+// String passwordScore;
|
|
|
+//
|
|
|
+//
|
|
|
// @Scheduled(fixedDelay = 60 * 1000)
|
|
|
// public void retry() throws IOException {
|
|
|
//
|
|
|
// //1 从 redis 获取 手动运行项目的 key 列表
|
|
|
+// Set<String> keys = redisTemplate.keys("manualProject:*");
|
|
|
+//
|
|
|
// //2 根据 key 列表从 redis 获取 pod 列表
|
|
|
// //3 通过 kubernetes 获取 pod 列表
|
|
|
// //4 比对 redis 中的 pod 列表 和 kubernetes 中的 pod 列表,如果有 redis 中存在但 kubernetes 中不存在则准备重试
|
|
@@ -57,102 +60,102 @@ public class TickScheduler {
|
|
|
// //4-2 查看重试次数是否为 3
|
|
|
// //4-3 如果重试次数小于 3 则从 redis 获取 message 并重新发送给 kafka
|
|
|
// }
|
|
|
-
|
|
|
- @Scheduled(fixedDelay = 60 * 1000)
|
|
|
- public void tick() throws IOException {
|
|
|
-
|
|
|
- SshClient client = SshUtil.getClient();
|
|
|
- ClientSession session = SshUtil.getSession(client, hostnameScore, usernameScore, passwordScore);
|
|
|
-
|
|
|
- ArrayList<TaskPO> executingTaskList = taskMapper.selectExecuting();
|
|
|
- if (CollectionUtil.isEmpty(executingTaskList)) {
|
|
|
- // log.info("------- TickScheduler 查询出所有执行中的任务('Running'):" + executingTaskList);
|
|
|
- //2 根据 key 查出任务的心跳时间
|
|
|
- for (TaskPO task : executingTaskList) {
|
|
|
- String taskId = task.getId();
|
|
|
- String projectId = task.getPId();
|
|
|
- String s = redisTemplate.opsForValue().get(manualProjectTopic + ":" + projectId + ":" + taskId + ":tick");
|
|
|
-// Optional.ofNullable(s).orElseThrow(() -> new RuntimeException("项目 " + projectId + " 下的任务 " + taskId + " 的心跳查询失败"));
|
|
|
- assert s != null;
|
|
|
- long tickTime = Long.parseLong(s);
|
|
|
- long timeout = 2 * 60 * 1000L;
|
|
|
- long now = TimeUtil.getNow();
|
|
|
- long difference = now - tickTime;
|
|
|
-// log.info("------- TickScheduler 任务" + taskId + "心跳时间为:" + tickTime + "最大仿真时间为:" + tickTime + "时间差为:" + difference);
|
|
|
- if (difference > timeout) {
|
|
|
- String podName = redisTemplate.opsForValue().get(manualProjectTopic + ":" + projectId + ":" + taskId + ":pod");
|
|
|
- String podDeleteCommand = "kubectl delete pod " + podName;
|
|
|
- if (podName != null) {
|
|
|
- log.info("TickScheduler--tick 修改任务 " + taskId + "已超时,状态修改为 Aborted,pod 名称为:" + podName
|
|
|
- + ",并执行删除 pod 命令:" + podDeleteCommand);
|
|
|
- SshUtil.execute(session, podDeleteCommand);
|
|
|
-// taskManager.updateFailStateWithStopTime(taskId, state, TimeUtil.getNowForMysql()); // 如果任务 abort 代表项目失败
|
|
|
- taskMapper.updateFailStateWithStopTime(taskId, DictConstants.TASK_ABORTED, TimeUtil.getNowForMysql(), DictConstants.TASK_ERROR_REASON_1);
|
|
|
- redisTemplate.delete("podName:" + taskId);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- session.close();
|
|
|
- client.stop();
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
- /**
|
|
|
- * 检查如果有 job 在运行但是 pod 全部关闭的情况,此时需要重启一下 job
|
|
|
- */
|
|
|
- @Scheduled(fixedDelay = 30 * 1000)
|
|
|
- public void checkProject() throws IOException {
|
|
|
- SshClient client = SshUtil.getClient();
|
|
|
- ClientSession session = SshUtil.getSession(client, hostnameScore, usernameScore, passwordScore);
|
|
|
-
|
|
|
- //1 查询出正在运行中的 project
|
|
|
- List<String> projectIdList = projectMapper.selectIdByState("20");
|
|
|
- log.info("TickScheduler-------checkProject 查询出正在运行中的 project" + projectIdList);
|
|
|
- //2 根据 projectId 获取 pod
|
|
|
- projectIdList.forEach(projectId -> {
|
|
|
- try {
|
|
|
- String key = manualProjectTopic + ":" + projectId + ":check";
|
|
|
- String lastNowString = redisTemplate.opsForValue().get(manualProjectTopic + ":" + projectId + ":check");
|
|
|
- String podList = SshUtil.execute(session, "kubectl get pod | grep project-" + projectId);
|
|
|
- log.info("TickScheduler-------checkProject 项目 " + projectId + " 正在运行的 pod 为:\n" + podList);
|
|
|
- int taskNumber = StringUtil.countSubString(podList, "project");
|
|
|
- if (StringUtil.isEmpty(lastNowString) && taskNumber == 0) {
|
|
|
- redisTemplate.opsForValue().set(key, TimeUtil.getNowString());
|
|
|
- }
|
|
|
-
|
|
|
- if (StringUtil.isNotEmpty(lastNowString) && taskNumber == 0) {
|
|
|
- // 判断两次是否超过2分钟
|
|
|
- //3 如果 pod 为空,则重启 job
|
|
|
- long lastNow = Long.parseLong(lastNowString);
|
|
|
- long now = Long.parseLong(TimeUtil.getNowString());
|
|
|
-
|
|
|
- if (now - lastNow > (long) 120 * 1000) {
|
|
|
- redisTemplate.opsForValue().set(key, TimeUtil.getNowString());
|
|
|
- SshUtil.execute(session, "kubectl delete job project-" + projectId);
|
|
|
- Thread.sleep(15000);
|
|
|
- while (true) {
|
|
|
- log.info("TickScheduler-------checkProject 准备重启项目 " + projectId);
|
|
|
- String podList2 = SshUtil.execute(session, "kubectl get pod | grep project-" + projectId);
|
|
|
- log.info("TickScheduler-------checkProject 项目 " + projectId + " 剩余的 pod 信息为:\n" + podList2);
|
|
|
- int taskNumber2 = StringUtil.countSubString(podList2, "project");
|
|
|
- if (taskNumber2 == 0) {
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- Thread.sleep(15000);
|
|
|
- log.info("TickScheduler-------checkProject 重新执行项目" + projectId);
|
|
|
- String jobTemplateYamlPathTarget = jobYaml + "project-" + projectId + ".yaml";
|
|
|
- SshUtil.execute(session, "kubectl apply -f " + jobTemplateYamlPathTarget);
|
|
|
- }
|
|
|
- }
|
|
|
- } catch (IOException | InterruptedException e) {
|
|
|
- e.printStackTrace();
|
|
|
- }
|
|
|
- });
|
|
|
-
|
|
|
- session.close();
|
|
|
- client.stop();
|
|
|
-
|
|
|
- }
|
|
|
-}
|
|
|
+//
|
|
|
+// @Scheduled(fixedDelay = 60 * 1000)
|
|
|
+// public void tick() throws IOException {
|
|
|
+//
|
|
|
+// SshClient client = SshUtil.getClient();
|
|
|
+// ClientSession session = SshUtil.getSession(client, hostnameScore, usernameScore, passwordScore);
|
|
|
+//
|
|
|
+// ArrayList<TaskPO> executingTaskList = taskMapper.selectExecuting();
|
|
|
+// if (CollectionUtil.isEmpty(executingTaskList)) {
|
|
|
+// // log.info("------- TickScheduler 查询出所有执行中的任务('Running'):" + executingTaskList);
|
|
|
+// //2 根据 key 查出任务的心跳时间
|
|
|
+// for (TaskPO task : executingTaskList) {
|
|
|
+// String taskId = task.getId();
|
|
|
+// String projectId = task.getPId();
|
|
|
+// String s = redisTemplate.opsForValue().get(manualProjectTopic + ":" + projectId + ":" + taskId + ":tick");
|
|
|
+//// Optional.ofNullable(s).orElseThrow(() -> new RuntimeException("项目 " + projectId + " 下的任务 " + taskId + " 的心跳查询失败"));
|
|
|
+// assert s != null;
|
|
|
+// long tickTime = Long.parseLong(s);
|
|
|
+// long timeout = 2 * 60 * 1000L;
|
|
|
+// long now = TimeUtil.getNow();
|
|
|
+// long difference = now - tickTime;
|
|
|
+//// log.info("------- TickScheduler 任务" + taskId + "心跳时间为:" + tickTime + "最大仿真时间为:" + tickTime + "时间差为:" + difference);
|
|
|
+// if (difference > timeout) {
|
|
|
+// String podName = redisTemplate.opsForValue().get(manualProjectTopic + ":" + projectId + ":" + taskId + ":pod");
|
|
|
+// String podDeleteCommand = "kubectl delete pod " + podName;
|
|
|
+// if (podName != null) {
|
|
|
+// log.info("TickScheduler--tick 修改任务 " + taskId + "已超时,状态修改为 Aborted,pod 名称为:" + podName
|
|
|
+// + ",并执行删除 pod 命令:" + podDeleteCommand);
|
|
|
+// SshUtil.execute(session, podDeleteCommand);
|
|
|
+//// taskManager.updateFailStateWithStopTime(taskId, state, TimeUtil.getNowForMysql()); // 如果任务 abort 代表项目失败
|
|
|
+// taskMapper.updateFailStateWithStopTime(taskId, DictConstants.TASK_ABORTED, TimeUtil.getNowForMysql(), DictConstants.TASK_ERROR_REASON_1);
|
|
|
+// redisTemplate.delete("podName:" + taskId);
|
|
|
+// }
|
|
|
+// }
|
|
|
+// }
|
|
|
+// }
|
|
|
+// session.close();
|
|
|
+// client.stop();
|
|
|
+// }
|
|
|
+//
|
|
|
+//
|
|
|
+// /**
|
|
|
+// * 检查如果有 job 在运行但是 pod 全部关闭的情况,此时需要重启一下 job
|
|
|
+// */
|
|
|
+// @Scheduled(fixedDelay = 30 * 1000)
|
|
|
+// public void checkProject() throws IOException {
|
|
|
+// SshClient client = SshUtil.getClient();
|
|
|
+// ClientSession session = SshUtil.getSession(client, hostnameScore, usernameScore, passwordScore);
|
|
|
+//
|
|
|
+// //1 查询出正在运行中的 project
|
|
|
+// List<String> projectIdList = projectMapper.selectIdByState("20");
|
|
|
+// log.info("TickScheduler-------checkProject 查询出正在运行中的 project" + projectIdList);
|
|
|
+// //2 根据 projectId 获取 pod
|
|
|
+// projectIdList.forEach(projectId -> {
|
|
|
+// try {
|
|
|
+// String key = manualProjectTopic + ":" + projectId + ":check";
|
|
|
+// String lastNowString = redisTemplate.opsForValue().get(manualProjectTopic + ":" + projectId + ":check");
|
|
|
+// String podList = SshUtil.execute(session, "kubectl get pod | grep project-" + projectId);
|
|
|
+// log.info("TickScheduler-------checkProject 项目 " + projectId + " 正在运行的 pod 为:\n" + podList);
|
|
|
+// int taskNumber = StringUtil.countSubString(podList, "project");
|
|
|
+// if (StringUtil.isEmpty(lastNowString) && taskNumber == 0) {
|
|
|
+// redisTemplate.opsForValue().set(key, TimeUtil.getNowString());
|
|
|
+// }
|
|
|
+//
|
|
|
+// if (StringUtil.isNotEmpty(lastNowString) && taskNumber == 0) {
|
|
|
+// // 判断两次是否超过2分钟
|
|
|
+// //3 如果 pod 为空,则重启 job
|
|
|
+// long lastNow = Long.parseLong(lastNowString);
|
|
|
+// long now = Long.parseLong(TimeUtil.getNowString());
|
|
|
+//
|
|
|
+// if (now - lastNow > (long) 120 * 1000) {
|
|
|
+// redisTemplate.opsForValue().set(key, TimeUtil.getNowString());
|
|
|
+// SshUtil.execute(session, "kubectl delete job project-" + projectId);
|
|
|
+// Thread.sleep(15000);
|
|
|
+// while (true) {
|
|
|
+// log.info("TickScheduler-------checkProject 准备重启项目 " + projectId);
|
|
|
+// String podList2 = SshUtil.execute(session, "kubectl get pod | grep project-" + projectId);
|
|
|
+// log.info("TickScheduler-------checkProject 项目 " + projectId + " 剩余的 pod 信息为:\n" + podList2);
|
|
|
+// int taskNumber2 = StringUtil.countSubString(podList2, "project");
|
|
|
+// if (taskNumber2 == 0) {
|
|
|
+// break;
|
|
|
+// }
|
|
|
+// }
|
|
|
+// Thread.sleep(15000);
|
|
|
+// log.info("TickScheduler-------checkProject 重新执行项目" + projectId);
|
|
|
+// String jobTemplateYamlPathTarget = jobYaml + "project-" + projectId + ".yaml";
|
|
|
+// SshUtil.execute(session, "kubectl apply -f " + jobTemplateYamlPathTarget);
|
|
|
+// }
|
|
|
+// }
|
|
|
+// } catch (IOException | InterruptedException e) {
|
|
|
+// e.printStackTrace();
|
|
|
+// }
|
|
|
+// });
|
|
|
+//
|
|
|
+// session.close();
|
|
|
+// client.stop();
|
|
|
+//
|
|
|
+// }
|
|
|
+//}
|