|
@@ -21,7 +21,6 @@ import org.springframework.scheduling.annotation.Scheduled;
|
|
import org.springframework.stereotype.Component;
|
|
import org.springframework.stereotype.Component;
|
|
|
|
|
|
import java.io.IOException;
|
|
import java.io.IOException;
|
|
-import java.util.ArrayList;
|
|
|
|
import java.util.List;
|
|
import java.util.List;
|
|
import java.util.Objects;
|
|
import java.util.Objects;
|
|
|
|
|
|
@@ -66,9 +65,10 @@ public class ProjectScheduler {
|
|
|
|
|
|
SshClient client = SshUtil.getClient();
|
|
SshClient client = SshUtil.getClient();
|
|
ClientSession session = SshUtil.getSession(client, hostname, username, password);
|
|
ClientSession session = SshUtil.getSession(client, hostname, username, password);
|
|
- ArrayList<TaskPO> executingTaskList = taskMapper.selectExecuting();
|
|
|
|
|
|
+ List<TaskPO> executingTaskList = taskMapper.selectExecuting();
|
|
|
|
+
|
|
|
|
+ log.info("ProjectScheduler--timeout 正在运行的任务有:" + executingTaskList);
|
|
if (CollectionUtil.isEmpty(executingTaskList)) {
|
|
if (CollectionUtil.isEmpty(executingTaskList)) {
|
|
- //2 根据 key 查出任务的心跳时间
|
|
|
|
executingTaskList.forEach(task -> {
|
|
executingTaskList.forEach(task -> {
|
|
String taskId = task.getId();
|
|
String taskId = task.getId();
|
|
String projectId = task.getPId();
|
|
String projectId = task.getPId();
|
|
@@ -83,6 +83,7 @@ public class ProjectScheduler {
|
|
session.close();
|
|
session.close();
|
|
client.stop();
|
|
client.stop();
|
|
}
|
|
}
|
|
|
|
+
|
|
/**
|
|
/**
|
|
* 解决 pod 莫名全部关闭但是 job 还在的问题
|
|
* 解决 pod 莫名全部关闭但是 job 还在的问题
|
|
* 检查如果有 job 在运行但是 pod 全部关闭的情况,此时需要重启一下 job
|
|
* 检查如果有 job 在运行但是 pod 全部关闭的情况,此时需要重启一下 job
|
|
@@ -98,15 +99,15 @@ public class ProjectScheduler {
|
|
//2 根据 projectId 获取 pod
|
|
//2 根据 projectId 获取 pod
|
|
projectIdList.forEach(projectId -> {
|
|
projectIdList.forEach(projectId -> {
|
|
try {
|
|
try {
|
|
- String key = manualProjectTopic + ":" + projectId + ":check";
|
|
|
|
- String lastNowString = redisTemplate.opsForValue().get(manualProjectTopic + ":" + projectId + ":check");
|
|
|
|
|
|
+
|
|
|
|
+ String checkKey = manualProjectTopic + ":" + projectId + ":check";
|
|
|
|
+ String lastNowString = redisTemplate.opsForValue().get(checkKey);
|
|
String podList = SshUtil.execute(session, "kubectl get pod | grep project-" + projectId);
|
|
String podList = SshUtil.execute(session, "kubectl get pod | grep project-" + projectId);
|
|
log.info("ProjectScheduler-------checkProject 项目 " + projectId + " 正在运行的 pod 为:\n" + podList);
|
|
log.info("ProjectScheduler-------checkProject 项目 " + projectId + " 正在运行的 pod 为:\n" + podList);
|
|
int taskNumber = StringUtil.countSubString(podList, "project");
|
|
int taskNumber = StringUtil.countSubString(podList, "project");
|
|
if (StringUtil.isEmpty(lastNowString) && taskNumber == 0) {
|
|
if (StringUtil.isEmpty(lastNowString) && taskNumber == 0) {
|
|
- redisTemplate.opsForValue().set(key, TimeUtil.getNowString());
|
|
|
|
|
|
+ redisTemplate.opsForValue().set(checkKey, TimeUtil.getNowString());
|
|
}
|
|
}
|
|
-
|
|
|
|
if (StringUtil.isNotEmpty(lastNowString) && taskNumber == 0) {
|
|
if (StringUtil.isNotEmpty(lastNowString) && taskNumber == 0) {
|
|
// 判断两次是否超过2分钟
|
|
// 判断两次是否超过2分钟
|
|
//3 如果 pod 为空,则重启 job
|
|
//3 如果 pod 为空,则重启 job
|
|
@@ -114,7 +115,7 @@ public class ProjectScheduler {
|
|
long now = Long.parseLong(TimeUtil.getNowString());
|
|
long now = Long.parseLong(TimeUtil.getNowString());
|
|
|
|
|
|
if (now - lastNow > (long) 120 * 1000) {
|
|
if (now - lastNow > (long) 120 * 1000) {
|
|
- redisTemplate.opsForValue().set(key, TimeUtil.getNowString());
|
|
|
|
|
|
+ redisTemplate.opsForValue().set(checkKey, TimeUtil.getNowString());
|
|
SshUtil.execute(session, "kubectl delete job project-" + projectId);
|
|
SshUtil.execute(session, "kubectl delete job project-" + projectId);
|
|
Thread.sleep(15000);
|
|
Thread.sleep(15000);
|
|
while (true) {
|
|
while (true) {
|