|
@@ -89,6 +89,7 @@ public class TaskManager {
|
|
@SneakyThrows
|
|
@SneakyThrows
|
|
@Transactional
|
|
@Transactional
|
|
public boolean isProjectCompleted(PrefixTO redisPrefix, String projectId, String taskId, String state, String podName) {
|
|
public boolean isProjectCompleted(PrefixTO redisPrefix, String projectId, String taskId, String state, String podName) {
|
|
|
|
+ String nodeName = projectUtil.getNodeNameOfPod(podName);
|
|
if ("Running".equals(state)) { // 运行中的 pod 无需删除
|
|
if ("Running".equals(state)) { // 运行中的 pod 无需删除
|
|
// 将运行中的任务的 pod 名称放入 redis
|
|
// 将运行中的任务的 pod 名称放入 redis
|
|
stringRedisTemplate.opsForValue().set(redisPrefix.getTaskPodKey(), podName);
|
|
stringRedisTemplate.opsForValue().set(redisPrefix.getTaskPodKey(), podName);
|
|
@@ -96,11 +97,10 @@ public class TaskManager {
|
|
log.info("TaskManager--state 修改任务 " + taskId + " 的状态为 " + state + ",pod 名称为:" + podName);
|
|
log.info("TaskManager--state 修改任务 " + taskId + " 的状态为 " + state + ",pod 名称为:" + podName);
|
|
taskMapper.updateStateWithStartTime(taskId, state, TimeUtil.getNowForMysql());
|
|
taskMapper.updateStateWithStartTime(taskId, state, TimeUtil.getNowForMysql());
|
|
return false;
|
|
return false;
|
|
- } else { // 结束的 pod 都直接删除
|
|
|
|
|
|
+ } else { // 结束的 pod 都直接删除,并判断项目是否完成
|
|
// -------------------------------- 处理状态 --------------------------------
|
|
// -------------------------------- 处理状态 --------------------------------
|
|
//TODO 暂时不用重试操作
|
|
//TODO 暂时不用重试操作
|
|
try {
|
|
try {
|
|
- KubernetesUtil.deletePod(apiClient, kubernetesNamespace, podName);
|
|
|
|
log.info("TaskManager--state 修改任务 " + taskId + "的状态为 " + state + ",pod 名称为:" + podName + ",并删除 pod。");
|
|
log.info("TaskManager--state 修改任务 " + taskId + "的状态为 " + state + ",pod 名称为:" + podName + ",并删除 pod。");
|
|
if ("Aborted".equals(state)) {
|
|
if ("Aborted".equals(state)) {
|
|
String minioPathOfErrorLog = resultPathMinio + projectId + "/" + taskId + "error.log";
|
|
String minioPathOfErrorLog = resultPathMinio + projectId + "/" + taskId + "error.log";
|
|
@@ -129,21 +129,21 @@ public class TaskManager {
|
|
} else if ("PendingAnalysis".equals(state)) {
|
|
} else if ("PendingAnalysis".equals(state)) {
|
|
taskMapper.updateSuccessStateWithStopTime(taskId, state, TimeUtil.getNowForMysql());
|
|
taskMapper.updateSuccessStateWithStopTime(taskId, state, TimeUtil.getNowForMysql());
|
|
}
|
|
}
|
|
- } catch (io.kubernetes.client.openapi.ApiException apiException) {
|
|
|
|
- log.error("TaskManager--isCompleted pod " + podName + " 已经被手动删除,该项目可能已经失败或删除。");
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
- // -------------------------------- 判断项目是否结束 --------------------------------
|
|
|
|
- ProjectMessageDTO projectMessageDTO = JsonUtil.jsonToBean(stringRedisTemplate.opsForValue().get(redisPrefix.getProjectRunningKey()), ProjectMessageDTO.class);
|
|
|
|
- int taskTotal = projectMessageDTO.getTaskTotal();
|
|
|
|
- int taskCompleted = projectMessageDTO.getTaskCompleted();
|
|
|
|
- log.info("TaskManager--isProjectCompleted 项目 " + projectId + " 完成进度为:" + (taskCompleted + 1) + "/" + taskTotal);
|
|
|
|
- if (taskCompleted + 1 == taskTotal) {
|
|
|
|
- return true;
|
|
|
|
- } else {
|
|
|
|
- projectMessageDTO.setTaskCompleted(taskCompleted + 1);
|
|
|
|
- stringRedisTemplate.opsForValue().set(redisPrefix.getProjectRunningKey(), JsonUtil.beanToJson(projectMessageDTO));
|
|
|
|
- createNextPod(projectId, podName); // 项目没有完成则启动下一个 pod
|
|
|
|
|
|
+ // -------------------------------- 判断项目是否结束 --------------------------------
|
|
|
|
+ ProjectMessageDTO projectMessageDTO = JsonUtil.jsonToBean(stringRedisTemplate.opsForValue().get(redisPrefix.getProjectRunningKey()), ProjectMessageDTO.class);
|
|
|
|
+ int taskTotal = projectMessageDTO.getTaskTotal();
|
|
|
|
+ int taskCompleted = projectMessageDTO.getTaskCompleted();
|
|
|
|
+ log.info("TaskManager--isProjectCompleted 项目 " + projectId + " 完成进度为:" + (taskCompleted + 1) + "/" + taskTotal);
|
|
|
|
+ if (taskCompleted + 1 == taskTotal) {
|
|
|
|
+ return true;
|
|
|
|
+ } else {
|
|
|
|
+ projectMessageDTO.setTaskCompleted(taskCompleted + 1);
|
|
|
|
+ stringRedisTemplate.opsForValue().set(redisPrefix.getProjectRunningKey(), JsonUtil.beanToJson(projectMessageDTO));
|
|
|
|
+ createNextPod(nodeName, projectId, podName); // 项目没有完成则启动下一个 pod,同时删除上一个 pod
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
+ } catch (Exception exception) {
|
|
|
|
+ log.error("TaskManager--isCompleted pod " + podName + " 已经被手动删除,该项目可能已经失败或删除。", exception);
|
|
return false;
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -152,17 +152,21 @@ public class TaskManager {
|
|
/**
|
|
/**
|
|
* 更改一个名字继续启动
|
|
* 更改一个名字继续启动
|
|
*
|
|
*
|
|
- * @param projectId 项目 id
|
|
|
|
- * @param lastPodName 项目名称
|
|
|
|
|
|
+ * @param projectId 项目 id
|
|
|
|
+ * @param nodeName 运行 pod 的节点名称
|
|
|
|
+ * @param lastPodName 项目名称
|
|
*/
|
|
*/
|
|
@SneakyThrows
|
|
@SneakyThrows
|
|
- public void createNextPod(String projectId, String lastPodName) {
|
|
|
|
|
|
+ public void createNextPod(String projectId, String nodeName, String lastPodName) {
|
|
|
|
+ //1 删除上一个 pod 和 redis 键值对
|
|
|
|
+ KubernetesUtil.deletePod(apiClient, kubernetesNamespace, lastPodName);
|
|
|
|
+ stringRedisTemplate.delete(projectUtil.getNodeNameOfPod(lastPodName));
|
|
String lastPodString = FileUtil.read(podYamlDirectory + lastPodName + ".yaml");
|
|
String lastPodString = FileUtil.read(podYamlDirectory + lastPodName + ".yaml");
|
|
String nextPodName = "project-" + projectId + "-" + StringUtil.getRandomUUID();
|
|
String nextPodName = "project-" + projectId + "-" + StringUtil.getRandomUUID();
|
|
String nextPodString = lastPodString.replace(lastPodName, nextPodName); // pod 名称包括 projectId 和 随机字符串
|
|
String nextPodString = lastPodString.replace(lastPodName, nextPodName); // pod 名称包括 projectId 和 随机字符串
|
|
String nextPodFileName = nextPodName + ".yaml"; // 实际执行 pod 的文件名称
|
|
String nextPodFileName = nextPodName + ".yaml"; // 实际执行 pod 的文件名称
|
|
log.info("TaskManager--createNextPod 创建项目 " + projectId + " 的下一个 pod。");
|
|
log.info("TaskManager--createNextPod 创建项目 " + projectId + " 的下一个 pod。");
|
|
- projectUtil.createPod(nextPodString, nextPodFileName);
|
|
|
|
|
|
+ projectUtil.createPod(nodeName, nextPodString, nextPodFileName);
|
|
}
|
|
}
|
|
|
|
|
|
public void prepareScore(String projectRunningKey) {
|
|
public void prepareScore(String projectRunningKey) {
|
|
@@ -436,21 +440,18 @@ public class TaskManager {
|
|
// SshUtil.stop(clientKafka, sessionKafka);
|
|
// SshUtil.stop(clientKafka, sessionKafka);
|
|
|
|
|
|
|
|
|
|
- Map<String, Integer> nodeMap = projectUtil.getNodeMap();
|
|
|
|
- List<String> podList = KubernetesUtil.getPodByPrefix(apiClient, kubernetesNamespace, "project-" + projectId);
|
|
|
|
- for (String tempPodName : podList) {
|
|
|
|
- // 删除该 project 下的所有 pod
|
|
|
|
- KubernetesUtil.deletePod(apiClient, kubernetesNamespace, tempPodName);
|
|
|
|
- // 归还并行度
|
|
|
|
- String tempNodeName = stringRedisTemplate.opsForValue().get("pod:" + tempPodName + ":node");
|
|
|
|
- stringRedisTemplate.delete("pod:" + tempPodName + ":node");
|
|
|
|
- int restParallelism = nodeMap.get(tempNodeName);
|
|
|
|
- nodeMap.put(tempNodeName, restParallelism + 1);
|
|
|
|
|
|
+ // 归还并行度
|
|
|
|
+ Set<String> nodeOfPodKeySet = stringRedisTemplate.keys("pod:project-" + projectId);
|
|
|
|
+ for (String nodeOfPodKey : nodeOfPodKeySet) {
|
|
|
|
+ String podName = nodeOfPodKey.split(":")[1];
|
|
|
|
+ String nodeName = projectUtil.getNodeNameOfPod(podName);
|
|
|
|
+ // 删除 pod
|
|
|
|
+ KubernetesUtil.deletePod(apiClient, kubernetesNamespace, podName);
|
|
|
|
+ // 删除 redis key
|
|
|
|
+ projectUtil.deleteNodeNameOfPod(podName);
|
|
|
|
+ // 节点并行度加一
|
|
|
|
+ projectUtil.addOneParallelismToNode(nodeName);
|
|
}
|
|
}
|
|
- nodeMap.forEach((tempNodeName, tempParallelism) -> {
|
|
|
|
- String restParallelismKey = "node:" + tempNodeName + ":parallelism";
|
|
|
|
- stringRedisTemplate.opsForValue().set(restParallelismKey, tempParallelism + "");
|
|
|
|
- });
|
|
|
|
|
|
|
|
// 删除 redis 中的键值对
|
|
// 删除 redis 中的键值对
|
|
Set<String> keys = stringRedisTemplate.keys(redisPrefix.getProjectRunningKey() + "*");
|
|
Set<String> keys = stringRedisTemplate.keys(redisPrefix.getProjectRunningKey() + "*");
|