|
@@ -89,6 +89,7 @@ public class TaskManager {
|
|
|
@SneakyThrows
|
|
|
@Transactional
|
|
|
public boolean isProjectCompleted(PrefixTO redisPrefix, String projectId, String taskId, String state, String podName) {
|
|
|
+ String nodeName = projectUtil.getNodeNameOfPod(podName);
|
|
|
if ("Running".equals(state)) {
|
|
|
|
|
|
stringRedisTemplate.opsForValue().set(redisPrefix.getTaskPodKey(), podName);
|
|
@@ -96,11 +97,10 @@ public class TaskManager {
|
|
|
log.info("TaskManager--state 修改任务 " + taskId + " 的状态为 " + state + ",pod 名称为:" + podName);
|
|
|
taskMapper.updateStateWithStartTime(taskId, state, TimeUtil.getNowForMysql());
|
|
|
return false;
|
|
|
- } else {
|
|
|
+ } else {
|
|
|
|
|
|
|
|
|
try {
|
|
|
- KubernetesUtil.deletePod(apiClient, kubernetesNamespace, podName);
|
|
|
log.info("TaskManager--state 修改任务 " + taskId + "的状态为 " + state + ",pod 名称为:" + podName + ",并删除 pod。");
|
|
|
if ("Aborted".equals(state)) {
|
|
|
String minioPathOfErrorLog = resultPathMinio + projectId + "/" + taskId + "error.log";
|
|
@@ -129,21 +129,21 @@ public class TaskManager {
|
|
|
} else if ("PendingAnalysis".equals(state)) {
|
|
|
taskMapper.updateSuccessStateWithStopTime(taskId, state, TimeUtil.getNowForMysql());
|
|
|
}
|
|
|
- } catch (io.kubernetes.client.openapi.ApiException apiException) {
|
|
|
- log.error("TaskManager--isCompleted pod " + podName + " 已经被手动删除,该项目可能已经失败或删除。");
|
|
|
- return false;
|
|
|
- }
|
|
|
-
|
|
|
- ProjectMessageDTO projectMessageDTO = JsonUtil.jsonToBean(stringRedisTemplate.opsForValue().get(redisPrefix.getProjectRunningKey()), ProjectMessageDTO.class);
|
|
|
- int taskTotal = projectMessageDTO.getTaskTotal();
|
|
|
- int taskCompleted = projectMessageDTO.getTaskCompleted();
|
|
|
- log.info("TaskManager--isProjectCompleted 项目 " + projectId + " 完成进度为:" + (taskCompleted + 1) + "/" + taskTotal);
|
|
|
- if (taskCompleted + 1 == taskTotal) {
|
|
|
- return true;
|
|
|
- } else {
|
|
|
- projectMessageDTO.setTaskCompleted(taskCompleted + 1);
|
|
|
- stringRedisTemplate.opsForValue().set(redisPrefix.getProjectRunningKey(), JsonUtil.beanToJson(projectMessageDTO));
|
|
|
- createNextPod(projectId, podName);
|
|
|
+
|
|
|
+ ProjectMessageDTO projectMessageDTO = JsonUtil.jsonToBean(stringRedisTemplate.opsForValue().get(redisPrefix.getProjectRunningKey()), ProjectMessageDTO.class);
|
|
|
+ int taskTotal = projectMessageDTO.getTaskTotal();
|
|
|
+ int taskCompleted = projectMessageDTO.getTaskCompleted();
|
|
|
+ log.info("TaskManager--isProjectCompleted 项目 " + projectId + " 完成进度为:" + (taskCompleted + 1) + "/" + taskTotal);
|
|
|
+ if (taskCompleted + 1 == taskTotal) {
|
|
|
+ return true;
|
|
|
+ } else {
|
|
|
+ projectMessageDTO.setTaskCompleted(taskCompleted + 1);
|
|
|
+ stringRedisTemplate.opsForValue().set(redisPrefix.getProjectRunningKey(), JsonUtil.beanToJson(projectMessageDTO));
|
|
|
+ createNextPod(nodeName, projectId, podName);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ } catch (Exception exception) {
|
|
|
+ log.error("TaskManager--isCompleted pod " + podName + " 已经被手动删除,该项目可能已经失败或删除。", exception);
|
|
|
return false;
|
|
|
}
|
|
|
}
|
|
@@ -152,17 +152,21 @@ public class TaskManager {
|
|
|
|
|
|
* 更改一个名字继续启动
|
|
|
*
|
|
|
- * @param projectId 项目 id
|
|
|
- * @param lastPodName 项目名称
|
|
|
+ * @param projectId 项目 id
|
|
|
+ * @param nodeName 运行 pod 的节点名称
|
|
|
+ * @param lastPodName 项目名称
|
|
|
*/
|
|
|
@SneakyThrows
|
|
|
- public void createNextPod(String projectId, String lastPodName) {
|
|
|
+ public void createNextPod(String projectId, String nodeName, String lastPodName) {
|
|
|
+
|
|
|
+ KubernetesUtil.deletePod(apiClient, kubernetesNamespace, lastPodName);
|
|
|
+ stringRedisTemplate.delete(projectUtil.getNodeNameOfPod(lastPodName));
|
|
|
String lastPodString = FileUtil.read(podYamlDirectory + lastPodName + ".yaml");
|
|
|
String nextPodName = "project-" + projectId + "-" + StringUtil.getRandomUUID();
|
|
|
String nextPodString = lastPodString.replace(lastPodName, nextPodName);
|
|
|
String nextPodFileName = nextPodName + ".yaml";
|
|
|
log.info("TaskManager--createNextPod 创建项目 " + projectId + " 的下一个 pod。");
|
|
|
- projectUtil.createPod(nextPodString, nextPodFileName);
|
|
|
+ projectUtil.createPod(nodeName, nextPodString, nextPodFileName);
|
|
|
}
|
|
|
|
|
|
public void prepareScore(String projectRunningKey) {
|
|
@@ -436,21 +440,18 @@ public class TaskManager {
|
|
|
|
|
|
|
|
|
|
|
|
- Map<String, Integer> nodeMap = projectUtil.getNodeMap();
|
|
|
- List<String> podList = KubernetesUtil.getPodByPrefix(apiClient, kubernetesNamespace, "project-" + projectId);
|
|
|
- for (String tempPodName : podList) {
|
|
|
-
|
|
|
- KubernetesUtil.deletePod(apiClient, kubernetesNamespace, tempPodName);
|
|
|
-
|
|
|
- String tempNodeName = stringRedisTemplate.opsForValue().get("pod:" + tempPodName + ":node");
|
|
|
- stringRedisTemplate.delete("pod:" + tempPodName + ":node");
|
|
|
- int restParallelism = nodeMap.get(tempNodeName);
|
|
|
- nodeMap.put(tempNodeName, restParallelism + 1);
|
|
|
+
|
|
|
+ Set<String> nodeOfPodKeySet = stringRedisTemplate.keys("pod:project-" + projectId);
|
|
|
+ for (String nodeOfPodKey : nodeOfPodKeySet) {
|
|
|
+ String podName = nodeOfPodKey.split(":")[1];
|
|
|
+ String nodeName = projectUtil.getNodeNameOfPod(podName);
|
|
|
+
|
|
|
+ KubernetesUtil.deletePod(apiClient, kubernetesNamespace, podName);
|
|
|
+
|
|
|
+ projectUtil.deleteNodeNameOfPod(podName);
|
|
|
+
|
|
|
+ projectUtil.addOneParallelismToNode(nodeName);
|
|
|
}
|
|
|
- nodeMap.forEach((tempNodeName, tempParallelism) -> {
|
|
|
- String restParallelismKey = "node:" + tempNodeName + ":parallelism";
|
|
|
- stringRedisTemplate.opsForValue().set(restParallelismKey, tempParallelism + "");
|
|
|
- });
|
|
|
|
|
|
|
|
|
Set<String> keys = stringRedisTemplate.keys(redisPrefix.getProjectRunningKey() + "*");
|