|
@@ -56,6 +56,10 @@ public class ProjectUtil {
|
|
@Resource
|
|
@Resource
|
|
private CustomRedisClient customRedisClient;
|
|
private CustomRedisClient customRedisClient;
|
|
|
|
|
|
|
|
+ public String getIsChoiceGpuByProjectId(String projectId) {
|
|
|
|
+ return getProjectByProjectId(projectId).getIsChoiceGpu();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
|
|
@SneakyThrows
|
|
@SneakyThrows
|
|
public void deleteYamlByProjectId(String projectId) {
|
|
public void deleteYamlByProjectId(String projectId) {
|
|
@@ -133,6 +137,7 @@ public class ProjectUtil {
|
|
*/
|
|
*/
|
|
@SneakyThrows
|
|
@SneakyThrows
|
|
public void createNextPod(String userId, String projectId, String projectType, String nodeName, String lastPodName) {
|
|
public void createNextPod(String userId, String projectId, String projectType, String nodeName, String lastPodName) {
|
|
|
|
+ final String isChoiceGpu = getIsChoiceGpuByProjectId(projectId);
|
|
log.info("删除上一个 pod:projectId={},nodeName={},lastPodName={}", projectId, nodeName, lastPodName);
|
|
log.info("删除上一个 pod:projectId={},nodeName={},lastPodName={}", projectId, nodeName, lastPodName);
|
|
String cpuOrderString = stringRedisTemplate.opsForValue().get("project:" + projectId + ":pod:" + lastPodName + ":cpu");
|
|
String cpuOrderString = stringRedisTemplate.opsForValue().get("project:" + projectId + ":pod:" + lastPodName + ":cpu");
|
|
deletePod(lastPodName);
|
|
deletePod(lastPodName);
|
|
@@ -142,7 +147,7 @@ public class ProjectUtil {
|
|
if (CollectionUtil.isEmpty(yamlPathCacheKeySet)) {
|
|
if (CollectionUtil.isEmpty(yamlPathCacheKeySet)) {
|
|
// 如果当前节点没有下一个yaml,则返回一个并行度。
|
|
// 如果当前节点没有下一个yaml,则返回一个并行度。
|
|
log.info("节点 " + nodeName + " 已经执行完被分配的项目 " + projectId + " 的所有 pod。");
|
|
log.info("节点 " + nodeName + " 已经执行完被分配的项目 " + projectId + " 的所有 pod。");
|
|
- incrementOneParallelismOfGpuNode(nodeName);
|
|
|
|
|
|
+ incrementOneParallelism(isChoiceGpu, nodeName);
|
|
releaseLicense(userId, getModelTypeByProjectIdAndProjectType(projectId, projectType), 1);
|
|
releaseLicense(userId, getModelTypeByProjectIdAndProjectType(projectId, projectType), 1);
|
|
} else {
|
|
} else {
|
|
final String yamlPathCacheKey = new ArrayList<>(yamlPathCacheKeySet).get(0);
|
|
final String yamlPathCacheKey = new ArrayList<>(yamlPathCacheKeySet).get(0);
|
|
@@ -549,29 +554,53 @@ public class ProjectUtil {
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
- public void incrementOneParallelismOfGpuNode(String nodeName) {
|
|
|
|
- incrementParallelismOfGpuNode(nodeName, 1L);
|
|
|
|
|
|
+ public void incrementOneParallelism(String isChoiceGpu, String nodeName) {
|
|
|
|
+ incrementParallelism(isChoiceGpu, nodeName, 1L);
|
|
}
|
|
}
|
|
|
|
|
|
- public void incrementParallelismOfGpuNode(String nodeName, long number) {
|
|
|
|
- //1 先检查缓存中的并行度是否超过,超过了就不加缓存的并行度了,常用于测试
|
|
|
|
- String key = "gpu-node:" + nodeName + ":parallelism";
|
|
|
|
- final int currentRestParallelism = Integer.parseInt(customRedisClient.get(key));
|
|
|
|
- final List<NodeModel> nodeList = kubernetesConfiguration.getGpuNodeList();
|
|
|
|
- nodeList.forEach(node -> {
|
|
|
|
- if (nodeName.equals(node.getHostname())) {
|
|
|
|
- if (currentRestParallelism + 1 < node.getParallelism()) {
|
|
|
|
- customRedisClient.increment(key, number);
|
|
|
|
|
|
+ public void incrementParallelism(String isChoiceGpu, String nodeName, long number) {
|
|
|
|
+ if (DictConstants.USE_GPU.equals(isChoiceGpu)) {
|
|
|
|
+ //1 先检查缓存中的并行度是否超过,超过了就不加缓存的并行度了,常用于测试
|
|
|
|
+ String key = "gpu-node:" + nodeName + ":parallelism";
|
|
|
|
+ final int currentRestParallelism = Integer.parseInt(customRedisClient.get(key));
|
|
|
|
+ final List<NodeModel> nodeList = kubernetesConfiguration.getGpuNodeList();
|
|
|
|
+ nodeList.forEach(node -> {
|
|
|
|
+ if (nodeName.equals(node.getHostname())) {
|
|
|
|
+ if (currentRestParallelism + 1 < node.getParallelism()) {
|
|
|
|
+ customRedisClient.increment(key, number);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
- }
|
|
|
|
- });
|
|
|
|
- log.info("归还节点 {} 的 {} 个 GPU 并行度。", nodeName, number);
|
|
|
|
|
|
+ });
|
|
|
|
+ log.info("归还 GPU 节点 {} 的 {} 个并行度。", nodeName, number);
|
|
|
|
+ } else if (DictConstants.USE_CPU.equals(isChoiceGpu)) {
|
|
|
|
+ //1 先检查缓存中的并行度是否超过,超过了就不加缓存的并行度了,常用于测试
|
|
|
|
+ String key = "cpu-node:" + nodeName + ":parallelism";
|
|
|
|
+ final int currentRestParallelism = Integer.parseInt(customRedisClient.get(key));
|
|
|
|
+ final List<NodeModel> nodeList = kubernetesConfiguration.getCpuNodeList();
|
|
|
|
+ nodeList.forEach(node -> {
|
|
|
|
+ if (nodeName.equals(node.getHostname())) {
|
|
|
|
+ if (currentRestParallelism + 1 < node.getParallelism()) {
|
|
|
|
+ customRedisClient.increment(key, number);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ });
|
|
|
|
+ log.info("归还 CPU 节点 {} 的 {} 个并行度。", nodeName, number);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
}
|
|
}
|
|
|
|
|
|
- public void decrementParallelismOfGpuNode(String nodeName, long number) {
|
|
|
|
- String key = "gpu-node:" + nodeName + ":parallelism";
|
|
|
|
- customRedisClient.decrement(key, number);
|
|
|
|
- log.info("获取节点 {} 的 {} 个 GPU 并行度。", nodeName, number);
|
|
|
|
|
|
+ public void decrementParallelism(String isChoiceGpu, String nodeName, long number) {
|
|
|
|
+ if (DictConstants.USE_GPU.equals(isChoiceGpu)) {
|
|
|
|
+ String key = "gpu-node:" + nodeName + ":parallelism";
|
|
|
|
+ customRedisClient.decrement(key, number);
|
|
|
|
+ log.info("获取节点 {} 的 {} 个 GPU 并行度。", nodeName, number);
|
|
|
|
+ } else if (DictConstants.USE_CPU.equals(isChoiceGpu)) {
|
|
|
|
+ String key = "cpu-node:" + nodeName + ":parallelism";
|
|
|
|
+ customRedisClient.decrement(key, number);
|
|
|
|
+ log.info("获取节点 {} 的 {} 个 CPU 并行度。", nodeName, number);
|
|
|
|
+ }
|
|
|
|
+
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|