LingxinMeng 1 year ago
parent
commit
bbb0e2deab

+ 1 - 0
amd64/dispatch_server/package/domain/comm_with_redis.go

@@ -26,6 +26,7 @@ func CanRunCluster() (bool, infra.GpuNode, error) {
 	if err != nil {
 	if err != nil {
 		return false, infra.GpuNode{}, errors.New("获取集群并行度列表 " + global.KeyGpuNodeList + " 失败,错误信息为:" + util.ToString(err))
 		return false, infra.GpuNode{}, errors.New("获取集群并行度列表 " + global.KeyGpuNodeList + " 失败,错误信息为:" + util.ToString(err))
 	}
 	}
+	infra.GlobalLogger.Infof("集群并行度列表为:%v", util.ToString(gpuNodeJsons))
 	var maxNode infra.GpuNode
 	var maxNode infra.GpuNode
 	var maxParallelism int64
 	var maxParallelism int64
 	var can bool
 	var can bool

+ 4 - 0
amd64/dispatch_server/package/service/run_task.go

@@ -76,6 +76,7 @@ func RunWaitingCluster() {
 		}
 		}
 		var firstTaskCache entity.TaskCache
 		var firstTaskCache entity.TaskCache
 		if can {
 		if can {
+			infra.GlobalLogger.Infof("节点 %v 有剩余并行度。", gpuNode)
 			// 取出但不移除
 			// 取出但不移除
 			firstTaskCacheJson, err := infra.GlobalRedisClient.LIndex(global.KeyTaskQueueWaitingCluster, 0).Result()
 			firstTaskCacheJson, err := infra.GlobalRedisClient.LIndex(global.KeyTaskQueueWaitingCluster, 0).Result()
 			if err != nil {
 			if err != nil {
@@ -92,6 +93,9 @@ func RunWaitingCluster() {
 				infra.GlobalLogger.Error(err)
 				infra.GlobalLogger.Error(err)
 				continue
 				continue
 			}
 			}
+		} else {
+			infra.GlobalLogger.Infof("集群没有剩余并行度。")
+			continue
 		}
 		}
 		global.GpuNodeListMutex.Unlock()
 		global.GpuNodeListMutex.Unlock()
 		// 获取项目ID
 		// 获取项目ID