package main import ( commonConfig "cicv-data-closedloop/aarch64/pjibot_guide/common/config" commonService "cicv-data-closedloop/aarch64/pjibot_guide/common/service" "cicv-data-closedloop/aarch64/pjibot_guide/common/variable" "cicv-data-closedloop/aarch64/pjibot_guide/control/pkg" "cicv-data-closedloop/common/config/c_log" "cicv-data-closedloop/common/entity" "cicv-data-closedloop/common/util" "fmt" "net/rpc" "os" "runtime" "time" ) var ( applicationName = "pji-control" localStatus = "idle" cloudStatus = "NONE" lastLocalStatus = "idle" lastCloudStatus = "NONE" localTurnLength = 1 // s,本地状态刷新时间 cloudTurnLength = 60 // s,云端状态刷新时间 renewTurnLength = 3 // s,续约状态刷新时间 waitStopLength = 1 // min,停止master前等待时间 launchedFlag = false renewedFlag = false renewTimer *time.Timer // 续约定时器 RenewDur = 5 // min, 续约时间 maxRetryCount = 10 // 查询配置最大重试次数 ) func init() { runtime.GOMAXPROCS(1) // 初始化日志配置 c_log.InitLog(variable.LogDir, applicationName) // 初始化本地配置文件(第1处配置,在本地文件) commonConfig.InitLocalConfig(variable.LocalConfigPath) // 初始化Oss连接信息 commonConfig.InitOssConfig() // 初始化业务逻辑配置信息,配置文件在oss上(第2处配置,在oss文件) commonConfig.InitCloudConfig() // 初始化rpc客户端,用于杀死旧的采集程序 // 初始化websocket配置 commonConfig.InitWebsocketConfig() } func initRenew() { c_log.GlobalLogger.Info("启动定时器 - 开始。") if renewTimer != nil { renewTimer.Stop() } renewedFlag = true renewTimer = time.AfterFunc(time.Duration(RenewDur)*time.Minute, func() { renewedFlag = false }) c_log.GlobalLogger.Infof("定时时间【%v】分钟 - 成功。", RenewDur) } func renew() { for { time.Sleep(time.Duration(renewTurnLength) * time.Second) if localStatus == "running" && launchedFlag && !renewedFlag { // 设备处于运行状态,数采程序已启动,且尚未续约 c_log.GlobalLogger.Info("设备仍处于运行状态,续约 - 开始。") if renewTimer != nil { renewTimer.Stop() } renewedFlag = true renewTimer = time.AfterFunc(time.Duration(RenewDur)*time.Minute, func() { renewedFlag = false }) c_log.GlobalLogger.Infof("续约时间【%v】分钟 - 成功。", RenewDur) } } } func startMasterNode() { c_log.GlobalLogger.Info("获取数据闭环平台最新配置。") if commonConfig.CheckPlatformConfigStatus(maxRetryCount) { c_log.GlobalLogger.Info("查询到数据闭环平台有配置任务。") commonConfig.InitPlatformConfig() if _, err := util.ExecuteWithPath(commonConfig.LocalConfig.RestartCmd.Dir, commonConfig.LocalConfig.RestartCmd.Name, commonConfig.LocalConfig.RestartCmd.Args...); err != nil { c_log.GlobalLogger.Info("启动新程序失败,【path】=", commonConfig.LocalConfig.RestartCmd.Dir, "【cmd】=", commonConfig.LocalConfig.RestartCmd.Name, commonConfig.LocalConfig.RestartCmd.Args, ":", err) os.Exit(-1) } c_log.GlobalLogger.Info("启动任务,本地执行启动命令:【path】=", commonConfig.LocalConfig.RestartCmd.Dir, "【cmd】=", commonConfig.LocalConfig.RestartCmd.Name, commonConfig.LocalConfig.RestartCmd.Args) initRenew() launchedFlag = true c_log.GlobalLogger.Info("数采程序启动 - 成功。") } else { c_log.GlobalLogger.Error("查询到数据闭环平台没有配置任务,不启动数采程序。") } } func stopMasterNode() { // 发送rpc信号杀死采集程序 var killArgs commonService.KillSignal killArgs = commonService.KillSignal{NodeName: "master", DropUploadData: commonConfig.PlatformConfig.DropUploadData, Restart: false} c_log.GlobalLogger.Info("杀死任务,发送rpc结束信号:", killArgs) KillRpcClient, err := rpc.Dial("tcp", commonConfig.LocalConfig.Node.Ip+":"+commonConfig.CloudConfig.RpcPort) if err != nil { // 此处如果连接失败说明采集程序已经停止了 lastCloudStatus = "NONE" c_log.GlobalLogger.Error("采集程序已经停止:", err) return } reply := 0 if err = KillRpcClient.Call("KillService.Kill", killArgs, &reply); err != nil { c_log.GlobalLogger.Error("发送 rpc 请求到 master 报错:", err) } c_log.GlobalLogger.Info("结束任务后,将数据闭环平台配置置空。") commonConfig.PlatformConfig = commonConfig.PlatformConfigStruct{} if err = KillRpcClient.Close(); err != nil { // 不做处理 } launchedFlag = false c_log.GlobalLogger.Info("数采程序关闭 - 成功。") } func main() { // 更新本地任务状态 go pkg.GetLocalStatus(&localStatus, &lastLocalStatus, localTurnLength) // 更新云端任务状态 go pkg.GetCloudStatus(&cloudStatus, &lastCloudStatus, cloudTurnLength) // 定期检查本地任务状态,执行续约,避免短时间内多次启停 go renew() // 云端任务状态负责更新配置 go pkg.GetCloudConfig(&cloudStatus, &lastCloudStatus, cloudTurnLength) for { if launchedFlag { // 当前已启动master节点 time.Sleep(time.Duration(cloudTurnLength) * time.Second) } else { time.Sleep(time.Duration(localTurnLength) * time.Second) } fmt.Println("localStatus: ", localStatus, "lastLocalStatus: ", lastLocalStatus) fmt.Println("cloudStatus: ", cloudStatus, "lastCloudStatus: ", lastCloudStatus) // 综合判断 cloudStatus 和 localStatus // cloudStatus // UN_CHANGE 没有新的任务,无需更改 // CHANGE 有新的任务,需要杀死旧的数采任务并重启 // NONE 设备没有配置任务,需要杀死旧的数采任务 // localStatus // idle 空闲状态,此状态下不启动数采任务 // running 繁忙状态,此状态需要启动数采任务 // error 错误状态,此状态下不启动数采任务 // 本地任务状态负责启停master if localStatus == "running" { if !launchedFlag { // 目前未启动数采程序 c_log.GlobalLogger.Info("数采程序启动 - 开始。") startMasterNode() } } else if localStatus == "idle" { if !renewedFlag && launchedFlag && len(entity.TimeWindowConsumerQueue) == 0 { time.Sleep(time.Duration(waitStopLength) * time.Minute) c_log.GlobalLogger.Info("设备不在运行状态且没有待处理的数据,数采程序关闭 - 开始。") stopMasterNode() } } else if localStatus == "error" { c_log.GlobalLogger.Error("设备运行状态出错,停止数采程序。") stopMasterNode() } else { c_log.GlobalLogger.Error("未知的设备运行状态。【status】=", localStatus) } } }