data_quality.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. ##################################################################
  4. #
  5. # Copyright (c) 2023 CICV, Inc. All Rights Reserved
  6. #
  7. ##################################################################
  8. """
  9. @Authors: yangzihao(yangzihao@china-icv.cn)
  10. @Data: 2023/12/19
  11. @Last Modified: 2023/12/19
  12. @Summary: Data quality evaluate.
  13. """
  14. import os
  15. import pandas as pd
  16. from collections import Counter
  17. class DataQuality(object):
  18. def __init__(self, df):
  19. """
  20. TODO: 数据跳变
  21. """
  22. self.df = df
  23. self.frame_list = []
  24. self.frame_diff_list = []
  25. self.frame_diff_counter = {}
  26. self.total_frame_count = 0
  27. self.max_frame_number = 0
  28. self.frame_loss_count = 0
  29. self.frame_loss_rate = 0
  30. self.frame_loss_max = 0
  31. self.frame_loss_max_start = []
  32. # self.quality_detect()
  33. self.result = ""
  34. def quality_detect(self):
  35. self.frame_extract()
  36. if self.frame_list:
  37. self.cal_total_frame_count()
  38. self.cal_max_frame_number()
  39. self.cal_frame_diff()
  40. self.cal_frame_loss_count()
  41. self.cal_frame_loss_rate()
  42. self.cal_frame_loss_max()
  43. self.cal_frame_diff_counter()
  44. self.result_print()
  45. else:
  46. self.result = "No data in this file."
  47. print("No data in this file.")
  48. def frame_extract(self):
  49. self.df = self.df.dropna(subset="simFrame")
  50. self.frame_list = list(set(self.df['simFrame']))
  51. # self.frame_list = list(set(self.df['frame_ID']))
  52. self.frame_list.sort()
  53. def cal_frame_diff(self):
  54. maxx = -1
  55. for i in range(1, len(self.frame_list)):
  56. diff = self.frame_list[i] - self.frame_list[i - 1] - 1
  57. if diff > maxx and diff != 0:
  58. maxx = diff
  59. self.frame_loss_max_start = [self.frame_list[i - 1], self.frame_list[i]]
  60. self.frame_diff_list.append(int(diff))
  61. def cal_total_frame_count(self):
  62. self.total_frame_count = int(self.frame_list[-1] - self.frame_list[0])
  63. def cal_max_frame_number(self):
  64. self.max_frame_number = int(self.frame_list[-1])
  65. def cal_frame_loss_count(self):
  66. self.frame_loss_count = sum(self.frame_diff_list)
  67. def cal_frame_loss_rate(self):
  68. self.frame_loss_rate = round(self.frame_loss_count / self.frame_list[-1], 4)
  69. def cal_frame_loss_max(self):
  70. self.frame_loss_max = max(self.frame_diff_list) if self.frame_diff_list else 0
  71. def cal_frame_diff_counter(self):
  72. self.frame_diff_counter = dict(Counter(self.frame_diff_list))
  73. self.frame_diff_counter = dict(sorted(self.frame_diff_counter.items(), reverse=True))
  74. del self.frame_diff_counter[0] # 删除不丢帧次数统计
  75. def result_print(self):
  76. self.result += f"丢帧率: {self.frame_loss_rate* 100:.2f}%, "
  77. self.result += f"总帧数: {self.total_frame_count}, "
  78. self.result += f"丢帧数量: {self.frame_loss_count}, "
  79. # self.result += f"此文件最大帧数: {self.max_frame_number}, "
  80. self.result += f"最长丢帧数量: {self.frame_loss_max}, "
  81. self.result += f"最长丢帧时起始帧: {self.frame_loss_max_start}, "
  82. self.result += f"丢帧数及次数统计: {self.frame_diff_counter}."
  83. print("此文件总帧数:", self.total_frame_count)
  84. print("此文件最大帧数:", self.max_frame_number)
  85. print("此文件丢帧数量:", self.frame_loss_count)
  86. print("此文件丢帧率:", f"{self.frame_loss_rate * 100:.2f}%")
  87. print("此文件最长丢帧数量:", self.frame_loss_max)
  88. print("此文件最长丢帧时起始帧:", self.frame_loss_max_start)
  89. print("此文件丢帧数及次数统计:", self.frame_diff_counter)
  90. def get_all_files(path):
  91. file_names = []
  92. for root, dirs, files in os.walk(path):
  93. for file in files:
  94. file_names.append(os.path.join(root, file))
  95. return file_names
  96. def frame_loss_statistic(path):
  97. file_names = get_all_files(path)
  98. frame_loss_dict = dict()
  99. for file in file_names:
  100. file_name = file.split('\\')[-1]
  101. print(f"\n[{file_name}]")
  102. df = pd.read_csv(file, index_col=False)
  103. d_q = DataQuality(df)
  104. d_q.quality_detect()
  105. # frame_loss_dict[file_name] = d_q.result
  106. frame_loss_dict[file_name] = dict()
  107. frame_loss_dict[file_name]["result"] = d_q.result
  108. frame_loss_dict[file_name]["frame_loss_rate"] = d_q.frame_loss_rate
  109. return frame_loss_dict
  110. if __name__ == "__main__":
  111. # path = r"C:\Users\cicv\Desktop\ads_evaluate_V3.2.0\task_1218_\case_1218\data"
  112. path = r"./task_1228/case0322/data"
  113. # path = r"C:\Users\cicv\Desktop\ads_evaluate_V3.2.0\task_1221\data1\data"
  114. # path = r"C:\Users\cicv\Desktop\ADS_evaluate\ads_evaluation_development\adas_evaluation_V3.0\data\data_0920_zhikong_doubleCar"
  115. # path = r"C:\Users\cicv\Desktop\align\data2"
  116. #
  117. frame_loss_statistic(path)