#!/usr/bin/env python # -*- coding: utf-8 -*- ################################################################## # # Copyright (c) 2023 CICV, Inc. All Rights Reserved # ################################################################## """ @Authors: yangzihao(yangzihao@china-icv.cn) @Data: 2023/11/28 @Last Modified: 2023/11/28 @Summary: Evaluate multiple cases. """ import os import sys import pandas as pd import numpy as np import requests import json import pathlib import time import traceback import log from common import json2dict, score_grade, mileage_format, duration_format, string_concatenate, replace_key_with_value from score import cal_score_from_80 def calculate_ratios(grade_count_list): """ This function helps to calculate the ratios of the grade in grade_count_list. Arguments: grade_count_list: A list of the number of grades. Returns: ratios: A list of the ratios of the grade in grade_count_list. """ arr = np.array(grade_count_list) total = arr.sum() ratios = np.round(arr / total * 100).astype(int).tolist() # 确保四个比例的和为100 if sum(ratios) != 100: ratios[0] += 100 - sum(ratios) return ratios def grade_statistic(grade_list): """ This function statistic the number of grades in grade_list. Arguments: grade_list: A list of grades. Returns: grade_distribution: A dict of the distribution of 4 grades. """ a_grade_count = len([x for x in grade_list if x == "优秀"]) b_grade_count = len([x for x in grade_list if x == "良好"]) c_grade_count = len([x for x in grade_list if x == "一般"]) d_grade_count = len([x for x in grade_list if x == "较差"]) grade_count_list = [a_grade_count, b_grade_count, c_grade_count, d_grade_count] ratios = calculate_ratios(grade_count_list) grade_distribution = { "优秀": ratios[0], "良好": ratios[1], "一般": ratios[2], "较差": ratios[3] } return grade_distribution def custom_metric_param_list_parser(param_list): """ param_dict = { "paramA" [ { "kind": "-1", "optimal": "1", "multiple": ["0.5","5"], "spare1": null, "spare2": null } ] } """ kind_list = [] optimal_list = [] multiple_list = [] spare_list = [] # spare1_list = [] # spare2_list = [] for i in range(len(param_list)): kind_list.append(int(param_list[i]['kind'])) optimal_list.append(float(param_list[i]['optimal'])) multiple_list.append([float(x) for x in param_list[i]['multiple']]) spare_list.append([item["param"] for item in param_list[i]["spare"]]) # spare1_list.append(param_list[i]['spare1']) # spare2_list.append(param_list[i]['spare2']) result = { "kind": kind_list, "optimal": optimal_list, "multiple": multiple_list, "spare": spare_list, # "spare1": spare1_list, # "spare2": spare2_list } return result def custom_metric_param_parser(customMetricParam): result = {} for key, param_list in customMetricParam.items(): result[key] = custom_metric_param_list_parser(param_list) return result def custom_multi_case_statistic(custom_cases_dict, config, dimension): """ Args: custom_cases_dict: config: dimension: Returns: """ df_custom_cases = pd.DataFrame(custom_cases_dict).T.dropna(how='all') # common data bulitin_metric_list = config.builtinMetricList # config infos dimension_config = config.config[dimension] metric_list = dimension_config['metric'] type_list = dimension_config['type'] type_name_dict = dimension_config['typeName'] name_dict = dimension_config['name'] unit_dict = dimension_config['unit'] metric_dict = dimension_config['typeMetricDict'] dimension_name_dict = config.dimension_name # # custom metric data customMetricParam = dimension_config['customMetricParam'] # custom_data = custom_data custom_param_dict = custom_metric_param_parser(customMetricParam) weight_custom = float(dimension_config['weightDimension']) # optimal_dict = dimension_config['optimal'] # kind_dict = dimension_config['kind'] # multiple_dict = dimension_config['multiple'] report_dict = { "name": f"{dimension_name_dict[dimension]}", "weight": f"{weight_custom * 100}%", } # calculate score_custom and grade_custom score_custom_list = df_custom_cases['score'].values.tolist() score_custom = cal_score_from_80(score_custom_list) grade_custom = score_grade(score_custom) report_dict["score"] = score_custom report_dict["level"] = grade_custom report_dict["scoreList"] = score_custom_list # calculate grade_distribution grade_custom_list = df_custom_cases['level'].values.tolist() grade_custom_distribution = grade_statistic(grade_custom_list) report_dict["levelDistribution"] = grade_custom_distribution score_type_dict = {} value_list_dict = {} bad_count_dict = {} good_rate_dict = {} bad_rate_dict = {} type_details_dict = {} for type in type_list: type_dict = { "name": type_name_dict[type], } builtin_graph_dict = {} custom_graph_dict = {} df_custom_cases[type] = df_custom_cases["details"].apply(lambda x: x[type] if type in x.keys() else None) df_custom_cases1 = df_custom_cases.dropna(subset=[type]) type_cases_dict = df_custom_cases1[type].to_dict() df_type_cases = pd.DataFrame(type_cases_dict).T.dropna(how='all') # calculate score_type and grade_type score_type_list = df_type_cases['score'].values.tolist() score_type = round(np.mean(score_type_list), 2) grade_type = score_grade(score_type) type_dict["score"] = score_type type_dict["level"] = grade_type score_type_dict[type] = score_type # calculate grade_distribution grade_type_list = df_type_cases['level'].values.tolist() grade_type_distribution = grade_statistic(grade_type_list) type_dict["gradeDistribution"] = grade_type_distribution dfs = df_type_cases['indexes'].apply(lambda x: pd.DataFrame(x).T) df_type_indexes = pd.concat(dfs.tolist(), ignore_index=True) # functionACC description type_metric1_list = [] # good type_metric2_list = [] # not good type_metric3_list = [] # bad # indexes type_dict_indexes = {} for metric in metric_dict[type]: df_metric_indexes = df_type_indexes[df_type_indexes['name'] == f"{name_dict[metric]}"] kind_metric = custom_param_dict[metric]['kind'][0] optimal_metric = custom_param_dict[metric]['optimal'][0] multiple_metric0 = custom_param_dict[metric]['multiple'][0][0] multiple_metric1 = custom_param_dict[metric]['multiple'][0][1] if kind_metric == 1: # 越大越好 metric_value_list = df_metric_indexes['min'].astype(str).values.tolist() metric_value_list = [float(x) for x in metric_value_list if '-' not in x] avg_metric_value = round(np.mean(metric_value_list), 2) if metric_value_list else "-" max_metric_value = max(metric_value_list) if metric_value_list else "-" min_metric_value = max(metric_value_list) if metric_value_list else "-" if not optimal_metric: metric_bad_count = 0 else: metric_bad_count = len([x for x in metric_value_list if x < float(optimal_metric)]) metric_bad_rate = round(metric_bad_count / len(metric_value_list) * 100, 2) if metric_value_list else 0 metric_good_rate = round(100 - metric_bad_rate, 2) type_metric1_list.append(metric) if metric_bad_rate == 0 else type_metric2_list.append( metric) if avg_metric_value > float(optimal_metric) else type_metric3_list.append(metric) elif kind_metric == -1: # 越小越好 metric_value_list = df_metric_indexes['max'].astype(str).values.tolist() metric_value_list = [float(x) for x in metric_value_list if '-' not in x] avg_metric_value = round(np.mean(metric_value_list), 2) if metric_value_list else "-" max_metric_value = max(metric_value_list) if metric_value_list else "-" min_metric_value = max(metric_value_list) if metric_value_list else "-" if not optimal_metric: metric_bad_count = 0 else: metric_bad_count = len([x for x in metric_value_list if x > float(optimal_metric)]) metric_bad_rate = round(metric_bad_count / len(metric_value_list) * 100, 2) if metric_value_list else 0 metric_good_rate = round(100 - metric_bad_rate, 2) type_metric1_list.append(metric) if metric_bad_rate == 0 else type_metric2_list.append( metric) if avg_metric_value < float(optimal_metric) else type_metric3_list.append(metric) elif kind_metric == 0: metric_value_list = df_metric_indexes['avg'].astype(str).values.tolist() metric_value_list = [float(x) for x in metric_value_list if '-' not in x] avg_metric_value = round(np.mean(metric_value_list), 2) if metric_value_list else "-" max_metric_value = max(metric_value_list) if metric_value_list else "-" min_metric_value = max(metric_value_list) if metric_value_list else "-" if not optimal_metric: metric_bad_count = 0 else: metric_bad_count = len([x for x in metric_value_list if ( x > float(optimal_metric) * multiple_metric1 or x < float( optimal_metric) * multiple_metric0)]) metric_bad_rate = round(metric_bad_count / len(metric_value_list) * 100, 2) if metric_value_list else 0 metric_good_rate = round(100 - metric_bad_rate, 2) if not optimal_metric: type_metric1_list.append(metric) else: type_metric1_list.append(metric) if metric_bad_rate == 0 else type_metric2_list.append(metric) if ( float(optimal_metric) * multiple_metric1 > avg_metric_value > float( optimal_metric) * multiple_metric0) else type_metric3_list.append( metric) type_dict_indexes[metric] = { "name": f"{name_dict[metric]}", "average": avg_metric_value, "max": max_metric_value, "min": min_metric_value, } if not optimal_metric: type_dict_indexes[metric]["range"] = f"-" else: if kind_metric == -1: type_dict_indexes[metric]["range"] = f"[0, {optimal_metric}]" elif kind_metric == 1: type_dict_indexes[metric]["range"] = f"[{optimal_metric}, inf)" elif kind_metric == 0: type_dict_indexes[metric][ "range"] = f"[{float(optimal_metric) * multiple_metric0}, {float(optimal_metric) * multiple_metric1}]" value_list_dict[metric] = metric_value_list bad_count_dict[metric] = metric_bad_count good_rate_dict[metric] = metric_good_rate bad_rate_dict[metric] = metric_bad_rate type_dict["indexes"] = type_dict_indexes for metric in metric_dict[type]: metric_data = { "name": f"{name_dict[metric]}", "data": value_list_dict[metric], "markLine": [custom_param_dict[metric]['optimal'][0]] } custom_graph_dict[metric] = metric_data type_dict["builtin"] = builtin_graph_dict type_dict["custom"] = custom_graph_dict # description str_type_metric1 = '' str_type_metric2 = '' str_type_metric3 = '' if len(type_metric1_list) == len(metric_list): str_metric = string_concatenate(metric_list) type_description1 = f"{str_metric}指标均表现良好,平均值在合理范围内且不存在不合格用例。" elif len(type_metric3_list) == len(metric_list): str_metric = string_concatenate(metric_list) type_description1 = f"{str_metric}指标平均值在合理范围外,算法在大部分用例下均表现不佳,需重点优化。" else: if type_metric1_list: str_type1 = string_concatenate(type_metric1_list) str_type_metric1 += f"{str_type1}指标表现良好,平均值在合理范围内且不存在不合格用例;" for metric in type_metric2_list: str_type_metric2 += f"{name_dict[metric]}指标表现不佳,存在{bad_count_dict[metric]}个不合格用例,需要改进算法在这些用例中的表现;" if type_metric3_list: str_type3 = string_concatenate(type_metric3_list) str_type_metric3 += f"{str_type3}指标平均值在合理范围外,算法在大部分用例下均表现不佳,需重点优化。" type_description1 = '' type_description1 += (str_type_metric1 + '\n') if str_type_metric1 else '' type_description1 += (str_type_metric2 + '\n') if str_type_metric2 else '' type_description1 += str_type_metric3 type_description1 = type_description1[:-1] + "。" # type_description2 = "经计算可知," # for metric in metric_dict[type]: # type_description2 += f"{metric}指标位于合理区间的占比为{good_rate_dict[metric]}%," # type_description2 = type_description2[:-1] + "。" type_dict["description1"] = replace_key_with_value(type_description1, name_dict) # type_dict["description2"] = replace_key_with_value(type_description2, name_dict) type_details_dict[type] = type_dict report_dict["details"] = type_details_dict # custom description1 good_type_list = [] bad_type_list = [] for type in type_list: bad_type_list.append(type) if score_type_dict[type] < 80 else good_type_list.append(type) # generate custom_description1 good_type_list_count = len(score_custom_list) over_80_count = len([num for num in score_custom_list if num >= 80]) over_80_proportion = over_80_count / good_type_list_count below_80_count = good_type_list_count - over_80_count below_80_proportion = below_80_count / good_type_list_count below_60_count = len([num for num in score_custom_list if num < 60]) below_60_proportion = below_60_count / good_type_list_count if grade_custom == '优秀': custom_description1 = f'算法在本轮测试中的表现优秀;' elif grade_custom == '良好': custom_description1 = f'算法在本轮测试中的表现满足设计指标要求。其中有{over_80_count}个用例得分超过80分,占比为{over_80_proportion * 100:.2f}%;' elif grade_custom == '一般': str_bad_type = string_concatenate(bad_type_list) custom_description1 = f'未满足设计指标要求。其中有{below_80_count}个用例得分低于80分,占比为{below_80_proportion * 100:.2f}%,需优化算法在{str_bad_type}上的表现;' elif grade_custom == '较差': str_bad_type = string_concatenate(bad_type_list) custom_description1 = f'未满足设计指标要求。其中有{below_60_count}个用例得分低于60分,占比为{below_60_proportion * 100:.2f}%,需优化算法在{str_bad_type}上的表现;' # customcient description2 if not bad_type_list: custom_description2 = f"算法在{dimension_name_dict[dimension]}维度上的表现满足设计指标要求。" else: custom_description2 = f"算法在{str_bad_type}的指标需要重点优化。" report_dict["description1"] = replace_key_with_value(custom_description1, type_name_dict) report_dict["description2"] = replace_key_with_value(custom_description2, type_name_dict) return report_dict def safe_multi_case_statistic(safe_cases_dict, config): """ Args: safe_cases_dict: config: Returns: """ # report_dict = { # "name": "安全性", # "weight": f"{weight_safe * 100}%", # "score": score_safe, # "level": grade_safe, # "scoreList": score_safe_list, # "levelDistribution": grade_safe_distribution, # # "description1": safe_description1, # "description2": safe_description2, # "noObjectCar": False, # # "safeTime": time_dict, # "safeDistance": distance_dict, # "safeAcceleration": acceleration_dict, # "safeProbability": probability_dict # } df_safe_cases = pd.DataFrame(safe_cases_dict).T.dropna(how='all') # common data bulitin_metric_list = config.builtinMetricList # config infos dimension_config = config.config["safe"] metric_list = dimension_config['metric'] type_list = dimension_config['type'] type_name_dict = dimension_config['typeName'] name_dict = dimension_config['name'] unit_dict = dimension_config['unit'] metric_dict = dimension_config['typeMetricDict'] # # custom metric data customMetricParam = dimension_config['customMetricParam'] # custom_data = custom_data custom_param_dict = custom_metric_param_parser(customMetricParam) weight_safe = float(dimension_config['weightDimension']) optimal_dict = dimension_config['optimal'][0] kind_dict = dimension_config['kind'][0] multiple_dict = dimension_config['multiple'][0] report_dict = { "name": "安全性", "weight": f"{weight_safe * 100}%", "noObjectCar": False, } # calculate score_safe and grade_safe score_safe_list = df_safe_cases['score'].values.tolist() score_safe = cal_score_from_80(score_safe_list) grade_safe = score_grade(score_safe) report_dict["score"] = score_safe report_dict["level"] = grade_safe report_dict["scoreList"] = score_safe_list # calculate grade_distribution grade_safe_list = df_safe_cases['level'].values.tolist() grade_safe_distribution = grade_statistic(grade_safe_list) report_dict["levelDistribution"] = grade_safe_distribution score_type_dict = {} value_list_dict = {} bad_count_dict = {} good_rate_dict = {} bad_rate_dict = {} type_details_dict = {} for type in type_list: type_dict = { "name": type_name_dict[type], } builtin_graph_dict = {} custom_graph_dict = {} df_safe_cases[type] = df_safe_cases["details"].apply(lambda x: x[type] if type in x.keys() else None) df_safe_cases1 = df_safe_cases.dropna(subset=[type]) type_cases_dict = df_safe_cases1[type].to_dict() df_type_cases = pd.DataFrame(type_cases_dict).T.dropna(how='all') # calculate score_type and grade_type score_type_list = df_type_cases['score'].values.tolist() score_type = round(np.mean(score_type_list), 2) grade_type = score_grade(score_type) type_dict["score"] = score_type type_dict["level"] = grade_type score_type_dict[type] = score_type # calculate grade_distribution grade_type_list = df_type_cases['level'].values.tolist() grade_type_distribution = grade_statistic(grade_type_list) type_dict["gradeDistribution"] = grade_type_distribution # calculate type indexes # type_indexes_dict = df_type_cases['indexes'].explode().tolist() # df_type_indexes = pd.DataFrame(type_indexes_dict) dfs = df_type_cases['indexes'].apply(lambda x: pd.DataFrame(x).T) df_type_indexes = pd.concat(dfs.tolist(), ignore_index=True) # functionACC description type_metric1_list = [] # good type_metric2_list = [] # not good type_metric3_list = [] # bad # indexes type_dict_indexes = {} for metric in metric_dict[type]: df_metric_indexes = df_type_indexes[df_type_indexes['name'] == f"{name_dict[metric]}"] kind_metric = kind_dict[metric] if metric in bulitin_metric_list else custom_param_dict[metric]['kind'][0] optimal_metric = optimal_dict[metric] if metric in bulitin_metric_list else \ custom_param_dict[metric]['optimal'][0] multiple_metric0 = multiple_dict[metric][0] if metric in bulitin_metric_list else \ custom_param_dict[metric]['multiple'][0][0] multiple_metric1 = multiple_dict[metric][1] if metric in bulitin_metric_list else \ custom_param_dict[metric]['multiple'][0][1] if kind_metric == 1: # 越大越好 # metric_value_list = df_metric_indexes['min'].astype(float).values.tolist() metric_value_list = df_metric_indexes['extremum'].astype(str).values.tolist() metric_value_list = [float(x) for x in metric_value_list if '-' not in x] avg_metric_value = round(np.mean(metric_value_list), 2) if metric_value_list else "-" max_metric_value = max(metric_value_list) if metric_value_list else "-" min_metric_value = max(metric_value_list) if metric_value_list else "-" if not optimal_metric: metric_bad_count = 0 else: metric_bad_count = len([x for x in metric_value_list if x < float(optimal_metric)]) metric_bad_rate = round(metric_bad_count / len(metric_value_list) * 100, 2) if metric_value_list else 0 metric_good_rate = round(100 - metric_bad_rate, 2) type_metric1_list.append(metric) if metric_bad_rate == 0 else type_metric2_list.append( metric) if avg_metric_value > float(optimal_metric) else type_metric3_list.append(metric) elif kind_metric == -1: # 越小越好 # metric_value_list = df_metric_indexes['max'].astype(float).values.tolist() metric_value_list = df_metric_indexes['extremum'].astype(str).values.tolist() metric_value_list = [float(x) for x in metric_value_list if '-' not in x] avg_metric_value = round(np.mean(metric_value_list), 2) if metric_value_list else "-" max_metric_value = max(metric_value_list) if metric_value_list else "-" min_metric_value = max(metric_value_list) if metric_value_list else "-" if not optimal_metric: metric_bad_count = 0 else: metric_bad_count = len([x for x in metric_value_list if x > float(optimal_metric)]) metric_bad_rate = round(metric_bad_count / len(metric_value_list) * 100, 2) if metric_value_list else 0 metric_good_rate = round(100 - metric_bad_rate, 2) type_metric1_list.append(metric) if metric_bad_rate == 0 else type_metric2_list.append( metric) if avg_metric_value < float(optimal_metric) else type_metric3_list.append(metric) elif kind_metric == 0: # metric_value_list = df_metric_indexes['avg'].astype(float).values.tolist() metric_value_list = df_metric_indexes['extremum'].astype(str).values.tolist() metric_value_list = [float(x) for x in metric_value_list if '-' not in x] avg_metric_value = round(np.mean(metric_value_list), 2) if metric_value_list else "-" max_metric_value = max(metric_value_list) if metric_value_list else "-" min_metric_value = max(metric_value_list) if metric_value_list else "-" if not optimal_metric: metric_bad_count = 0 else: metric_bad_count = len([x for x in metric_value_list if ( x > float(optimal_metric) * multiple_metric1 or x < float(optimal_metric) * multiple_metric0)]) metric_bad_rate = round(metric_bad_count / len(metric_value_list) * 100, 2) if metric_value_list else 0 metric_good_rate = round(100 - metric_bad_rate, 2) if not optimal_metric: type_metric1_list.append(metric) else: type_metric1_list.append(metric) if metric_bad_rate == 0 else type_metric2_list.append( metric) if ( float(optimal_metric) * multiple_metric1 > avg_metric_value > float(optimal_metric) * multiple_metric0) else type_metric3_list.append(metric) type_dict_indexes[metric] = { "name": f"{name_dict[metric]}", "average": avg_metric_value, "max": max_metric_value, "min": min_metric_value, "rate": f"{metric_bad_rate}%" # "range": f"[0, {optimal_dict['followSpeedDeviation']}]", } if not optimal_metric: type_dict_indexes[metric]["range"] = f"-" else: if kind_metric == -1: type_dict_indexes[metric]["range"] = f"[0, {optimal_metric}]" elif kind_metric == 1: type_dict_indexes[metric]["range"] = f"[{optimal_metric}, inf)" elif kind_metric == 0: type_dict_indexes[metric][ "range"] = f"[{float(optimal_metric) * multiple_metric0}, {float(optimal_metric) * multiple_metric1}]" # else: # if custom_param_dict[metric]['kind'][0] == -1: # type_dict_indexes[metric][ # "range"] = f"[0, {custom_param_dict[metric]['optimal'][0]}]" # elif custom_param_dict[metric]['kind'][0] == 1: # type_dict_indexes[metric][ # "range"] = f"[{custom_param_dict[metric]['optimal'][0]}, inf)" # elif custom_param_dict[metric]['kind'][0] == 0: # type_dict_indexes[metric][ # "range"] = f"[{custom_param_dict[metric]['optimal'][0] * multiple_dict[metric][0]}, {custom_param_dict[metric]['optimal'][0] * custom_param_dict[metric]['multiple'][0][1]}]" value_list_dict[metric] = metric_value_list bad_count_dict[metric] = metric_bad_count good_rate_dict[metric] = metric_good_rate bad_rate_dict[metric] = metric_bad_rate type_dict["indexes"] = type_dict_indexes for metric in metric_dict[type]: metric_data = { "name": f"{name_dict[metric]}", "data": value_list_dict[metric], # "markLine": [optimal_dict[metric]] } if metric in bulitin_metric_list: metric_data["markLine"] = [optimal_dict[metric]] builtin_graph_dict[metric] = metric_data else: metric_data["markLine"] = [custom_param_dict[metric]['optimal'][0]] custom_graph_dict[metric] = metric_data type_dict["builtin"] = builtin_graph_dict type_dict["custom"] = custom_graph_dict # description str_type_metric1 = '' str_type_metric2 = '' str_type_metric3 = '' if len(type_metric1_list) == len(metric_list): str_metric = string_concatenate(metric_list) type_description1 = f"{str_metric}指标均表现良好,平均值在合理范围内且不存在不合格用例。" elif len(type_metric3_list) == len(metric_list): str_metric = string_concatenate(metric_list) type_description1 = f"{str_metric}指标平均值在合理范围外,算法在大部分用例下均表现不佳,需重点优化。" else: if type_metric1_list: str_type1 = string_concatenate(type_metric1_list) str_type_metric1 += f"{str_type1}指标表现良好,平均值在合理范围内且不存在不合格用例;" for metric in type_metric2_list: str_type_metric2 += f"{name_dict[metric]}指标表现不佳,存在{bad_count_dict[metric]}个不合格用例,需要改进算法在这些用例中的表现;" if type_metric3_list: str_type3 = string_concatenate(type_metric3_list) str_type_metric3 += f"{str_type3}指标平均值在合理范围外,算法在大部分用例下均表现不佳,需重点优化。" type_description1 = '' type_description1 += (str_type_metric1 + '\n') if str_type_metric1 else '' type_description1 += (str_type_metric2 + '\n') if str_type_metric2 else '' type_description1 += str_type_metric3 type_description1 = type_description1[:-1] + "。" type_description2 = "经计算可知," for metric in metric_dict[type]: type_description2 += f"{metric}指标位于合理区间的占比为{good_rate_dict[metric]}%," type_description2 = type_description2[:-1] + "。" type_dict["description1"] = replace_key_with_value(type_description1, name_dict) type_dict["description2"] = replace_key_with_value(type_description2, name_dict) type_details_dict[type] = type_dict report_dict["details"] = type_details_dict """ # ------------------------------ # safe summary dict """ safe_type_list = [] unsafe_type_list = [] for key, value in score_type_dict.items(): unsafe_type_list.append(key) if value < 80 else safe_type_list.append(key) # generate safe_description1 safe_list_count = len(score_safe_list) over_80_count = len([num for num in score_safe_list if num >= 80]) over_80_proportion = over_80_count / safe_list_count below_80_count = safe_list_count - over_80_count below_80_proportion = below_80_count / safe_list_count below_60_count = len([num for num in score_safe_list if num < 60]) below_60_proportion = below_60_count / safe_list_count if grade_safe == '优秀': safe_description1 = '车辆在本轮测试中无碰撞风险;' elif grade_safe == '良好': safe_description1 = f'算法在本轮测试中的表现满足设计指标要求。其中有{over_80_count}个用例得分超过80分,占比为{over_80_proportion * 100:.2f}%;' elif grade_safe == '一般': str_unsafe_type = string_concatenate(unsafe_type_list) safe_description1 = f'未满足设计指标要求。其中有{below_80_count}个用例得分低于80分,占比为{below_80_proportion * 100:.2f}%,需优化算法在{str_unsafe_type}上的表现;' elif grade_safe == '较差': str_unsafe_type = string_concatenate(unsafe_type_list) safe_description1 = f'未满足设计指标要求。其中有{below_60_count}个用例得分低于60分,占比为{below_60_proportion * 100:.2f}%,需优化算法在{str_unsafe_type}上的表现;' if not unsafe_type_list: safe_description2 = '算法在安全性维度上的表现满足设计指标要求。' else: str_unsafe_type = string_concatenate(unsafe_type_list) safe_description2 = f"安全性在{str_unsafe_type}上存在严重风险,需要重点优化。" report_dict["description1"] = replace_key_with_value(safe_description1, type_name_dict) report_dict["description2"] = replace_key_with_value(safe_description2, type_name_dict) # report_dict = { # "name": "安全性", # "weight": f"{weight_safe * 100}%", # "score": score_safe, # "level": grade_safe, # "scoreList": score_safe_list, # "levelDistribution": grade_safe_distribution, # # "description1": safe_description1, # "description2": safe_description2, # "noObjectCar": False, # # "safeTime": time_dict, # "safeDistance": distance_dict, # "safeAcceleration": acceleration_dict, # "safeProbability": probability_dict # } return report_dict import pandas as pd import numpy as np def process_dataframe(df, column): # 初始化value列为NaN df['value'] = np.nan # 遍历DataFrame的每一行 for index, row in df.iterrows(): avg_val, max_val, min_val = row['avg'], row['max'], row['min'] # 检查是否两个是'-',一个是数值 count_minus = sum(val == '-' for val in [avg_val, max_val, min_val]) if count_minus == 2: # 找出那个不是'-'的值,并赋值给value列 numeric_val = [val for val in [avg_val, max_val, min_val] if val != '-'][0] df.at[index, 'value'] = float(numeric_val) # 转换为浮点数并赋值 else: # 如果条件不满足,则将column列的值赋给value列 df.at[index, 'value'] = row[column] return df def assign_value_column(df, column, avg_col='avg', max_col='max', min_col='min'): # 判断三列中有几个是'-'(作为空值或缺失值的代表) df[['num_avg', 'num_max', 'num_min']] = df[[avg_col, max_col, min_col]].applymap( lambda x: 0 if isinstance(x, (int, float)) else 1 if x == '-' else None) # 计算每行中'-'的数量 df['num_hyphens'] = df[['num_avg', 'num_max', 'num_min']].sum(axis=1) # 判断是否满足条件:两个'-'和一个数值 df['is_valid_condition'] = (df['num_hyphens'] == 2) # df['is_valid_condition'] = (df['num_hyphens'] == 2) & df[[avg_col, max_col, min_col]].apply( # lambda row: sum(isinstance(val, (int, float)) for val in row) == 1, axis=1) # 创建一个新的value列,根据条件填充值 df['value'] = df.apply( lambda row: next((val for val in [row[avg_col], row[max_col], row[min_col]] if val != '-'), row[column]) if row['is_valid_condition'] else row[column], axis=1) # 删除辅助列 df.drop(['num_avg', 'num_max', 'num_min', 'num_hyphens', 'is_valid_condition'], axis=1, inplace=True) # 返回修改后的DataFrame return df # 示例使用 # 假设df是你的原始DataFrame,且包含avg, max, min和column列 # df = pd.DataFrame({...}) # 这里省略了具体的DataFrame创建代码 # result_df = assign_value_column(df) # print(result_df) def func_multi_case_statistic(func_cases_dict, config): """ Args: func_cases_dict: config: Returns: """ # report_dict = { # "name": "功能性", # "weight": f"{weight_dimension * 100}%", # "score": score_func, # "level": grade_func, # "scoreList": score_func_list, # "levelDistribution": grade_func_distribution, # "description1": func_description1, # "description2": func_description2, # "noObjectCar": False, # "functionACC": follow_dict, # "functionLKA": lane_dict # } df_func_cases = pd.DataFrame(func_cases_dict).T.dropna(how='all') # common data bulitin_metric_list = config.builtinMetricList # config infos dimension_config = config.config["function"] metric_list = dimension_config['metric'] type_list = dimension_config['type'] type_name_dict = dimension_config['typeName'] name_dict = dimension_config['name'] unit_dict = dimension_config['unit'] metric_dict = dimension_config['typeMetricDict'] # # custom metric data customMetricParam = dimension_config['customMetricParam'] custom_metric_list = list(customMetricParam.keys()) # custom_data = custom_data custom_param_dict = custom_metric_param_parser(customMetricParam) weight_dimension = float(dimension_config['weightDimension']) optimal_dict = dimension_config['optimal'] kind_dict = dimension_config['kind'] multiple_dict = dimension_config['multiple'] report_dict = { "name": "功能性", "weight": f"{weight_dimension * 100}%", "noObjectCar": False, } # calculate score_func and grade_func score_func_list = df_func_cases['score'].values.tolist() score_func = cal_score_from_80(score_func_list) grade_func = score_grade(score_func) report_dict["score"] = score_func report_dict["level"] = grade_func report_dict["scoreList"] = score_func_list # calculate grade_distribution grade_func_list = df_func_cases['level'].values.tolist() grade_func_distribution = grade_statistic(grade_func_list) report_dict["levelDistribution"] = grade_func_distribution score_type_dict = {} value_list_dict = {} bad_count_dict = {} good_rate_dict = {} bad_rate_dict = {} type_details_dict = {} for type in type_list: type_dict = { "name": type_name_dict[type], } builtin_graph_dict = {} custom_graph_dict = {} df_func_cases[type] = df_func_cases["details"].apply(lambda x: x[type] if type in x.keys() else None) df_func_cases1 = df_func_cases.dropna(subset=[type]) type_cases_dict = df_func_cases1[type].to_dict() df_type_cases = pd.DataFrame(type_cases_dict).T.dropna(how='all') # calculate score_type and grade_type score_type_list = df_type_cases['score'].values.tolist() score_type = round(np.mean(score_type_list), 2) grade_type = score_grade(score_type) type_dict["score"] = score_type type_dict["level"] = grade_type score_type_dict[type] = score_type # calculate grade_distribution grade_type_list = df_type_cases['level'].values.tolist() grade_type_distribution = grade_statistic(grade_type_list) type_dict["gradeDistribution"] = grade_type_distribution # calculate type indexes # type_indexes_dict = df_type_cases['indexes'].explode().tolist() # df_type_indexes = pd.DataFrame(type_indexes_dict) dfs = df_type_cases['indexes'].apply(lambda x: pd.DataFrame(x).T) df_type_indexes = pd.concat(dfs.tolist(), ignore_index=True) # functionACC description type_metric1_list = [] # good type_metric2_list = [] # not good type_metric3_list = [] # bad # indexes type_dict_indexes = {} for metric in metric_dict[type]: df_metric_indexes = df_type_indexes[df_type_indexes['name'] == f"{name_dict[metric]}"] kind_metric = kind_dict[metric] if metric in bulitin_metric_list else custom_param_dict[metric]['kind'][0] optimal_metric = optimal_dict[metric] if metric in bulitin_metric_list else \ custom_param_dict[metric]['optimal'][0] multiple_metric0 = multiple_dict[metric][0] if metric in bulitin_metric_list else \ custom_param_dict[metric]['multiple'][0][0] multiple_metric1 = multiple_dict[metric][1] if metric in bulitin_metric_list else \ custom_param_dict[metric]['multiple'][0][1] if kind_metric == 1: # 越大越好 # metric_value_list = df_metric_indexes['min'].astype(float).values.tolist() df_metric_indexes = assign_value_column(df_metric_indexes, "min") metric_value_list = df_metric_indexes['value'].astype(str).values.tolist() metric_value_list = [float(x) for x in metric_value_list if '-' not in x] metric_value_list = [x for x in metric_value_list if not np.isnan(x)] avg_metric_value = round(np.mean(metric_value_list), 2) if metric_value_list else "-" max_metric_value = max(metric_value_list) if metric_value_list else "-" min_metric_value = max(metric_value_list) if metric_value_list else "-" if not optimal_metric: metric_bad_count = 0 else: metric_bad_count = len([x for x in metric_value_list if x < float(optimal_metric)]) metric_bad_rate = round(metric_bad_count / len(metric_value_list) * 100, 2) if metric_value_list else 0 metric_good_rate = round(100 - metric_bad_rate, 2) type_metric1_list.append(metric) if metric_bad_rate == 0 else type_metric2_list.append( metric) if avg_metric_value > float(optimal_metric) else type_metric3_list.append(metric) elif kind_metric == -1: # 越小越好 # metric_value_list = df_metric_indexes['max'].astype(float).values.tolist() # if metric in ["centerDistanceExpectation", "centerDistanceStandardDeviation", "centerDistanceFrequency", # "centerDistanceRange"]: # index_name = 'avg' # elif metric in ["centerDistanceMin"]: # index_name = 'min' # else: # centerDistanceMax, laneDistance # index_name = 'max' df_metric_indexes = assign_value_column(df_metric_indexes, "max") metric_value_list = df_metric_indexes['value'].astype(str).values.tolist() # metric_value_list = df_metric_indexes[index_name].astype(str).values.tolist() metric_value_list = [float(x) for x in metric_value_list if '-' not in x] metric_value_list = [x for x in metric_value_list if not np.isnan(x)] avg_metric_value = round(np.mean(metric_value_list), 2) if metric_value_list else "-" max_metric_value = max(metric_value_list) if metric_value_list else "-" min_metric_value = max(metric_value_list) if metric_value_list else "-" if not optimal_metric: metric_bad_count = 0 else: metric_bad_count = len([x for x in metric_value_list if x > float(optimal_metric)]) metric_bad_rate = round(metric_bad_count / len(metric_value_list) * 100, 2) if metric_value_list else 0 metric_good_rate = round(100 - metric_bad_rate, 2) type_metric1_list.append(metric) if metric_bad_rate == 0 else type_metric2_list.append( metric) if avg_metric_value < float(optimal_metric) else type_metric3_list.append(metric) elif kind_metric == 0: # metric_value_list = df_metric_indexes['avg'].astype(float).values.tolist() # metric_value_list = df_metric_indexes['avg'].astype(str).values.tolist() df_metric_indexes = assign_value_column(df_metric_indexes, "avg") metric_value_list = df_metric_indexes['value'].astype(str).values.tolist() metric_value_list = [float(x) for x in metric_value_list if '-' not in x] metric_value_list = [x for x in metric_value_list if not np.isnan(x)] avg_metric_value = round(np.mean(metric_value_list), 2) if metric_value_list else "-" max_metric_value = max(metric_value_list) if metric_value_list else "-" min_metric_value = max(metric_value_list) if metric_value_list else "-" if not optimal_metric: metric_bad_count = 0 else: metric_bad_count = len([x for x in metric_value_list if ( x > float(optimal_metric) * multiple_metric1 or x < float(optimal_metric) * multiple_metric0)]) metric_bad_rate = round(metric_bad_count / len(metric_value_list) * 100, 2) if metric_value_list else 0 metric_good_rate = round(100 - metric_bad_rate, 2) if not optimal_metric: type_metric1_list.append(metric) else: type_metric1_list.append(metric) if metric_bad_rate == 0 else type_metric2_list.append( metric) if ( float(optimal_metric) * multiple_metric1 > avg_metric_value > float(optimal_metric) * multiple_metric0) else type_metric3_list.append(metric) type_dict_indexes[metric] = { "name": f"{name_dict[metric]}", "average": avg_metric_value, "max": max_metric_value, "min": min_metric_value, # "range": f"[0, {optimal_dict['followSpeedDeviation']}]", } if not optimal_metric: type_dict_indexes[metric]["range"] = f"-" else: if kind_metric == -1: type_dict_indexes[metric]["range"] = f"[0, {optimal_metric}]" elif kind_metric == 1: type_dict_indexes[metric]["range"] = f"[{optimal_metric}, inf)" elif kind_metric == 0: type_dict_indexes[metric][ "range"] = f"[{float(optimal_metric) * multiple_metric0}, {float(optimal_metric) * multiple_metric1}]" # else: # if custom_param_dict[metric]['kind'][0] == -1: # type_dict_indexes[metric][ # "range"] = f"[0, {custom_param_dict[metric]['optimal'][0]}]" # elif custom_param_dict[metric]['kind'][0] == 1: # type_dict_indexes[metric][ # "range"] = f"[{custom_param_dict[metric]['optimal'][0]}, inf)" # elif custom_param_dict[metric]['kind'][0] == 0: # type_dict_indexes[metric][ # "range"] = f"[{custom_param_dict[metric]['optimal'][0] * multiple_dict[metric][0]}, {custom_param_dict[metric]['optimal'][0] * custom_param_dict[metric]['multiple'][0][1]}]" value_list_dict[metric] = metric_value_list bad_count_dict[metric] = metric_bad_count good_rate_dict[metric] = metric_good_rate bad_rate_dict[metric] = metric_bad_rate type_dict["indexes"] = type_dict_indexes for metric in metric_dict[type]: metric_data = { "name": f"{name_dict[metric]}", "data": value_list_dict[metric], # "markLine": [optimal_dict[metric]] } if metric in bulitin_metric_list: metric_data["markLine"] = [optimal_dict[metric]] builtin_graph_dict[metric] = metric_data else: metric_data["markLine"] = [custom_param_dict[metric]['optimal'][0]] custom_graph_dict[metric] = metric_data type_dict["builtin"] = builtin_graph_dict type_dict["custom"] = custom_graph_dict # description str_type_metric1 = '' str_type_metric2 = '' str_type_metric3 = '' if len(type_metric1_list) == len(metric_list): str_metric = string_concatenate(metric_list) type_description1 = f"{str_metric}指标均表现良好,平均值在合理范围内且不存在不合格用例。" elif len(type_metric3_list) == len(metric_list): str_metric = string_concatenate(metric_list) type_description1 = f"{str_metric}指标平均值在合理范围外,算法在大部分用例下均表现不佳,需重点优化。" else: if type_metric1_list: str_type1 = string_concatenate(type_metric1_list) str_type_metric1 += f"{str_type1}指标表现良好,平均值在合理范围内且不存在不合格用例;" for metric in type_metric2_list: str_type_metric2 += f"{name_dict[metric]}指标表现不佳,存在{bad_count_dict[metric]}个不合格用例,需要改进算法在这些用例中的表现;" if type_metric3_list: str_type3 = string_concatenate(type_metric3_list) str_type_metric3 += f"{str_type3}指标平均值在合理范围外,算法在大部分用例下均表现不佳,需重点优化。" type_description1 = '' type_description1 += (str_type_metric1 + '\n') if str_type_metric1 else '' type_description1 += (str_type_metric2 + '\n') if str_type_metric2 else '' type_description1 += str_type_metric3 type_description1 = type_description1[:-1] + "。" type_description2 = "经计算可知," for metric in metric_dict[type]: type_description2 += f"{metric}指标位于合理区间的占比为{good_rate_dict[metric]}%," type_description2 = type_description2[:-1] + "。" type_dict["description1"] = replace_key_with_value(type_description1, name_dict) type_dict["description2"] = replace_key_with_value(type_description2, name_dict) type_details_dict[type] = type_dict report_dict["details"] = type_details_dict """ # ------------------------------ # func summary dict """ # generate func_description1 func_list_count = len(score_func_list) over_80_count = len([num for num in score_func_list if num >= 80]) over_80_proportion = over_80_count / func_list_count below_80_count = func_list_count - over_80_count below_80_proportion = below_80_count / func_list_count below_60_count = len([num for num in score_func_list if num < 60]) below_60_proportion = below_60_count / func_list_count func_type_list = [] unfunc_type_list = [] for key, value in score_type_dict.items(): unfunc_type_list.append(key) if value < 80 else func_type_list.append(key) if grade_func == '优秀': str_func_type = string_concatenate(func_type_list) func_description1 = f'算法在{str_func_type}上表现优秀;' elif grade_func == '良好': str_func_type = string_concatenate(func_type_list) func_description1 = f'算法在{str_func_type}上表现良好,满足设计指标要求。其中有{over_80_count}个用例得分超过80分,占比为{over_80_proportion * 100:.2f}%;' elif grade_func == '一般': str_unfunc_type = string_concatenate(unfunc_type_list) func_description1 = f'未满足设计指标要求。其中有{below_80_count}个用例得分低于80分,占比为{below_80_proportion * 100:.2f}%,需优化算法在{str_unfunc_type}上的表现;' elif grade_func == '较差': str_unfunc_type = string_concatenate(unfunc_type_list) func_description1 = f'未满足设计指标要求。其中有{below_60_count}个用例得分低于60分,占比为{below_60_proportion * 100:.2f}%,需优化算法在{str_unfunc_type}上的表现;' if not unfunc_type_list: str_func_type = string_concatenate(func_type_list) func_description2 = f'算法在{str_func_type}功能上的表现满足设计指标要求。' else: str_unfunc_type = string_concatenate(unfunc_type_list) func_description2 = f"算法在{str_unfunc_type}功能上需要重点优化。" report_dict["description1"] = replace_key_with_value(func_description1, type_name_dict) report_dict["description2"] = replace_key_with_value(func_description2, type_name_dict) # report_dict = { # "name": "功能性", # "weight": f"{weight_dimension * 100}%", # "score": score_func, # "level": grade_func, # "scoreList": score_func_list, # "levelDistribution": grade_func_distribution, # "description1": func_description1, # "description2": func_description2, # "noObjectCar": False, # "functionACC": follow_dict, # "functionLKA": lane_dict # } return report_dict def comp_multi_case_statistic(comp_cases_dict, config): """ Args: comp_cases_dict: config: Returns: """ # report_dict = { # "name": "合规性", # "weight": f"{weight_comp * 100}%", # "score": score_comp, # "level": grade_comp, # "scoreList": score_comp_list, # "levelDistribution": grade_comp_distribution, # "description1": comp_description1, # "description2": comp_description2, # "deductPoints": [deduct_1_dict, deduct_3_dict, deduct_6_dict, deduct_9_dict, deduct_12_dict] # } df_comp_cases = pd.DataFrame(comp_cases_dict).T.dropna(how='all') # common data bulitin_metric_list = config.builtinMetricList # config infos dimension_config = config.config["compliance"] metric_list = dimension_config['metric'] type_list = dimension_config['type'] type_name_dict = dimension_config['typeName'] name_dict = dimension_config['name'] unit_dict = dimension_config['unit'] metric_dict = dimension_config['typeMetricDict'] weight_comp = float(dimension_config['weightDimension']) report_dict = { "name": "合规性", "weight": f"{weight_comp * 100}%", } # calculate score_comp and grade_comp score_comp_list = df_comp_cases['score'].values.tolist() score_comp = cal_score_from_80(score_comp_list) grade_comp = score_grade(score_comp) report_dict["score"] = score_comp report_dict["level"] = grade_comp report_dict["scoreList"] = score_comp_list # calculate grade_distribution grade_comp_list = df_comp_cases['level'].values.tolist() grade_comp_distribution = grade_statistic(grade_comp_list) report_dict["levelDistribution"] = grade_comp_distribution deduct_cases_dict = df_comp_cases['details'].to_dict() df_deduct_cases = pd.DataFrame(deduct_cases_dict).T.dropna(how='all') score_type_dict = {} type_details_dict = {} # report_dict["deductPoints"] = [] for type in type_list: type_dict = { "name": type_name_dict[type], } type_cases_dict = df_deduct_cases[type].to_dict() df_type_cases = pd.DataFrame(type_cases_dict).T.dropna(how='all') # calculate score_type and grade_type score_type_list = df_type_cases['score'].values.tolist() score_type = round(np.mean(score_type_list), 2) grade_type = score_grade(score_type) type_dict["score"] = score_type type_dict["level"] = grade_type score_type_dict[type] = score_type dfs = df_type_cases['indexes'].apply(lambda x: pd.DataFrame(x).T) df_type_indexes = pd.concat(dfs.tolist(), ignore_index=True) type_dict_indexes = {} for metric in metric_dict[type]: df_metric_indexes = df_type_indexes[df_type_indexes['name'] == f"{name_dict[metric]}"] metric_times_list = df_metric_indexes['times'].astype(int).values.tolist() metric_times_count = sum(metric_times_list) type_dict_indexes[metric] = { "name": f"{name_dict[metric]}", "times": metric_times_count } type_dict["indexes"] = type_dict_indexes # report_dict[type] = type_dict type_details_dict[type] = type_dict report_dict["details"] = type_details_dict # get compliance description comp_list_count = len(score_comp_list) below_60_count = len([num for num in score_comp_list if num < 60]) below_60_proportion = below_60_count / comp_list_count if grade_comp == '优秀': comp_description1 = '车辆在本轮测试中无违反交通法规行为;' else: comp_description1 = f'未满足设计指标要求。其中有{below_60_count}个用例得分低于60分,占比为{below_60_proportion * 100:.2f}%,需优化算法在合规性上的表现;' comp_description2 = f'共有{comp_list_count}个用例,其中{below_60_count}个用例出现违规行为。' report_dict["description1"] = comp_description1 report_dict["description2"] = comp_description2 # report_dict = { # "name": "合规性", # "weight": f"{weight_comp * 100}%", # "score": score_comp, # "level": grade_comp, # "scoreList": score_comp_list, # "levelDistribution": grade_comp_distribution, # "description1": comp_description1, # "description2": comp_description2, # "deductPoints": [deduct_1_dict, deduct_3_dict, deduct_6_dict, deduct_9_dict, deduct_12_dict] # } return report_dict def comf_multi_case_statistic(comf_cases_dict, config): """ Args: comf_cases_dict: config: Returns: """ # report_dict = { # "name": "舒适性", # "weight": f"{weight_comfort * 100}%", # "score": score_comf, # "level": grade_comf, # "scoreList": score_comf_list, # "levelDistribution": grade_comf_distribution, # "description1": comf_description1, # "description2": comf_description2, # "description3": comf_description3, # "description4": comf_description4, # "indexes": [], # "zigzagData": zigzag_score_list, # "shakeData": shake_score_list, # "cadenceData": cadence_score_list, # "slamBrakeData": slam_brake_score_list, # "slamAccelData": slam_accel_score_list # } df_comf_cases = pd.DataFrame(comf_cases_dict).T.dropna(how='all') # common data bulitin_metric_list = config.builtinMetricList # config infos dimension_config = config.config["comfort"] metric_list = dimension_config['metric'] type_list = dimension_config['type'] type_name_dict = dimension_config['typeName'] name_dict = dimension_config['name'] unit_dict = dimension_config['unit'] metric_dict = dimension_config['typeMetricDict'] # # custom metric data customMetricParam = dimension_config['customMetricParam'] # custom_data = custom_data # custom_param_dict = custom_metric_param_parser(customMetricParam) weight_dimension = float(dimension_config['weightDimension']) # optimal_dict = dimension_config['optimal'] # kind_dict = dimension_config['kind'] # multiple_dict = dimension_config['multiple'] report_dict = { "name": "舒适性", "weight": f"{weight_dimension * 100}%", } # calculate score_comf and grade_comf score_comf_list = df_comf_cases['score'].values.tolist() score_comf = cal_score_from_80(score_comf_list) grade_comf = score_grade(score_comf) report_dict["score"] = score_comf report_dict["level"] = grade_comf report_dict["scoreList"] = score_comf_list # calculate grade_distribution grade_comf_list = df_comf_cases['level'].values.tolist() grade_comf_distribution = grade_statistic(grade_comf_list) report_dict["levelDistribution"] = grade_comf_distribution score_type_dict = {} value_list_dict = {} bad_count_dict = {} good_rate_dict = {} bad_rate_dict = {} type_details_dict = {} for type in type_list: type_dict = { "name": type_name_dict[type], } builtin_graph_dict = {} custom_graph_dict = {} df_comf_cases[type] = df_comf_cases["details"].apply(lambda x: x[type] if type in x.keys() else None) df_comf_cases1 = df_comf_cases.dropna(subset=[type]) type_cases_dict = df_comf_cases1[type].to_dict() df_type_cases = pd.DataFrame(type_cases_dict).T.dropna(how='all') # calculate score_type and grade_type score_type_list = df_type_cases['score'].values.tolist() score_type = round(np.mean(score_type_list), 2) grade_type = score_grade(score_type) type_dict["score"] = score_type type_dict["level"] = grade_type score_type_dict[type] = score_type # calculate grade_distribution grade_type_list = df_type_cases['level'].values.tolist() grade_type_distribution = grade_statistic(grade_type_list) type_dict["gradeDistribution"] = grade_type_distribution dfs = df_type_cases['indexes'].apply(lambda x: pd.DataFrame(x).T) df_type_indexes = pd.concat(dfs.tolist(), ignore_index=True) # functionACC description type_metric1_list = [] # good type_metric2_list = [] # not good type_metric3_list = [] # bad # indexes type_dict_indexes = {} for metric in metric_dict[type]: df_metric_indexes = df_type_indexes[df_type_indexes['name'] == f"{name_dict[metric]}"] metric_value_list = df_metric_indexes['score'].astype(float).values.tolist() avg_metric_value = round(np.mean(metric_value_list), 2) if metric_value_list else "-" max_metric_value = max(metric_value_list) if metric_value_list else "-" min_metric_value = min(metric_value_list) if metric_value_list else "-" metric_bad_count = len([x for x in metric_value_list if x < 80]) metric_bad_rate = round(metric_bad_count / len(metric_value_list) * 100, 2) if metric_value_list else 0 metric_good_rate = round(100 - metric_bad_rate, 2) # metric_number_list = df_metric_indexes['numberReal'].astype(int).values.tolist() metric_number_list = df_metric_indexes['numberReal'].astype(float).values.tolist() metric_duration_list = df_metric_indexes['durationReal'].astype(float).values.tolist() metric_strength_list = df_metric_indexes['strengthReal'].astype(float).values.tolist() type_metric1_list.append(metric) if avg_metric_value > 80 else type_metric2_list.append( metric) if min_metric_value < 80 else type_metric3_list.append(metric) type_dict_indexes[metric] = { "name": f"{name_dict[metric]}", "avgScore": avg_metric_value, "maxScore": max_metric_value, "minScore": min_metric_value, "avgNumber": f"{np.mean(metric_number_list):.2f}", "avgDuration": f"{np.mean(metric_duration_list):.2f}", "avgStrength": f"{np.mean(metric_strength_list):.2f}" } value_list_dict[metric] = metric_value_list bad_count_dict[metric] = metric_bad_count good_rate_dict[metric] = metric_good_rate bad_rate_dict[metric] = metric_bad_rate type_dict["indexes"] = type_dict_indexes for metric in metric_dict[type]: metric_data = { "name": f"{name_dict[metric]}", "data": value_list_dict[metric], # "markLine": [optimal_dict[metric]] } if metric in bulitin_metric_list: # metric_data["markLine"] = [optimal_dict[metric]] builtin_graph_dict[metric] = metric_data else: # metric_data["markLine"] = [custom_param_dict[metric]['optimal'][0]] custom_graph_dict[metric] = metric_data type_dict["builtin"] = builtin_graph_dict type_dict["custom"] = custom_graph_dict # description str_type_metric1 = '' str_type_metric2 = '' str_type_metric3 = '' if len(type_metric1_list) == len(metric_list): str_metric = string_concatenate(metric_list) type_description1 = f"{str_metric}指标最低分均超过设计指标要求,算法在{len(score_comf_list)}个用例中均表现良好。" elif len(type_metric3_list) == len(metric_list): str_metric = string_concatenate(metric_list) type_description1 = f"{str_metric}指标平均分低于设计指标要求,算法整体表现不佳,需要优化算法在这些指标上的表现;" else: if type_metric1_list: str_type1 = string_concatenate(type_metric1_list) str_type_metric1 += f"{str_type1}指标最低分超过设计指标要求,算法在{len(score_comf_list)}个用例中均表现良好;" for metric in type_metric2_list: str_type_metric2 += f"{name_dict[metric]}指标平均分超过设计指标要求,但是算法存在{bad_count_dict[metric]}个表现不佳用例,需要改进算法在这些用例中的表现;" if type_metric3_list: str_type3 = string_concatenate(type_metric3_list) str_type_metric3 += f"{str_type3}指标平均分低于设计指标要求,算法整体表现不佳,需要优化算法在这些指标上的表现;" type_description1 = '' type_description1 += (str_type_metric1 + '\n') if str_type_metric1 else '' type_description1 += (str_type_metric2 + '\n') if str_type_metric2 else '' type_description1 += str_type_metric3 type_description1 = type_description1[:-1] + "。" type_description2 = "经计算可知,算法" for metric in metric_dict[type]: type_description2 += f"在{metric}指标上表现良好的概率为{good_rate_dict[metric]}%," type_description2 = type_description2[:-1] + "。" type_dict["description1"] = replace_key_with_value(type_description1, name_dict) type_dict["description2"] = replace_key_with_value(type_description2, name_dict) type_details_dict[type] = type_dict report_dict["details"] = type_details_dict # calculate comfort description # str for comfort description1 comf_list = [] uncomf_list = [] for metric in metric_list: uncomf_list.append(metric) if bad_count_dict[metric] > 0 else comf_list.append(metric) # generate comf_description1 comf_list_count = len(score_comf_list) over_80_count = len([num for num in score_comf_list if num >= 80]) over_80_proportion = over_80_count / comf_list_count below_80_count = comf_list_count - over_80_count below_80_proportion = below_80_count / comf_list_count below_60_count = len([num for num in score_comf_list if num < 60]) below_60_proportion = below_60_count / comf_list_count if grade_comf == '优秀': comf_description1 = '乘客在本轮测试中体验舒适;' elif grade_comf == '良好': comf_description1 = f'算法在本轮测试中的表现满足设计指标要求。其中有{over_80_count}个用例得分超过80分,占比为{over_80_proportion * 100:.2f}%;' elif grade_comf == '一般': str_uncomf_metric = string_concatenate(uncomf_list) comf_description1 = f'未满足设计指标要求。其中有{below_80_count}个用例得分低于80分,占比为{below_80_proportion * 100:.2f}%,需优化算法在{str_uncomf_metric}上的表现;' elif grade_comf == '较差': str_uncomf_metric = string_concatenate(uncomf_list) comf_description1 = f'未满足设计指标要求。其中有{below_60_count}个用例得分低于60分,占比为{below_60_proportion * 100:.2f}%,需优化算法在{str_uncomf_metric}上的表现;' # str for comfort description2 control_type = [] if '画龙指标' in uncomf_list or '晃动指标' in uncomf_list: control_type.append('横向') if '顿挫指标' in uncomf_list or '急刹指标' in uncomf_list or '急加速指标' in uncomf_list in uncomf_list: control_type.append('纵向') str_control_type = '和'.join(control_type) if not control_type: comf_description2 = f"算法在舒适性维度上的表现满足设计指标要求" else: comf_description2 = f"算法应该优化对车辆的{str_control_type}控制,优化乘坐体验" report_dict["description1"] = replace_key_with_value(comf_description1, name_dict) report_dict["description2"] = replace_key_with_value(comf_description2, type_name_dict) # report_dict = { # "name": "舒适性", # "weight": f"{weight_comfort * 100}%", # "score": score_comf, # "level": grade_comf, # "scoreList": score_comf_list, # "levelDistribution": grade_comf_distribution, # "description1": comf_description1, # "description2": comf_description2, # "indexes": [], # "zigzagData": zigzag_score_list, # "shakeData": shake_score_list, # "cadenceData": cadence_score_list, # "slamBrakeData": slam_brake_score_list, # "slamAccelData": slam_accel_score_list # } return report_dict def effi_deviation_extra(str): if str[0] == '+': return str[1:].split("%")[0] else: return str.split("%")[0] def effi_multi_case_statistic(effi_cases_dict, config): """ Args: effi_cases_dict: config: Returns: """ # report_dict = { # "name": "高效性", # "weight": f"{weight_efficient * 100}%", # "score": score_effi, # "level": grade_effi, # "scoreList": score_effi_list, # "levelDistribution": grade_effi_distribution, # "description1": effi_description1, # "description2": effi_description2, # "description3": effi_description3, # "indexes": [] # } df_effi_cases = pd.DataFrame(effi_cases_dict).T.dropna(how='all') # common data bulitin_metric_list = config.builtinMetricList # config infos dimension_config = config.config["efficient"] metric_list = dimension_config['metric'] type_list = dimension_config['type'] type_name_dict = dimension_config['typeName'] name_dict = dimension_config['name'] unit_dict = dimension_config['unit'] metric_dict = dimension_config['typeMetricDict'] # # custom metric data customMetricParam = dimension_config['customMetricParam'] # custom_data = custom_data custom_param_dict = custom_metric_param_parser(customMetricParam) weight_efficient = float(dimension_config['weightDimension']) optimal_dict = dimension_config['optimal'] kind_dict = dimension_config['kind'] multiple_dict = dimension_config['multiple'] report_dict = { "name": "高效性", "weight": f"{weight_efficient * 100}%", } # calculate score_effi and grade_effi score_effi_list = df_effi_cases['score'].values.tolist() score_effi = cal_score_from_80(score_effi_list) grade_effi = score_grade(score_effi) report_dict["score"] = score_effi report_dict["level"] = grade_effi report_dict["scoreList"] = score_effi_list # calculate grade_distribution grade_effi_list = df_effi_cases['level'].values.tolist() grade_effi_distribution = grade_statistic(grade_effi_list) report_dict["levelDistribution"] = grade_effi_distribution score_type_dict = {} value_list_dict = {} bad_count_dict = {} good_rate_dict = {} bad_rate_dict = {} type_details_dict = {} for type in type_list: type_dict = { "name": type_name_dict[type], } builtin_graph_dict = {} custom_graph_dict = {} df_effi_cases[type] = df_effi_cases["details"].apply(lambda x: x[type] if type in x.keys() else None) df_effi_cases1 = df_effi_cases.dropna(subset=[type]) type_cases_dict = df_effi_cases1[type].to_dict() df_type_cases = pd.DataFrame(type_cases_dict).T.dropna(how='all') # calculate score_type and grade_type score_type_list = df_type_cases['score'].values.tolist() score_type = round(np.mean(score_type_list), 2) grade_type = score_grade(score_type) type_dict["score"] = score_type type_dict["level"] = grade_type score_type_dict[type] = score_type # calculate grade_distribution grade_type_list = df_type_cases['level'].values.tolist() grade_type_distribution = grade_statistic(grade_type_list) type_dict["gradeDistribution"] = grade_type_distribution # calculate type indexes # type_indexes_dict = df_type_cases['indexes'].explode().tolist() # df_type_indexes = pd.DataFrame(type_indexes_dict) dfs = df_type_cases['indexes'].apply(lambda x: pd.DataFrame(x).T) df_type_indexes = pd.concat(dfs.tolist(), ignore_index=True) # functionACC description type_metric1_list = [] # good type_metric2_list = [] # not good type_metric3_list = [] # bad # indexes type_dict_indexes = {} for metric in metric_dict[type]: df_metric_indexes = df_type_indexes[df_type_indexes['name'] == f"{name_dict[metric]}"] kind_metric = kind_dict[metric] if metric in bulitin_metric_list else custom_param_dict[metric]['kind'][0] optimal_metric = optimal_dict[metric] if metric in bulitin_metric_list else \ custom_param_dict[metric]['optimal'][0] multiple_metric0 = multiple_dict[metric][0] if metric in bulitin_metric_list else \ custom_param_dict[metric]['multiple'][0][0] multiple_metric1 = multiple_dict[metric][1] if metric in bulitin_metric_list else \ custom_param_dict[metric]['multiple'][0][1] if kind_metric == 1: # 越大越好 metric_value_list = df_metric_indexes['value'].astype(float).values.tolist() avg_metric_value = round(np.mean(metric_value_list), 2) if metric_value_list else "-" max_metric_value = max(metric_value_list) if metric_value_list else "-" min_metric_value = max(metric_value_list) if metric_value_list else "-" if not optimal_metric: metric_bad_count = 0 else: metric_bad_count = len([x for x in metric_value_list if x < float(optimal_metric)]) metric_bad_rate = round(metric_bad_count / len(metric_value_list) * 100, 2) if metric_value_list else 0 metric_good_rate = round(100 - metric_bad_rate, 2) type_metric1_list.append(metric) if metric_bad_rate == 0 else type_metric2_list.append( metric) if avg_metric_value > float(optimal_metric) else type_metric3_list.append(metric) elif kind_metric == -1: # 越小越好 metric_value_list = df_metric_indexes['value'].astype(float).values.tolist() avg_metric_value = round(np.mean(metric_value_list), 2) if metric_value_list else "-" max_metric_value = max(metric_value_list) if metric_value_list else "-" min_metric_value = max(metric_value_list) if metric_value_list else "-" if not optimal_metric: metric_bad_count = 0 else: metric_bad_count = len([x for x in metric_value_list if x > float(optimal_metric)]) metric_bad_rate = round(metric_bad_count / len(metric_value_list) * 100, 2) if metric_value_list else 0 metric_good_rate = round(100 - metric_bad_rate, 2) type_metric1_list.append(metric) if metric_bad_rate == 0 else type_metric2_list.append( metric) if avg_metric_value < float(optimal_metric) else type_metric3_list.append(metric) elif kind_metric == 0: metric_value_list = df_metric_indexes['value'].astype(float).values.tolist() avg_metric_value = round(np.mean(metric_value_list), 2) if metric_value_list else "-" max_metric_value = max(metric_value_list) if metric_value_list else "-" min_metric_value = max(metric_value_list) if metric_value_list else "-" if not optimal_metric: metric_bad_count = 0 else: metric_bad_count = len([x for x in metric_value_list if ( x > float(optimal_metric) * multiple_metric1 or x < float( optimal_metric) * multiple_metric0)]) metric_bad_rate = round(metric_bad_count / len(metric_value_list) * 100, 2) if metric_value_list else 0 metric_good_rate = round(100 - metric_bad_rate, 2) if not optimal_metric: type_metric1_list.append(metric) else: type_metric1_list.append(metric) if metric_bad_rate == 0 else type_metric2_list.append(metric) if ( float(optimal_metric) * multiple_metric1 > avg_metric_value > float( optimal_metric) * multiple_metric0) else type_metric3_list.append( metric) type_dict_indexes[metric] = { "name": f"{name_dict[metric]}", "average": avg_metric_value, "max": max_metric_value, "min": min_metric_value, "number": metric_bad_count } if not optimal_metric: type_dict_indexes[metric]["range"] = f"-" else: if kind_metric == -1: type_dict_indexes[metric]["range"] = f"[0, {optimal_metric}]" elif kind_metric == 1: type_dict_indexes[metric]["range"] = f"[{optimal_metric}, inf)" elif kind_metric == 0: type_dict_indexes[metric][ "range"] = f"[{float(optimal_metric) * multiple_metric0}, {float(optimal_metric) * multiple_metric1}]" value_list_dict[metric] = metric_value_list bad_count_dict[metric] = metric_bad_count good_rate_dict[metric] = metric_good_rate bad_rate_dict[metric] = metric_bad_rate type_dict["indexes"] = type_dict_indexes for metric in metric_dict[type]: metric_data = { "name": f"{name_dict[metric]}", "data": value_list_dict[metric], # "markLine": [optimal_dict[metric]] } if metric in bulitin_metric_list: metric_data["markLine"] = [optimal_dict[metric]] builtin_graph_dict[metric] = metric_data else: metric_data["markLine"] = [custom_param_dict[metric]['optimal'][0]] custom_graph_dict[metric] = metric_data type_dict["builtin"] = builtin_graph_dict type_dict["custom"] = custom_graph_dict # description str_type_metric1 = '' str_type_metric2 = '' str_type_metric3 = '' if len(type_metric1_list) == len(metric_list): str_metric = string_concatenate(metric_list) type_description1 = f"{str_metric}指标均表现良好,平均值在合理范围内且不存在不合格用例。" elif len(type_metric3_list) == len(metric_list): str_metric = string_concatenate(metric_list) type_description1 = f"{str_metric}指标平均值在合理范围外,算法在大部分用例下均表现不佳,需重点优化。" else: if type_metric1_list: str_type1 = string_concatenate(type_metric1_list) str_type_metric1 += f"{str_type1}指标表现良好,平均值在合理范围内且不存在不合格用例;" for metric in type_metric2_list: str_type_metric2 += f"{name_dict[metric]}指标表现不佳,存在{bad_count_dict[metric]}个不合格用例,需要改进算法在这些用例中的表现;" if type_metric3_list: str_type3 = string_concatenate(type_metric3_list) str_type_metric3 += f"{str_type3}指标平均值在合理范围外,算法在大部分用例下均表现不佳,需重点优化。" type_description1 = '' type_description1 += (str_type_metric1 + '\n') if str_type_metric1 else '' type_description1 += (str_type_metric2 + '\n') if str_type_metric2 else '' type_description1 += str_type_metric3 type_description1 = type_description1[:-1] + "。" # type_description2 = "经计算可知," # for metric in metric_dict[type]: # type_description2 += f"{metric}指标位于合理区间的占比为{good_rate_dict[metric]}%," # type_description2 = type_description2[:-1] + "。" type_dict["description1"] = replace_key_with_value(type_description1, name_dict) # type_dict["description2"] = replace_key_with_value(type_description2, name_dict) type_details_dict[type] = type_dict report_dict["details"] = type_details_dict # efficient description1 effi_list = [] ineffi_list = [] for metric in metric_list: ineffi_list.append(metric) if bad_count_dict[metric] > 0 else effi_list.append(metric) # generate effi_description1 effi_list_count = len(score_effi_list) over_80_count = len([num for num in score_effi_list if num >= 80]) over_80_proportion = over_80_count / effi_list_count below_80_count = effi_list_count - over_80_count below_80_proportion = below_80_count / effi_list_count below_60_count = len([num for num in score_effi_list if num < 60]) below_60_proportion = below_60_count / effi_list_count effi_description1 = '' if grade_effi == '优秀': effi_description1 = '车辆行驶效率高;' elif grade_effi == '良好': effi_description1 = f'算法在本轮测试中的表现满足设计指标要求。其中有{over_80_count}个用例得分超过80分,占比为{over_80_proportion * 100:.2f}%;' elif grade_effi == '一般': str_ineffi_type = string_concatenate(ineffi_list) effi_description1 = f'未满足设计指标要求。其中有{below_80_count}个用例得分低于80分,占比为{below_80_proportion * 100:.2f}%,需优化算法在{str_ineffi_type}上的表现;' elif grade_effi == '较差': str_ineffi_type = string_concatenate(ineffi_list) effi_description1 = f'未满足设计指标要求。其中有{below_60_count}个用例得分低于60分,占比为{below_60_proportion * 100:.2f}%,需优化算法在{str_ineffi_type}上的表现;' # efficient description2 if not ineffi_list: effi_description2 = "算法在高效性维度上的表现满足设计指标要求。" else: effi_description2 = "算法应该优化车辆的规划控制逻辑,提高算法的通行效率。" report_dict["description1"] = replace_key_with_value(effi_description1, type_name_dict) report_dict["description2"] = replace_key_with_value(effi_description2, type_name_dict) # report_dict = { # "name": "高效性", # "weight": f"{weight_efficient * 100}%", # "score": score_effi, # "level": grade_effi, # "scoreList": score_effi_list, # "levelDistribution": grade_effi_distribution, # "description1": effi_description1, # "description2": effi_description2, # "description3": effi_description3, # "indexes": [ # { # "name": "平均速度(km/h)", # "average": round(np.mean(avg_v_list), 2), # "max": max(avg_v_list), # "min": min(avg_v_list), # "range": "-", # "number": avg_v_bad_count # }, { # "name": "停车次数(次)", # "average": round(np.mean(stop_count_list), 2), # "max": max(stop_count_list), # "min": min(stop_count_list), # "range": f"[0, {optimal_dict['stopCount']}]", # "number": stop_count_bad_count # }, { # "name": "停车平均时长(s)", # "average": round(np.mean(stop_time_list), 2), # "max": max(stop_time_list), # "min": min(stop_time_list), # "range": f"[0, {optimal_dict['stopDuration']}]", # "number": stop_time_bad_count # } # ] # } return report_dict def inverse_mileage_format(mileage): if "公里" in mileage: num = float(mileage[:-2]) * 1000 elif "米" in mileage: num = float(mileage[:-1]) else: raise ValueError("输入的字符串格式不正确") return num def inverse_duration_format(duration): time_parts = duration.split("时") hour = int(time_parts[0]) if len(time_parts) > 1 else 0 time_parts = time_parts[-1].split("分") minute = int(time_parts[0]) if len(time_parts) > 1 else 0 if hour or minute: second = int(time_parts[-1][:-1]) if "秒" in time_parts[-1] else 0 else: second = float(time_parts[-1][:-1]) if "秒" in time_parts[-1] else 0 total_seconds = hour * 3600 + minute * 60 + second return total_seconds def multi_case_statistic(cases_dict, config): """ Args: cases_dict: config: Returns: """ logger = log.get_logger() cases_df = pd.DataFrame(cases_dict).T.dropna(how='all') case_number = cases_df.shape[0] # done_number = 0 # fail_number = 0 # sceneNumber = f"共测试{case_number}个用例,其中成功评价{done_number}个用例,有{fail_number}个用例评价失败" sceneNumber = f"共测试{case_number}个用例" cases_df['testMileage'] = cases_df['testMileage'].apply(inverse_mileage_format) cases_df['testDuration'] = cases_df['testDuration'].apply(inverse_duration_format) mileage_sum = mileage_format(cases_df['testMileage'].sum()) duration_sum = duration_format(cases_df['testDuration'].sum()) score_all = cal_score_from_80(cases_df['algorithmComprehensiveScore'].astype(float).values.tolist()) grade_all = score_grade(score_all) multi_dict = { 'sceneNumber': sceneNumber, 'testMileageSum': mileage_sum, 'testDurationSum': duration_sum, 'algorithmComprehensiveScore': score_all, 'algorithmLevel': grade_all, } dimension_details_dict = {} # metric check dimension_list = config.dimension_list dimension_name_dict = config.dimension_name bad_dimension_list = [] score_dimension_dict = {} for dimension in dimension_list: cases_df[dimension] = cases_df["details"].apply(lambda x: x[dimension] if dimension in x.keys() else None) dimension_cases_dict = cases_df[dimension].to_dict() if dimension in config.builtinDimensionList: dimension_dict = globals()[f"{dimension[:4]}_multi_case_statistic"](dimension_cases_dict, config) else: dimension_dict = custom_multi_case_statistic(dimension_cases_dict, config, dimension) pass score_dimension_dict[dimension] = dimension_dict['score'] if score_dimension_dict[dimension] < 80: bad_dimension_list.append(dimension) dimension_details_dict[dimension] = dimension_dict multi_dict["details"] = dimension_details_dict if not bad_dimension_list: algorithmResultDescription = '综上所述,算法在各个维度的表现俱佳。' else: str_bad_dimension = string_concatenate(bad_dimension_list) algorithmResultDescription = f'综上所述,建议算法优化在{str_bad_dimension}指标上的表现。' multi_dict['algorithmResultDescription'] = replace_key_with_value(algorithmResultDescription, dimension_name_dict) # multi_dict = { # 'sceneNumber': sceneNumber, # 'testMileageSum': mileage_sum, # 'testDurationSum': duration_sum, # 'algorithmComprehensiveScore': score_all, # 'algorithmResultDescription': algorithmResultDescription, # 'algorithmLevel': grade_all, # 'safe': report_dict, # 'function': func_dict, # 'compliance': report_dict, # 'comfort': report_dict, # 'efficient': report_dict # } return multi_dict def multi_report_post(multi_dict, task_name): """ This function generate the multiple cases report based on multi_dict. Arguments: multi_dict: A dict of single case scores and descriptions. task_files: A str of path of files, which says where the generated report is to be stored. Returns: None """ url_json = 'http://36.110.106.156:18081/report/generate' data_json = json.dumps(multi_dict) response = requests.post(url_json, data_json, headers={'Content-Type': 'application/json; charset=utf-8'}) runtime = time.strftime('%Y%m%d%H%M%S', time.localtime()) p = pathlib.Path(rf'C:\Users\cicv\Desktop\{task_name}_{runtime}.pdf') # p = pathlib.Path(rf'..\results\report\{algorithmName}_{runtime}.pdf') p.write_bytes(response.content) def multi_report_generate(task_path, cases_dict_json_path, config_path, resPath): logger = log.get_logger() cases_dict = json2dict(os.path.join(cases_dict_json_path, "cases_dict.json")) config = json2dict(os.path.join(config_path, "config.json")) if len(cases_dict) > 1: try: # multiple cases report statistic and generate multi_dict = multi_case_statistic(cases_dict, config) if resPath: with open(f'{resPath}/report.json', 'w', encoding='utf-8') as f: f.write(json.dumps(multi_dict, ensure_ascii=False)) with open(f'{task_path}/report.json', 'w', encoding='utf-8') as f: f.write(json.dumps(multi_dict, ensure_ascii=False)) except: traceback.print_exc() print(1) logger.error("MULTIPLE_CASES_EVAL: Evaluate multiple cases ERROR!") sys.exit(-1)