|
@@ -1061,131 +1061,6 @@ class FinalDataProcessor:
|
|
traceback.print_exc()
|
|
traceback.print_exc()
|
|
return False
|
|
return False
|
|
|
|
|
|
- # def _merge_optional_data(self, df_object: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
- # """加载和合并可选数据"""
|
|
|
|
- # df_merged = df_object.copy()
|
|
|
|
-
|
|
|
|
- # # 检查并删除重复列的函数
|
|
|
|
- # def clean_duplicate_columns(df):
|
|
|
|
- # # 查找带有 _x 或 _y 后缀的列
|
|
|
|
- # duplicate_cols = []
|
|
|
|
- # base_cols = {}
|
|
|
|
-
|
|
|
|
- # for col in df.columns:
|
|
|
|
- # if col.endswith('_x') or col.endswith('_y'):
|
|
|
|
- # base_name = col[:-2] # 去掉后缀
|
|
|
|
- # if base_name not in base_cols:
|
|
|
|
- # base_cols[base_name] = []
|
|
|
|
- # base_cols[base_name].append(col)
|
|
|
|
-
|
|
|
|
- # # 对于每组重复列,检查数据是否相同,如果相同则只保留一个
|
|
|
|
- # for base_name, cols in base_cols.items():
|
|
|
|
- # if len(cols) > 1:
|
|
|
|
- # # 检查这些列的数据是否相同
|
|
|
|
- # is_identical = True
|
|
|
|
- # first_col = cols[0]
|
|
|
|
- # for col in cols[1:]:
|
|
|
|
- # if not df[first_col].equals(df[col]):
|
|
|
|
- # is_identical = False
|
|
|
|
- # break
|
|
|
|
-
|
|
|
|
- # if is_identical:
|
|
|
|
- # # 数据相同,保留第一列并重命名为基本名称
|
|
|
|
- # df = df.rename(columns={first_col: base_name})
|
|
|
|
- # # 删除其他重复列
|
|
|
|
- # for col in cols[1:]:
|
|
|
|
- # duplicate_cols.append(col)
|
|
|
|
- # print(f"列 {cols} 数据相同,保留为 {base_name}")
|
|
|
|
- # else:
|
|
|
|
- # print(f"列 {cols} 数据不同,保留所有列")
|
|
|
|
-
|
|
|
|
- # # 删除重复列
|
|
|
|
- # if duplicate_cols:
|
|
|
|
- # df = df.drop(columns=duplicate_cols)
|
|
|
|
- # print(f"删除了重复列: {duplicate_cols}")
|
|
|
|
-
|
|
|
|
- # return df
|
|
|
|
-
|
|
|
|
- # # --- 合并 EgoMap ---
|
|
|
|
- # egomap_path = self.output_dir / OUTPUT_CSV_EGOMAP
|
|
|
|
- # if egomap_path.exists() and egomap_path.stat().st_size > 0:
|
|
|
|
- # try:
|
|
|
|
- # df_ego = pd.read_csv(egomap_path, dtype={"simTime": float})
|
|
|
|
- # # 删除 simFrame 列,因为使用主数据的 simFrame
|
|
|
|
- # if 'simFrame' in df_ego.columns:
|
|
|
|
- # df_ego = df_ego.drop(columns=['simFrame'])
|
|
|
|
-
|
|
|
|
- # # 按时间和ID排序
|
|
|
|
- # df_ego.sort_values(['simTime', 'playerId'], inplace=True)
|
|
|
|
- # df_merged.sort_values(['simTime', 'playerId'], inplace=True)
|
|
|
|
-
|
|
|
|
- # # 使用 merge_asof 进行就近合并,不包括 simFrame
|
|
|
|
- # df_merged = pd.merge_asof(
|
|
|
|
- # df_merged,
|
|
|
|
- # df_ego,
|
|
|
|
- # on='simTime',
|
|
|
|
- # by='playerId',
|
|
|
|
- # direction='nearest',
|
|
|
|
- # tolerance=0.01 # 10ms tolerance
|
|
|
|
- # )
|
|
|
|
- # print("EgoMap data merged.")
|
|
|
|
- # except Exception as e:
|
|
|
|
- # print(f"Warning: Could not merge EgoMap data from {egomap_path}: {e}")
|
|
|
|
-
|
|
|
|
- # # --- Merge Function ---
|
|
|
|
- # function_path = self.output_dir / OUTPUT_CSV_FUNCTION
|
|
|
|
- # if function_path.exists() and function_path.stat().st_size > 0:
|
|
|
|
- # try:
|
|
|
|
- # df_function = pd.read_csv(function_path, dtype={"timestamp": float}, low_memory=False).drop_duplicates()
|
|
|
|
- # # 删除 simFrame 列
|
|
|
|
- # if 'simFrame' in df_function.columns:
|
|
|
|
- # df_function = df_function.drop(columns=['simFrame'])
|
|
|
|
-
|
|
|
|
- # if 'simTime' in df_function.columns:
|
|
|
|
- # df_function['simTime'] = df_function['simTime'].round(2)
|
|
|
|
- # df_function['time'] = df_function['simTime'].round(2).astype(float)
|
|
|
|
- # df_merged['time'] = df_merged['simTime'].round(2).astype(float)
|
|
|
|
-
|
|
|
|
- # common_cols = list(set(df_merged.columns) & set(df_function.columns) - {'time'})
|
|
|
|
- # df_function.drop(columns=common_cols, inplace=True, errors='ignore')
|
|
|
|
-
|
|
|
|
- # df_merged = pd.merge(df_merged, df_function, on=["time"], how="left")
|
|
|
|
- # df_merged.drop(columns=['time'], inplace=True)
|
|
|
|
- # print("Function data merged.")
|
|
|
|
- # else:
|
|
|
|
- # print("Warning: 'simTime' column not found in Function.csv. Cannot merge.")
|
|
|
|
- # except Exception as e:
|
|
|
|
- # print(f"Warning: Could not merge Function data from {function_path}: {e}")
|
|
|
|
- # else:
|
|
|
|
- # print("Function data not found or empty, skipping merge.")
|
|
|
|
-
|
|
|
|
- # # --- Merge OBU ---
|
|
|
|
- # obu_path = self.output_dir / OUTPUT_CSV_OBU
|
|
|
|
- # if obu_path.exists() and obu_path.stat().st_size > 0:
|
|
|
|
- # try:
|
|
|
|
- # df_obu = pd.read_csv(obu_path, dtype={"simTime": float}, low_memory=False).drop_duplicates()
|
|
|
|
- # # 删除 simFrame 列
|
|
|
|
- # if 'simFrame' in df_obu.columns:
|
|
|
|
- # df_obu = df_obu.drop(columns=['simFrame'])
|
|
|
|
-
|
|
|
|
- # df_obu['time'] = df_obu['simTime'].round(2).astype(float)
|
|
|
|
- # df_merged['time'] = df_merged['simTime'].round(2).astype(float)
|
|
|
|
-
|
|
|
|
- # common_cols = list(set(df_merged.columns) & set(df_obu.columns) - {'time'})
|
|
|
|
- # df_obu.drop(columns=common_cols, inplace=True, errors='ignore')
|
|
|
|
-
|
|
|
|
- # df_merged = pd.merge(df_merged, df_obu, on=["time"], how="left")
|
|
|
|
- # df_merged.drop(columns=['time'], inplace=True)
|
|
|
|
- # print("OBU data merged.")
|
|
|
|
- # except Exception as e:
|
|
|
|
- # print(f"Warning: Could not merge OBU data from {obu_path}: {e}")
|
|
|
|
- # else:
|
|
|
|
- # print("OBU data not found or empty, skipping merge.")
|
|
|
|
-
|
|
|
|
- # # 在所有合并完成后,清理重复列
|
|
|
|
- # df_merged = clean_duplicate_columns(df_merged)
|
|
|
|
-
|
|
|
|
- # return df_merged
|
|
|
|
def _merge_optional_data(self, df_object: pd.DataFrame) -> pd.DataFrame:
|
|
def _merge_optional_data(self, df_object: pd.DataFrame) -> pd.DataFrame:
|
|
"""加载和合并可选数据"""
|
|
"""加载和合并可选数据"""
|
|
df_merged = df_object.copy()
|
|
df_merged = df_object.copy()
|