|
@@ -0,0 +1,202 @@
|
|
|
|
+import pandas as pd
|
|
|
|
+from PIL import Image
|
|
|
|
+import csv
|
|
|
|
+from sklearn.cluster import KMeans
|
|
|
|
+import argparse
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+# def save_pgm_to_csv(pgm_file_path, csv_file_path):
|
|
|
|
+# # 读取 PGM 图片
|
|
|
|
+# with Image.open(pgm_file_path) as img:
|
|
|
|
+# # 获取图片尺寸
|
|
|
|
+# width, height = img.size
|
|
|
|
+#
|
|
|
|
+# # 创建 CSV 文件并写入像素值
|
|
|
|
+# with open(csv_file_path, 'w', newline='') as csvfile:
|
|
|
|
+# csvwriter = csv.writer(csvfile)
|
|
|
|
+# for y in range(height):
|
|
|
|
+# row = []
|
|
|
|
+# for x in range(width):
|
|
|
|
+# pixel = img.getpixel((x, y))[0]
|
|
|
|
+# row.append(pixel)
|
|
|
|
+# csvwriter.writerow(row)
|
|
|
|
+def pgm_to_df(pgm_file_path):
|
|
|
|
+ # 读取PGM图片
|
|
|
|
+ with Image.open(pgm_file_path) as img:
|
|
|
|
+ # 获取图片尺寸
|
|
|
|
+ width, height = img.size
|
|
|
|
+
|
|
|
|
+ # 准备用于构建DataFrame的数据
|
|
|
|
+ data = []
|
|
|
|
+
|
|
|
|
+ for y in range(height):
|
|
|
|
+ row = []
|
|
|
|
+ for x in range(width):
|
|
|
|
+ pixel = img.getpixel((x, y))[0]
|
|
|
|
+ row.append(pixel)
|
|
|
|
+ data.append(row)
|
|
|
|
+
|
|
|
|
+ # 将数据转换为DataFrame
|
|
|
|
+ pgm_df = pd.DataFrame(data)
|
|
|
|
+
|
|
|
|
+ return pgm_df
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def filter_rows(group):
|
|
|
|
+ # 对x进行排序
|
|
|
|
+ group = group.sort_values(by='pos_x')
|
|
|
|
+ # 算出差值
|
|
|
|
+ diffs = group['pos_x'].diff().fillna(1) # 填充1,确保第一行不被移除
|
|
|
|
+ # 标记差值小于0.2的行为True,然后过滤这些行
|
|
|
|
+ group = group[diffs >= 0.2]
|
|
|
|
+ return group
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def calculate_position(row, column):
|
|
|
|
+ x = column * 0.05 + 0.025
|
|
|
|
+ y = row * 0.05 + 0.025
|
|
|
|
+ return x, y
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def get_centered_positions(dataframe, threshold):
|
|
|
|
+ condition = dataframe < threshold
|
|
|
|
+ centered_positions = [calculate_position(row, col) for row, col in zip(*condition.values.nonzero())]
|
|
|
|
+ return centered_positions
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def lies_within_range(x, y, centered_positions):
|
|
|
|
+ for cx, cy in centered_positions:
|
|
|
|
+ if (cx - 0.025) <= x <= (cx + 0.025) and (cy - 0.025) <= y <= (cy + 0.025):
|
|
|
|
+ return True
|
|
|
|
+ return False
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def filter_df_with_positions(input_df, centered_positions):
|
|
|
|
+ # 应用过滤条件,保留 lies_within_range 返回 False 的行
|
|
|
|
+ # 现在使用 'pos_x' 和 'pos_y' 列名来访问数据
|
|
|
|
+ return input_df[
|
|
|
|
+ ~input_df.apply(lambda row: lies_within_range(row['pos_x'], row['pos_y'], centered_positions), axis=1)]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def filter_csv_with_pandas(source_df, target_csv_filename, threshold):
|
|
|
|
+ # 读取源CSV文件,寻找小于阈值的位置
|
|
|
|
+ centered_positions = get_centered_positions(source_df, threshold)
|
|
|
|
+
|
|
|
|
+ # 读取目标CSV文件
|
|
|
|
+ target_df = pd.read_csv(target_csv_filename)
|
|
|
|
+
|
|
|
|
+ # 根据找到的位置过滤目标CSV文件的数据
|
|
|
|
+ filtered_df = filter_df_with_positions(target_df, centered_positions)
|
|
|
|
+
|
|
|
|
+ # 将过滤后的数据写入输出CSV文件
|
|
|
|
+ filtered_df.to_csv(target_csv_filename, index=False)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+# 使用 groupby 按 'Time' 分组,然后对每个组计算新的 pos_x, pos_y, length, width
|
|
|
|
+def calculate_dims(group):
|
|
|
|
+ # 计算长度和宽度
|
|
|
|
+ length = group['pos_x'].max() - group['pos_x'].min()
|
|
|
|
+ width = group['pos_y'].max() - group['pos_y'].min()
|
|
|
|
+
|
|
|
|
+ # 计算 pos_x 和 pos_y 的平均值
|
|
|
|
+ pos_x_mean = group['pos_x'].mean()
|
|
|
|
+ pos_y_mean = group['pos_y'].mean()
|
|
|
|
+
|
|
|
|
+ # 返回一个新的 DataFrame 行
|
|
|
|
+ return pd.DataFrame({
|
|
|
|
+ 'Time': [group['Time'].iloc[0]],
|
|
|
|
+ 'pos_x': [pos_x_mean],
|
|
|
|
+ 'pos_y': [pos_y_mean],
|
|
|
|
+ 'length': [length],
|
|
|
|
+ 'width': [width]
|
|
|
|
+ })
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def calculate_clusters(group, n_clusters=5):
|
|
|
|
+ # 取出x,y坐标值
|
|
|
|
+ coordinates = group[['pos_x', 'pos_y']].values
|
|
|
|
+
|
|
|
|
+ # 如果组内的数据点小于所需的聚类数,则将聚类数设置为数据点的数量
|
|
|
|
+ n_clusters = min(n_clusters, len(coordinates))
|
|
|
|
+
|
|
|
|
+ kmeans = KMeans(n_clusters=n_clusters)
|
|
|
|
+ kmeans.fit(coordinates)
|
|
|
|
+ labels = kmeans.labels_
|
|
|
|
+
|
|
|
|
+ # 用于存储所有矩形的数据
|
|
|
|
+ rectangles = []
|
|
|
|
+
|
|
|
|
+ for label in range(n_clusters):
|
|
|
|
+ # 获取属于相同分类的数据点
|
|
|
|
+ points = coordinates[labels == label]
|
|
|
|
+
|
|
|
|
+ # 计算长度和宽度
|
|
|
|
+ min_x, max_x = points[:, 0].min(), points[:, 0].max()
|
|
|
|
+ min_y, max_y = points[:, 1].min(), points[:, 1].max()
|
|
|
|
+ length, width = max_x - min_x + 0.05, max_y - min_y + 0.05
|
|
|
|
+
|
|
|
|
+ # 计算 pos_x 和 pos_y 的平均值
|
|
|
|
+ pos_x_mean = points[:, 0].mean()
|
|
|
|
+ pos_y_mean = points[:, 1].mean()
|
|
|
|
+
|
|
|
|
+ # 保存该矩形的数据
|
|
|
|
+ rectangles.append({
|
|
|
|
+ 'Time': group['Time'].iloc[0],
|
|
|
|
+ 'pos_x': pos_x_mean,
|
|
|
|
+ 'pos_y': pos_y_mean,
|
|
|
|
+ 'length': length,
|
|
|
|
+ 'width': width
|
|
|
|
+ })
|
|
|
|
+
|
|
|
|
+ return pd.DataFrame(rectangles)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def keep_largest_rectangle(group):
|
|
|
|
+ # 计算每个矩形的面积
|
|
|
|
+ group['area'] = group['length'] * group['width']
|
|
|
|
+
|
|
|
|
+ # 找到面积最大的矩形
|
|
|
|
+ largest_rectangle = group.loc[group['area'].idxmax()]
|
|
|
|
+
|
|
|
|
+ # 返回最大的矩形
|
|
|
|
+ return largest_rectangle
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+# 主程序
|
|
|
|
+if __name__ == '__main__':
|
|
|
|
+ parser = argparse.ArgumentParser(description='Process the CSV data.')
|
|
|
|
+ parser.add_argument('pgm_path', type=str, help='Path to the PGM file')
|
|
|
|
+ parser.add_argument('target_csv_filename', type=str, help='Path to the target CSV file')
|
|
|
|
+
|
|
|
|
+ args = parser.parse_args()
|
|
|
|
+ # pgm_path = "/home/hancheng/hc_project/pujin/shiyanshi_newpgm_20240416.pgm"
|
|
|
|
+ # source_csv_filename = '/home/hancheng/hc_project/pujin/pgmRead_demo.csv'
|
|
|
|
+ pgm_df = pgm_to_df(args.pgm_path)
|
|
|
|
+ # target_csv_filename = '/home/hancheng/Downloads/merged_obstacles.csv'
|
|
|
|
+ threshold_value = 89.25
|
|
|
|
+
|
|
|
|
+ # 读取CSV文件
|
|
|
|
+ posdata_obs = pd.read_csv(args.target_csv_filename)
|
|
|
|
+ posdata_obs['pos_x'] = posdata_obs['pos_x'] - 20.883
|
|
|
|
+ posdata_obs['pos_y'] = posdata_obs['pos_y'] - 17.8439
|
|
|
|
+ # 使用groupby方法对数据进行分组处理,并应用上述的过滤函数
|
|
|
|
+ posdata_obs = posdata_obs.groupby(['Time', 'pos_y'], group_keys=False).apply(filter_rows)
|
|
|
|
+ # 重置索引
|
|
|
|
+ posdata_obs.reset_index(drop=True, inplace=True)
|
|
|
|
+ # 将处理后的数据写回'pos_3.csv'
|
|
|
|
+ posdata_obs.to_csv(args.target_csv_filename, index=False)
|
|
|
|
+
|
|
|
|
+ # 过滤CSV文件并保存结果
|
|
|
|
+ filter_csv_with_pandas(pgm_df, args.target_csv_filename, threshold_value)
|
|
|
|
+
|
|
|
|
+ df = pd.read_csv(args.target_csv_filename)
|
|
|
|
+ # 应用函数并合并结果
|
|
|
|
+ cluster_df = df.groupby('Time', as_index=False).apply(calculate_clusters).reset_index(drop=True)
|
|
|
|
+
|
|
|
|
+ df_largest = cluster_df.groupby('Time').apply(keep_largest_rectangle)
|
|
|
|
+
|
|
|
|
+ # 重置索引
|
|
|
|
+ df_largest.reset_index(drop=True, inplace=True)
|
|
|
|
+
|
|
|
|
+ # 将处理后的 DataFrame 保存到新的 CSV 文件
|
|
|
|
+ df_largest.to_csv(args.target_csv_filename, index=False)
|