本文共 4724 字,大约阅读时间需要 15 分钟。
本文将介绍基于YOLOv9+SAM实现动态目标检测和分割的实践方法,重点提供详细步骤和代码示例。通过结合YOLOv9的高效检测能力和SAM的零样本分割优势,我们将在RF100 Construction-Safety-2数据集上构建自定义目标检测模型,展示其在自动驾驶、医学成像等多个领域的实际应用价值。
YOLOv9(You Only Look Once)是目标检测领域的一款高性能模型,基于可编程梯度信息(PGI)和通用高效层聚合网络(GELAN)设计,显著提升了检测精度和运行效率。YOLOv9在MS COCO数据集上的出色表现证明了其在实时目标检测中的优越性。
YOLOv9的主要特点包括:
SAM(Segment-Anything Model)是一种革命性的图像分割模型,基于迄今为止最大的Segment Anything 1-Billion(SA-1B)数据集,通过简单的提示驱动实现零样本分割。SAM的核心优势在于:
本文使用Roboflow提供的RF100施工数据集,特别是Construction-Safety-2子集,作为模型的训练和验证数据。RF100数据集旨在建立开源目标检测的标准化基准,强调数据集的通用性和可访问性,为AI研究者和应用开发人员提供了丰富的资源。
# 检测结果提取import cv2# 定义图像路径image_path = '/content/drive/MyDrive/data/image9.jpeg'# 读取图像获取尺寸image = cv2.imread(image_path)image_height, image_width, _ = image.shapedetections_path = '/content/yolov9/runs/detect/exp/labels/image9.txt'bboxes = []class_ids = []conf_scores = []# 从文件中读取检测结果with open(detections_path, 'r') as file: for line in file: components = line.split() class_id = int(components[0]) confidence = float(components[5]) cx, cy, w, h = [float(x) for x in components[1:5]] # 转换为图像坐标系 cx *= image_width cy *= image_height w *= image_width h *= image_height # 转换为边界框坐标 xmin = cx - w / 2 ymin = cy - h / 2 xmax = cx + w / 2 ymax = cy + h / 2 bboxes.append((xmin, ymin, xmax, ymax)) class_ids.append(class_id) conf_scores.append(confidence)# 初始化SAM模型from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictorsam_checkpoint = "/content/yolov9/sam_vit_h_4b8939.pth"model_type = "vit_h"sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)predictor = SamPredictor(sam)# 加载图像进行分割import cv2image = cv2.cvtColor(cv2.imread('/content/drive/MyDrive/data/image9.jpeg'), cv2.COLOR_BGR2RGB)predictor.set_image(image)# 可视化分割结果import matplotlib.patches as patchesfrom matplotlib import pyplot as pltimport numpy as npimport yamlwith open('/content/yolov9/data/coco.yaml', 'r') as file: coco_data = yaml.safe_load(file) class_names = coco_data['names']# 定义颜色映射color_map = {}for class_id in class_ids: color_map[class_id] = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)# 定义辅助函数def show_mask(mask, ax, color): h, w = mask.shape[-2:] mask_image = mask.reshape(h, w, 1) * np.array(color).reshape(1, 1, -1) ax.imshow(mask_image)def show_box(box, label, conf_score, color, ax): x0, y0 = box[0], box[1] w, h = box[2] - box[0], box[3] - box[1] rect = plt.Rectangle((x0, y0), w, h, edgecolor=color, facecolor='none', lw=2) ax.add_patch(rect) label_offset = 10 label_text = f'{label} {conf_score:.2f}' ax.text(x0, y0 - label_offset, label_text, color='black', fontsize=10, va='top', ha='left', bbox=dict(facecolor=color, alpha=0.7, edgecolor='none', boxstyle='square,pad=0.4'))plt.figure(figsize=(10, 10))ax = plt.gca()plt.imshow(image)# 展示分割结果for class_id, bbox in zip(class_ids, bboxes): class_name = class_names[class_id] color = color_map[class_id] input_box = np.array(bbox) masks, _, _ = predictor.predict( point_coords=None, point_labels=None, box=input_box, multimask_output=False ) show_mask(masks[0], ax, color=color) show_box(bbox, class_name, conf, color, ax)plt.axis('off')plt.show()# 生成最终图像aggregate_mask = np.zeros(image.shape[:2], dtype=np.uint8)for bbox in bboxes: input_box = np.array(bbox).reshape(1, 4) masks, _, _ = predictor.predict( point_coords=None, point_labels=None, box=input_box, multimask_output=False ) aggregate_mask = np.where(masks[0] > 0.5, 1, aggregate_mask)binary_mask = np.where(aggregate_mask == 1, 1, 0)white_background = np.ones_like(image) * 255new_image = white_background * (1 - binary_mask[..., np.newaxis]) + image * binary_mask[..., np.newaxis]plt.figure(figsize=(10, 10))plt.imshow(new_image.astype(np.uint8))plt.axis('off')plt.show() 通过以上步骤,我们成功实现了基于YOLOv9+SAM的动态目标检测与分割系统。该系统不仅在检测精度和分割粒度上表现优异,还具有强大的通用性和扩展性,适用于多个实际场景。
转载地址:http://resfk.baihongyu.com/