6. Gemini相机+yoloseg+foundationpose环境搭建及应用

张

张建站

2026/5/28 19:04:25

10分钟阅读

6. Gemini相机+yoloseg+foundationpose环境搭建及应用

内容主要是获取目标位姿步骤如下1.获取相机的数据然后通过相机内参将像素坐标转换为相机坐标2.yolov8的分割模型获取抓取物体的烟码3.把深度图的深度值和分割的彩图值结合构成3维数据4.foundationpose对3维数据找位姿。1.环境搭建项目用conda搭建环境。系统ubuntu22.04 ROS2 cuda12.4相机环境搭建参考https://blog.csdn.net/weixin_71719718/article/details/160549145?spm1011.2124.3001.6209。一、基础环境说明已安装ROS2 Humble、Miniconda显卡RTX 4060 Laptop系统 CUDA11.8nvcc核心原则所有 CUDA 相关依赖必须统一为 11.8 版本二、Conda 环境创建与基础配置# 1. 创建conda环境Python3.10 conda create -n ros2_tensorrt python3.10 -y conda activate ros2_tensorrt # 2. 基础pip升级 pip install --upgrade pip setuptools wheel ninja三、核心 CUDA/PyTorch 安装关键版本# 卸载冲突包 pip uninstall -y torch torchvision torchaudio triton # 安装指定版本CUDA12.4 匹配系统 python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 # 验证必须输出 12.4 python -c import torch; print(torch.version.cuda, torch.cuda.is_available())四、NumPy 降级修复兼容性问题pip install numpy1.24.4五、一键安装 YOLO 分割环境numpy1.24.0 opencv-python open3d pyyaml scikit-learn1.2.2 ultralytics8.2.100 numpy1.24.0分割代码如下import rclpy from rclpy.node import Node from sensor_msgs.msg import Image, CameraInfo import cv2 import numpy as np from ultralytics import YOLO def imgmsg_to_cv2(img_msg): # 修复这里np.16 → np.uint16 if img_msg.encoding 16UC1: return np.frombuffer(img_msg.data, dtypenp.uint16).reshape(img_msg.height, img_msg.width) img np.frombuffer(img_msg.data, dtypenp.uint8).reshape(img_msg.height, img_msg.width, -1) return img[..., ::-1] def save_point_cloud_to_ply(points_3d, colors, filenamephone_only_cloud.ply): with open(filename, w) as f: f.write(ply\n) f.write(format ascii 1.0\n) f.write(felement vertex {len(points_3d)}\n) f.write(property float x\n) f.write(property float y\n) f.write(property float z\n) f.write(property uchar red\n) f.write(property uchar green\n) f.write(property uchar blue\n) f.write(end_header\n) for (x, y, z), (r, g, b) in zip(points_3d, colors): f.write(f{x:.6f} {y:.6f} {z:.6f} {int(r)} {int(g)} {int(b)}\n) # 颜色分割1中间均值法最强去桌面 def color_segment_center_mean(points_3d, point_colors, crop_ratio0.25, thresh18): if len(points_3d) 0: return np.array([]), np.array([]) pts np.array(points_3d) cols np.array(point_colors, dtypenp.float32) x_min, x_max pts[:,0].min(), pts[:,0].max() y_min, y_max pts[:,1].min(), pts[:,1].max() dx (x_max - x_min) * crop_ratio dy (y_max - y_min) * crop_ratio center_mask (pts[:,0] x_mindx) (pts[:,0] x_max-dx) \ (pts[:,1] y_mindy) (pts[:,1] y_max-dy) if np.sum(center_mask) 10: return pts, cols center_rgb np.mean(cols[center_mask], axis0) diff np.linalg.norm(cols - center_rgb, axis1) keep diff thresh return pts[keep], cols[keep].astype(np.uint8) # 颜色分割2边缘差异法 def color_segment_edge_diff(points_3d, point_colors, edge_ratio0.2, thresh12): if len(points_3d) 0: return np.array([]), np.array([]) pts np.array(points_3d) cols np.array(point_colors, dtypenp.float32) x_min, x_max pts[:,0].min(), pts[:,0].max() y_min, y_max pts[:,1].min(), pts[:,1].max() dx (x_max - x_min) * edge_ratio dy (y_max - y_min) * edge_ratio edge_mask (pts[:,0] x_mindx) | (pts[:,0] x_max-dx) | \ (pts[:,1] y_mindy) | (pts[:,1] y_max-dy) if np.sum(edge_mask) 10: return pts, cols mean_rgb np.mean(cols, axis0) diff np.linalg.norm(cols - mean_rgb, axis1) keep diff thresh return pts[keep], cols[keep].astype(np.uint8) class CameraAlignmentNode(Node): def __init__(self): super().__init__(camera_phone_segment) self.color_sub self.create_subscription(Image, /camera/color/image_raw, self.color_callback, 10) self.depth_sub self.create_subscription(Image, /camera/depth/image_raw, self.depth_callback, 10) self.info_sub self.create_subscription(CameraInfo, /camera/color/camera_info, self.info_cb, 10) self.color None self.depth None self.K None self.model YOLO(best_v3.pt) def info_cb(self, msg): self.K np.array(msg.k).reshape(3,3) self.destroy_subscription(self.info_sub) def color_callback(self, msg): self.color imgmsg_to_cv2(msg) def depth_callback(self, msg): self.depth imgmsg_to_cv2(msg) if self.color is not None and self.K is not None: self.process() def process(self): h, w self.color.shape[:2] depth_aligned cv2.resize(self.depth, (w, h), interpolationcv2.INTER_NEAREST) draw_img self.color.copy() # ---------------------- 1. YOLO 分割 ---------------------- res self.model(self.color, conf0.6, verboseFalse) mask np.zeros((h,w), dtypenp.uint8) if res[0].masks is not None: for m in res[0].masks.data: m cv2.resize(m.cpu().numpy(), (w,h)) mask np.clip(mask m*255, 0,255).astype(np.uint8) # ---------------------- 2. 立刻补洞实心无洞 ---------------------- kernel_fill np.ones((7,7), np.uint8) mask cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_fill, iterations2) # ---------------------- 3. 轻微膨胀不包含桌面 ---------------------- kernel np.ones((2,2), np.uint8) mask cv2.dilate(mask, kernel, iterations1) # ---------------------- 4. 只保留最大连通域只留手机 ---------------------- num_labels, labels_im cv2.connectedComponents(mask) if num_labels 1: max_component 1 max_count 0 for label in range(1, num_labels): count np.sum(labels_im label) if count max_count: max_count count max_component label mask (labels_im max_component).astype(np.uint8) * 255 # ---------------------- 5. 深度过滤彻底剔除桌面 ---------------------- final_mask mask.copy() depth_mask (depth_aligned 100) (depth_aligned 3000) final_mask[~depth_mask] 0 # ---------------------- 可视化 ---------------------- color_mask np.zeros_like(draw_img) color_mask[final_mask0] [255,0,255] draw_img cv2.addWeighted(draw_img, 1, color_mask, 0.4, 0) cv2.imshow(result, draw_img) cv2.imshow(mask, final_mask) # ---------------------- 生成点云 ---------------------- yy, xx np.where(final_mask 0) fx, fy, cx, cy self.K[0,0], self.K[1,1], self.K[0,2], self.K[1,2] points_3d [] point_colors [] for v, u in zip(yy, xx): z depth_aligned[v, u] x (u - cx) * z / fx y (v - cy) * z / fy r, g, b self.color[v, u] points_3d.append([x,y,z]) point_colors.append([r,g,b]) if len(points_3d) 50: cv2.waitKey(1) return # ---------------------- 二次颜色分割纯净手机 ---------------------- pts, cols color_segment_center_mean(points_3d, point_colors, thresh18) # pts, cols color_segment_edge_diff(points_3d, point_colors, thresh12) # ---------------------- 保存 ---------------------- if len(pts) 50: w np.max(pts[:,0]) - np.min(pts[:,0]) h np.max(pts[:,1]) - np.min(pts[:,1]) d np.max(pts[:,2]) - np.min(pts[:,2]) print(f手机尺寸{w:.3f}m {h:.3f}m {d:.3f}m) save_point_cloud_to_ply(pts, cols) cv2.waitKey(1) def main(argsNone): rclpy.init(argsargs) node CameraAlignmentNode() try: rclpy.spin(node) except KeyboardInterrupt: pass finally: cv2.destroyAllWindows() node.destroy_node() rclpy.shutdown() if __name__ __main__: main()2. foundationpose环境搭建一、克隆官方仓库用国内镜像克隆git clone https://gitcode.com/gh_mirrors/fo/FoundationPose.git mv FoundationPose ~/ros2_ws/二、安装FoundationPose 的依赖cd ~/ros2_ws/FoundationPose pip install -r requirements.txt三、安装PyTorch3Dcd ~/ros2_ws git clone https://github.com/facebookresearch/pytorch3d.git cd pytorch3d # 环境变量强制CUDA11.8 4线程编译 export CUDA_HOME/usr/local/cuda-11.8 export FORCE_CUDA1 export MAX_JOBS4 # 编译安装 pip install . --no-build-isolation # 验证 python -c import pytorch3d; print(✅ pytorch3d 安装成功)四、安装nvdiffrast打开这个链接浏览器会自动开始下载 nvdiffrast 压缩包 https://github.com/NVlabs/nvdiffrast/archive/refs/heads/master.zip回到终端# 1. 把下载好的 zip 包挪到当前目录你下载的文件一般在 ~/下载/ mv ~/下载/nvdiffrast-master.zip . # 2. 解压 unzip nvdiffrast-master.zip # 3. 把文件夹重命名成项目需要的名字 nvdiffrast mv nvdiffrast-master nvdiffrast切到nvdiffrast-main文件夹里cd ~/ros2_ws/FoundationPose/nvdiffrast-main ls修改setup.py文件成下面# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. # # NVIDIA CORPORATION and its licensors retain all intellectual property # and proprietary rights in and to this software, related documentation # and any modifications thereto. Any use, reproduction, disclosure or # distribution of this software and related documentation without an express # license agreement from NVIDIA CORPORATION is strictly prohibited. import setuptools import os import torch from torch.utils.cpp_extension import BuildExtension, CUDAExtension # Print an error message if theres no PyTorch installed. #try: #from torch.utils.cpp_extension import BuildExtension, CUDAExtension #except ImportError: # This happens if the user runs pip install with default build isolation # OR if they simply dont have torch installed at all. #print(\n\n * * 70) #print(ERROR! Cannot compile nvdiffrast CUDA extension. Please ensure that:\n) #print(1. You have PyTorch installed) #print(2. You run pip install with --no-build-isolation flag) #print(* * 70 \n\n) #exit(1) setuptools.setup( ext_modules[ CUDAExtension( _nvdiffrast_c, sources[ csrc/common/antialias.cu, csrc/common/common.cpp, csrc/common/cudaraster/impl/Buffer.cpp, csrc/common/cudaraster/impl/CudaRaster.cpp, csrc/common/cudaraster/impl/RasterImpl.cpp, csrc/common/cudaraster/impl/RasterImpl_kernel.cu, csrc/common/interpolate.cu, csrc/common/rasterize.cu, csrc/common/texture.cpp, csrc/common/texture_kernel.cu, csrc/torch/torch_antialias.cpp, csrc/torch/torch_bindings.cpp, csrc/torch/torch_interpolate.cpp, csrc/torch/torch_rasterize.cpp, csrc/torch/torch_texture.cpp, ], extra_compile_args{ cxx: [-DNVDR_TORCH] # Disable warnings in torch headers. ([/wd4067, /wd4624, /wd4996] if os.name nt else []), nvcc: [-DNVDR_TORCH, -lineinfo], }, ) ], cmdclass{build_ext: BuildExtension}, )降级setuptoolsconda activate ros2_tensorrt # 降级到 81.x最后一个带 pkg_resources 的版本 pip install setuptools82 --force-reinstall 验证 pkg_resources python -c import pkg_resources; print(✅ pkg_resources OK)回去编译nvdiffrastcd ~/ros2_ws/FoundationPose/nvdiffrast-main rm -rf build dist *.egg-info python setup.py build_ext --inplace pip install . --no-build-isolation #最后测试 python -c import nvdiffrast; print(✅ nvdiffrast 安装成功)最后在 ~/ros2_ws/FoundationPose 下验证python -c from estimater import FoundationPose; print(✅ FoundationPose 导入成功可以跑6D位姿了)会看到✅ FoundationPose 导入成功可以跑6D位姿了3.编译编译如下cd ~/ros2_ws colcon build --packages-select camera_info_get source install/setup.bash ros2 run camera_info_get get_camera_info或者直接运行conda activate ros2_tensorrt cd ~/ros2_ws source install/setup.bash python -m camera_info_get.get_camera_info

导师认可的AI论文写作工具星级排名（2026 权威发布）

基于学术适配性、写作效率、功能完整性及用户反馈，以下是2026年主流AI论文写作工具的权威测评排名，按综合推荐指数由高至低排列，并详细标注各工具的核心优势与适用场景。🏆 第一梯队：全流程学术解决方案（★…...

2026/5/28 19:03:27 阅读更多 →

Windows 10系统优化终极指南：如何用自动化工具彻底清理臃肿软件

Windows 10系统优化终极指南：如何用自动化工具彻底清理臃肿软件【免费下载链接】Windows10Debloater Script to remove Windows 10 bloatware. 项目地址: https://gitcode.com/gh_mirrors/wi/Windows10Debloater Windows10Debloater是一款专为Windows 10设计…...

2026/5/28 18:59:01 阅读更多 →