ed_sensor_integration
src/kinect/segmenter.cpp
Go to the documentation of this file.
1 #include "ed/kinect/segmenter.h"
2 
4 #include <geolib/Shape.h>
5 
6 #include <rgbd/image.h>
7 #include <rgbd/view.h>
8 
9 #include <ed/world_model.h>
10 #include <ed/entity.h>
11 
12 #include <ros/console.h>
13 
14 // Clustering
15 #include <queue>
16 #include <ed/convex_hull_calc.h>
17 #include <sys/resource.h>
18 
19 #include <opencv2/ml.hpp>
20 #include <pcl/point_types.h>
21 #include <pcl/point_cloud.h>
22 #include <pcl/segmentation/extract_clusters.h>
23 
24 #include <Eigen/Dense>
25 
27 #include <bmm/bayesian_mixture_model.hpp>
28 
29 // ----------------------------------------------------------------------------------------------------
30 
32  : config_(config)
33 {
34 }
35 
36 // ----------------------------------------------------------------------------------------------------
37 
39 {
40 }
41 
42 // ----------------------------------------------------------------------------------------------------
43 
44 namespace
45 {
46 // Internal constants (tuning thresholds)
47 constexpr std::size_t MIN_FILTERED_POINTS = 10;
48 constexpr double MIN_RETENTION_RATIO = 0.10; // 10%
49 constexpr std::size_t MIN_CLUSTER_POINTS = 100;
50 
51 class DepthRenderer : public geo::RenderResult
52 {
53 
54 public:
55 
56  DepthRenderer(cv::Mat& z_buffer_)
57  : geo::RenderResult(z_buffer_.cols, z_buffer_.rows), z_buffer(z_buffer_)
58  {
59  }
60 
61  void renderPixel(int x, int y, float depth, int /*i_triangle*/)
62  {
63  float& old_depth = z_buffer.at<float>(y, x);
64  if (old_depth == 0 || depth < old_depth)
65  {
66  old_depth = depth;
67  }
68  }
69 
70  cv::Mat& z_buffer;
71 };
72 
73 }
74 
75 // ----------------------------------------------------------------------------------------------------
76 
77 void Segmenter::removeBackground(cv::Mat& depth_image, const ed::WorldModel& world, const geo::DepthCamera& cam,
78  const geo::Pose3D& sensor_pose, double background_padding)
79 {
80  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
81  // Render the world model as seen by the depth sensor
82 
83  cv::Mat depth_model(depth_image.rows, depth_image.cols, CV_32FC1, 0.0);
84 
85  DepthRenderer res(depth_model);
86  for(ed::WorldModel::const_iterator it = world.begin(); it != world.end(); ++it)
87  {
88  const ed::EntityConstPtr& e = *it;
89  if (!e->visual() || !e->has_pose())
90  continue;
91 
93  opt.setMesh(e->visual()->getMesh(), sensor_pose.inverse() * e->pose());
94  cam.render(opt, res);
95  }
96 
97  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
98  // Filter all points that can be associated with the rendered depth image
99 
100  unsigned int size = depth_image.rows * depth_image.cols;
101  for(unsigned int i = 0; i < size; ++i)
102  {
103  float& ds = depth_image.at<float>(i);
104  float dm = depth_model.at<float>(i);
105  if (dm > 0 && ds > 0 && ds > dm - background_padding)
106  ds = 0;
107  }
108 }
109 
110 // ----------------------------------------------------------------------------------------------------
111 
113 {
114 
115 public:
116 
117  MinMaxRenderer(int width, int height) : geo::RenderResult(width, height)
118  {
119  min_buffer = cv::Mat(height, width, CV_32FC1, 0.0);
120  max_buffer = cv::Mat(height, width, CV_32FC1, 0.0);
121  }
122 
123  void renderPixel(int x, int y, float depth, int /*i_triangle*/)
124  {
125  // TODO: now the renderer can only deal with convex meshes, which means
126  // that at each pixel there can only be one minimum and one maximum pixel
127  // There is an easy solution for concave meshes: determine which side
128  // the triangle points (away or to the camera) and test the pixel in the depth
129  // image to be on the correct side. ... etc ...
130 
131  float& d_min = min_buffer.at<float>(y, x);
132  float& d_max = max_buffer.at<float>(y, x);
133 
134  if (d_min == 0 || depth < d_min)
135  d_min = depth;
136 
137  d_max = std::max(d_max, depth);
138  }
139 
140  cv::Mat min_buffer;
141  cv::Mat max_buffer;
142 
143 };
144 
145 // ----------------------------------------------------------------------------------------------------
146 
148  const geo::Pose3D& shape_pose, cv::Mat& filtered_depth_image) const
149 {
150  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
151  // Render shape
152 
153  const cv::Mat& depth_image = image.getDepthImage();
154 
155  rgbd::View view(image, depth_image.cols);
156  const geo::DepthCamera& cam_model = view.getRasterizer();
157 
158  MinMaxRenderer res(depth_image.cols, depth_image.rows);
159 
160  geo::RenderOptions opt;
161  opt.setBackFaceCulling(false);
162  opt.setMesh(shape.getMesh(), shape_pose);
163 
164  cam_model.render(opt, res);
165 
166  filtered_depth_image = cv::Mat(depth_image.rows, depth_image.cols, CV_32FC1, 0.0);
167 
168  for(int i = 0; i < depth_image.cols * depth_image.rows; ++i)
169  {
170  float d = depth_image.at<float>(i);
171  if (d <= 0)
172  continue;
173 
174  float d_min = res.min_buffer.at<float>(i);
175  float d_max = res.max_buffer.at<float>(i);
176 
177  if (d_min > 0 && d_max > 0 && d >= d_min && d <= d_max)
178  filtered_depth_image.at<float>(i) = d;
179  }
180 }
181 
182 // ----------------------------------------------------------------------------------------------------
183 
184 cv::Mat Segmenter::preprocessRGBForSegmentation(const cv::Mat& rgb_image,
185  const cv::Mat& filtered_depth_image) const
186 {
187  cv::Mat masked_rgb = cv::Mat::zeros(rgb_image.size(), rgb_image.type());
188  for (int y = 0; y < rgb_image.rows; ++y) {
189  const float* depth_row = filtered_depth_image.ptr<float>(y); // fast
190  cv::Vec3b* out_row = masked_rgb.ptr<cv::Vec3b>(y);
191  const cv::Vec3b* rgb_row = rgb_image.ptr<cv::Vec3b>(y);
192  for (int x = 0; x < rgb_image.cols; ++x) {
193  if (depth_row[x] > 0.f) {
194  out_row[x] = rgb_row[x];
195  }
196  }
197  }
198  return masked_rgb;
199 }
200 // ----------------------------------------------------------------------------------------------------
201 
202 std::vector<cv::Mat> Segmenter::cluster(const cv::Mat& depth_image, const geo::DepthCamera& cam_model,
203  const geo::Pose3D& sensor_pose, std::vector<EntityUpdate>& clusters, const cv::Mat& rgb_image, bool logging)
204 {
205  int width = depth_image.cols;
206  int height = depth_image.rows;
207  ROS_DEBUG("Cluster with depth image of size %i, %i", width, height);
208 
210  ROS_DEBUG("Creating clusters");
211 
212  // Pre-allocate temporary storage (one per mask, avoid push_back races)
213  std::vector<EntityUpdate> temp_clusters(masks.size());
214  std::vector<bool> valid_cluster(masks.size(), false);
215 
216  // Parallel loop - each iteration is independent
217  #pragma omp parallel for schedule(dynamic)
218  for (size_t i = 0; i < masks.size(); ++i)
219  {
220  const cv::Mat& mask = masks[i];
221  EntityUpdate cluster; // local to this thread
222 
223  // Extract points from mask
224  for(int y = 0; y < mask.rows; ++y) {
225  for(int x = 0; x < mask.cols; ++x) {
226  if (mask.at<unsigned char>(y, x) > 0) {
227  unsigned int pixel_idx = y * width + x;
228  float d = depth_image.at<float>(pixel_idx);
229 
230  if (d > 0 && std::isfinite(d)) {
231  cluster.pixel_indices.push_back(pixel_idx);
232  cluster.points.push_back(cam_model.project2Dto3D(x, y) * d);
233  }
234  }
235  }
236  }
237 
238  // Skip small clusters (< 100 points)
239  if (cluster.pixel_indices.size() < MIN_CLUSTER_POINTS) {
240  continue; // valid_cluster[i] remains false
241  }
242 
243  // BMM point cloud denoising
244  GMMParams params;
245  config_.value("psi0", params.psi0);
246  config_.value("nu0", params.nu0);
247  config_.value("alpha", params.alpha);
248  config_.value("kappa0", params.kappa0);
249 
250 
251  MAPGMM gmm(2, cluster.points, params); // 2 components: object + outliers
252  gmm.fit(cluster.points, sensor_pose);
253  // Get component assignments and inlier component
254  std::vector<int> labels = gmm.get_labels();
255  int inlier_component = gmm.get_inlier_component();
256 
257  // Filter points
258  std::vector<geo::Vec3> filtered_points;
259  std::vector<geo::Vec3> outlier_points; // Only populate if needed
260 
261  for (size_t j = 0; j < labels.size(); j++)
262  {
263  if (labels[j] == inlier_component)
264  {
265  filtered_points.push_back(cluster.points[j]);
266  }
267  else if (logging)
268  {
269  outlier_points.push_back(cluster.points[j]);
270  }
271  }
272 
273  // Safety check: only use filtered points if we retained enough
274  if (filtered_points.size() > MIN_FILTERED_POINTS &&
275  filtered_points.size() > MIN_RETENTION_RATIO * cluster.points.size()) {
276  // Use filtered points
277  cluster.points = filtered_points;
278  if (logging)
279  {
280  cluster.outlier_points = outlier_points;
281  // Transform outlier points to map frame
282  // for (size_t j = 0; j < cluster.outlier_points.size(); ++j) {
283  // cluster.outlier_points[j] = sensor_pose * cluster.outlier_points[j];
284  // }
285  }
286  }
287  else
288  {
289  // Safety check failed - keep original unfiltered points
290  // Don't populate outlier_points since we're not using the GMM result
291  ROS_DEBUG("GMM filtering rejected: retained %zu/%zu points",
292  filtered_points.size(), cluster.points.size());
293  }
294 
295  // Calculate convex hull
296  float z_min = 1e9;
297  float z_max = -1e9;
298  std::vector<geo::Vec2f> points_2d(cluster.points.size());
299 
300  for(unsigned int j = 0; j < cluster.points.size(); ++j)
301  {
302  const geo::Vec3& p = cluster.points[j];
303 
304  // Transform sensor point to map frame
305  geo::Vector3 p_map = sensor_pose * p;
306  points_2d[j] = geo::Vec2f(p_map.x, p_map.y);
307  z_min = std::min<float>(z_min, p_map.z);
308  z_max = std::max<float>(z_max, p_map.z);
309  }
310 
311  ed::convex_hull::create(points_2d, z_min, z_max, cluster.chull, cluster.pose_map);
312  cluster.chull.complete = false;
313 
314  // Store in thread-safe pre-allocated array
315  temp_clusters[i] = cluster;
316  valid_cluster[i] = true;
317  }
318 
319  // Sequential section: collect valid clusters
320  clusters.clear();
321  clusters.reserve(masks.size());
322  for (size_t i = 0; i < temp_clusters.size(); ++i) {
323  if (valid_cluster[i]) {
324  clusters.push_back(std::move(temp_clusters[i]));
325  }
326  }
327 
328  return masks;
329 }
geo::Vector3::y
const real & y() const
MinMaxRenderer::MinMaxRenderer
MinMaxRenderer(int width, int height)
Definition: src/kinect/segmenter.cpp:117
EntityUpdate
collection structure for laser entities
Definition: kinect/entity_update.h:9
Segmenter::preprocessRGBForSegmentation
cv::Mat preprocessRGBForSegmentation(const cv::Mat &rgb_image, const cv::Mat &filtered_depth_image) const
Preprocess RGB image for segmentation. This function masks the RGB image with the filtered depth imag...
Definition: src/kinect/segmenter.cpp:184
DepthCamera.h
geo::RenderOptions::setMesh
void setMesh(const geo::Mesh &mesh, const geo::Pose3D &pose=geo::Pose3D::identity())
geo
Segmenter::cluster
std::vector< cv::Mat > cluster(const cv::Mat &depth_image, const geo::DepthCamera &cam_model, const geo::Pose3D &sensor_pose, std::vector< EntityUpdate > &clusters, const cv::Mat &rgb_image, bool logging=false)
Cluster the depth image into segments. Applies new algorithm which is the YOLO - SAM - BMM depth segm...
Definition: src/kinect/segmenter.cpp:202
sam_seg_module.h
ed::convex_hull::create
void create(const std::vector< geo::Vec2f > &points, float z_min, float z_max, ConvexHull &chull, geo::Pose3D &pose)
std::vector::reserve
T reserve(T... args)
Segmenter::config_
tue::Configuration config_
Definition: segmenter.h:66
Segmenter::removeBackground
void removeBackground(cv::Mat &depth_image, const ed::WorldModel &world, const geo::DepthCamera &cam, const geo::Pose3D &sensor_pose, double background_padding)
Definition: src/kinect/segmenter.cpp:77
std::vector
std::vector::size
T size(T... args)
geo::RenderOptions::setBackFaceCulling
void setBackFaceCulling(bool b)
rgb_image
cv::Mat rgb_image
cam
geo::DepthCamera cam
geo::Vec3T
Shape.h
geo::DepthCamera::render
void render(const RenderOptions &opt, RenderResult &res) const
MinMaxRenderer::max_buffer
cv::Mat max_buffer
Definition: src/kinect/segmenter.cpp:141
geo::Transform3T
geo::Shape::getMesh
virtual const Mesh & getMesh() const
queue
geo::Vec2f
Vec2T< float > Vec2f
ed::WorldModel::begin
const_iterator begin() const
ed::WorldModel::const_iterator
EntityIterator const_iterator
std::vector::clear
T clear(T... args)
ed::EntityConstPtr
shared_ptr< const Entity > EntityConstPtr
ed::WorldModel::end
const_iterator end() const
std::vector::push_back
T push_back(T... args)
image
cv::Mat image
tue::config::ReaderWriter
segmenter.h
geo::Transform3T::inverse
Transform3T inverse() const
rgbd::Image
rgbd::View::getRasterizer
const geo::DepthCamera & getRasterizer() const
std::isfinite
T isfinite(T... args)
image.h
geo::RenderResult
geo::RenderOptions
Segmenter::calculatePointsWithin
void calculatePointsWithin(const rgbd::Image &image, const geo::Shape &shape, const geo::Pose3D &shape_pose, cv::Mat &filtered_depth_image) const
Definition: src/kinect/segmenter.cpp:147
geo::Vector3
geo::DepthCamera::project2Dto3D
geo::Vec3T< T > project2Dto3D(int x, int y) const
ed::WorldModel
geo::Vector3::z
const real & z() const
Segmenter::~Segmenter
~Segmenter()
Definition: src/kinect/segmenter.cpp:38
Segmenter::Segmenter
Segmenter(tue::Configuration config)
Definition: src/kinect/segmenter.cpp:31
view.h
tue::config::ReaderWriter::value
bool value(const std::string &name, T &value, RequiredOrOptional opt=REQUIRED)
world_model.h
MinMaxRenderer
Definition: src/kinect/segmenter.cpp:112
SegmentationPipeline
std::vector< cv::Mat > SegmentationPipeline(const cv::Mat &img, tue::Configuration &config)
Segmentation pipeline that processes the input image and generates segmentation masks.
Definition: sam_seg_module.cpp:14
MinMaxRenderer::min_buffer
cv::Mat min_buffer
Definition: src/kinect/segmenter.cpp:140
geo::Vector3::x
const real & x() const
geo::DepthCamera
std::size_t
geo::RenderResult::RenderResult
RenderResult(int width, int height)
std::max
T max(T... args)
entity.h
MinMaxRenderer::renderPixel
void renderPixel(int x, int y, float depth, int)
Definition: src/kinect/segmenter.cpp:123
convex_hull_calc.h
depth_image
cv::Mat depth_image
rgbd::View
geo::Shape
config
tue::config::ReaderWriter config
geo::RenderResult::renderPixel
virtual void renderPixel(int x, int y, float depth, int i_triangle)=0