Skip to content

Commit 340662e

Browse files
nikhilaravifacebook-github-bot
authored andcommitted
CUDA/C++ Rasterizer updates to handle clipped faces
Summary: - Updated the C++/CUDA mesh rasterization kernels to handle the clipped faces. In particular this required careful handling of the distance calculation for faces which are cut into a quadrilateral by the image plane and then split into two sub triangles i.e. both sub triangles can't be part of the top K faces. - Updated `rasterize_meshes.py` to use the utils functions to clip the meshes and convert the fragments back to in terms of the unclipped mesh - Added end to end tests Reviewed By: jcjohnson Differential Revision: D26169685 fbshipit-source-id: d64cd0d656109b965f44a35c301b7c81f451cfa0
1 parent 838b73d commit 340662e

12 files changed

+733
-46
lines changed

pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu

Lines changed: 87 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ namespace {
1717
// A structure for holding details about a pixel.
1818
struct Pixel {
1919
float z;
20-
int64_t idx;
21-
float dist;
20+
int64_t idx; // idx of face
21+
float dist; // abs distance of pixel to face
2222
float3 bary;
2323
};
2424

@@ -111,6 +111,7 @@ __device__ bool CheckPointOutsideBoundingBox(
111111
template <typename FaceQ>
112112
__device__ void CheckPixelInsideFace(
113113
const float* face_verts, // (F, 3, 3)
114+
const int64_t* clipped_faces_neighbor_idx, // (F,)
114115
const int face_idx,
115116
int& q_size,
116117
float& q_max_z,
@@ -173,32 +174,72 @@ __device__ void CheckPixelInsideFace(
173174
// face.
174175
const bool inside = p_bary.x > 0.0f && p_bary.y > 0.0f && p_bary.z > 0.0f;
175176
const float signed_dist = inside ? -dist : dist;
176-
177177
// Check if pixel is outside blur region
178178
if (!inside && dist >= blur_radius) {
179179
return;
180180
}
181181

182-
if (q_size < K) {
183-
// Just insert it.
184-
q[q_size] = {pz, face_idx, signed_dist, p_bary_clip};
185-
if (pz > q_max_z) {
186-
q_max_z = pz;
187-
q_max_idx = q_size;
182+
// Handle the case where a face (f) partially behind the image plane is
183+
// clipped to a quadrilateral and then split into two faces (t1, t2). In this
184+
// case we:
185+
// 1. Find the index of the neighboring face (e.g. for t1 need index of t2)
186+
// 2. Check if the neighboring face (t2) is already in the top K faces
187+
// 3. If yes, compare the distance of the pixel to t1 with the distance to t2.
188+
// 4. If dist_t1 < dist_t2, overwrite the values for t2 in the top K faces.
189+
const int neighbor_idx = clipped_faces_neighbor_idx[face_idx];
190+
int neighbor_idx_top_k = -1;
191+
192+
// Check if neighboring face is already in the top K.
193+
// -1 is the fill value in clipped_faces_neighbor_idx
194+
if (neighbor_idx != -1) {
195+
// Only need to loop until q_size.
196+
for (int i = 0; i < q_size; i++) {
197+
if (q[i].idx == neighbor_idx) {
198+
neighbor_idx_top_k = i;
199+
break;
200+
}
201+
}
202+
}
203+
// If neighbor idx is not -1 then it is in the top K struct.
204+
if (neighbor_idx_top_k != -1) {
205+
// If dist of current face is less than neighbor then overwrite the
206+
// neighbor face values in the top K struct.
207+
float neighbor_dist = abs(q[neighbor_idx_top_k].dist);
208+
if (dist < neighbor_dist) {
209+
// Overwrite the neighbor face values
210+
q[neighbor_idx_top_k] = {pz, face_idx, signed_dist, p_bary_clip};
211+
212+
// If pz > q_max then overwrite the max values and index of the max.
213+
// q_size stays the same.
214+
if (pz > q_max_z) {
215+
q_max_z = pz;
216+
q_max_idx = neighbor_idx_top_k;
217+
}
188218
}
189-
q_size++;
190-
} else if (pz < q_max_z) {
191-
// Overwrite the old max, and find the new max.
192-
q[q_max_idx] = {pz, face_idx, signed_dist, p_bary_clip};
193-
q_max_z = pz;
194-
for (int i = 0; i < K; i++) {
195-
if (q[i].z > q_max_z) {
196-
q_max_z = q[i].z;
197-
q_max_idx = i;
219+
} else {
220+
// Handle as a normal face
221+
if (q_size < K) {
222+
// Just insert it.
223+
q[q_size] = {pz, face_idx, signed_dist, p_bary_clip};
224+
if (pz > q_max_z) {
225+
q_max_z = pz;
226+
q_max_idx = q_size;
227+
}
228+
q_size++;
229+
} else if (pz < q_max_z) {
230+
// Overwrite the old max, and find the new max.
231+
q[q_max_idx] = {pz, face_idx, signed_dist, p_bary_clip};
232+
q_max_z = pz;
233+
for (int i = 0; i < K; i++) {
234+
if (q[i].z > q_max_z) {
235+
q_max_z = q[i].z;
236+
q_max_idx = i;
237+
}
198238
}
199239
}
200240
}
201241
}
242+
202243
} // namespace
203244

204245
// ****************************************************************************
@@ -208,6 +249,7 @@ __global__ void RasterizeMeshesNaiveCudaKernel(
208249
const float* face_verts,
209250
const int64_t* mesh_to_face_first_idx,
210251
const int64_t* num_faces_per_mesh,
252+
const int64_t* clipped_faces_neighbor_idx,
211253
const float blur_radius,
212254
const bool perspective_correct,
213255
const bool clip_barycentric_coords,
@@ -265,6 +307,7 @@ __global__ void RasterizeMeshesNaiveCudaKernel(
265307

266308
CheckPixelInsideFace(
267309
face_verts,
310+
clipped_faces_neighbor_idx,
268311
f,
269312
q_size,
270313
q_max_z,
@@ -298,6 +341,7 @@ RasterizeMeshesNaiveCuda(
298341
const at::Tensor& face_verts,
299342
const at::Tensor& mesh_to_faces_packed_first_idx,
300343
const at::Tensor& num_faces_per_mesh,
344+
const at::Tensor& clipped_faces_neighbor_idx,
301345
const std::tuple<int, int> image_size,
302346
const float blur_radius,
303347
const int num_closest,
@@ -313,6 +357,10 @@ RasterizeMeshesNaiveCuda(
313357
num_faces_per_mesh.size(0) == mesh_to_faces_packed_first_idx.size(0),
314358
"num_faces_per_mesh must have save size first dimension as mesh_to_faces_packed_first_idx");
315359

360+
TORCH_CHECK(
361+
clipped_faces_neighbor_idx.size(0) == face_verts.size(0),
362+
"clipped_faces_neighbor_idx must have save size first dimension as face_verts");
363+
316364
if (num_closest > kMaxPointsPerPixel) {
317365
std::stringstream ss;
318366
ss << "Must have points_per_pixel <= " << kMaxPointsPerPixel;
@@ -323,11 +371,16 @@ RasterizeMeshesNaiveCuda(
323371
at::TensorArg face_verts_t{face_verts, "face_verts", 1},
324372
mesh_to_faces_packed_first_idx_t{
325373
mesh_to_faces_packed_first_idx, "mesh_to_faces_packed_first_idx", 2},
326-
num_faces_per_mesh_t{num_faces_per_mesh, "num_faces_per_mesh", 3};
374+
num_faces_per_mesh_t{num_faces_per_mesh, "num_faces_per_mesh", 3},
375+
clipped_faces_neighbor_idx_t{
376+
clipped_faces_neighbor_idx, "clipped_faces_neighbor_idx", 4};
327377
at::CheckedFrom c = "RasterizeMeshesNaiveCuda";
328378
at::checkAllSameGPU(
329379
c,
330-
{face_verts_t, mesh_to_faces_packed_first_idx_t, num_faces_per_mesh_t});
380+
{face_verts_t,
381+
mesh_to_faces_packed_first_idx_t,
382+
num_faces_per_mesh_t,
383+
clipped_faces_neighbor_idx_t});
331384

332385
// Set the device for the kernel launch based on the device of the input
333386
at::cuda::CUDAGuard device_guard(face_verts.device());
@@ -358,6 +411,7 @@ RasterizeMeshesNaiveCuda(
358411
face_verts.contiguous().data_ptr<float>(),
359412
mesh_to_faces_packed_first_idx.contiguous().data_ptr<int64_t>(),
360413
num_faces_per_mesh.contiguous().data_ptr<int64_t>(),
414+
clipped_faces_neighbor_idx.contiguous().data_ptr<int64_t>(),
361415
blur_radius,
362416
perspective_correct,
363417
clip_barycentric_coords,
@@ -800,6 +854,7 @@ at::Tensor RasterizeMeshesCoarseCuda(
800854
__global__ void RasterizeMeshesFineCudaKernel(
801855
const float* face_verts, // (F, 3, 3)
802856
const int32_t* bin_faces, // (N, BH, BW, T)
857+
const int64_t* clipped_faces_neighbor_idx, // (F,)
803858
const float blur_radius,
804859
const int bin_size,
805860
const bool perspective_correct,
@@ -858,6 +913,7 @@ __global__ void RasterizeMeshesFineCudaKernel(
858913
int q_size = 0;
859914
float q_max_z = -1000;
860915
int q_max_idx = -1;
916+
861917
for (int m = 0; m < M; m++) {
862918
const int f = bin_faces[n * BH * BW * M + by * BW * M + bx * M + m];
863919
if (f < 0) {
@@ -867,6 +923,7 @@ __global__ void RasterizeMeshesFineCudaKernel(
867923
// update q, q_size, q_max_z and q_max_idx in place.
868924
CheckPixelInsideFace(
869925
face_verts,
926+
clipped_faces_neighbor_idx,
870927
f,
871928
q_size,
872929
q_max_z,
@@ -906,6 +963,7 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor>
906963
RasterizeMeshesFineCuda(
907964
const at::Tensor& face_verts,
908965
const at::Tensor& bin_faces,
966+
const at::Tensor& clipped_faces_neighbor_idx,
909967
const std::tuple<int, int> image_size,
910968
const float blur_radius,
911969
const int bin_size,
@@ -918,12 +976,18 @@ RasterizeMeshesFineCuda(
918976
face_verts.size(2) == 3,
919977
"face_verts must have dimensions (num_faces, 3, 3)");
920978
TORCH_CHECK(bin_faces.ndimension() == 4, "bin_faces must have 4 dimensions");
979+
TORCH_CHECK(
980+
clipped_faces_neighbor_idx.size(0) == face_verts.size(0),
981+
"clipped_faces_neighbor_idx must have the same first dimension as face_verts");
921982

922983
// Check inputs are on the same device
923984
at::TensorArg face_verts_t{face_verts, "face_verts", 1},
924-
bin_faces_t{bin_faces, "bin_faces", 2};
985+
bin_faces_t{bin_faces, "bin_faces", 2},
986+
clipped_faces_neighbor_idx_t{
987+
clipped_faces_neighbor_idx, "clipped_faces_neighbor_idx", 3};
925988
at::CheckedFrom c = "RasterizeMeshesFineCuda";
926-
at::checkAllSameGPU(c, {face_verts_t, bin_faces_t});
989+
at::checkAllSameGPU(
990+
c, {face_verts_t, bin_faces_t, clipped_faces_neighbor_idx_t});
927991

928992
// Set the device for the kernel launch based on the device of the input
929993
at::cuda::CUDAGuard device_guard(face_verts.device());
@@ -961,6 +1025,7 @@ RasterizeMeshesFineCuda(
9611025
RasterizeMeshesFineCudaKernel<<<blocks, threads, 0, stream>>>(
9621026
face_verts.contiguous().data_ptr<float>(),
9631027
bin_faces.contiguous().data_ptr<int32_t>(),
1028+
clipped_faces_neighbor_idx.contiguous().data_ptr<int64_t>(),
9641029
blur_radius,
9651030
bin_size,
9661031
perspective_correct,

pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ RasterizeMeshesNaiveCpu(
1515
const torch::Tensor& face_verts,
1616
const torch::Tensor& mesh_to_face_first_idx,
1717
const torch::Tensor& num_faces_per_mesh,
18+
const torch::Tensor& clipped_faces_neighbor_idx,
1819
const std::tuple<int, int> image_size,
1920
const float blur_radius,
2021
const int faces_per_pixel,
@@ -28,6 +29,7 @@ RasterizeMeshesNaiveCuda(
2829
const at::Tensor& face_verts,
2930
const at::Tensor& mesh_to_face_first_idx,
3031
const at::Tensor& num_faces_per_mesh,
32+
const torch::Tensor& clipped_faces_neighbor_idx,
3133
const std::tuple<int, int> image_size,
3234
const float blur_radius,
3335
const int num_closest,
@@ -48,6 +50,12 @@ RasterizeMeshesNaiveCuda(
4850
// the batch where N is the batch size.
4951
// num_faces_per_mesh: LongTensor of shape (N) giving the number of faces
5052
// for each mesh in the batch.
53+
// clipped_faces_neighbor_idx: LongTensor of shape (F,) giving the
54+
// index of the neighboring face for each face which was clipped to a
55+
// quadrilateral and then divided into two triangles.
56+
// e.g. for a face f partially behind the image plane which is split into
57+
// two triangles (t1, t2): clipped_faces_neighbor_idx[t1_idx] = t2_idx
58+
// Faces which are not clipped and subdivided are set to -1.
5159
// image_size: Tuple (H, W) giving the size in pixels of the output
5260
// image to be rasterized.
5361
// blur_radius: float distance in NDC coordinates uses to expand the face
@@ -90,6 +98,7 @@ RasterizeMeshesNaive(
9098
const torch::Tensor& face_verts,
9199
const torch::Tensor& mesh_to_face_first_idx,
92100
const torch::Tensor& num_faces_per_mesh,
101+
const torch::Tensor& clipped_faces_neighbor_idx,
93102
const std::tuple<int, int> image_size,
94103
const float blur_radius,
95104
const int faces_per_pixel,
@@ -106,6 +115,7 @@ RasterizeMeshesNaive(
106115
face_verts,
107116
mesh_to_face_first_idx,
108117
num_faces_per_mesh,
118+
clipped_faces_neighbor_idx,
109119
image_size,
110120
blur_radius,
111121
faces_per_pixel,
@@ -120,6 +130,7 @@ RasterizeMeshesNaive(
120130
face_verts,
121131
mesh_to_face_first_idx,
122132
num_faces_per_mesh,
133+
clipped_faces_neighbor_idx,
123134
image_size,
124135
blur_radius,
125136
faces_per_pixel,
@@ -306,6 +317,7 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
306317
RasterizeMeshesFineCuda(
307318
const torch::Tensor& face_verts,
308319
const torch::Tensor& bin_faces,
320+
const torch::Tensor& clipped_faces_neighbor_idx,
309321
const std::tuple<int, int> image_size,
310322
const float blur_radius,
311323
const int bin_size,
@@ -322,6 +334,12 @@ RasterizeMeshesFineCuda(
322334
// in NDC coordinates in the range [-1, 1].
323335
// bin_faces: int32 Tensor of shape (N, B, B, M) giving the indices of faces
324336
// that fall into each bin (output from coarse rasterization).
337+
// clipped_faces_neighbor_idx: LongTensor of shape (F,) giving the
338+
// index of the neighboring face for each face which was clipped to a
339+
// quadrilateral and then divided into two triangles.
340+
// e.g. for a face f partially behind the image plane which is split into
341+
// two triangles (t1, t2): clipped_faces_neighbor_idx[t1_idx] = t2_idx
342+
// Faces which are not clipped and subdivided are set to -1.
325343
// image_size: Tuple (H, W) giving the size in pixels of the output
326344
// image to be rasterized.
327345
// blur_radius: float distance in NDC coordinates uses to expand the face
@@ -364,6 +382,7 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
364382
RasterizeMeshesFine(
365383
const torch::Tensor& face_verts,
366384
const torch::Tensor& bin_faces,
385+
const torch::Tensor& clipped_faces_neighbor_idx,
367386
const std::tuple<int, int> image_size,
368387
const float blur_radius,
369388
const int bin_size,
@@ -378,6 +397,7 @@ RasterizeMeshesFine(
378397
return RasterizeMeshesFineCuda(
379398
face_verts,
380399
bin_faces,
400+
clipped_faces_neighbor_idx,
381401
image_size,
382402
blur_radius,
383403
bin_size,
@@ -411,6 +431,12 @@ RasterizeMeshesFine(
411431
// the batch where N is the batch size.
412432
// num_faces_per_mesh: LongTensor of shape (N) giving the number of faces
413433
// for each mesh in the batch.
434+
// clipped_faces_neighbor_idx: LongTensor of shape (F,) giving the
435+
// index of the neighboring face for each face which was clipped to a
436+
// quadrilateral and then divided into two triangles.
437+
// e.g. for a face f partially behind the image plane which is split into
438+
// two triangles (t1, t2): clipped_faces_neighbor_idx[t1_idx] = t2_idx
439+
// Faces which are not clipped and subdivided are set to -1.
414440
// image_size: Tuple (H, W) giving the size in pixels of the output
415441
// image to be rasterized.
416442
// blur_radius: float distance in NDC coordinates uses to expand the face
@@ -456,6 +482,7 @@ RasterizeMeshes(
456482
const torch::Tensor& face_verts,
457483
const torch::Tensor& mesh_to_face_first_idx,
458484
const torch::Tensor& num_faces_per_mesh,
485+
const torch::Tensor& clipped_faces_neighbor_idx,
459486
const std::tuple<int, int> image_size,
460487
const float blur_radius,
461488
const int faces_per_pixel,
@@ -477,6 +504,7 @@ RasterizeMeshes(
477504
return RasterizeMeshesFine(
478505
face_verts,
479506
bin_faces,
507+
clipped_faces_neighbor_idx,
480508
image_size,
481509
blur_radius,
482510
bin_size,
@@ -490,6 +518,7 @@ RasterizeMeshes(
490518
face_verts,
491519
mesh_to_face_first_idx,
492520
num_faces_per_mesh,
521+
clipped_faces_neighbor_idx,
493522
image_size,
494523
blur_radius,
495524
faces_per_pixel,

0 commit comments

Comments
 (0)