@@ -17,8 +17,8 @@ namespace {
17
17
// A structure for holding details about a pixel.
18
18
struct Pixel {
19
19
float z;
20
- int64_t idx;
21
- float dist;
20
+ int64_t idx; // idx of face
21
+ float dist; // abs distance of pixel to face
22
22
float3 bary;
23
23
};
24
24
@@ -111,6 +111,7 @@ __device__ bool CheckPointOutsideBoundingBox(
111
111
template <typename FaceQ>
112
112
__device__ void CheckPixelInsideFace (
113
113
const float * face_verts, // (F, 3, 3)
114
+ const int64_t * clipped_faces_neighbor_idx, // (F,)
114
115
const int face_idx,
115
116
int & q_size,
116
117
float & q_max_z,
@@ -173,32 +174,72 @@ __device__ void CheckPixelInsideFace(
173
174
// face.
174
175
const bool inside = p_bary.x > 0 .0f && p_bary.y > 0 .0f && p_bary.z > 0 .0f ;
175
176
const float signed_dist = inside ? -dist : dist;
176
-
177
177
// Check if pixel is outside blur region
178
178
if (!inside && dist >= blur_radius) {
179
179
return ;
180
180
}
181
181
182
- if (q_size < K) {
183
- // Just insert it.
184
- q[q_size] = {pz, face_idx, signed_dist, p_bary_clip};
185
- if (pz > q_max_z) {
186
- q_max_z = pz;
187
- q_max_idx = q_size;
182
+ // Handle the case where a face (f) partially behind the image plane is
183
+ // clipped to a quadrilateral and then split into two faces (t1, t2). In this
184
+ // case we:
185
+ // 1. Find the index of the neighboring face (e.g. for t1 need index of t2)
186
+ // 2. Check if the neighboring face (t2) is already in the top K faces
187
+ // 3. If yes, compare the distance of the pixel to t1 with the distance to t2.
188
+ // 4. If dist_t1 < dist_t2, overwrite the values for t2 in the top K faces.
189
+ const int neighbor_idx = clipped_faces_neighbor_idx[face_idx];
190
+ int neighbor_idx_top_k = -1 ;
191
+
192
+ // Check if neighboring face is already in the top K.
193
+ // -1 is the fill value in clipped_faces_neighbor_idx
194
+ if (neighbor_idx != -1 ) {
195
+ // Only need to loop until q_size.
196
+ for (int i = 0 ; i < q_size; i++) {
197
+ if (q[i].idx == neighbor_idx) {
198
+ neighbor_idx_top_k = i;
199
+ break ;
200
+ }
201
+ }
202
+ }
203
+ // If neighbor idx is not -1 then it is in the top K struct.
204
+ if (neighbor_idx_top_k != -1 ) {
205
+ // If dist of current face is less than neighbor then overwrite the
206
+ // neighbor face values in the top K struct.
207
+ float neighbor_dist = abs (q[neighbor_idx_top_k].dist );
208
+ if (dist < neighbor_dist) {
209
+ // Overwrite the neighbor face values
210
+ q[neighbor_idx_top_k] = {pz, face_idx, signed_dist, p_bary_clip};
211
+
212
+ // If pz > q_max then overwrite the max values and index of the max.
213
+ // q_size stays the same.
214
+ if (pz > q_max_z) {
215
+ q_max_z = pz;
216
+ q_max_idx = neighbor_idx_top_k;
217
+ }
188
218
}
189
- q_size++;
190
- } else if (pz < q_max_z) {
191
- // Overwrite the old max, and find the new max.
192
- q[q_max_idx] = {pz, face_idx, signed_dist, p_bary_clip};
193
- q_max_z = pz;
194
- for (int i = 0 ; i < K; i++) {
195
- if (q[i].z > q_max_z) {
196
- q_max_z = q[i].z ;
197
- q_max_idx = i;
219
+ } else {
220
+ // Handle as a normal face
221
+ if (q_size < K) {
222
+ // Just insert it.
223
+ q[q_size] = {pz, face_idx, signed_dist, p_bary_clip};
224
+ if (pz > q_max_z) {
225
+ q_max_z = pz;
226
+ q_max_idx = q_size;
227
+ }
228
+ q_size++;
229
+ } else if (pz < q_max_z) {
230
+ // Overwrite the old max, and find the new max.
231
+ q[q_max_idx] = {pz, face_idx, signed_dist, p_bary_clip};
232
+ q_max_z = pz;
233
+ for (int i = 0 ; i < K; i++) {
234
+ if (q[i].z > q_max_z) {
235
+ q_max_z = q[i].z ;
236
+ q_max_idx = i;
237
+ }
198
238
}
199
239
}
200
240
}
201
241
}
242
+
202
243
} // namespace
203
244
204
245
// ****************************************************************************
@@ -208,6 +249,7 @@ __global__ void RasterizeMeshesNaiveCudaKernel(
208
249
const float * face_verts,
209
250
const int64_t * mesh_to_face_first_idx,
210
251
const int64_t * num_faces_per_mesh,
252
+ const int64_t * clipped_faces_neighbor_idx,
211
253
const float blur_radius,
212
254
const bool perspective_correct,
213
255
const bool clip_barycentric_coords,
@@ -265,6 +307,7 @@ __global__ void RasterizeMeshesNaiveCudaKernel(
265
307
266
308
CheckPixelInsideFace (
267
309
face_verts,
310
+ clipped_faces_neighbor_idx,
268
311
f,
269
312
q_size,
270
313
q_max_z,
@@ -298,6 +341,7 @@ RasterizeMeshesNaiveCuda(
298
341
const at::Tensor& face_verts,
299
342
const at::Tensor& mesh_to_faces_packed_first_idx,
300
343
const at::Tensor& num_faces_per_mesh,
344
+ const at::Tensor& clipped_faces_neighbor_idx,
301
345
const std::tuple<int , int > image_size,
302
346
const float blur_radius,
303
347
const int num_closest,
@@ -313,6 +357,10 @@ RasterizeMeshesNaiveCuda(
313
357
num_faces_per_mesh.size (0 ) == mesh_to_faces_packed_first_idx.size (0 ),
314
358
" num_faces_per_mesh must have save size first dimension as mesh_to_faces_packed_first_idx" );
315
359
360
+ TORCH_CHECK (
361
+ clipped_faces_neighbor_idx.size (0 ) == face_verts.size (0 ),
362
+ " clipped_faces_neighbor_idx must have save size first dimension as face_verts" );
363
+
316
364
if (num_closest > kMaxPointsPerPixel ) {
317
365
std::stringstream ss;
318
366
ss << " Must have points_per_pixel <= " << kMaxPointsPerPixel ;
@@ -323,11 +371,16 @@ RasterizeMeshesNaiveCuda(
323
371
at::TensorArg face_verts_t {face_verts, " face_verts" , 1 },
324
372
mesh_to_faces_packed_first_idx_t {
325
373
mesh_to_faces_packed_first_idx, " mesh_to_faces_packed_first_idx" , 2 },
326
- num_faces_per_mesh_t {num_faces_per_mesh, " num_faces_per_mesh" , 3 };
374
+ num_faces_per_mesh_t {num_faces_per_mesh, " num_faces_per_mesh" , 3 },
375
+ clipped_faces_neighbor_idx_t {
376
+ clipped_faces_neighbor_idx, " clipped_faces_neighbor_idx" , 4 };
327
377
at::CheckedFrom c = " RasterizeMeshesNaiveCuda" ;
328
378
at::checkAllSameGPU (
329
379
c,
330
- {face_verts_t , mesh_to_faces_packed_first_idx_t , num_faces_per_mesh_t });
380
+ {face_verts_t ,
381
+ mesh_to_faces_packed_first_idx_t ,
382
+ num_faces_per_mesh_t ,
383
+ clipped_faces_neighbor_idx_t });
331
384
332
385
// Set the device for the kernel launch based on the device of the input
333
386
at::cuda::CUDAGuard device_guard (face_verts.device ());
@@ -358,6 +411,7 @@ RasterizeMeshesNaiveCuda(
358
411
face_verts.contiguous ().data_ptr <float >(),
359
412
mesh_to_faces_packed_first_idx.contiguous ().data_ptr <int64_t >(),
360
413
num_faces_per_mesh.contiguous ().data_ptr <int64_t >(),
414
+ clipped_faces_neighbor_idx.contiguous ().data_ptr <int64_t >(),
361
415
blur_radius,
362
416
perspective_correct,
363
417
clip_barycentric_coords,
@@ -800,6 +854,7 @@ at::Tensor RasterizeMeshesCoarseCuda(
800
854
__global__ void RasterizeMeshesFineCudaKernel (
801
855
const float * face_verts, // (F, 3, 3)
802
856
const int32_t * bin_faces, // (N, BH, BW, T)
857
+ const int64_t * clipped_faces_neighbor_idx, // (F,)
803
858
const float blur_radius,
804
859
const int bin_size,
805
860
const bool perspective_correct,
@@ -858,6 +913,7 @@ __global__ void RasterizeMeshesFineCudaKernel(
858
913
int q_size = 0 ;
859
914
float q_max_z = -1000 ;
860
915
int q_max_idx = -1 ;
916
+
861
917
for (int m = 0 ; m < M; m++) {
862
918
const int f = bin_faces[n * BH * BW * M + by * BW * M + bx * M + m];
863
919
if (f < 0 ) {
@@ -867,6 +923,7 @@ __global__ void RasterizeMeshesFineCudaKernel(
867
923
// update q, q_size, q_max_z and q_max_idx in place.
868
924
CheckPixelInsideFace (
869
925
face_verts,
926
+ clipped_faces_neighbor_idx,
870
927
f,
871
928
q_size,
872
929
q_max_z,
@@ -906,6 +963,7 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor>
906
963
RasterizeMeshesFineCuda (
907
964
const at::Tensor& face_verts,
908
965
const at::Tensor& bin_faces,
966
+ const at::Tensor& clipped_faces_neighbor_idx,
909
967
const std::tuple<int , int > image_size,
910
968
const float blur_radius,
911
969
const int bin_size,
@@ -918,12 +976,18 @@ RasterizeMeshesFineCuda(
918
976
face_verts.size (2 ) == 3 ,
919
977
" face_verts must have dimensions (num_faces, 3, 3)" );
920
978
TORCH_CHECK (bin_faces.ndimension () == 4 , " bin_faces must have 4 dimensions" );
979
+ TORCH_CHECK (
980
+ clipped_faces_neighbor_idx.size (0 ) == face_verts.size (0 ),
981
+ " clipped_faces_neighbor_idx must have the same first dimension as face_verts" );
921
982
922
983
// Check inputs are on the same device
923
984
at::TensorArg face_verts_t {face_verts, " face_verts" , 1 },
924
- bin_faces_t {bin_faces, " bin_faces" , 2 };
985
+ bin_faces_t {bin_faces, " bin_faces" , 2 },
986
+ clipped_faces_neighbor_idx_t {
987
+ clipped_faces_neighbor_idx, " clipped_faces_neighbor_idx" , 3 };
925
988
at::CheckedFrom c = " RasterizeMeshesFineCuda" ;
926
- at::checkAllSameGPU (c, {face_verts_t , bin_faces_t });
989
+ at::checkAllSameGPU (
990
+ c, {face_verts_t , bin_faces_t , clipped_faces_neighbor_idx_t });
927
991
928
992
// Set the device for the kernel launch based on the device of the input
929
993
at::cuda::CUDAGuard device_guard (face_verts.device ());
@@ -961,6 +1025,7 @@ RasterizeMeshesFineCuda(
961
1025
RasterizeMeshesFineCudaKernel<<<blocks, threads, 0 , stream>>> (
962
1026
face_verts.contiguous ().data_ptr <float >(),
963
1027
bin_faces.contiguous ().data_ptr <int32_t >(),
1028
+ clipped_faces_neighbor_idx.contiguous ().data_ptr <int64_t >(),
964
1029
blur_radius,
965
1030
bin_size,
966
1031
perspective_correct,
0 commit comments