Skip to content

Commit ae742d6

Browse files
committed
update with comment
1 parent 4c710e4 commit ae742d6

File tree

2 files changed

+11
-10
lines changed

2 files changed

+11
-10
lines changed

offload/DeviceRTL/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ function(compileDeviceRTLLibrary target_name target_triple)
119119
set(outfile "${outfile}-${target_name}.bc")
120120
set(depfile "${outfile}.d")
121121

122+
# Passing an empty CPU to -march= suppressed target specific metadata.
122123
add_custom_command(OUTPUT ${outfile}
123124
COMMAND ${CLANG_TOOL}
124125
${bc_flags}

offload/DeviceRTL/src/Reduction.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -73,16 +73,16 @@ static int32_t nvptx_parallel_reduce_nowait(void *reduce_data,
7373
if (NumThreads == 1)
7474
return 1;
7575

76-
//
77-
// This reduce function handles reduction within a team. It handles
78-
// parallel regions in both L1 and L2 parallelism levels. It also
79-
// supports Generic, SPMD, and NoOMP modes.
80-
//
81-
// 1. Reduce within a warp.
82-
// 2. Warp master copies value to warp 0 via shared memory.
83-
// 3. Warp 0 reduces to a single value.
84-
// 4. The reduced value is available in the thread that returns 1.
85-
//
76+
//
77+
// This reduce function handles reduction within a team. It handles
78+
// parallel regions in both L1 and L2 parallelism levels. It also
79+
// supports Generic, SPMD, and NoOMP modes.
80+
//
81+
// 1. Reduce within a warp.
82+
// 2. Warp master copies value to warp 0 via shared memory.
83+
// 3. Warp 0 reduces to a single value.
84+
// 4. The reduced value is available in the thread that returns 1.
85+
//
8686

8787
#if __has_builtin(__nvvm_reflect)
8888
if (__nvvm_reflect("__CUDA_ARCH") >= 700) {

0 commit comments

Comments
 (0)