Skip to content

Commit 1a62373

Browse files
committed
device-libs: Use ballot(true) instead of calling read_exec builtin
The read_exec builtins are implemented with the ballot intrinsic anyway. In the wave32 case, these will optimize down to just use the low 32-bits. This converts a few uses, but others remain. Apparently you can just use exec_hi as a GPR in wave32 though, so I'm not sure we should be treating the raw exec read as assumed 0. Change-Id: Id5621bf31b0bb7fa27456938942138f3dea85a0a
1 parent f784bda commit 1a62373

File tree

2 files changed

+2
-18
lines changed

2 files changed

+2
-18
lines changed

amd/device-libs/asanrtl/src/dm.cl

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -113,22 +113,10 @@ first(__global void * v)
113113
}
114114

115115
// The number of active lanes at this point
116-
static uint
117-
active_lane_count_w64(void)
118-
{
119-
return __builtin_popcountl(__builtin_amdgcn_read_exec());
120-
}
121-
122-
static uint
123-
active_lane_count_w32(void)
124-
{
125-
return __builtin_popcount(__builtin_amdgcn_read_exec_lo());
126-
}
127-
128116
static uint
129117
active_lane_count(void)
130118
{
131-
return __oclc_wavefrontsize64 ? active_lane_count_w64() : active_lane_count_w32();
119+
return __builtin_popcountl(__builtin_amdgcn_ballot_w64(true));
132120
}
133121

134122
static ulong

amd/device-libs/opencl/src/pipes/wresvnp.cl

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,7 @@
1212
static uint
1313
active_lane_count(void)
1414
{
15-
if (__oclc_wavefrontsize64) {
16-
return __builtin_popcountl(__builtin_amdgcn_read_exec());
17-
} else {
18-
return __builtin_popcount(__builtin_amdgcn_read_exec_lo());
19-
}
15+
return __builtin_popcountl(__builtin_amdgcn_ballot_w64(true));
2016
}
2117

2218
size_t

0 commit comments

Comments
 (0)