Skip to content

Commit 2adc13d

Browse files
committed
allow AMDGPU to store doubles to register
1 parent 908bdef commit 2adc13d

File tree

1 file changed

+6
-2
lines changed
  • libc/benchmarks/gpu/timing/amdgpu

1 file changed

+6
-2
lines changed

libc/benchmarks/gpu/timing/amdgpu/timing.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,9 @@ template <typename F, typename T>
6767

6868
// This inline assembly performs a no-op which forces the result to both
6969
// be used and prevents us from exiting this region before it's complete.
70-
asm("v_or_b32 %[v_reg], 0, %[v_reg]\n" ::[v_reg] "v"(result) :);
70+
asm("v_or_b32 %[v_reg], 0, %[v_reg]\n" ::[v_reg] "v"(
71+
static_cast<uint32_t>(result))
72+
:);
7173

7274
// Obtain the current timestamp after running the calculation and force
7375
// ordering.
@@ -98,7 +100,9 @@ template <typename F, typename T1, typename T2>
98100

99101
auto result = f(arg1, arg2);
100102

101-
asm("v_or_b32 %[v_reg], 0, %[v_reg]\n" ::[v_reg] "v"(result) :);
103+
asm("v_or_b32 %[v_reg], 0, %[v_reg]\n" ::[v_reg] "v"(
104+
static_cast<uint32_t>(result))
105+
:);
102106

103107
uint64_t stop = gpu::processor_clock();
104108
asm("" ::"s"(stop));

0 commit comments

Comments
 (0)