Skip to content

Commit e6caa12

Browse files
author
Cruz Monrreal
authored
Merge pull request #9812 from kjbracey-arm/wait_ns
Add wait_ns API
2 parents eff8b1d + d2df5a0 commit e6caa12

File tree

4 files changed

+196
-4
lines changed

4 files changed

+196
-4
lines changed

TESTS/mbed_platform/wait_ns/main.cpp

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/*
2+
* Copyright (c) 2018, ARM Limited, All Rights Reserved
3+
* SPDX-License-Identifier: Apache-2.0
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License"); you may
6+
* not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13+
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
#include "mbed.h"
19+
#include "greentea-client/test_env.h"
20+
#include "unity.h"
21+
#include "utest.h"
22+
#include "platform/mbed_wait_api.h"
23+
#include "hal/us_ticker_api.h"
24+
#include "hal/lp_ticker_api.h"
25+
26+
using namespace utest::v1;
27+
28+
/* This test is created based on the test for Timer class.
29+
* Since low power timer is less accurate than regular
30+
* timer we need to adjust delta.
31+
*/
32+
33+
/*
34+
* Define tolerance as follows:
35+
* Timer might be +/-5% out; wait_ns is permitted 40% slow, but not fast.
36+
* Therefore minimum measured time should be 95% of requested, maximum should
37+
* be 145%. Unity doesn't let us specify an asymmetric error though.
38+
*
39+
* Would be nice to have tighter upper tolerance, but in practice we've seen
40+
* a few devices unable to sustain theoretical throughput - flash wait states?
41+
*/
42+
#define TOLERANCE_MIN 0.95f
43+
#define TOLERANCE_MAX 1.45f
44+
#define MIDPOINT ((TOLERANCE_MIN+TOLERANCE_MAX)/2)
45+
#define DELTA (MIDPOINT-TOLERANCE_MIN)
46+
47+
/* This test verifies if wait_ns's wait time
48+
* is accurate, according to a timer.
49+
*
50+
* Given timer is created.
51+
* When timer is used to measure delay.
52+
* Then the results are valid (within acceptable range).
53+
*/
54+
template<int wait_val_ms, class CompareTimer>
55+
void test_wait_ns_time_measurement()
56+
{
57+
CompareTimer timer;
58+
59+
float wait_val_s = (float)wait_val_ms / 1000;
60+
61+
/* Start the timer. */
62+
timer.start();
63+
64+
/* Wait <wait_val_ms> ms - arithmetic inside wait_ns will overflow if
65+
* asked for too large a delay, so break it up.
66+
*/
67+
for (int i = 0; i < wait_val_ms; i++) {
68+
wait_ns(1000000);
69+
}
70+
71+
/* Stop the timer. */
72+
timer.stop();
73+
74+
/* Check results - wait_val_us us have elapsed. */
75+
TEST_ASSERT_FLOAT_WITHIN(DELTA * wait_val_s, MIDPOINT * wait_val_s, timer.read());
76+
}
77+
78+
utest::v1::status_t test_setup(const size_t number_of_cases)
79+
{
80+
GREENTEA_SETUP(15, "default_auto");
81+
return verbose_test_setup_handler(number_of_cases);
82+
}
83+
84+
Case cases[] = {
85+
#if DEVICE_LPTICKER
86+
Case("Test: wait_ns - compare with lp_timer 1s", test_wait_ns_time_measurement<1000, LowPowerTimer>),
87+
#endif
88+
Case("Test: wait_ns - compare with us_timer 1s", test_wait_ns_time_measurement<1000, Timer>)
89+
};
90+
91+
Specification specification(test_setup, cases);
92+
93+
int main()
94+
{
95+
return !Harness::run(specification);
96+
}

platform/mbed_wait_api.h

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,43 @@ void wait_ms(int ms);
7878
*
7979
* @note
8080
* This function always spins to get the exact number of microseconds.
81-
* If RTOS is present, this will affect power (by preventing deep sleep) and
82-
* multithread performance. Therefore, spinning for millisecond wait is not recommended.
81+
* This will affect power and multithread performance. Therefore, spinning for
82+
* millisecond wait is not recommended, and wait_ms() should
83+
* be used instead.
84+
*
85+
* @note You may call this function from ISR context, but large delays may
86+
* impact system stability - interrupt handlers should take less than
87+
* 50us.
8388
*/
8489
void wait_us(int us);
8590

91+
/** Waits a number of nanoseconds.
92+
*
93+
* This function spins the CPU to produce a small delay. It should normally
94+
* only be used for delays of 10us (10000ns) or less. As it is calculated
95+
* based on the expected execution time of a software loop, it may well run
96+
* slower than requested based on activity from other threads and interrupts.
97+
* If greater precision is required, this can be called from inside a critical
98+
* section.
99+
*
100+
* @param ns the number of nanoseconds to wait
101+
*
102+
* @note
103+
* wait_us() will likely give more precise time than wait_ns for large-enough
104+
* delays, as it is based on a timer, but its set-up time may be excessive
105+
* for the smallest microsecond counts, at which point wait_ns() is better.
106+
*
107+
* @note
108+
* Any delay larger than a millisecond (1000000ns) is liable to cause
109+
* overflow in the internal loop calculation. You shouldn't normally be
110+
* using this for such large delays anyway in real code, but be aware if
111+
* calibrating. Make repeated calls for longer test runs.
112+
*
113+
* @note You may call this function from ISR context.
114+
*
115+
*/
116+
void wait_ns(unsigned int ns);
117+
86118
#ifdef __cplusplus
87119
}
88120
#endif

platform/mbed_wait_api_no_rtos.c

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,14 @@
1515
* limitations under the License.
1616
*/
1717

18+
#include "cmsis.h"
19+
#include "platform/mbed_toolchain.h"
20+
#include "platform/mbed_wait_api.h"
21+
1822
// This implementation of the wait functions will be compiled only
1923
// if the RTOS is not present.
2024
#ifndef MBED_CONF_RTOS_PRESENT
2125

22-
#include "platform/mbed_wait_api.h"
2326
#include "hal/us_ticker_api.h"
2427

2528
void wait(float s)
@@ -41,3 +44,64 @@ void wait_us(int us)
4144

4245
#endif // #ifndef MBED_CONF_RTOS_PRESENT
4346

47+
// This wait_ns is used by both RTOS and non-RTOS builds
48+
49+
#ifdef __CORTEX_M
50+
#if (__CORTEX_M == 0 && !defined __CM0PLUS_REV) || __CORTEX_M == 1
51+
// Cortex-M0 and Cortex-M1 take 6 cycles per iteration - SUBS = 1, 2xNOP = 2, BCS = 3
52+
#define LOOP_SCALER 6000
53+
#elif (__CORTEX_M == 0 && defined __CM0PLUS_REV) || __CORTEX_M == 3 || __CORTEX_M == 4 || \
54+
__CORTEX_M == 23 || __CORTEX_M == 33
55+
// Cortex-M0+, M3, M4, M23 and M33 take 5 cycles per iteration - SUBS = 1, 2xNOP = 2, BCS = 2
56+
// TODO - check M33
57+
#define LOOP_SCALER 5000
58+
#elif __CORTEX_M == 7
59+
// Cortex-M7 manages to dual-issue for 2 cycles per iteration (SUB,NOP) = 1, (NOP,BCS) = 1
60+
// (The NOPs were added to stabilise this - with just the SUB and BCS, it seems that the
61+
// M7 sometimes takes 1 cycle, sometimes 2, possibly depending on alignment)
62+
#define LOOP_SCALER 2000
63+
#endif
64+
#elif defined __CORTEX_A
65+
#if __CORTEX_A == 9
66+
// Cortex-A9 is dual-issue, so let's assume same performance as Cortex-M7.
67+
// TODO - test.
68+
#define LOOP_SCALER 2000
69+
#endif
70+
#endif
71+
72+
/* We only define the function if we've identified the CPU. If we haven't,
73+
* rather than a compile-time error, leave it undefined, rather than faulting
74+
* with an immediate #error. This leaves the door open to non-ARM
75+
* builds with or people providing substitutes for other CPUs, and only if
76+
* needed.
77+
*/
78+
#ifdef LOOP_SCALER
79+
80+
/* Timing seems to depend on alignment, and toolchains do not support aligning
81+
* functions well. So sidestep that by hand-assembling the code. Also avoids
82+
* the hassle of handling multiple toolchains with different assembler
83+
* syntax.
84+
*/
85+
MBED_ALIGN(8)
86+
static const uint16_t delay_loop_code[] = {
87+
0x1E40, // SUBS R0,R0,#1
88+
0xBF00, // NOP
89+
0xBF00, // NOP
90+
0xD2FB, // BCS .-3 (0x00 would be .+2, so 0xFB = -5 = .-3)
91+
0x4770 // BX LR
92+
};
93+
94+
/* Take the address of the code, set LSB to indicate Thumb, and cast to void() function pointer */
95+
#define delay_loop ((void(*)()) ((uintptr_t) delay_loop_code | 1))
96+
97+
void wait_ns(unsigned int ns)
98+
{
99+
uint32_t cycles_per_us = SystemCoreClock / 1000000;
100+
// Note that this very calculation, plus call overhead, will take multiple
101+
// cycles. Could well be 100ns on its own... So round down here, startup is
102+
// worth at least one loop iteration.
103+
uint32_t count = (cycles_per_us * ns) / LOOP_SCALER;
104+
105+
delay_loop(count);
106+
}
107+
#endif // LOOP_SCALER

targets/TARGET_NUVOTON/TARGET_M2351/device/M2351.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ typedef enum IRQn
192192
/*@}*/ /* end of group CMSIS */
193193

194194

195-
#include "core_armv8mbl.h" /* Processor and core peripherals */
195+
#include "core_cm23.h" /* Processor and core peripherals */
196196
#include "system_M2351.h" /* System Header */
197197

198198
/**

0 commit comments

Comments
 (0)