Skip to content

Add wait_ns API #9812

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions TESTS/mbed_platform/wait_ns/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Copyright (c) 2018, ARM Limited, All Rights Reserved
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "mbed.h"
#include "greentea-client/test_env.h"
#include "unity.h"
#include "utest.h"
#include "platform/mbed_wait_api.h"
#include "hal/us_ticker_api.h"
#include "hal/lp_ticker_api.h"

using namespace utest::v1;

/* This test is created based on the test for Timer class.
* Since low power timer is less accurate than regular
* timer we need to adjust delta.
*/

/*
* Define tolerance as follows:
* Timer might be +/-5% out; wait_ns is permitted 40% slow, but not fast.
* Therefore minimum measured time should be 95% of requested, maximum should
* be 145%. Unity doesn't let us specify an asymmetric error though.
*
* Would be nice to have tighter upper tolerance, but in practice we've seen
* a few devices unable to sustain theoretical throughput - flash wait states?
*/
#define TOLERANCE_MIN 0.95f
#define TOLERANCE_MAX 1.45f
#define MIDPOINT ((TOLERANCE_MIN+TOLERANCE_MAX)/2)
#define DELTA (MIDPOINT-TOLERANCE_MIN)

/* This test verifies if wait_ns's wait time
* is accurate, according to a timer.
*
* Given timer is created.
* When timer is used to measure delay.
* Then the results are valid (within acceptable range).
*/
template<int wait_val_ms, class CompareTimer>
void test_wait_ns_time_measurement()
{
CompareTimer timer;

float wait_val_s = (float)wait_val_ms / 1000;

/* Start the timer. */
timer.start();

/* Wait <wait_val_ms> ms - arithmetic inside wait_ns will overflow if
* asked for too large a delay, so break it up.
*/
for (int i = 0; i < wait_val_ms; i++) {
wait_ns(1000000);
}

/* Stop the timer. */
timer.stop();

/* Check results - wait_val_us us have elapsed. */
TEST_ASSERT_FLOAT_WITHIN(DELTA * wait_val_s, MIDPOINT * wait_val_s, timer.read());
}

utest::v1::status_t test_setup(const size_t number_of_cases)
{
GREENTEA_SETUP(15, "default_auto");
return verbose_test_setup_handler(number_of_cases);
}

Case cases[] = {
#if DEVICE_LPTICKER
Case("Test: wait_ns - compare with lp_timer 1s", test_wait_ns_time_measurement<1000, LowPowerTimer>),
#endif
Case("Test: wait_ns - compare with us_timer 1s", test_wait_ns_time_measurement<1000, Timer>)
};

Specification specification(test_setup, cases);

int main()
{
return !Harness::run(specification);
}
36 changes: 34 additions & 2 deletions platform/mbed_wait_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,43 @@ void wait_ms(int ms);
*
* @note
* This function always spins to get the exact number of microseconds.
* If RTOS is present, this will affect power (by preventing deep sleep) and
* multithread performance. Therefore, spinning for millisecond wait is not recommended.
* This will affect power and multithread performance. Therefore, spinning for
* millisecond wait is not recommended, and wait_ms() should
* be used instead.
*
* @note You may call this function from ISR context, but large delays may
* impact system stability - interrupt handlers should take less than
* 50us.
*/
void wait_us(int us);

/** Waits a number of nanoseconds.
*
* This function spins the CPU to produce a small delay. It should normally
* only be used for delays of 10us (10000ns) or less. As it is calculated
* based on the expected execution time of a software loop, it may well run
* slower than requested based on activity from other threads and interrupts.
* If greater precision is required, this can be called from inside a critical
* section.
*
* @param ns the number of nanoseconds to wait
*
* @note
* wait_us() will likely give more precise time than wait_ns for large-enough
* delays, as it is based on a timer, but its set-up time may be excessive
* for the smallest microsecond counts, at which point wait_ns() is better.
*
* @note
* Any delay larger than a millisecond (1000000ns) is liable to cause
* overflow in the internal loop calculation. You shouldn't normally be
* using this for such large delays anyway in real code, but be aware if
* calibrating. Make repeated calls for longer test runs.
*
* @note You may call this function from ISR context.
*
*/
void wait_ns(unsigned int ns);

#ifdef __cplusplus
}
#endif
Expand Down
66 changes: 65 additions & 1 deletion platform/mbed_wait_api_no_rtos.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,14 @@
* limitations under the License.
*/

#include "cmsis.h"
#include "platform/mbed_toolchain.h"
#include "platform/mbed_wait_api.h"

// This implementation of the wait functions will be compiled only
// if the RTOS is not present.
#ifndef MBED_CONF_RTOS_PRESENT

#include "platform/mbed_wait_api.h"
#include "hal/us_ticker_api.h"

void wait(float s)
Expand All @@ -41,3 +44,64 @@ void wait_us(int us)

#endif // #ifndef MBED_CONF_RTOS_PRESENT

// This wait_ns is used by both RTOS and non-RTOS builds

#ifdef __CORTEX_M
#if (__CORTEX_M == 0 && !defined __CM0PLUS_REV) || __CORTEX_M == 1
// Cortex-M0 and Cortex-M1 take 6 cycles per iteration - SUBS = 1, 2xNOP = 2, BCS = 3
#define LOOP_SCALER 6000
#elif (__CORTEX_M == 0 && defined __CM0PLUS_REV) || __CORTEX_M == 3 || __CORTEX_M == 4 || \
__CORTEX_M == 23 || __CORTEX_M == 33
// Cortex-M0+, M3, M4, M23 and M33 take 5 cycles per iteration - SUBS = 1, 2xNOP = 2, BCS = 2
// TODO - check M33
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we resolve the TODO before committing this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't actually have any M33 targets yet, do we? So don't think I've got anything I can test this on.

My belief is that the M33 is basically an M4 in terms of internal architecture, but no firm evidence. Could leave the __CORTEX_M == 33 case out and deal with it when we have the first actual target.

#define LOOP_SCALER 5000
#elif __CORTEX_M == 7
// Cortex-M7 manages to dual-issue for 2 cycles per iteration (SUB,NOP) = 1, (NOP,BCS) = 1
// (The NOPs were added to stabilise this - with just the SUB and BCS, it seems that the
// M7 sometimes takes 1 cycle, sometimes 2, possibly depending on alignment)
#define LOOP_SCALER 2000
#endif
#elif defined __CORTEX_A
#if __CORTEX_A == 9
// Cortex-A9 is dual-issue, so let's assume same performance as Cortex-M7.
// TODO - test.
#define LOOP_SCALER 2000
#endif
#endif

/* We only define the function if we've identified the CPU. If we haven't,
* rather than a compile-time error, leave it undefined, rather than faulting
* with an immediate #error. This leaves the door open to non-ARM
* builds with or people providing substitutes for other CPUs, and only if
* needed.
*/
#ifdef LOOP_SCALER

/* Timing seems to depend on alignment, and toolchains do not support aligning
* functions well. So sidestep that by hand-assembling the code. Also avoids
* the hassle of handling multiple toolchains with different assembler
* syntax.
*/
MBED_ALIGN(8)
static const uint16_t delay_loop_code[] = {
0x1E40, // SUBS R0,R0,#1
0xBF00, // NOP
0xBF00, // NOP
0xD2FB, // BCS .-3 (0x00 would be .+2, so 0xFB = -5 = .-3)
0x4770 // BX LR
};

/* Take the address of the code, set LSB to indicate Thumb, and cast to void() function pointer */
#define delay_loop ((void(*)()) ((uintptr_t) delay_loop_code | 1))

void wait_ns(unsigned int ns)
{
uint32_t cycles_per_us = SystemCoreClock / 1000000;
// Note that this very calculation, plus call overhead, will take multiple
// cycles. Could well be 100ns on its own... So round down here, startup is
// worth at least one loop iteration.
uint32_t count = (cycles_per_us * ns) / LOOP_SCALER;

delay_loop(count);
}
#endif // LOOP_SCALER
2 changes: 1 addition & 1 deletion targets/TARGET_NUVOTON/TARGET_M2351/device/M2351.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ typedef enum IRQn
/*@}*/ /* end of group CMSIS */


#include "core_armv8mbl.h" /* Processor and core peripherals */
#include "core_cm23.h" /* Processor and core peripherals */
#include "system_M2351.h" /* System Header */

/**
Expand Down