15
15
* limitations under the License.
16
16
*/
17
17
18
+ #include "cmsis.h"
19
+ #include "platform/mbed_wait_api.h"
20
+
18
21
// This implementation of the wait functions will be compiled only
19
22
// if the RTOS is not present.
20
23
#ifndef MBED_CONF_RTOS_PRESENT
21
24
22
- #include "platform/mbed_wait_api.h"
23
25
#include "hal/us_ticker_api.h"
24
26
25
27
void wait (float s )
@@ -41,3 +43,94 @@ void wait_us(int us)
41
43
42
44
#endif // #ifndef MBED_CONF_RTOS_PRESENT
43
45
46
+ // This wait_ns is used by both RTOS and non-RTOS builds
47
+
48
+ #ifdef __CORTEX_M
49
+ #if (__CORTEX_M == 0 && !defined __CM0PLUS_REV ) || __CORTEX_M == 1
50
+ // Cortex-M0 and Cortex-M1 take 7 cycles per iteration - SUBS = 1, 2xNOP = 2, BCS = 3
51
+ #define LOOP_SCALER 6000
52
+ #elif (__CORTEX_M == 0 && defined __CM0PLUS_REV ) || __CORTEX_M == 3 || __CORTEX_M == 4 || \
53
+ __CORTEX_M == 23 || __CORTEX_M == 33
54
+ // Cortex-M0+, M3, M4, M23 and M33 take 6 cycles per iteration - SUBS = 1, 3xNOP = 2, BCS = 2
55
+ // TODO - check M33
56
+ #define LOOP_SCALER 5000
57
+ #elif __CORTEX_M == 7
58
+ // Cortex-M7 manages to dual-issue for 2 cycles per iteration (SUB,NOP) = 1, (NOP,BCS) = 1
59
+ // (The NOPs were added to stabilise this - with just the SUB and BCS, it seems that the
60
+ // M7 sometimes takes 1 cycle, sometimes 2, possibly depending on alignment)
61
+ #define LOOP_SCALER 2000
62
+ #endif
63
+ #elif defined __CORTEX_A
64
+ #if __CORTEX_A == 9
65
+ // Cortex-A9 is dual-issue, so let's assume same performance as Cortex-M7.
66
+ // TODO - test.
67
+ #define LOOP_SCALER 2000
68
+ #endif
69
+ #endif
70
+
71
+ /* We only define the function if we've identified the CPU. If we haven't,
72
+ * rather than a compile-time error, leave it undefined, rather than faulting
73
+ * with an immediate #error. This leaves the door open to non-ARM
74
+ * builds with or people providing substitutes for other CPUs, and only if
75
+ * needed.
76
+ */
77
+ #ifdef LOOP_SCALER
78
+
79
+ // *INDENT-OFF*
80
+ #ifdef __CC_ARM /* ARMC5 */
81
+ __asm static void delay_loop (uint32_t count )
82
+ {
83
+ 1
84
+ SUBS a1 , a1 , #1
85
+ NOP
86
+ NOP
87
+ BCS %BT1
88
+ BX lr
89
+ }
90
+ #elif defined (__ICCARM__ )
91
+ static void delay_loop (uint32_t count )
92
+ {
93
+ __asm volatile (
94
+ "loop: \n"
95
+ " SUBS %0, %0, #1 \n"
96
+ " NOP\n"
97
+ " NOP\n"
98
+ " BCS.n loop\n"
99
+ : "+r" (count )
100
+ :
101
+ : "cc"
102
+ );
103
+ }
104
+ #else // GCC or ARMC6
105
+ static void delay_loop (uint32_t count )
106
+ {
107
+ __asm__ volatile (
108
+ "%=:\n\t"
109
+ /* Only GCC insists on non-UAL assembly for Thumb v1 */
110
+ #if !defined(__ARMCC_VERSION ) && defined(__thumb__ ) && !defined(__thumb2__ )
111
+ "SUB %0, #1\n\t"
112
+ #else
113
+ "SUBS %0, %0, #1\n\t"
114
+ #endif
115
+ "NOP\n\t"
116
+ "NOP\n\t"
117
+ "BCS %=b\n\t"
118
+ : "+ l " (count)
119
+ :
120
+ : " cc "
121
+ );
122
+ }
123
+ #endif
124
+ // *INDENT-ON*
125
+
126
+ void wait_ns (unsigned int ns )
127
+ {
128
+ uint32_t cycles_per_us = SystemCoreClock / 1000000 ;
129
+ // Note that this very calculation, plus call overhead, will take multiple
130
+ // cycles. Could well be 100ns on its own... So round down here, startup is
131
+ // worth at least one loop iteration.
132
+ uint32_t count = (cycles_per_us * ns ) / LOOP_SCALER ;
133
+
134
+ delay_loop (count );
135
+ }
136
+ #endif // LOOP_SCALER
0 commit comments