|
| 1 | +/* |
| 2 | + * Copyright (c) 2018-2020, Nuvoton Technology Corporation |
| 3 | + * |
| 4 | + * SPDX-License-Identifier: Apache-2.0 |
| 5 | + * |
| 6 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | + * you may not use this file except in compliance with the License. |
| 8 | + * You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, software |
| 13 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | + * See the License for the specific language governing permissions and |
| 16 | + * limitations under the License. |
| 17 | + */ |
| 18 | + |
| 19 | +#include "cmsis.h" |
| 20 | +#include "platform/mbed_toolchain.h" |
| 21 | +#include "platform/mbed_wait_api.h" |
| 22 | + |
| 23 | +/* Override wait_ns to provide more accurate implementation |
| 24 | + * |
| 25 | + * At high HCLK rate, M2351 cannot provide zero-wait-state flash performance. Besides, |
| 26 | + * cache is off for non-secure land (for internal reason). To fix it, borrowing from |
| 27 | + * mbed-os/platform/mbed_wait_api_no_rtos.c wait_ns, we locate 'delay_loop_code' from |
| 28 | + * flash to SRAM to achieve zero-wait-state performance. |
| 29 | + * |
| 30 | + * NOTE1: With MPU, RAM is marked non-executable. We must mark RAM executable for |
| 31 | + * running 'delay_loop_code' in SRAM. |
| 32 | + * NOTE2: Cache is on for secure land. This override is necessary only for non-secure |
| 33 | + * land. |
| 34 | + */ |
| 35 | + |
| 36 | +// Cortex-M0+, M3, M4 and M23 take 5 cycles per iteration - SUBS = 1, 2xNOP = 2, BCS = 2 |
| 37 | +#define LOOP_SCALER 5000 |
| 38 | + |
| 39 | +MBED_ALIGN(8) |
| 40 | +static uint16_t delay_loop_code[] = { |
| 41 | + 0x1E40, // SUBS R0,R0,#1 |
| 42 | + 0xBF00, // NOP |
| 43 | + 0xBF00, // NOP |
| 44 | + 0xD2FB, // BCS .-3 (0x00 would be .+2, so 0xFB = -5 = .-3) |
| 45 | + 0x4770 // BX LR |
| 46 | +}; |
| 47 | + |
| 48 | +/* Take the address of the code, set LSB to indicate Thumb, and cast to void() function pointer */ |
| 49 | +#define delay_loop ((void(*)()) ((uintptr_t) delay_loop_code | 1)) |
| 50 | + |
| 51 | +void wait_ns(unsigned int ns) |
| 52 | +{ |
| 53 | + uint32_t cycles_per_us = SystemCoreClock / 1000000; |
| 54 | + // Note that this very calculation, plus call overhead, will take multiple |
| 55 | + // cycles. Could well be 100ns on its own... So round down here, startup is |
| 56 | + // worth at least one loop iteration. |
| 57 | + uint32_t count = (cycles_per_us * ns) / LOOP_SCALER; |
| 58 | + |
| 59 | + mbed_mpu_manager_lock_ram_execution(); |
| 60 | + delay_loop(count); |
| 61 | + mbed_mpu_manager_unlock_ram_execution(); |
| 62 | +} |
0 commit comments