34
34
#ifdef SAMD51
35
35
#include "hri/hri_cmcc_d51.h"
36
36
#include "hri/hri_nvmctrl_d51.h"
37
-
38
- // This magical macro makes sure the delay isn't optimized out and is the
39
- // minimal three instructions.
40
- #define delay_cycles (cycles ) \
41
- { \
42
- uint32_t t; \
43
- asm volatile ( \
44
- "movs %[t], %[c]\n\t" \
45
- "loop%=:\n\t" \
46
- "subs %[t], #1\n\t" \
47
- "bne.n loop%=" : [t] "=r"(t) : [c] "I" (cycles)); \
48
- }
49
37
#endif
50
38
51
- // Ensure this code is compiled with -Os. Any other optimization level may change the timing of it
52
- // and break neopixels.
53
- #pragma GCC push_options
54
- #pragma GCC optimize ("Os")
39
+ __attribute__((naked ,noinline ,aligned (16 )))
40
+ static void neopixel_send_buffer_core (volatile uint32_t * clraddr , uint32_t pinMask ,
41
+ const uint8_t * ptr , int numBytes );
42
+
43
+ static void neopixel_send_buffer_core (volatile uint32_t * clraddr , uint32_t pinMask ,
44
+ const uint8_t * ptr , int numBytes ) {
45
+ asm volatile (" push {r4, r5, r6, lr};"
46
+ " add r3, r2, r3;"
47
+ "loopLoad:"
48
+ " ldrb r5, [r2, #0];" // r5 := *ptr
49
+ " add r2, #1;" // ptr++
50
+ " movs r4, #128;" // r4-mask, 0x80
51
+ "loopBit:"
52
+ " str r1, [r0, #4];" // set
53
+ #ifdef SAMD21
54
+ " movs r6, #3; d2: sub r6, #1; bne d2;" // delay 3
55
+ #endif
56
+ #ifdef SAMD51
57
+ " movs r6, #3; d2: subs r6, #1; bne d2;" // delay 3
58
+ #endif
59
+ " tst r4, r5;" // mask&r5
60
+ " bne skipclr;"
61
+ " str r1, [r0, #0];" // clr
62
+ "skipclr:"
63
+ #ifdef SAMD21
64
+ " movs r6, #6; d0: sub r6, #1; bne d0;" // delay 6
65
+ #endif
66
+ #ifdef SAMD51
67
+ " movs r6, #6; d0: subs r6, #1; bne d0;" // delay 6
68
+ #endif
69
+ " str r1, [r0, #0];" // clr (possibly again, doesn't matter)
70
+ #ifdef SAMD21
71
+ " asr r4, r4, #1;" // mask >>= 1
72
+ #endif
73
+ #ifdef SAMD51
74
+ " asrs r4, r4, #1;" // mask >>= 1
75
+ #endif
76
+ " beq nextbyte;"
77
+ " uxtb r4, r4;"
78
+ #ifdef SAMD21
79
+ " movs r6, #2; d1: sub r6, #1; bne d1;" // delay 2
80
+ #endif
81
+ #ifdef SAMD51
82
+ " movs r6, #2; d1: subs r6, #1; bne d1;" // delay 2
83
+ #endif
84
+ " b loopBit;"
85
+ "nextbyte:"
86
+ " cmp r2, r3;"
87
+ " bcs neopixel_stop;"
88
+ " b loopLoad;"
89
+ "neopixel_stop:"
90
+ " pop {r4, r5, r6, pc};"
91
+ "" );
92
+ }
55
93
56
94
uint64_t next_start_tick_ms = 0 ;
57
95
uint32_t next_start_tick_us = 1000 ;
58
96
59
97
void common_hal_neopixel_write (const digitalio_digitalinout_obj_t * digitalinout , uint8_t * pixels , uint32_t numBytes ) {
60
98
// This is adapted directly from the Adafruit NeoPixel library SAMD21G18A code:
61
99
// https://github.com/adafruit/Adafruit_NeoPixel/blob/master/Adafruit_NeoPixel.cpp
62
- uint8_t * ptr , * end , p , bitMask ;
100
+ // and the asm version from https://github.com/microsoft/uf2-samdx1/blob/master/inc/neopixel.h
63
101
uint32_t pinMask ;
64
102
PortGroup * port ;
65
103
@@ -71,100 +109,32 @@ void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout,
71
109
mp_hal_disable_all_interrupts ();
72
110
73
111
74
- #ifdef SAMD21
75
- // Make sure the NVM cache is consistently timed.
76
- NVMCTRL -> CTRLB .bit .READMODE = NVMCTRL_CTRLB_READMODE_DETERMINISTIC_Val ;
77
- #endif
78
-
79
112
#ifdef SAMD51
80
113
// When this routine is positioned at certain addresses, the timing logic
81
114
// below can be too fast by about 2.5x. This is some kind of (un)fortunate code
82
- // positiong with respect to a cache line.
115
+ // positioning with respect to a cache line.
83
116
// Theoretically we should turn on off the CMCC caches and the
84
117
// NVM caches to ensure consistent timing. Testing shows the the NVMCTRL
85
118
// cache disabling seems to make the difference. But turn both off to make sure.
86
119
// It's difficult to test because additions to the code before the timing loop
87
- // below change instruction placement. Testing was done by adding cache changes
88
- // below the loop (so only the first time through is wrong).
120
+ // below change instruction placement. (though this should be less true now that
121
+ // the main code is in the cache-aligned function neopixel_send_buffer_core)
122
+ // Testing was done by adding cache changes below the loop (so only the
123
+ // first time through is wrong).
89
124
//
90
125
// Turn off instruction, data, and NVM caches to force consistent timing.
91
126
// Invalidate existing cache entries.
92
127
hri_cmcc_set_CFG_reg (CMCC , CMCC_CFG_DCDIS | CMCC_CFG_ICDIS );
93
128
hri_cmcc_write_MAINT0_reg (CMCC , CMCC_MAINT0_INVALL );
94
129
hri_nvmctrl_set_CTRLA_CACHEDIS0_bit (NVMCTRL );
95
130
hri_nvmctrl_set_CTRLA_CACHEDIS1_bit (NVMCTRL );
96
- #endif
131
+ #endif
97
132
98
133
uint32_t pin = digitalinout -> pin -> number ;
99
134
port = & PORT -> Group [GPIO_PORT (pin )]; // Convert GPIO # to port register
100
135
pinMask = (1UL << (pin % 32 )); // From port_pin_set_output_level ASF code.
101
- ptr = pixels ;
102
- end = ptr + numBytes ;
103
- p = * ptr ++ ;
104
- bitMask = 0x80 ;
105
-
106
- volatile uint32_t * set = & (port -> OUTSET .reg ),
107
- * clr = & (port -> OUTCLR .reg );
108
-
109
- for (;;) {
110
- * set = pinMask ;
111
- // This is the time where the line is always high regardless of the bit.
112
- // For the SK6812 its 0.3us +- 0.15us
113
- #ifdef SAMD21
114
- asm("nop; nop;" );
115
- #endif
116
- #ifdef SAMD51
117
- delay_cycles (2 );
118
- #endif
119
- if ((p & bitMask ) != 0 ) {
120
- // This is the high delay unique to a one bit.
121
- // For the SK6812 its 0.3us
122
- #ifdef SAMD21
123
- asm("nop; nop; nop; nop; nop; nop; nop;" );
124
- #endif
125
- #ifdef SAMD51
126
- delay_cycles (3 );
127
- #endif
128
- * clr = pinMask ;
129
- } else {
130
- * clr = pinMask ;
131
- // This is the low delay unique to a zero bit.
132
- // For the SK6812 its 0.3us
133
- #ifdef SAMD21
134
- asm("nop; nop;" );
135
- #endif
136
- #ifdef SAMD51
137
- delay_cycles (2 );
138
- #endif
139
- }
140
- if ((bitMask >>= 1 ) != 0 ) {
141
- // This is the delay between bits in a byte and is the 1 code low
142
- // level time from the datasheet.
143
- // For the SK6812 its 0.6us +- 0.15us
144
- #ifdef SAMD21
145
- asm("nop; nop; nop; nop; nop;" );
146
- #endif
147
- #ifdef SAMD51
148
- delay_cycles (4 );
149
- #endif
150
- } else {
151
- if (ptr >= end ) break ;
152
- p = * ptr ++ ;
153
- bitMask = 0x80 ;
154
- // This is the delay between bytes. It's similar to the other branch
155
- // in the if statement except its tuned to account for the time the
156
- // above operations take.
157
- // For the SK6812 its 0.6us +- 0.15us
158
- #ifdef SAMD51
159
- delay_cycles (3 );
160
- #endif
161
- }
162
- }
163
-
164
- #ifdef SAMD21
165
- // Speed up! (But inconsistent timing.)
166
- NVMCTRL -> CTRLB .bit .READMODE = NVMCTRL_CTRLB_READMODE_NO_MISS_PENALTY_Val ;
167
- #endif
136
+ volatile uint32_t * clr = & (port -> OUTCLR .reg );
137
+ neopixel_send_buffer_core (clr , pinMask , pixels , numBytes );
168
138
169
139
#ifdef SAMD51
170
140
// Turn instruction, data, and NVM caches back on.
@@ -189,4 +159,3 @@ void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout,
189
159
190
160
}
191
161
192
- #pragma GCC pop_options
0 commit comments