Skip to content

samd: neopixel: Fix neopixels after #2297 #2363

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 10, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 63 additions & 94 deletions ports/atmel-samd/common-hal/neopixel_write/__init__.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,32 +34,70 @@
#ifdef SAMD51
#include "hri/hri_cmcc_d51.h"
#include "hri/hri_nvmctrl_d51.h"

// This magical macro makes sure the delay isn't optimized out and is the
// minimal three instructions.
#define delay_cycles(cycles) \
{ \
uint32_t t; \
asm volatile ( \
"movs %[t], %[c]\n\t" \
"loop%=:\n\t" \
"subs %[t], #1\n\t" \
"bne.n loop%=" : [t] "=r"(t) : [c] "I" (cycles)); \
}
#endif

// Ensure this code is compiled with -Os. Any other optimization level may change the timing of it
// and break neopixels.
#pragma GCC push_options
#pragma GCC optimize ("Os")
__attribute__((naked,noinline,aligned(16)))
static void neopixel_send_buffer_core(volatile uint32_t *clraddr, uint32_t pinMask,
const uint8_t *ptr, int numBytes);

static void neopixel_send_buffer_core(volatile uint32_t *clraddr, uint32_t pinMask,
const uint8_t *ptr, int numBytes) {
asm volatile(" push {r4, r5, r6, lr};"
" add r3, r2, r3;"
"loopLoad:"
" ldrb r5, [r2, #0];" // r5 := *ptr
" add r2, #1;" // ptr++
" movs r4, #128;" // r4-mask, 0x80
"loopBit:"
" str r1, [r0, #4];" // set
#ifdef SAMD21
" movs r6, #3; d2: sub r6, #1; bne d2;" // delay 3
#endif
#ifdef SAMD51
" movs r6, #3; d2: subs r6, #1; bne d2;" // delay 3
#endif
" tst r4, r5;" // mask&r5
" bne skipclr;"
" str r1, [r0, #0];" // clr
"skipclr:"
#ifdef SAMD21
" movs r6, #6; d0: sub r6, #1; bne d0;" // delay 6
#endif
#ifdef SAMD51
" movs r6, #6; d0: subs r6, #1; bne d0;" // delay 6
#endif
" str r1, [r0, #0];" // clr (possibly again, doesn't matter)
#ifdef SAMD21
" asr r4, r4, #1;" // mask >>= 1
#endif
#ifdef SAMD51
" asrs r4, r4, #1;" // mask >>= 1
#endif
" beq nextbyte;"
" uxtb r4, r4;"
#ifdef SAMD21
" movs r6, #2; d1: sub r6, #1; bne d1;" // delay 2
#endif
#ifdef SAMD51
" movs r6, #2; d1: subs r6, #1; bne d1;" // delay 2
#endif
" b loopBit;"
"nextbyte:"
" cmp r2, r3;"
" bcs neopixel_stop;"
" b loopLoad;"
"neopixel_stop:"
" pop {r4, r5, r6, pc};"
"");
}

uint64_t next_start_tick_ms = 0;
uint32_t next_start_tick_us = 1000;

void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout, uint8_t *pixels, uint32_t numBytes) {
// This is adapted directly from the Adafruit NeoPixel library SAMD21G18A code:
// https://github.com/adafruit/Adafruit_NeoPixel/blob/master/Adafruit_NeoPixel.cpp
uint8_t *ptr, *end, p, bitMask;
// and the asm version from https://github.com/microsoft/uf2-samdx1/blob/master/inc/neopixel.h
uint32_t pinMask;
PortGroup* port;

Expand All @@ -71,100 +109,32 @@ void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout,
mp_hal_disable_all_interrupts();


#ifdef SAMD21
// Make sure the NVM cache is consistently timed.
NVMCTRL->CTRLB.bit.READMODE = NVMCTRL_CTRLB_READMODE_DETERMINISTIC_Val;
#endif

#ifdef SAMD51
// When this routine is positioned at certain addresses, the timing logic
// below can be too fast by about 2.5x. This is some kind of (un)fortunate code
// positiong with respect to a cache line.
// positioning with respect to a cache line.
// Theoretically we should turn on off the CMCC caches and the
// NVM caches to ensure consistent timing. Testing shows the the NVMCTRL
// cache disabling seems to make the difference. But turn both off to make sure.
// It's difficult to test because additions to the code before the timing loop
// below change instruction placement. Testing was done by adding cache changes
// below the loop (so only the first time through is wrong).
// below change instruction placement. (though this should be less true now that
// the main code is in the cache-aligned function neopixel_send_buffer_core)
// Testing was done by adding cache changes below the loop (so only the
// first time through is wrong).
//
// Turn off instruction, data, and NVM caches to force consistent timing.
// Invalidate existing cache entries.
hri_cmcc_set_CFG_reg(CMCC, CMCC_CFG_DCDIS | CMCC_CFG_ICDIS);
hri_cmcc_write_MAINT0_reg(CMCC, CMCC_MAINT0_INVALL);
hri_nvmctrl_set_CTRLA_CACHEDIS0_bit(NVMCTRL);
hri_nvmctrl_set_CTRLA_CACHEDIS1_bit(NVMCTRL);
#endif
#endif

uint32_t pin = digitalinout->pin->number;
port = &PORT->Group[GPIO_PORT(pin)]; // Convert GPIO # to port register
pinMask = (1UL << (pin % 32)); // From port_pin_set_output_level ASF code.
ptr = pixels;
end = ptr + numBytes;
p = *ptr++;
bitMask = 0x80;

volatile uint32_t *set = &(port->OUTSET.reg),
*clr = &(port->OUTCLR.reg);

for(;;) {
*set = pinMask;
// This is the time where the line is always high regardless of the bit.
// For the SK6812 its 0.3us +- 0.15us
#ifdef SAMD21
asm("nop; nop;");
#endif
#ifdef SAMD51
delay_cycles(2);
#endif
if((p & bitMask) != 0) {
// This is the high delay unique to a one bit.
// For the SK6812 its 0.3us
#ifdef SAMD21
asm("nop; nop; nop; nop; nop; nop; nop;");
#endif
#ifdef SAMD51
delay_cycles(3);
#endif
*clr = pinMask;
} else {
*clr = pinMask;
// This is the low delay unique to a zero bit.
// For the SK6812 its 0.3us
#ifdef SAMD21
asm("nop; nop;");
#endif
#ifdef SAMD51
delay_cycles(2);
#endif
}
if((bitMask >>= 1) != 0) {
// This is the delay between bits in a byte and is the 1 code low
// level time from the datasheet.
// For the SK6812 its 0.6us +- 0.15us
#ifdef SAMD21
asm("nop; nop; nop; nop; nop;");
#endif
#ifdef SAMD51
delay_cycles(4);
#endif
} else {
if(ptr >= end) break;
p = *ptr++;
bitMask = 0x80;
// This is the delay between bytes. It's similar to the other branch
// in the if statement except its tuned to account for the time the
// above operations take.
// For the SK6812 its 0.6us +- 0.15us
#ifdef SAMD51
delay_cycles(3);
#endif
}
}

#ifdef SAMD21
// Speed up! (But inconsistent timing.)
NVMCTRL->CTRLB.bit.READMODE = NVMCTRL_CTRLB_READMODE_NO_MISS_PENALTY_Val;
#endif
volatile uint32_t *clr = &(port->OUTCLR.reg);
neopixel_send_buffer_core(clr, pinMask, pixels, numBytes);

#ifdef SAMD51
// Turn instruction, data, and NVM caches back on.
Expand All @@ -189,4 +159,3 @@ void common_hal_neopixel_write(const digitalio_digitalinout_obj_t* digitalinout,

}

#pragma GCC pop_options