From 8df09c9a7ce19c18189c8b4408e01ca34ab480fe Mon Sep 17 00:00:00 2001 From: pmarques-dev <72901351+pmarques-dev@users.noreply.github.com> Date: Mon, 5 Jun 2023 16:54:37 +0100 Subject: [PATCH 1/3] quadrature encoder: reduce PIO code size from 29 to 24 (#390) Co-authored-by: Paulo Marques --- pio/quadrature_encoder/quadrature_encoder.c | 8 +- pio/quadrature_encoder/quadrature_encoder.pio | 97 +++++++------------ 2 files changed, 42 insertions(+), 63 deletions(-) diff --git a/pio/quadrature_encoder/quadrature_encoder.c b/pio/quadrature_encoder/quadrature_encoder.c index a11ab89..1818a97 100644 --- a/pio/quadrature_encoder/quadrature_encoder.c +++ b/pio/quadrature_encoder/quadrature_encoder.c @@ -1,5 +1,5 @@ /** - * Copyright (c) 2021 pmarques-dev @ github + * Copyright (c) 2021-2023 pmarques-dev @ github * * SPDX-License-Identifier: BSD-3-Clause */ @@ -44,8 +44,10 @@ int main() { PIO pio = pio0; const uint sm = 0; - uint offset = pio_add_program(pio, &quadrature_encoder_program); - quadrature_encoder_program_init(pio, sm, offset, PIN_AB, 0); + // we don't really need to keep the offset, as this program must be loaded + // at offset 0 + pio_add_program(pio, &quadrature_encoder_program); + quadrature_encoder_program_init(pio, sm, PIN_AB, 0); while (1) { // note: thanks to two's complement arithmetic delta will always diff --git a/pio/quadrature_encoder/quadrature_encoder.pio b/pio/quadrature_encoder/quadrature_encoder.pio index d245d4b..63e1dfd 100644 --- a/pio/quadrature_encoder/quadrature_encoder.pio +++ b/pio/quadrature_encoder/quadrature_encoder.pio @@ -1,13 +1,12 @@ ; -; Copyright (c) 2021 pmarques-dev @ github +; Copyright (c) 2021-2023 pmarques-dev @ github ; ; SPDX-License-Identifier: BSD-3-Clause ; .program quadrature_encoder -; this code must be loaded into address 0, but at 29 instructions, it probably -; wouldn't be able to share space with other programs anyway +; the code must be loaded at address 0, because it uses computed jumps .origin 0 @@ -20,11 +19,11 @@ ; keeps the current encoder count and is incremented / decremented according to ; the steps sampled -; writing any non zero value to the TX FIFO makes the state machine push the -; current count to RX FIFO between 6 to 18 clocks afterwards. The worst case -; sampling loop takes 14 cycles, so this program is able to read step rates up -; to sysclk / 14 (e.g., sysclk 125MHz, max step rate = 8.9 Msteps/sec) - +; the program keeps trying to write the current count to the RX FIFO without +; blocking. To read the current count, the user code must drain the FIFO first +; and wait for a fresh sample (takes ~4 SM cycles on average). The worst case +; sampling loop takes 10 cycles, so this program is able to read step rates up +; to sysclk / 10 (e.g., sysclk 125MHz, max step rate = 12.5 Msteps/sec) ; 00 state JMP update ; read 00 @@ -60,41 +59,29 @@ decrement: ; this is where the main loop starts .wrap_target update: - ; we start by checking the TX FIFO to see if the main code is asking for - ; the current count after the PULL noblock, OSR will have either 0 if - ; there was nothing or the value that was there - SET X, 0 - PULL noblock - - ; since there are not many free registers, and PULL is done into OSR, we - ; have to do some juggling to avoid losing the state information and - ; still place the values where we need them - MOV X, OSR - MOV OSR, ISR - - ; the main code did not ask for the count, so just go to "sample_pins" - JMP !X, sample_pins - - ; if it did ask for the count, then we push it - MOV ISR, Y ; we trash ISR, but we already have a copy in OSR - PUSH + MOV ISR, Y + PUSH noblock sample_pins: ; we shift into ISR the last state of the 2 input pins (now in OSR) and ; the new state of the 2 pins, thus producing the 4 bit target for the - ; computed jump into the correct action for this state - MOV ISR, NULL - IN OSR, 2 + ; computed jump into the correct action for this state. Both the PUSH + ; above and the OUT below zero the other bits in ISR + OUT ISR, 2 IN PINS, 2 + + ; save the state in the OSR, so that we can use ISR for other purposes + MOV OSR, ISR + ; jump to the correct state machine action MOV PC, ISR ; the PIO does not have a increment instruction, so to do that we do a ; negate, decrement, negate sequence increment: - MOV X, !Y - JMP X--, increment_cont + MOV Y, ~Y + JMP Y--, increment_cont increment_cont: - MOV Y, !X + MOV Y, ~Y .wrap ; the .wrap here avoids one jump instruction and saves a cycle too @@ -106,16 +93,16 @@ increment_cont: // max_step_rate is used to lower the clock of the state machine to save power // if the application doesn't require a very high sampling rate. Passing zero -// will set the clock to the maximum, which gives a max step rate of around -// 8.9 Msteps/sec at 125MHz +// will set the clock to the maximum -static inline void quadrature_encoder_program_init(PIO pio, uint sm, uint offset, uint pin, int max_step_rate) +static inline void quadrature_encoder_program_init(PIO pio, uint sm, uint pin, int max_step_rate) { pio_sm_set_consecutive_pindirs(pio, sm, pin, 2, false); gpio_pull_up(pin); gpio_pull_up(pin + 1); - pio_sm_config c = quadrature_encoder_program_get_default_config(offset); + pio_sm_config c = quadrature_encoder_program_get_default_config(0); + sm_config_set_in_pins(&c, pin); // for WAIT, IN sm_config_set_jmp_pin(&c, pin); // for JMP // shift to left, autopull disabled @@ -127,38 +114,28 @@ static inline void quadrature_encoder_program_init(PIO pio, uint sm, uint offset if (max_step_rate == 0) { sm_config_set_clkdiv(&c, 1.0); } else { - // one state machine loop takes at most 14 cycles - float div = (float)clock_get_hz(clk_sys) / (14 * max_step_rate); + // one state machine loop takes at most 10 cycles + float div = (float)clock_get_hz(clk_sys) / (10 * max_step_rate); sm_config_set_clkdiv(&c, div); } - pio_sm_init(pio, sm, offset, &c); + pio_sm_init(pio, sm, 0, &c); pio_sm_set_enabled(pio, sm, true); } - -// When requesting the current count we may have to wait a few cycles (average -// ~11 sysclk cycles) for the state machine to reply. If we are reading multiple -// encoders, we may request them all in one go and then fetch them all, thus -// avoiding doing the wait multiple times. If we are reading just one encoder, -// we can use the "get_count" function to request and wait - -static inline void quadrature_encoder_request_count(PIO pio, uint sm) -{ - pio->txf[sm] = 1; -} - -static inline int32_t quadrature_encoder_fetch_count(PIO pio, uint sm) -{ - while (pio_sm_is_rx_fifo_empty(pio, sm)) - tight_loop_contents(); - return pio->rxf[sm]; -} - static inline int32_t quadrature_encoder_get_count(PIO pio, uint sm) { - quadrature_encoder_request_count(pio, sm); - return quadrature_encoder_fetch_count(pio, sm); + uint ret; + int n; + + // if the FIFO has N entries, we fetch them to drain the FIFO, + // plus one entry which will be guaranteed to not be stale + n = pio_sm_get_rx_fifo_level(pio, sm) + 1; + while (n > 0) { + ret = pio_sm_get_blocking(pio, sm); + n--; + } + return ret; } %} From 654aabc6869c80b4b05739e764528907c2d30d7e Mon Sep 17 00:00:00 2001 From: Frederico Pereira <89074716+Ir0nyPT@users.noreply.github.com> Date: Tue, 6 Jun 2023 16:51:55 +0100 Subject: [PATCH 2/3] Update picow_ntp_client.c (#391) Fix "invalid conversion from 'void*' to 'NTP_T*'" --- pico_w/wifi/ntp_client/picow_ntp_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pico_w/wifi/ntp_client/picow_ntp_client.c b/pico_w/wifi/ntp_client/picow_ntp_client.c index fb76304..0fe3f0b 100644 --- a/pico_w/wifi/ntp_client/picow_ntp_client.c +++ b/pico_w/wifi/ntp_client/picow_ntp_client.c @@ -108,7 +108,7 @@ static void ntp_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p, const ip_ad // Perform initialisation static NTP_T* ntp_init(void) { - NTP_T *state = calloc(1, sizeof(NTP_T)); + NTP_T *state = (NTP_T*)calloc(1, sizeof(NTP_T)); if (!state) { printf("failed to allocate state\n"); return NULL; @@ -183,4 +183,4 @@ int main() { run_ntp_test(); cyw43_arch_deinit(); return 0; -} \ No newline at end of file +} From 465a2a3824007249a9a425560231f2e8766c356d Mon Sep 17 00:00:00 2001 From: pmarques-dev <72901351+pmarques-dev@users.noreply.github.com> Date: Thu, 8 Jun 2023 21:22:29 +0100 Subject: [PATCH 3/3] coding style fixes, copyright waiver (#395) Co-authored-by: Paulo Marques --- pio/quadrature_encoder/quadrature_encoder.c | 2 +- pio/quadrature_encoder/quadrature_encoder.pio | 139 +++++++++--------- 2 files changed, 70 insertions(+), 71 deletions(-) diff --git a/pio/quadrature_encoder/quadrature_encoder.c b/pio/quadrature_encoder/quadrature_encoder.c index 1818a97..f402efb 100644 --- a/pio/quadrature_encoder/quadrature_encoder.c +++ b/pio/quadrature_encoder/quadrature_encoder.c @@ -1,5 +1,5 @@ /** - * Copyright (c) 2021-2023 pmarques-dev @ github + * Copyright (c) 2023 Raspberry Pi (Trading) Ltd. * * SPDX-License-Identifier: BSD-3-Clause */ diff --git a/pio/quadrature_encoder/quadrature_encoder.pio b/pio/quadrature_encoder/quadrature_encoder.pio index 63e1dfd..d08f699 100644 --- a/pio/quadrature_encoder/quadrature_encoder.pio +++ b/pio/quadrature_encoder/quadrature_encoder.pio @@ -1,5 +1,5 @@ ; -; Copyright (c) 2021-2023 pmarques-dev @ github +; Copyright (c) 2023 Raspberry Pi (Trading) Ltd. ; ; SPDX-License-Identifier: BSD-3-Clause ; @@ -26,63 +26,63 @@ ; to sysclk / 10 (e.g., sysclk 125MHz, max step rate = 12.5 Msteps/sec) ; 00 state - JMP update ; read 00 - JMP decrement ; read 01 - JMP increment ; read 10 - JMP update ; read 11 + JMP update ; read 00 + JMP decrement ; read 01 + JMP increment ; read 10 + JMP update ; read 11 ; 01 state - JMP increment ; read 00 - JMP update ; read 01 - JMP update ; read 10 - JMP decrement ; read 11 + JMP increment ; read 00 + JMP update ; read 01 + JMP update ; read 10 + JMP decrement ; read 11 ; 10 state - JMP decrement ; read 00 - JMP update ; read 01 - JMP update ; read 10 - JMP increment ; read 11 + JMP decrement ; read 00 + JMP update ; read 01 + JMP update ; read 10 + JMP increment ; read 11 ; to reduce code size, the last 2 states are implemented in place and become the ; target for the other jumps ; 11 state - JMP update ; read 00 - JMP increment ; read 01 + JMP update ; read 00 + JMP increment ; read 01 decrement: - ; note: the target of this instruction must be the next address, so that - ; the effect of the instruction does not depend on the value of Y. The - ; same is true for the "JMP X--" below. Basically "JMP Y--, " - ; is just a pure "decrement Y" instruction, with no other side effects - JMP Y--, update ; read 10 + ; note: the target of this instruction must be the next address, so that + ; the effect of the instruction does not depend on the value of Y. The + ; same is true for the "JMP X--" below. Basically "JMP Y--, " + ; is just a pure "decrement Y" instruction, with no other side effects + JMP Y--, update ; read 10 - ; this is where the main loop starts + ; this is where the main loop starts .wrap_target update: - MOV ISR, Y - PUSH noblock + MOV ISR, Y ; read 11 + PUSH noblock sample_pins: - ; we shift into ISR the last state of the 2 input pins (now in OSR) and - ; the new state of the 2 pins, thus producing the 4 bit target for the - ; computed jump into the correct action for this state. Both the PUSH - ; above and the OUT below zero the other bits in ISR - OUT ISR, 2 - IN PINS, 2 + ; we shift into ISR the last state of the 2 input pins (now in OSR) and + ; the new state of the 2 pins, thus producing the 4 bit target for the + ; computed jump into the correct action for this state. Both the PUSH + ; above and the OUT below zero out the other bits in ISR + OUT ISR, 2 + IN PINS, 2 - ; save the state in the OSR, so that we can use ISR for other purposes - MOV OSR, ISR - ; jump to the correct state machine action - MOV PC, ISR + ; save the state in the OSR, so that we can use ISR for other purposes + MOV OSR, ISR + ; jump to the correct state machine action + MOV PC, ISR - ; the PIO does not have a increment instruction, so to do that we do a - ; negate, decrement, negate sequence + ; the PIO does not have a increment instruction, so to do that we do a + ; negate, decrement, negate sequence increment: - MOV Y, ~Y - JMP Y--, increment_cont + MOV Y, ~Y + JMP Y--, increment_cont increment_cont: - MOV Y, ~Y -.wrap ; the .wrap here avoids one jump instruction and saves a cycle too + MOV Y, ~Y +.wrap ; the .wrap here avoids one jump instruction and saves a cycle too @@ -97,46 +97,45 @@ increment_cont: static inline void quadrature_encoder_program_init(PIO pio, uint sm, uint pin, int max_step_rate) { - pio_sm_set_consecutive_pindirs(pio, sm, pin, 2, false); - gpio_pull_up(pin); - gpio_pull_up(pin + 1); + pio_sm_set_consecutive_pindirs(pio, sm, pin, 2, false); + gpio_pull_up(pin); + gpio_pull_up(pin + 1); - pio_sm_config c = quadrature_encoder_program_get_default_config(0); + pio_sm_config c = quadrature_encoder_program_get_default_config(0); - sm_config_set_in_pins(&c, pin); // for WAIT, IN - sm_config_set_jmp_pin(&c, pin); // for JMP - // shift to left, autopull disabled - sm_config_set_in_shift(&c, false, false, 32); - // don't join FIFO's - sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_NONE); + sm_config_set_in_pins(&c, pin); // for WAIT, IN + sm_config_set_jmp_pin(&c, pin); // for JMP + // shift to left, autopull disabled + sm_config_set_in_shift(&c, false, false, 32); + // don't join FIFO's + sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_NONE); - // passing "0" as the sample frequency, - if (max_step_rate == 0) { - sm_config_set_clkdiv(&c, 1.0); - } else { - // one state machine loop takes at most 10 cycles - float div = (float)clock_get_hz(clk_sys) / (10 * max_step_rate); - sm_config_set_clkdiv(&c, div); - } + // passing "0" as the sample frequency, + if (max_step_rate == 0) { + sm_config_set_clkdiv(&c, 1.0); + } else { + // one state machine loop takes at most 10 cycles + float div = (float)clock_get_hz(clk_sys) / (10 * max_step_rate); + sm_config_set_clkdiv(&c, div); + } - pio_sm_init(pio, sm, 0, &c); - pio_sm_set_enabled(pio, sm, true); + pio_sm_init(pio, sm, 0, &c); + pio_sm_set_enabled(pio, sm, true); } static inline int32_t quadrature_encoder_get_count(PIO pio, uint sm) { - uint ret; - int n; + uint ret; + int n; - // if the FIFO has N entries, we fetch them to drain the FIFO, - // plus one entry which will be guaranteed to not be stale - n = pio_sm_get_rx_fifo_level(pio, sm) + 1; - while (n > 0) { - ret = pio_sm_get_blocking(pio, sm); - n--; - } - return ret; + // if the FIFO has N entries, we fetch them to drain the FIFO, + // plus one entry which will be guaranteed to not be stale + n = pio_sm_get_rx_fifo_level(pio, sm) + 1; + while (n > 0) { + ret = pio_sm_get_blocking(pio, sm); + n--; + } + return ret; } %} -