From 8df09c9a7ce19c18189c8b4408e01ca34ab480fe Mon Sep 17 00:00:00 2001
From: pmarques-dev <72901351+pmarques-dev@users.noreply.github.com>
Date: Mon, 5 Jun 2023 16:54:37 +0100
Subject: [PATCH 1/3] quadrature encoder: reduce PIO code size from 29 to 24
 (#390)

Co-authored-by: Paulo Marques <pm@quant-insight.com>
---
 pio/quadrature_encoder/quadrature_encoder.c   |  8 +-
 pio/quadrature_encoder/quadrature_encoder.pio | 97 +++++++------------
 2 files changed, 42 insertions(+), 63 deletions(-)

diff --git a/pio/quadrature_encoder/quadrature_encoder.c b/pio/quadrature_encoder/quadrature_encoder.c
index a11ab89..1818a97 100644
--- a/pio/quadrature_encoder/quadrature_encoder.c
+++ b/pio/quadrature_encoder/quadrature_encoder.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2021 pmarques-dev @ github
+ * Copyright (c) 2021-2023 pmarques-dev @ github
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -44,8 +44,10 @@ int main() {
     PIO pio = pio0;
     const uint sm = 0;
 
-    uint offset = pio_add_program(pio, &quadrature_encoder_program);
-    quadrature_encoder_program_init(pio, sm, offset, PIN_AB, 0);
+    // we don't really need to keep the offset, as this program must be loaded
+    // at offset 0
+    pio_add_program(pio, &quadrature_encoder_program);
+    quadrature_encoder_program_init(pio, sm, PIN_AB, 0);
 
     while (1) {
         // note: thanks to two's complement arithmetic delta will always
diff --git a/pio/quadrature_encoder/quadrature_encoder.pio b/pio/quadrature_encoder/quadrature_encoder.pio
index d245d4b..63e1dfd 100644
--- a/pio/quadrature_encoder/quadrature_encoder.pio
+++ b/pio/quadrature_encoder/quadrature_encoder.pio
@@ -1,13 +1,12 @@
 ;
-; Copyright (c) 2021 pmarques-dev @ github
+; Copyright (c) 2021-2023 pmarques-dev @ github
 ;
 ; SPDX-License-Identifier: BSD-3-Clause
 ;
 
 .program quadrature_encoder
 
-; this code must be loaded into address 0, but at 29 instructions, it probably
-; wouldn't be able to share space with other programs anyway
+; the code must be loaded at address 0, because it uses computed jumps
 .origin 0
 
 
@@ -20,11 +19,11 @@
 ; keeps the current encoder count and is incremented / decremented according to
 ; the steps sampled
 
-; writing any non zero value to the TX FIFO makes the state machine push the
-; current count to RX FIFO between 6 to 18 clocks afterwards. The worst case
-; sampling loop takes 14 cycles, so this program is able to read step rates up
-; to sysclk / 14  (e.g., sysclk 125MHz, max step rate = 8.9 Msteps/sec)
-
+; the program keeps trying to write the current count to the RX FIFO without
+; blocking. To read the current count, the user code must drain the FIFO first
+; and wait for a fresh sample (takes ~4 SM cycles on average). The worst case
+; sampling loop takes 10 cycles, so this program is able to read step rates up
+; to sysclk / 10  (e.g., sysclk 125MHz, max step rate = 12.5 Msteps/sec)
 
 ; 00 state
 	JMP update	; read 00
@@ -60,41 +59,29 @@ decrement:
 	; this is where the main loop starts
 .wrap_target
 update:
-	; we start by checking the TX FIFO to see if the main code is asking for
-	; the current count after the PULL noblock, OSR will have either 0 if
-	; there was nothing or the value that was there
-	SET X, 0
-	PULL noblock
-
-	; since there are not many free registers, and PULL is done into OSR, we
-	; have to do some juggling to avoid losing the state information and
-	; still place the values where we need them
-	MOV X, OSR
-	MOV OSR, ISR
-
-	; the main code did not ask for the count, so just go to "sample_pins"
-	JMP !X, sample_pins
-
-	; if it did ask for the count, then we push it
-	MOV ISR, Y	; we trash ISR, but we already have a copy in OSR
-	PUSH
+	MOV ISR, Y
+	PUSH noblock
 
 sample_pins:
 	; we shift into ISR the last state of the 2 input pins (now in OSR) and
 	; the new state of the 2 pins, thus producing the 4 bit target for the
-	; computed jump into the correct action for this state
-	MOV ISR, NULL
-	IN OSR, 2
+	; computed jump into the correct action for this state. Both the PUSH
+	; above and the OUT below zero the other bits in ISR
+	OUT ISR, 2
 	IN PINS, 2
+
+	; save the state in the OSR, so that we can use ISR for other purposes
+	MOV OSR, ISR
+	; jump to the correct state machine action
 	MOV PC, ISR
 
 	; the PIO does not have a increment instruction, so to do that we do a
 	; negate, decrement, negate sequence
 increment:
-	MOV X, !Y
-	JMP X--, increment_cont
+	MOV Y, ~Y
+	JMP Y--, increment_cont
 increment_cont:
-	MOV Y, !X
+	MOV Y, ~Y
 .wrap	; the .wrap here avoids one jump instruction and saves a cycle too
 
 
@@ -106,16 +93,16 @@ increment_cont:
 
 // max_step_rate is used to lower the clock of the state machine to save power
 // if the application doesn't require a very high sampling rate. Passing zero
-// will set the clock to the maximum, which gives a max step rate of around
-// 8.9 Msteps/sec at 125MHz
+// will set the clock to the maximum
 
-static inline void quadrature_encoder_program_init(PIO pio, uint sm, uint offset, uint pin, int max_step_rate)
+static inline void quadrature_encoder_program_init(PIO pio, uint sm, uint pin, int max_step_rate)
 {
 	pio_sm_set_consecutive_pindirs(pio, sm, pin, 2, false);
 	gpio_pull_up(pin);
 	gpio_pull_up(pin + 1);
 
-	pio_sm_config c = quadrature_encoder_program_get_default_config(offset);
+	pio_sm_config c = quadrature_encoder_program_get_default_config(0);
+
 	sm_config_set_in_pins(&c, pin); // for WAIT, IN
 	sm_config_set_jmp_pin(&c, pin); // for JMP
 	// shift to left, autopull disabled
@@ -127,38 +114,28 @@ static inline void quadrature_encoder_program_init(PIO pio, uint sm, uint offset
 	if (max_step_rate == 0) {
 		sm_config_set_clkdiv(&c, 1.0);
 	} else {
-		// one state machine loop takes at most 14 cycles
-		float div = (float)clock_get_hz(clk_sys) / (14 * max_step_rate);
+		// one state machine loop takes at most 10 cycles
+		float div = (float)clock_get_hz(clk_sys) / (10 * max_step_rate);
 		sm_config_set_clkdiv(&c, div);
 	}
 
-	pio_sm_init(pio, sm, offset, &c);
+	pio_sm_init(pio, sm, 0, &c);
 	pio_sm_set_enabled(pio, sm, true);
 }
 
-
-// When requesting the current count we may have to wait a few cycles (average
-// ~11 sysclk cycles) for the state machine to reply. If we are reading multiple
-// encoders, we may request them all in one go and then fetch them all, thus
-// avoiding doing the wait multiple times. If we are reading just one encoder,
-// we can use the "get_count" function to request and wait
-
-static inline void quadrature_encoder_request_count(PIO pio, uint sm)
-{
-	pio->txf[sm] = 1;
-}
-
-static inline int32_t quadrature_encoder_fetch_count(PIO pio, uint sm)
-{
-	while (pio_sm_is_rx_fifo_empty(pio, sm))
-		tight_loop_contents();
-	return pio->rxf[sm];
-}
-
 static inline int32_t quadrature_encoder_get_count(PIO pio, uint sm)
 {
-	quadrature_encoder_request_count(pio, sm);
-	return quadrature_encoder_fetch_count(pio, sm);
+	uint ret;
+	int n;
+
+	// if the FIFO has N entries, we fetch them to drain the FIFO,
+	// plus one entry which will be guaranteed to not be stale
+	n = pio_sm_get_rx_fifo_level(pio, sm) + 1;
+	while (n > 0) {
+		ret = pio_sm_get_blocking(pio, sm);
+		n--;
+	}
+	return ret;
 }
 
 %}

From 654aabc6869c80b4b05739e764528907c2d30d7e Mon Sep 17 00:00:00 2001
From: Frederico Pereira <89074716+Ir0nyPT@users.noreply.github.com>
Date: Tue, 6 Jun 2023 16:51:55 +0100
Subject: [PATCH 2/3] Update picow_ntp_client.c (#391)

Fix "invalid conversion from 'void*' to 'NTP_T*'"
---
 pico_w/wifi/ntp_client/picow_ntp_client.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pico_w/wifi/ntp_client/picow_ntp_client.c b/pico_w/wifi/ntp_client/picow_ntp_client.c
index fb76304..0fe3f0b 100644
--- a/pico_w/wifi/ntp_client/picow_ntp_client.c
+++ b/pico_w/wifi/ntp_client/picow_ntp_client.c
@@ -108,7 +108,7 @@ static void ntp_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p, const ip_ad
 
 // Perform initialisation
 static NTP_T* ntp_init(void) {
-    NTP_T *state = calloc(1, sizeof(NTP_T));
+    NTP_T *state = (NTP_T*)calloc(1, sizeof(NTP_T));
     if (!state) {
         printf("failed to allocate state\n");
         return NULL;
@@ -183,4 +183,4 @@ int main() {
     run_ntp_test();
     cyw43_arch_deinit();
    return 0;
-}
\ No newline at end of file
+}

From 465a2a3824007249a9a425560231f2e8766c356d Mon Sep 17 00:00:00 2001
From: pmarques-dev <72901351+pmarques-dev@users.noreply.github.com>
Date: Thu, 8 Jun 2023 21:22:29 +0100
Subject: [PATCH 3/3] coding style fixes, copyright waiver (#395)

Co-authored-by: Paulo Marques <pm@quant-insight.com>
---
 pio/quadrature_encoder/quadrature_encoder.c   |   2 +-
 pio/quadrature_encoder/quadrature_encoder.pio | 139 +++++++++---------
 2 files changed, 70 insertions(+), 71 deletions(-)

diff --git a/pio/quadrature_encoder/quadrature_encoder.c b/pio/quadrature_encoder/quadrature_encoder.c
index 1818a97..f402efb 100644
--- a/pio/quadrature_encoder/quadrature_encoder.c
+++ b/pio/quadrature_encoder/quadrature_encoder.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2021-2023 pmarques-dev @ github
+ * Copyright (c) 2023 Raspberry Pi (Trading) Ltd.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
diff --git a/pio/quadrature_encoder/quadrature_encoder.pio b/pio/quadrature_encoder/quadrature_encoder.pio
index 63e1dfd..d08f699 100644
--- a/pio/quadrature_encoder/quadrature_encoder.pio
+++ b/pio/quadrature_encoder/quadrature_encoder.pio
@@ -1,5 +1,5 @@
 ;
-; Copyright (c) 2021-2023 pmarques-dev @ github
+; Copyright (c) 2023 Raspberry Pi (Trading) Ltd.
 ;
 ; SPDX-License-Identifier: BSD-3-Clause
 ;
@@ -26,63 +26,63 @@
 ; to sysclk / 10  (e.g., sysclk 125MHz, max step rate = 12.5 Msteps/sec)
 
 ; 00 state
-	JMP update	; read 00
-	JMP decrement	; read 01
-	JMP increment	; read 10
-	JMP update	; read 11
+    JMP update    ; read 00
+    JMP decrement ; read 01
+    JMP increment ; read 10
+    JMP update    ; read 11
 
 ; 01 state
-	JMP increment	; read 00
-	JMP update	; read 01
-	JMP update	; read 10
-	JMP decrement	; read 11
+    JMP increment ; read 00
+    JMP update    ; read 01
+    JMP update    ; read 10
+    JMP decrement ; read 11
 
 ; 10 state
-	JMP decrement	; read 00
-	JMP update	; read 01
-	JMP update	; read 10
-	JMP increment	; read 11
+    JMP decrement ; read 00
+    JMP update    ; read 01
+    JMP update    ; read 10
+    JMP increment ; read 11
 
 ; to reduce code size, the last 2 states are implemented in place and become the
 ; target for the other jumps
 
 ; 11 state
-	JMP update	; read 00
-	JMP increment	; read 01
+    JMP update    ; read 00
+    JMP increment ; read 01
 decrement:
-	; note: the target of this instruction must be the next address, so that
-	; the effect of the instruction does not depend on the value of Y. The
-	; same is true for the "JMP X--" below. Basically "JMP Y--, <next addr>"
-	; is just a pure "decrement Y" instruction, with no other side effects
-	JMP Y--, update	; read 10
+    ; note: the target of this instruction must be the next address, so that
+    ; the effect of the instruction does not depend on the value of Y. The
+    ; same is true for the "JMP X--" below. Basically "JMP Y--, <next addr>"
+    ; is just a pure "decrement Y" instruction, with no other side effects
+    JMP Y--, update ; read 10
 
-	; this is where the main loop starts
+    ; this is where the main loop starts
 .wrap_target
 update:
-	MOV ISR, Y
-	PUSH noblock
+    MOV ISR, Y      ; read 11
+    PUSH noblock
 
 sample_pins:
-	; we shift into ISR the last state of the 2 input pins (now in OSR) and
-	; the new state of the 2 pins, thus producing the 4 bit target for the
-	; computed jump into the correct action for this state. Both the PUSH
-	; above and the OUT below zero the other bits in ISR
-	OUT ISR, 2
-	IN PINS, 2
+    ; we shift into ISR the last state of the 2 input pins (now in OSR) and
+    ; the new state of the 2 pins, thus producing the 4 bit target for the
+    ; computed jump into the correct action for this state. Both the PUSH
+    ; above and the OUT below zero out the other bits in ISR
+    OUT ISR, 2
+    IN PINS, 2
 
-	; save the state in the OSR, so that we can use ISR for other purposes
-	MOV OSR, ISR
-	; jump to the correct state machine action
-	MOV PC, ISR
+    ; save the state in the OSR, so that we can use ISR for other purposes
+    MOV OSR, ISR
+    ; jump to the correct state machine action
+    MOV PC, ISR
 
-	; the PIO does not have a increment instruction, so to do that we do a
-	; negate, decrement, negate sequence
+    ; the PIO does not have a increment instruction, so to do that we do a
+    ; negate, decrement, negate sequence
 increment:
-	MOV Y, ~Y
-	JMP Y--, increment_cont
+    MOV Y, ~Y
+    JMP Y--, increment_cont
 increment_cont:
-	MOV Y, ~Y
-.wrap	; the .wrap here avoids one jump instruction and saves a cycle too
+    MOV Y, ~Y
+.wrap    ; the .wrap here avoids one jump instruction and saves a cycle too
 
 
 
@@ -97,46 +97,45 @@ increment_cont:
 
 static inline void quadrature_encoder_program_init(PIO pio, uint sm, uint pin, int max_step_rate)
 {
-	pio_sm_set_consecutive_pindirs(pio, sm, pin, 2, false);
-	gpio_pull_up(pin);
-	gpio_pull_up(pin + 1);
+    pio_sm_set_consecutive_pindirs(pio, sm, pin, 2, false);
+    gpio_pull_up(pin);
+    gpio_pull_up(pin + 1);
 
-	pio_sm_config c = quadrature_encoder_program_get_default_config(0);
+    pio_sm_config c = quadrature_encoder_program_get_default_config(0);
 
-	sm_config_set_in_pins(&c, pin); // for WAIT, IN
-	sm_config_set_jmp_pin(&c, pin); // for JMP
-	// shift to left, autopull disabled
-	sm_config_set_in_shift(&c, false, false, 32);
-	// don't join FIFO's
-	sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_NONE);
+    sm_config_set_in_pins(&c, pin); // for WAIT, IN
+    sm_config_set_jmp_pin(&c, pin); // for JMP
+    // shift to left, autopull disabled
+    sm_config_set_in_shift(&c, false, false, 32);
+    // don't join FIFO's
+    sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_NONE);
 
-	// passing "0" as the sample frequency,
-	if (max_step_rate == 0) {
-		sm_config_set_clkdiv(&c, 1.0);
-	} else {
-		// one state machine loop takes at most 10 cycles
-		float div = (float)clock_get_hz(clk_sys) / (10 * max_step_rate);
-		sm_config_set_clkdiv(&c, div);
-	}
+    // passing "0" as the sample frequency,
+    if (max_step_rate == 0) {
+        sm_config_set_clkdiv(&c, 1.0);
+    } else {
+        // one state machine loop takes at most 10 cycles
+        float div = (float)clock_get_hz(clk_sys) / (10 * max_step_rate);
+        sm_config_set_clkdiv(&c, div);
+    }
 
-	pio_sm_init(pio, sm, 0, &c);
-	pio_sm_set_enabled(pio, sm, true);
+    pio_sm_init(pio, sm, 0, &c);
+    pio_sm_set_enabled(pio, sm, true);
 }
 
 static inline int32_t quadrature_encoder_get_count(PIO pio, uint sm)
 {
-	uint ret;
-	int n;
+    uint ret;
+    int n;
 
-	// if the FIFO has N entries, we fetch them to drain the FIFO,
-	// plus one entry which will be guaranteed to not be stale
-	n = pio_sm_get_rx_fifo_level(pio, sm) + 1;
-	while (n > 0) {
-		ret = pio_sm_get_blocking(pio, sm);
-		n--;
-	}
-	return ret;
+    // if the FIFO has N entries, we fetch them to drain the FIFO,
+    // plus one entry which will be guaranteed to not be stale
+    n = pio_sm_get_rx_fifo_level(pio, sm) + 1;
+    while (n > 0) {
+        ret = pio_sm_get_blocking(pio, sm);
+        n--;
+    }
+    return ret;
 }
 
 %}
-