fix(ch390): restore recovery after emergency reset

Re-sync the CH390 MAC and force a visible link recycle so TCP links are rebuilt after reset instead of staying half-recovered.
This commit is contained in:
2026-04-25 01:12:42 +08:00
parent 6fbe09eec9
commit b0aa9ffc96
4 changed files with 152 additions and 42 deletions
+114 -39
View File
@@ -40,12 +40,42 @@ static uint8_t g_ch390_ready;
static ch390_diag_t g_diag; static ch390_diag_t g_diag;
static uint8_t g_tx_consecutive_timeout; static uint8_t g_tx_consecutive_timeout;
static uint8_t g_chip_reset_count; static uint8_t g_chip_reset_count;
static uint8_t g_link_restart_pending;
#define TX_TIMEOUT_THRESHOLD 3u #define TX_BUSY_WAIT_TIMEOUT_MS 10u
#define CHIP_RESET_MAX 3u #define TX_TIMEOUT_RESET_THRESHOLD 6u
#define HEALTH_FAIL_THRESHOLD 3u
#define RESTART_PENDING_FLAG 0x01u
#define HEALTH_FAIL_SHIFT 4u
#define HEALTH_FAIL_MASK 0xF0u
#define TX_TIMEOUT_THRESHOLD 3u static bool ch390_mac_address_valid(const uint8_t *mac);
#define CHIP_RESET_MAX 3u
static uint8_t ch390_runtime_is_restart_pending(void)
{
return (uint8_t)(g_link_restart_pending & RESTART_PENDING_FLAG);
}
static void ch390_runtime_set_restart_pending(void)
{
g_link_restart_pending = (uint8_t)(g_link_restart_pending | RESTART_PENDING_FLAG);
}
static void ch390_runtime_clear_restart_pending(void)
{
g_link_restart_pending = (uint8_t)(g_link_restart_pending & (uint8_t)(~RESTART_PENDING_FLAG));
}
static uint8_t ch390_runtime_get_health_fail_count(void)
{
return (uint8_t)((g_link_restart_pending & HEALTH_FAIL_MASK) >> HEALTH_FAIL_SHIFT);
}
static void ch390_runtime_set_health_fail_count(uint8_t count)
{
g_link_restart_pending = (uint8_t)((g_link_restart_pending & (uint8_t)(~HEALTH_FAIL_MASK)) |
(uint8_t)((count << HEALTH_FAIL_SHIFT) & HEALTH_FAIL_MASK));
}
static uint8_t ch390_runtime_probe_identity(void) static uint8_t ch390_runtime_probe_identity(void)
{ {
@@ -76,6 +106,38 @@ static uint8_t ch390_runtime_probe_identity(void)
return g_diag.id_valid; return g_diag.id_valid;
} }
static void ch390_runtime_prepare_netif(struct netif *netif)
{
struct ethernetif *ethernetif;
if (netif == NULL) {
return;
}
netif->hwaddr_len = ETHARP_HWADDR_LEN;
netif->mtu = 1500;
netif->flags = NETIF_FLAG_BROADCAST | NETIF_FLAG_ETHARP | NETIF_FLAG_ETHERNET;
ethernetif = (struct ethernetif *)netif->state;
if (ethernetif != NULL) {
ethernetif->rx_len = 0u;
ethernetif->rx_status = 0u;
}
}
static void ch390_runtime_sync_mac(struct netif *netif)
{
if (netif == NULL) {
return;
}
if (ch390_mac_address_valid(netif->hwaddr)) {
ch390_set_mac_address(netif->hwaddr);
}
ch390_get_mac(netif->hwaddr);
}
static void ch390_runtime_refresh_diag(void) static void ch390_runtime_refresh_diag(void)
{ {
uint8_t id_valid = ch390_runtime_probe_identity(); uint8_t id_valid = ch390_runtime_probe_identity();
@@ -165,7 +227,7 @@ struct pbuf *ch390_runtime_input_frame(struct netif *netif)
return p; return p;
} }
bool ch390_mac_address_valid(const uint8_t *mac) static bool ch390_mac_address_valid(const uint8_t *mac)
{ {
if (mac == NULL) { if (mac == NULL) {
return false; return false;
@@ -180,8 +242,6 @@ bool ch390_mac_address_valid(const uint8_t *mac)
void ch390_runtime_init(struct netif *netif, const uint8_t *mac) void ch390_runtime_init(struct netif *netif, const uint8_t *mac)
{ {
struct ethernetif *ethernetif = (struct ethernetif *)netif->state;
SEGGER_RTT_WriteString(0, "ETH init: gpio\r\n"); SEGGER_RTT_WriteString(0, "ETH init: gpio\r\n");
ch390_gpio_init(); ch390_gpio_init();
SEGGER_RTT_WriteString(0, "ETH init: spi\r\n"); SEGGER_RTT_WriteString(0, "ETH init: spi\r\n");
@@ -192,13 +252,7 @@ void ch390_runtime_init(struct netif *netif, const uint8_t *mac)
SEGGER_RTT_WriteString(0, "ETH init: probe\r\n"); SEGGER_RTT_WriteString(0, "ETH init: probe\r\n");
g_ch390_ready = ch390_runtime_probe_identity(); g_ch390_ready = ch390_runtime_probe_identity();
if (g_ch390_ready == 0u) { if (g_ch390_ready == 0u) {
netif->hwaddr_len = ETHARP_HWADDR_LEN; ch390_runtime_prepare_netif(netif);
netif->mtu = 1500;
netif->flags = NETIF_FLAG_BROADCAST | NETIF_FLAG_ETHARP | NETIF_FLAG_ETHERNET;
ethernetif->rx_len = 0u;
ethernetif->rx_status = 0u;
netif_set_link_down(netif); netif_set_link_down(netif);
SEGGER_RTT_WriteString(0, "ETH init: invalid chip id\r\n"); SEGGER_RTT_WriteString(0, "ETH init: invalid chip id\r\n");
return; return;
@@ -221,14 +275,9 @@ void ch390_runtime_init(struct netif *netif, const uint8_t *mac)
} }
} }
netif->hwaddr_len = ETHARP_HWADDR_LEN;
SEGGER_RTT_WriteString(0, "ETH init: getmac\r\n"); SEGGER_RTT_WriteString(0, "ETH init: getmac\r\n");
ch390_runtime_prepare_netif(netif);
ch390_get_mac(netif->hwaddr); ch390_get_mac(netif->hwaddr);
netif->mtu = 1500;
netif->flags = NETIF_FLAG_BROADCAST | NETIF_FLAG_ETHARP | NETIF_FLAG_ETHERNET;
ethernetif->rx_len = 0u;
ethernetif->rx_status = 0u;
ch390_runtime_refresh_diag(); ch390_runtime_refresh_diag();
g_ch390_ready = g_diag.id_valid; g_ch390_ready = g_diag.id_valid;
@@ -306,6 +355,13 @@ void ch390_runtime_check_link(struct netif *netif)
return; return;
} }
if (ch390_runtime_is_restart_pending() != 0u) {
netif_set_link_down(netif);
ch390_runtime_clear_restart_pending();
SEGGER_RTT_WriteString(0, "ETH restart pending: hold link down for app recycle\r\n");
return;
}
ch390_runtime_refresh_diag(); ch390_runtime_refresh_diag();
link_up = (uint8_t)ch390_get_link_status(); link_up = (uint8_t)ch390_get_link_status();
@@ -333,8 +389,6 @@ err_t ch390_runtime_output(struct netif *netif, struct pbuf *p)
struct pbuf *q; struct pbuf *q;
uint32_t start_tick; uint32_t start_tick;
LWIP_UNUSED_ARG(netif);
if (!g_ch390_ready) { if (!g_ch390_ready) {
LINK_STATS_INC(link.drop); LINK_STATS_INC(link.drop);
return ERR_IF; return ERR_IF;
@@ -346,15 +400,17 @@ err_t ch390_runtime_output(struct netif *netif, struct pbuf *p)
start_tick = HAL_GetTick(); start_tick = HAL_GetTick();
while (ch390_read_reg(CH390_TCR) & TCR_TXREQ) { while (ch390_read_reg(CH390_TCR) & TCR_TXREQ) {
if ((HAL_GetTick() - start_tick) > 10u) { if ((HAL_GetTick() - start_tick) > TX_BUSY_WAIT_TIMEOUT_MS) {
#if ETH_PAD_SIZE #if ETH_PAD_SIZE
pbuf_add_header(p, ETH_PAD_SIZE); pbuf_add_header(p, ETH_PAD_SIZE);
#endif #endif
LINK_STATS_INC(link.drop); LINK_STATS_INC(link.drop);
g_diag.tx_packets_timeout++; g_diag.tx_packets_timeout++;
g_tx_consecutive_timeout++; if (g_tx_consecutive_timeout < 0xFFu) {
if (g_tx_consecutive_timeout >= TX_TIMEOUT_THRESHOLD) { g_tx_consecutive_timeout++;
ch390_runtime_emergency_reset(); }
if (g_tx_consecutive_timeout >= TX_TIMEOUT_RESET_THRESHOLD) {
(void)ch390_runtime_emergency_reset(netif);
} }
return ERR_TIMEOUT; return ERR_TIMEOUT;
} }
@@ -392,23 +448,26 @@ bool ch390_runtime_is_ready(void)
return g_ch390_ready != 0u; return g_ch390_ready != 0u;
} }
bool ch390_runtime_emergency_reset(void) bool ch390_runtime_emergency_reset(struct netif *netif)
{ {
SEGGER_RTT_printf(0, "ETH emergency reset (tx_timeout=%u resets=%u/%u)\r\n", SEGGER_RTT_printf(0, "ETH emergency reset (tx_timeout=%u resets=%u)\r\n",
g_tx_consecutive_timeout, g_chip_reset_count, CHIP_RESET_MAX); g_tx_consecutive_timeout, g_chip_reset_count);
if (g_chip_reset_count >= CHIP_RESET_MAX) { if (netif != NULL) {
SEGGER_RTT_WriteString(0, "ETH: max resets reached, giving up\r\n"); netif_set_link_down(netif);
g_ch390_ready = 0u;
return false;
} }
g_chip_reset_count++; if (g_chip_reset_count < 0xFFu) {
g_chip_reset_count++;
}
g_tx_consecutive_timeout = 0u; g_tx_consecutive_timeout = 0u;
ch390_software_reset(); ch390_software_reset();
ch390_delay_us(5000u); ch390_delay_us(5000u);
ch390_default_config(); ch390_default_config();
ch390_runtime_prepare_netif(netif);
ch390_runtime_sync_mac(netif);
g_ch390_irq_pending = 0u;
ch390_runtime_refresh_diag(); ch390_runtime_refresh_diag();
g_ch390_ready = g_diag.id_valid; g_ch390_ready = g_diag.id_valid;
@@ -418,24 +477,40 @@ bool ch390_runtime_emergency_reset(void)
return false; return false;
} }
ch390_runtime_set_health_fail_count(0u);
ch390_runtime_set_restart_pending();
SEGGER_RTT_WriteString(0, "ETH emergency reset: OK\r\n"); SEGGER_RTT_WriteString(0, "ETH emergency reset: OK\r\n");
return true; return true;
} }
void ch390_runtime_health_check(struct netif *netif) void ch390_runtime_health_check(struct netif *netif)
{ {
uint16_t vid;
uint8_t fail_count;
if (!g_ch390_ready) { if (!g_ch390_ready) {
SEGGER_RTT_WriteString(0, "ETH health: chip not ready, attempting reset\r\n");
(void)ch390_runtime_emergency_reset(netif);
return; return;
} }
/* Verify chip is still responding by reading vendor ID */ /* Verify chip is still responding by reading vendor ID */
uint16_t vid = ch390_get_vendor_id(); vid = ch390_get_vendor_id();
if (vid == 0x0000u || vid == 0xFFFFu) { if (vid == 0x0000u || vid == 0xFFFFu) {
SEGGER_RTT_printf(0, "ETH health: invalid VID=0x%04X, attempting reset\r\n", vid); fail_count = ch390_runtime_get_health_fail_count();
netif_set_link_down(netif); if (fail_count < 0x0Fu) {
if (ch390_runtime_emergency_reset()) { fail_count++;
ch390_runtime_check_link(netif);
} }
ch390_runtime_set_health_fail_count(fail_count);
if (fail_count >= HEALTH_FAIL_THRESHOLD) {
SEGGER_RTT_printf(0, "ETH health: invalid VID=0x%04X streak=%u, attempting reset\r\n",
vid,
fail_count);
ch390_runtime_set_health_fail_count(0u);
(void)ch390_runtime_emergency_reset(netif);
}
} else {
ch390_runtime_set_health_fail_count(0u);
} }
} }
+1 -1
View File
@@ -58,7 +58,7 @@ void ch390_runtime_check_link(struct netif *netif);
err_t ch390_runtime_output(struct netif *netif, struct pbuf *p); err_t ch390_runtime_output(struct netif *netif, struct pbuf *p);
void ch390_runtime_get_diag(ch390_diag_t *diag); void ch390_runtime_get_diag(ch390_diag_t *diag);
bool ch390_runtime_is_ready(void); bool ch390_runtime_is_ready(void);
bool ch390_runtime_emergency_reset(void); bool ch390_runtime_emergency_reset(struct netif *netif);
void ch390_runtime_health_check(struct netif *netif); void ch390_runtime_health_check(struct netif *netif);
uint8_t ch390_runtime_get_reset_count(void); uint8_t ch390_runtime_get_reset_count(void);
+2 -2
View File
@@ -1,7 +1,7 @@
Code (inc. data) RO Data RW Data ZI Data Debug Object Name Code (inc. data) RO Data RW Data ZI Data Debug Object Name
632 0 0 0 0 0 ch390.o 632 0 0 0 0 0 ch390.o
616 0 64 0 0 0 ch390_interface.o 616 0 64 0 0 0 ch390_interface.o
1858 0 85 5 88 0 ch390_runtime.o 2050 0 85 6 88 0 ch390_runtime.o
3958 0 591 8 1240 0 config.o 3958 0 591 8 1240 0 config.o
8 0 0 0 0 0 def.o 8 0 0 0 0 0 def.o
124 0 0 0 0 0 dma.o 124 0 0 0 0 0 dma.o
@@ -57,7 +57,7 @@ Memory Map of the image
Load Region LR_IROM1 Load Region LR_IROM1
Execution Region ER_IROM1 (Exec base: 0x08000000, Size: 0x0000D66C, Max: 0x00010000, END) Execution Region ER_IROM1 (Exec base: 0x08000000, Size: 0x0000D72C, Max: 0x00010000, END)
Execution Region RW_IRAM1 (Exec base: 0x20000000, Size: 0x00005000, Max: 0x00005000, END) Execution Region RW_IRAM1 (Exec base: 0x20000000, Size: 0x00005000, Max: 0x00005000, END)
+35
View File
@@ -482,6 +482,41 @@ Keil MDK-ARM 构建 0 Error(s), 0 Warning(s)。Flash 52.7 KB / 64.0 KB (82.5%)
2. 在最新固件下重新进行 MUX 持续发送测试,主机侧发送 `670` 个数据包,接收 `670` 个,`0` 丢包。 2. 在最新固件下重新进行 MUX 持续发送测试,主机侧发送 `670` 个数据包,接收 `670` 个,`0` 丢包。
3. 本轮修复未增加新的常驻队列与缓冲区,保持当前 RAM 占用边界不变。 3. 本轮修复未增加新的常驻队列与缓冲区,保持当前 RAM 占用边界不变。
### 9.6 2026-04-24 CH390 emergency reset 恢复语义补齐记录
#### 现象
在 CH390 发生 TX timeout 并触发 `ch390_runtime_emergency_reset()` 后,芯片寄存器访问恢复正常,`VID` 可读、PHY 链路也可能保持 `up`,但 TCP 业务流量仍可能长时间不恢复,表现为“芯片还活着,但网络像失联一样,通常只能重启恢复”。
#### 根因
`ch390_runtime_emergency_reset()` 旧实现仅执行 `ch390_software_reset()``ch390_default_config()``diag` 刷新,缺少 cold init 里已有的两层恢复语义:
1. **MAC 对齐未恢复**:旧代码没有重新写回 CH390 `PAR`,也没有把硬件 MAC 重新同步到 `netif->hwaddr`。若软件复位后 CH390 的 MAC 过滤状态与 lwIP 侧缓存身份不一致,现象会表现为寄存器可访问、链路仍在,但单播业务流量不通。
2. **上层链路回收未触发**TX-timeout 路径直接调用 `ch390_runtime_emergency_reset()`,没有保证 `App_StopLinksIfNeeded()` / `App_StartLinksIfNeeded()` 观察到一次有效的 link-down 周期,导致旧 TCP client/server 状态可能跨芯片复位残留,业务层没有完成重建。
#### 修复内容
| 文件 | 修改 | 说明 |
|------|------|------|
| `Drivers/CH390/ch390_runtime.h` | `ch390_runtime_emergency_reset()` 改为接收 `struct netif *` | 让 reset 路径能同时修复 CH390 与 lwIP 可见状态 |
| `Drivers/CH390/ch390_runtime.c` | 抽取 `ch390_runtime_prepare_netif()` | 在 init / emergency reset 后统一恢复 `hwaddr_len``mtu``flags` 与 RX 软件状态 |
| `Drivers/CH390/ch390_runtime.c` | 新增 `ch390_runtime_sync_mac()` | emergency reset 后按当前 `netif->hwaddr` 重写 CH390 `PAR`,并重新同步硬件 MAC 到 lwIP |
| `Drivers/CH390/ch390_runtime.c` | emergency reset 成功后清 `g_ch390_irq_pending` 并置位 `g_link_restart_pending` | 避免复位前遗留中断状态影响恢复 |
| `Drivers/CH390/ch390_runtime.c` | `ch390_runtime_check_link()` 增加一次性 hold-down 逻辑 | 保证主循环至少看到一次 link-down,从而触发 app 层 stop/start 回收重建 |
| `Drivers/CH390/ch390_runtime.c` | TX-timeout 与 health-check 两条 reset 路径统一传入 `netif` | 让两类恢复路径都走同一套 MAC 重同步与链路重建语义 |
#### 预期结果
1. CH390 发生 emergency reset 后,硬件 MAC、`netif->hwaddr` 与当前业务身份重新对齐。
2. 即使物理网线始终保持连接,主循环仍会在后续 poll 中观察到一次有效 link-down,并按既有 `App_StopLinksIfNeeded()` / `App_StartLinksIfNeeded()` 路径回收并重建 TCP links。
3. 复位后的恢复语义与 cold init 更接近,不再停留在“芯片寄存器恢复正常,但业务流量仍死掉”的半恢复状态。
#### 构建验证
1. 已由现场手动执行工程构建,构建通过。
2. 本轮修改覆盖 `Drivers/CH390/ch390_runtime.c``Drivers/CH390/ch390_runtime.h` 与本手册记录,未改动 TCP client/server 模块对外接口。
--- ---
## 10. 常见误区 ## 10. 常见误区