zephyr/subsys/net/ip/tcp.c
Robert Lubos 7207f35758 net: tcp: Fix ACK check in LAST_ACK state
The final ACK check during passive close was wrong - we should not
compare its SEQ number with the ACK number we've sent last, but rather
compare the ACK number it acknowledges matches our current SEQ number on
the connection. This ensures, that the ACK received is really
acknowledging the FIN packet we've sent from our side, and is not just
some earlier retransmission. Currently the latter could be the case, and
we've closed the connection prematurely. In result, when the real "final
ACK" arrived, the TCP stack replied with RST.

Subsequently, we should increment the SEQ number on the connection after
sending FIN packet, so that we are able to identify final ACK correctly,
just as it's done in active close cases.

Signed-off-by: Robert Lubos <robert.lubos@nordicsemi.no>
2024-03-22 15:56:50 -05:00

4465 lines
108 KiB
C

/*
* Copyright (c) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <zephyr/logging/log.h>
LOG_MODULE_REGISTER(net_tcp, CONFIG_NET_TCP_LOG_LEVEL);
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <zephyr/kernel.h>
#include <zephyr/random/random.h>
#if defined(CONFIG_NET_TCP_ISN_RFC6528)
#include <mbedtls/md5.h>
#endif
#include <zephyr/net/net_pkt.h>
#include <zephyr/net/net_context.h>
#include <zephyr/net/udp.h>
#include "ipv4.h"
#include "ipv6.h"
#include "connection.h"
#include "net_stats.h"
#include "net_private.h"
#include "tcp_internal.h"
#define ACK_TIMEOUT_MS CONFIG_NET_TCP_ACK_TIMEOUT
#define ACK_TIMEOUT K_MSEC(ACK_TIMEOUT_MS)
#define LAST_ACK_TIMEOUT_MS tcp_fin_timeout_ms
#define LAST_ACK_TIMEOUT K_MSEC(LAST_ACK_TIMEOUT_MS)
#define FIN_TIMEOUT K_MSEC(tcp_fin_timeout_ms)
#define ACK_DELAY K_MSEC(100)
#define ZWP_MAX_DELAY_MS 120000
#define DUPLICATE_ACK_RETRANSMIT_TRHESHOLD 3
static int tcp_rto = CONFIG_NET_TCP_INIT_RETRANSMISSION_TIMEOUT;
static int tcp_retries = CONFIG_NET_TCP_RETRY_COUNT;
static int tcp_fin_timeout_ms;
static int tcp_rx_window =
#if (CONFIG_NET_TCP_MAX_RECV_WINDOW_SIZE != 0)
CONFIG_NET_TCP_MAX_RECV_WINDOW_SIZE;
#else
#if defined(CONFIG_NET_BUF_FIXED_DATA_SIZE)
(CONFIG_NET_BUF_RX_COUNT * CONFIG_NET_BUF_DATA_SIZE) / 3;
#else
CONFIG_NET_BUF_DATA_POOL_SIZE / 3;
#endif /* CONFIG_NET_BUF_FIXED_DATA_SIZE */
#endif
static int tcp_tx_window =
#if (CONFIG_NET_TCP_MAX_SEND_WINDOW_SIZE != 0)
CONFIG_NET_TCP_MAX_SEND_WINDOW_SIZE;
#else
#if defined(CONFIG_NET_BUF_FIXED_DATA_SIZE)
(CONFIG_NET_BUF_TX_COUNT * CONFIG_NET_BUF_DATA_SIZE) / 3;
#else
CONFIG_NET_BUF_DATA_POOL_SIZE / 3;
#endif /* CONFIG_NET_BUF_FIXED_DATA_SIZE */
#endif
#ifdef CONFIG_NET_TCP_RANDOMIZED_RTO
#define TCP_RTO_MS (conn->rto)
#else
#define TCP_RTO_MS (tcp_rto)
#endif
/* Define the number of MSS sections the congestion window is initialized at */
#define TCP_CONGESTION_INITIAL_WIN 1
#define TCP_CONGESTION_INITIAL_SSTHRESH 3
static sys_slist_t tcp_conns = SYS_SLIST_STATIC_INIT(&tcp_conns);
static K_MUTEX_DEFINE(tcp_lock);
K_MEM_SLAB_DEFINE_STATIC(tcp_conns_slab, sizeof(struct tcp),
CONFIG_NET_MAX_CONTEXTS, 4);
static struct k_work_q tcp_work_q;
static K_KERNEL_STACK_DEFINE(work_q_stack, CONFIG_NET_TCP_WORKQ_STACK_SIZE);
static enum net_verdict tcp_in(struct tcp *conn, struct net_pkt *pkt);
static bool is_destination_local(struct net_pkt *pkt);
static void tcp_out(struct tcp *conn, uint8_t flags);
static const char *tcp_state_to_str(enum tcp_state state, bool prefix);
int (*tcp_send_cb)(struct net_pkt *pkt) = NULL;
size_t (*tcp_recv_cb)(struct tcp *conn, struct net_pkt *pkt) = NULL;
static uint32_t tcp_get_seq(struct net_buf *buf)
{
return *(uint32_t *)net_buf_user_data(buf);
}
static void tcp_set_seq(struct net_buf *buf, uint32_t seq)
{
*(uint32_t *)net_buf_user_data(buf) = seq;
}
static int tcp_pkt_linearize(struct net_pkt *pkt, size_t pos, size_t len)
{
struct net_buf *buf, *first = pkt->cursor.buf, *second = first->frags;
int ret = 0;
size_t len1, len2;
if (net_pkt_get_len(pkt) < (pos + len)) {
NET_ERR("Insufficient packet len=%zd (pos+len=%zu)",
net_pkt_get_len(pkt), pos + len);
ret = -EINVAL;
goto out;
}
buf = net_pkt_get_frag(pkt, len, TCP_PKT_ALLOC_TIMEOUT);
if (!buf || buf->size < len) {
if (buf) {
net_buf_unref(buf);
}
ret = -ENOBUFS;
goto out;
}
net_buf_linearize(buf->data, buf->size, pkt->frags, pos, len);
net_buf_add(buf, len);
len1 = first->len - (pkt->cursor.pos - pkt->cursor.buf->data);
len2 = len - len1;
first->len -= len1;
while (len2) {
size_t pull_len = MIN(second->len, len2);
struct net_buf *next;
len2 -= pull_len;
net_buf_pull(second, pull_len);
next = second->frags;
if (second->len == 0) {
net_buf_unref(second);
}
second = next;
}
buf->frags = second;
first->frags = buf;
out:
return ret;
}
static struct tcphdr *th_get(struct net_pkt *pkt)
{
size_t ip_len = net_pkt_ip_hdr_len(pkt) + net_pkt_ip_opts_len(pkt);
struct tcphdr *th = NULL;
again:
net_pkt_cursor_init(pkt);
net_pkt_set_overwrite(pkt, true);
if (net_pkt_skip(pkt, ip_len) != 0) {
goto out;
}
if (!net_pkt_is_contiguous(pkt, sizeof(*th))) {
if (tcp_pkt_linearize(pkt, ip_len, sizeof(*th)) < 0) {
goto out;
}
goto again;
}
th = net_pkt_cursor_get_pos(pkt);
out:
return th;
}
static size_t tcp_endpoint_len(sa_family_t af)
{
return (af == AF_INET) ? sizeof(struct sockaddr_in) :
sizeof(struct sockaddr_in6);
}
static int tcp_endpoint_set(union tcp_endpoint *ep, struct net_pkt *pkt,
enum pkt_addr src)
{
int ret = 0;
switch (net_pkt_family(pkt)) {
case AF_INET:
if (IS_ENABLED(CONFIG_NET_IPV4)) {
struct net_ipv4_hdr *ip = NET_IPV4_HDR(pkt);
struct tcphdr *th;
th = th_get(pkt);
if (!th) {
return -ENOBUFS;
}
memset(ep, 0, sizeof(*ep));
ep->sin.sin_port = src == TCP_EP_SRC ? th_sport(th) :
th_dport(th);
net_ipv4_addr_copy_raw((uint8_t *)&ep->sin.sin_addr,
src == TCP_EP_SRC ?
ip->src : ip->dst);
ep->sa.sa_family = AF_INET;
} else {
ret = -EINVAL;
}
break;
case AF_INET6:
if (IS_ENABLED(CONFIG_NET_IPV6)) {
struct net_ipv6_hdr *ip = NET_IPV6_HDR(pkt);
struct tcphdr *th;
th = th_get(pkt);
if (!th) {
return -ENOBUFS;
}
memset(ep, 0, sizeof(*ep));
ep->sin6.sin6_port = src == TCP_EP_SRC ? th_sport(th) :
th_dport(th);
net_ipv6_addr_copy_raw((uint8_t *)&ep->sin6.sin6_addr,
src == TCP_EP_SRC ?
ip->src : ip->dst);
ep->sa.sa_family = AF_INET6;
} else {
ret = -EINVAL;
}
break;
default:
NET_ERR("Unknown address family: %hu", net_pkt_family(pkt));
ret = -EINVAL;
}
return ret;
}
static const char *tcp_flags(uint8_t flags)
{
#define BUF_SIZE 25 /* 6 * 4 + 1 */
static char buf[BUF_SIZE];
int len = 0;
buf[0] = '\0';
if (flags) {
if (flags & SYN) {
len += snprintk(buf + len, BUF_SIZE - len, "SYN,");
}
if (flags & FIN) {
len += snprintk(buf + len, BUF_SIZE - len, "FIN,");
}
if (flags & ACK) {
len += snprintk(buf + len, BUF_SIZE - len, "ACK,");
}
if (flags & PSH) {
len += snprintk(buf + len, BUF_SIZE - len, "PSH,");
}
if (flags & RST) {
len += snprintk(buf + len, BUF_SIZE - len, "RST,");
}
if (flags & URG) {
len += snprintk(buf + len, BUF_SIZE - len, "URG,");
}
if (len > 0) {
buf[len - 1] = '\0'; /* delete the last comma */
}
}
#undef BUF_SIZE
return buf;
}
static size_t tcp_data_len(struct net_pkt *pkt)
{
struct tcphdr *th = th_get(pkt);
size_t tcp_options_len = (th_off(th) - 5) * 4;
int len = net_pkt_get_len(pkt) - net_pkt_ip_hdr_len(pkt) -
net_pkt_ip_opts_len(pkt) - sizeof(*th) - tcp_options_len;
return len > 0 ? (size_t)len : 0;
}
static const char *tcp_th(struct net_pkt *pkt)
{
#define BUF_SIZE 80
static char buf[BUF_SIZE];
int len = 0;
struct tcphdr *th = th_get(pkt);
buf[0] = '\0';
if (th_off(th) < 5) {
len += snprintk(buf + len, BUF_SIZE - len,
"bogus th_off: %hu", (uint16_t)th_off(th));
goto end;
}
len += snprintk(buf + len, BUF_SIZE - len,
"%s Seq=%u", tcp_flags(th_flags(th)), th_seq(th));
if (th_flags(th) & ACK) {
len += snprintk(buf + len, BUF_SIZE - len,
" Ack=%u", th_ack(th));
}
len += snprintk(buf + len, BUF_SIZE - len,
" Len=%ld", (long)tcp_data_len(pkt));
end:
#undef BUF_SIZE
return buf;
}
#define is_6lo_technology(pkt) \
(IS_ENABLED(CONFIG_NET_IPV6) && net_pkt_family(pkt) == AF_INET6 && \
((IS_ENABLED(CONFIG_NET_L2_BT) && \
net_pkt_lladdr_dst(pkt)->type == NET_LINK_BLUETOOTH) || \
(IS_ENABLED(CONFIG_NET_L2_IEEE802154) && \
net_pkt_lladdr_dst(pkt)->type == NET_LINK_IEEE802154)))
static void tcp_send(struct net_pkt *pkt)
{
tcp_pkt_ref(pkt);
if (tcp_send_cb) {
if (tcp_send_cb(pkt) < 0) {
NET_ERR("net_send_data()");
tcp_pkt_unref(pkt);
}
goto out;
}
/* We must have special handling for some network technologies that
* tweak the IP protocol headers during packet sending. This happens
* with Bluetooth and IEEE 802.15.4 which use IPv6 header compression
* (6lo) and alter the sent network packet. So in order to avoid any
* corruption of the original data buffer, we must copy the sent data.
* For Bluetooth, its fragmentation code will even mangle the data
* part of the message so we need to copy those too.
*/
if (is_6lo_technology(pkt)) {
struct net_pkt *new_pkt;
new_pkt = tcp_pkt_clone(pkt);
if (!new_pkt) {
/* The caller of this func assumes that the net_pkt
* is consumed by this function. We call unref here
* so that the unref at the end of the func will
* free the net_pkt.
*/
tcp_pkt_unref(pkt);
goto out;
}
if (net_send_data(new_pkt) < 0) {
tcp_pkt_unref(new_pkt);
}
/* We simulate sending of the original pkt and unref it like
* the device driver would do.
*/
tcp_pkt_unref(pkt);
} else {
if (net_send_data(pkt) < 0) {
NET_ERR("net_send_data()");
tcp_pkt_unref(pkt);
}
}
out:
tcp_pkt_unref(pkt);
}
static void tcp_derive_rto(struct tcp *conn)
{
#ifdef CONFIG_NET_TCP_RANDOMIZED_RTO
/* Compute a randomized rto 1 and 1.5 times tcp_rto */
uint32_t gain;
uint8_t gain8;
uint32_t rto;
/* Getting random is computational expensive, so only use 8 bits */
sys_rand_get(&gain8, sizeof(uint8_t));
gain = (uint32_t)gain8;
gain += 1 << 9;
rto = (uint32_t)tcp_rto;
rto = (gain * rto) >> 9;
conn->rto = (uint16_t)rto;
#else
ARG_UNUSED(conn);
#endif
}
#ifdef CONFIG_NET_TCP_CONGESTION_AVOIDANCE
/* Implementation according to RFC6582 */
static void tcp_new_reno_log(struct tcp *conn, char *step)
{
NET_DBG("conn: %p, ca %s, cwnd=%d, ssthres=%d, fast_pend=%i",
conn, step, conn->ca.cwnd, conn->ca.ssthresh,
conn->ca.pending_fast_retransmit_bytes);
}
static void tcp_new_reno_init(struct tcp *conn)
{
conn->ca.cwnd = conn_mss(conn) * TCP_CONGESTION_INITIAL_WIN;
conn->ca.ssthresh = conn_mss(conn) * TCP_CONGESTION_INITIAL_SSTHRESH;
conn->ca.pending_fast_retransmit_bytes = 0;
tcp_new_reno_log(conn, "init");
}
static void tcp_new_reno_fast_retransmit(struct tcp *conn)
{
if (conn->ca.pending_fast_retransmit_bytes == 0) {
conn->ca.ssthresh = MAX(conn_mss(conn) * 2, conn->unacked_len / 2);
/* Account for the lost segments */
conn->ca.cwnd = conn_mss(conn) * 3 + conn->ca.ssthresh;
conn->ca.pending_fast_retransmit_bytes = conn->unacked_len;
tcp_new_reno_log(conn, "fast_retransmit");
}
}
static void tcp_new_reno_timeout(struct tcp *conn)
{
conn->ca.ssthresh = MAX(conn_mss(conn) * 2, conn->unacked_len / 2);
conn->ca.cwnd = conn_mss(conn);
tcp_new_reno_log(conn, "timeout");
}
/* For every duplicate ack increment the cwnd by mss */
static void tcp_new_reno_dup_ack(struct tcp *conn)
{
int32_t new_win = conn->ca.cwnd;
new_win += conn_mss(conn);
conn->ca.cwnd = MIN(new_win, UINT16_MAX);
tcp_new_reno_log(conn, "dup_ack");
}
static void tcp_new_reno_pkts_acked(struct tcp *conn, uint32_t acked_len)
{
int32_t new_win = conn->ca.cwnd;
int32_t win_inc = MIN(acked_len, conn_mss(conn));
if (conn->ca.pending_fast_retransmit_bytes == 0) {
if (conn->ca.cwnd < conn->ca.ssthresh) {
new_win += win_inc;
} else {
/* Implement a div_ceil to avoid rounding to 0 */
new_win += ((win_inc * win_inc) + conn->ca.cwnd - 1) / conn->ca.cwnd;
}
conn->ca.cwnd = MIN(new_win, UINT16_MAX);
} else {
/* Check if it is still in fast recovery mode */
if (conn->ca.pending_fast_retransmit_bytes <= acked_len) {
conn->ca.pending_fast_retransmit_bytes = 0;
conn->ca.cwnd = conn->ca.ssthresh;
} else {
conn->ca.pending_fast_retransmit_bytes -= acked_len;
conn->ca.cwnd -= acked_len;
}
}
tcp_new_reno_log(conn, "pkts_acked");
}
static void tcp_ca_init(struct tcp *conn)
{
tcp_new_reno_init(conn);
}
static void tcp_ca_fast_retransmit(struct tcp *conn)
{
tcp_new_reno_fast_retransmit(conn);
}
static void tcp_ca_timeout(struct tcp *conn)
{
tcp_new_reno_timeout(conn);
}
static void tcp_ca_dup_ack(struct tcp *conn)
{
tcp_new_reno_dup_ack(conn);
}
static void tcp_ca_pkts_acked(struct tcp *conn, uint32_t acked_len)
{
tcp_new_reno_pkts_acked(conn, acked_len);
}
#else
static void tcp_ca_init(struct tcp *conn) { }
static void tcp_ca_fast_retransmit(struct tcp *conn) { }
static void tcp_ca_timeout(struct tcp *conn) { }
static void tcp_ca_dup_ack(struct tcp *conn) { }
static void tcp_ca_pkts_acked(struct tcp *conn, uint32_t acked_len) { }
#endif
#if defined(CONFIG_NET_TCP_KEEPALIVE)
static void tcp_send_keepalive_probe(struct k_work *work);
static void keep_alive_timer_init(struct tcp *conn)
{
conn->keep_alive = false;
conn->keep_idle = CONFIG_NET_TCP_KEEPIDLE_DEFAULT;
conn->keep_intvl = CONFIG_NET_TCP_KEEPINTVL_DEFAULT;
conn->keep_cnt = CONFIG_NET_TCP_KEEPCNT_DEFAULT;
NET_DBG("keepalive timer init idle = %d, interval = %d, cnt = %d",
conn->keep_idle, conn->keep_intvl, conn->keep_cnt);
k_work_init_delayable(&conn->keepalive_timer, tcp_send_keepalive_probe);
}
static void keep_alive_param_copy(struct tcp *to, struct tcp *from)
{
to->keep_alive = from->keep_alive;
to->keep_idle = from->keep_idle;
to->keep_intvl = from->keep_intvl;
to->keep_cnt = from->keep_cnt;
}
static void keep_alive_timer_restart(struct tcp *conn)
{
if (!conn->keep_alive || conn->state != TCP_ESTABLISHED) {
return;
}
conn->keep_cur = 0;
k_work_reschedule_for_queue(&tcp_work_q, &conn->keepalive_timer,
K_SECONDS(conn->keep_idle));
}
static void keep_alive_timer_stop(struct tcp *conn)
{
k_work_cancel_delayable(&conn->keepalive_timer);
}
static int set_tcp_keep_alive(struct tcp *conn, const void *value, size_t len)
{
int keep_alive;
if (conn == NULL || value == NULL || len != sizeof(int)) {
return -EINVAL;
}
keep_alive = *(int *)value;
if ((keep_alive < 0) || (keep_alive > 1)) {
return -EINVAL;
}
conn->keep_alive = (bool)keep_alive;
if (keep_alive) {
keep_alive_timer_restart(conn);
} else {
keep_alive_timer_stop(conn);
}
return 0;
}
static int set_tcp_keep_idle(struct tcp *conn, const void *value, size_t len)
{
int keep_idle;
if (conn == NULL || value == NULL || len != sizeof(int)) {
return -EINVAL;
}
keep_idle = *(int *)value;
if (keep_idle < 1) {
return -EINVAL;
}
conn->keep_idle = keep_idle;
keep_alive_timer_restart(conn);
return 0;
}
static int set_tcp_keep_intvl(struct tcp *conn, const void *value, size_t len)
{
int keep_intvl;
if (conn == NULL || value == NULL || len != sizeof(int)) {
return -EINVAL;
}
keep_intvl = *(int *)value;
if (keep_intvl < 1) {
return -EINVAL;
}
conn->keep_intvl = keep_intvl;
keep_alive_timer_restart(conn);
return 0;
}
static int set_tcp_keep_cnt(struct tcp *conn, const void *value, size_t len)
{
int keep_cnt;
if (conn == NULL || value == NULL || len != sizeof(int)) {
return -EINVAL;
}
keep_cnt = *(int *)value;
if (keep_cnt < 1) {
return -EINVAL;
}
conn->keep_cnt = keep_cnt;
keep_alive_timer_restart(conn);
return 0;
}
static int get_tcp_keep_alive(struct tcp *conn, void *value, size_t *len)
{
if (conn == NULL || value == NULL || len == NULL ||
*len != sizeof(int)) {
return -EINVAL;
}
*((int *)value) = (int)conn->keep_alive;
return 0;
}
static int get_tcp_keep_idle(struct tcp *conn, void *value, size_t *len)
{
if (conn == NULL || value == NULL || len == NULL ||
*len != sizeof(int)) {
return -EINVAL;
}
*((int *)value) = (int)conn->keep_idle;
return 0;
}
static int get_tcp_keep_intvl(struct tcp *conn, void *value, size_t *len)
{
if (conn == NULL || value == NULL || len == NULL ||
*len != sizeof(int)) {
return -EINVAL;
}
*((int *)value) = (int)conn->keep_intvl;
return 0;
}
static int get_tcp_keep_cnt(struct tcp *conn, void *value, size_t *len)
{
if (conn == NULL || value == NULL || len == NULL ||
*len != sizeof(int)) {
return -EINVAL;
}
*((int *)value) = (int)conn->keep_cnt;
return 0;
}
#else /* CONFIG_NET_TCP_KEEPALIVE */
#define keep_alive_timer_init(...)
#define keep_alive_param_copy(...)
#define keep_alive_timer_restart(...)
#define keep_alive_timer_stop(...)
#define set_tcp_keep_alive(...) (-ENOPROTOOPT)
#define set_tcp_keep_idle(...) (-ENOPROTOOPT)
#define set_tcp_keep_intvl(...) (-ENOPROTOOPT)
#define set_tcp_keep_cnt(...) (-ENOPROTOOPT)
#define get_tcp_keep_alive(...) (-ENOPROTOOPT)
#define get_tcp_keep_idle(...) (-ENOPROTOOPT)
#define get_tcp_keep_intvl(...) (-ENOPROTOOPT)
#define get_tcp_keep_cnt(...) (-ENOPROTOOPT)
#endif /* CONFIG_NET_TCP_KEEPALIVE */
static void tcp_send_queue_flush(struct tcp *conn)
{
struct net_pkt *pkt;
k_work_cancel_delayable(&conn->send_timer);
while ((pkt = tcp_slist(conn, &conn->send_queue, get,
struct net_pkt, next))) {
tcp_pkt_unref(pkt);
}
}
static void tcp_conn_release(struct k_work *work)
{
struct tcp *conn = CONTAINER_OF(work, struct tcp, conn_release);
struct net_pkt *pkt;
#if defined(CONFIG_NET_TEST)
if (conn->test_closed_cb != NULL) {
conn->test_closed_cb(conn, conn->test_user_data);
}
#endif
/* Application is no longer there, unref any remaining packets on the
* fifo (although there shouldn't be any at this point.)
*/
while ((pkt = k_fifo_get(&conn->recv_data, K_NO_WAIT)) != NULL) {
tcp_pkt_unref(pkt);
}
k_mutex_lock(&conn->lock, K_FOREVER);
if (conn->context->conn_handler) {
net_conn_unregister(conn->context->conn_handler);
conn->context->conn_handler = NULL;
}
conn->context->tcp = NULL;
conn->state = TCP_UNUSED;
tcp_send_queue_flush(conn);
(void)k_work_cancel_delayable(&conn->send_data_timer);
tcp_pkt_unref(conn->send_data);
if (CONFIG_NET_TCP_RECV_QUEUE_TIMEOUT) {
tcp_pkt_unref(conn->queue_recv_data);
}
(void)k_work_cancel_delayable(&conn->timewait_timer);
(void)k_work_cancel_delayable(&conn->fin_timer);
(void)k_work_cancel_delayable(&conn->persist_timer);
(void)k_work_cancel_delayable(&conn->ack_timer);
(void)k_work_cancel_delayable(&conn->send_timer);
(void)k_work_cancel_delayable(&conn->recv_queue_timer);
keep_alive_timer_stop(conn);
k_mutex_unlock(&conn->lock);
net_context_unref(conn->context);
conn->context = NULL;
k_mutex_lock(&tcp_lock, K_FOREVER);
sys_slist_find_and_remove(&tcp_conns, &conn->next);
k_mutex_unlock(&tcp_lock);
k_mem_slab_free(&tcp_conns_slab, (void *)conn);
}
#if defined(CONFIG_NET_TEST)
void tcp_install_close_cb(struct net_context *ctx,
net_tcp_closed_cb_t cb,
void *user_data)
{
NET_ASSERT(ctx->tcp != NULL);
((struct tcp *)ctx->tcp)->test_closed_cb = cb;
((struct tcp *)ctx->tcp)->test_user_data = user_data;
}
#endif
static int tcp_conn_unref(struct tcp *conn)
{
int ref_count = atomic_get(&conn->ref_count);
NET_DBG("conn: %p, ref_count=%d", conn, ref_count);
k_mutex_lock(&conn->lock, K_FOREVER);
#if !defined(CONFIG_NET_TEST_PROTOCOL)
if (conn->in_connect) {
conn->in_connect = false;
k_sem_reset(&conn->connect_sem);
}
#endif /* CONFIG_NET_TEST_PROTOCOL */
k_mutex_unlock(&conn->lock);
ref_count = atomic_dec(&conn->ref_count) - 1;
if (ref_count != 0) {
tp_out(net_context_get_family(conn->context), conn->iface,
"TP_TRACE", "event", "CONN_DELETE");
return ref_count;
}
/* Release the TCP context from the TCP workqueue. This will ensure,
* that all pending TCP works are cancelled properly, when the context
* is released.
*/
k_work_submit_to_queue(&tcp_work_q, &conn->conn_release);
return ref_count;
}
#if CONFIG_NET_TCP_LOG_LEVEL >= LOG_LEVEL_DBG
#define tcp_conn_close(conn, status) \
tcp_conn_close_debug(conn, status, __func__, __LINE__)
static int tcp_conn_close_debug(struct tcp *conn, int status,
const char *caller, int line)
#else
static int tcp_conn_close(struct tcp *conn, int status)
#endif
{
#if CONFIG_NET_TCP_LOG_LEVEL >= LOG_LEVEL_DBG
NET_DBG("conn: %p closed by TCP stack (%s():%d)", conn, caller, line);
#endif
k_mutex_lock(&conn->lock, K_FOREVER);
conn_state(conn, TCP_CLOSED);
keep_alive_timer_stop(conn);
k_mutex_unlock(&conn->lock);
if (conn->in_connect) {
if (conn->connect_cb) {
conn->connect_cb(conn->context, status, conn->context->user_data);
/* Make sure the connect_cb is only called once. */
conn->connect_cb = NULL;
}
} else if (conn->context->recv_cb) {
conn->context->recv_cb(conn->context, NULL, NULL, NULL,
status, conn->recv_user_data);
}
k_sem_give(&conn->tx_sem);
return tcp_conn_unref(conn);
}
static bool tcp_send_process_no_lock(struct tcp *conn)
{
bool unref = false;
struct net_pkt *pkt;
bool local = false;
pkt = tcp_slist(conn, &conn->send_queue, peek_head,
struct net_pkt, next);
if (!pkt) {
goto out;
}
NET_DBG("%s %s", tcp_th(pkt), conn->in_retransmission ?
"in_retransmission" : "");
if (conn->in_retransmission) {
if (conn->send_retries > 0) {
struct net_pkt *clone = tcp_pkt_clone(pkt);
if (clone) {
tcp_send(clone);
conn->send_retries--;
}
} else {
unref = true;
goto out;
}
} else {
uint8_t fl = th_get(pkt)->th_flags;
bool forget = ACK == fl || PSH == fl || (ACK | PSH) == fl ||
RST & fl;
pkt = forget ? tcp_slist(conn, &conn->send_queue, get,
struct net_pkt, next) :
tcp_pkt_clone(pkt);
if (!pkt) {
NET_ERR("net_pkt alloc failure");
goto out;
}
if (is_destination_local(pkt)) {
local = true;
}
tcp_send(pkt);
if (forget == false &&
!k_work_delayable_remaining_get(&conn->send_timer)) {
conn->send_retries = tcp_retries;
conn->in_retransmission = true;
}
}
if (conn->in_retransmission) {
k_work_reschedule_for_queue(&tcp_work_q, &conn->send_timer,
K_MSEC(TCP_RTO_MS));
} else if (local && !sys_slist_is_empty(&conn->send_queue)) {
k_work_reschedule_for_queue(&tcp_work_q, &conn->send_timer,
K_NO_WAIT);
}
out:
return unref;
}
static void tcp_send_process(struct k_work *work)
{
struct k_work_delayable *dwork = k_work_delayable_from_work(work);
struct tcp *conn = CONTAINER_OF(dwork, struct tcp, send_timer);
bool unref;
k_mutex_lock(&conn->lock, K_FOREVER);
unref = tcp_send_process_no_lock(conn);
k_mutex_unlock(&conn->lock);
if (unref) {
tcp_conn_close(conn, -ETIMEDOUT);
}
}
static void tcp_send_timer_cancel(struct tcp *conn)
{
if (conn->in_retransmission == false) {
return;
}
k_work_cancel_delayable(&conn->send_timer);
{
struct net_pkt *pkt = tcp_slist(conn, &conn->send_queue, get,
struct net_pkt, next);
if (pkt) {
NET_DBG("%s", tcp_th(pkt));
tcp_pkt_unref(pkt);
}
}
if (sys_slist_is_empty(&conn->send_queue)) {
conn->in_retransmission = false;
} else {
conn->send_retries = tcp_retries;
k_work_reschedule_for_queue(&tcp_work_q, &conn->send_timer,
K_MSEC(TCP_RTO_MS));
}
}
#if defined(CONFIG_NET_TCP_IPV6_ND_REACHABILITY_HINT)
static void tcp_nbr_reachability_hint(struct tcp *conn)
{
int64_t now;
struct net_if *iface;
if (net_context_get_family(conn->context) != AF_INET6) {
return;
}
now = k_uptime_get();
iface = net_context_get_iface(conn->context);
/* Ensure that Neighbor Reachability hints are rate-limited (using threshold
* of half of reachable time).
*/
if ((now - conn->last_nd_hint_time) > (net_if_ipv6_get_reachable_time(iface) / 2)) {
net_ipv6_nbr_reachability_hint(iface, &conn->dst.sin6.sin6_addr);
conn->last_nd_hint_time = now;
}
}
#else /* CONFIG_NET_TCP_IPV6_ND_REACHABILITY_HINT */
#define tcp_nbr_reachability_hint(...)
#endif /* CONFIG_NET_TCP_IPV6_ND_REACHABILITY_HINT */
static const char *tcp_state_to_str(enum tcp_state state, bool prefix)
{
const char *s = NULL;
#define _(_x) case _x: do { s = #_x; goto out; } while (0)
switch (state) {
_(TCP_UNUSED);
_(TCP_LISTEN);
_(TCP_SYN_SENT);
_(TCP_SYN_RECEIVED);
_(TCP_ESTABLISHED);
_(TCP_FIN_WAIT_1);
_(TCP_FIN_WAIT_2);
_(TCP_CLOSE_WAIT);
_(TCP_CLOSING);
_(TCP_LAST_ACK);
_(TCP_TIME_WAIT);
_(TCP_CLOSED);
}
#undef _
NET_ASSERT(s, "Invalid TCP state: %u", state);
out:
return prefix ? s : (s + 4);
}
static const char *tcp_conn_state(struct tcp *conn, struct net_pkt *pkt)
{
#define BUF_SIZE 160
static char buf[BUF_SIZE];
snprintk(buf, BUF_SIZE, "%s [%s Seq=%u Ack=%u]", pkt ? tcp_th(pkt) : "",
tcp_state_to_str(conn->state, false),
conn->seq, conn->ack);
#undef BUF_SIZE
return buf;
}
static uint8_t *tcp_options_get(struct net_pkt *pkt, int tcp_options_len,
uint8_t *buf, size_t buf_len)
{
struct net_pkt_cursor backup;
int ret;
net_pkt_cursor_backup(pkt, &backup);
net_pkt_cursor_init(pkt);
net_pkt_skip(pkt, net_pkt_ip_hdr_len(pkt) + net_pkt_ip_opts_len(pkt) +
sizeof(struct tcphdr));
ret = net_pkt_read(pkt, buf, MIN(tcp_options_len, buf_len));
if (ret < 0) {
buf = NULL;
}
net_pkt_cursor_restore(pkt, &backup);
return buf;
}
static bool tcp_options_check(struct tcp_options *recv_options,
struct net_pkt *pkt, ssize_t len)
{
uint8_t options_buf[40]; /* TCP header max options size is 40 */
bool result = len > 0 && ((len % 4) == 0) ? true : false;
uint8_t *options = tcp_options_get(pkt, len, options_buf,
sizeof(options_buf));
uint8_t opt, opt_len;
NET_DBG("len=%zd", len);
recv_options->mss_found = false;
recv_options->wnd_found = false;
for ( ; options && len >= 1; options += opt_len, len -= opt_len) {
opt = options[0];
if (opt == NET_TCP_END_OPT) {
break;
} else if (opt == NET_TCP_NOP_OPT) {
opt_len = 1;
continue;
} else {
if (len < 2) { /* Only END and NOP can have length 1 */
NET_ERR("Illegal option %d with length %zd",
opt, len);
result = false;
break;
}
opt_len = options[1];
}
NET_DBG("opt: %hu, opt_len: %hu",
(uint16_t)opt, (uint16_t)opt_len);
if (opt_len < 2 || opt_len > len) {
result = false;
break;
}
switch (opt) {
case NET_TCP_MSS_OPT:
if (opt_len != 4) {
result = false;
goto end;
}
recv_options->mss =
ntohs(UNALIGNED_GET((uint16_t *)(options + 2)));
recv_options->mss_found = true;
NET_DBG("MSS=%hu", recv_options->mss);
break;
case NET_TCP_WINDOW_SCALE_OPT:
if (opt_len != 3) {
result = false;
goto end;
}
recv_options->window = opt;
recv_options->wnd_found = true;
break;
default:
continue;
}
}
end:
if (false == result) {
NET_WARN("Invalid TCP options");
}
return result;
}
static bool tcp_short_window(struct tcp *conn)
{
int32_t threshold = MIN(conn_mss(conn), conn->recv_win_max / 2);
if (conn->recv_win > threshold) {
return false;
}
return true;
}
/**
* @brief Update TCP receive window
*
* @param conn TCP network connection
* @param delta Receive window delta
*
* @return 0 on success, -EINVAL
* if the receive window delta is out of bounds
*/
static int tcp_update_recv_wnd(struct tcp *conn, int32_t delta)
{
int32_t new_win;
bool short_win_before;
bool short_win_after;
new_win = conn->recv_win + delta;
if (new_win < 0) {
new_win = 0;
} else if (new_win > conn->recv_win_max) {
new_win = conn->recv_win_max;
}
short_win_before = tcp_short_window(conn);
conn->recv_win = new_win;
short_win_after = tcp_short_window(conn);
if (short_win_before && !short_win_after &&
conn->state == TCP_ESTABLISHED) {
k_work_cancel_delayable(&conn->ack_timer);
tcp_out(conn, ACK);
}
return 0;
}
static size_t tcp_check_pending_data(struct tcp *conn, struct net_pkt *pkt,
size_t len)
{
size_t pending_len = 0;
if (CONFIG_NET_TCP_RECV_QUEUE_TIMEOUT &&
!net_pkt_is_empty(conn->queue_recv_data)) {
/* Some potentential cases:
* Note: MI = MAX_INT
* Packet | Queued| End off | Gap size | Required handling
* Seq|Len|Seq|Len| | |
* 3 | 3 | 6 | 4 | 3+3-6= 0 | 6-3-3=0 | Append
* 3 | 4 | 6 | 4 | 3+4-6 = 1 | 6-3-4=-1 | Append, pull from queue
* 3 | 7 | 6 | 4 | 3+7-6 = 4 | 6-3-7=-4 | Drop queued data
* 3 | 8 | 6 | 4 | 3+8-6 = 5 | 6-3-8=-5 | Drop queued data
* 6 | 5 | 6 | 4 | 6+5-6 = 5 | 6-6-5=-5 | Drop queued data
* 6 | 4 | 6 | 4 | 6+4-6 = 4 | 6-6-4=-4 | Drop queued data / packet
* 10 | 2 | 6 | 4 | 10+2-6= 6 | 6-10-2=-6| Should not happen, dropping queue
* 7 | 4 | 6 | 4 | 7+4-6 = 5 | 6-7-4=-5 | Should not happen, dropping queue
* 11 | 2 | 6 | 4 | 11+2-6= 7 | 6-11-2=-7| Should not happen, dropping queue
* 2 | 3 | 6 | 4 | 2+3-6= MI | 6-2-3=1 | Keep queued data
*/
struct tcphdr *th = th_get(pkt);
uint32_t expected_seq = th_seq(th) + len;
uint32_t pending_seq;
int32_t gap_size;
uint32_t end_offset;
pending_seq = tcp_get_seq(conn->queue_recv_data->buffer);
end_offset = expected_seq - pending_seq;
gap_size = (int32_t)(pending_seq - th_seq(th) - ((uint32_t)len));
pending_len = net_pkt_get_len(conn->queue_recv_data);
if (end_offset < pending_len) {
if (end_offset) {
net_pkt_remove_tail(pkt, end_offset);
pending_len -= end_offset;
}
NET_DBG("Found pending data seq %u len %zd",
expected_seq, pending_len);
net_buf_frag_add(pkt->buffer,
conn->queue_recv_data->buffer);
conn->queue_recv_data->buffer = NULL;
k_work_cancel_delayable(&conn->recv_queue_timer);
} else {
/* Check if the queued data is just a section of the incoming data */
if (gap_size <= 0) {
net_buf_unref(conn->queue_recv_data->buffer);
conn->queue_recv_data->buffer = NULL;
k_work_cancel_delayable(&conn->recv_queue_timer);
}
pending_len = 0;
}
}
return pending_len;
}
static enum net_verdict tcp_data_get(struct tcp *conn, struct net_pkt *pkt, size_t *len)
{
enum net_verdict ret = NET_DROP;
if (tcp_recv_cb) {
tcp_recv_cb(conn, pkt);
goto out;
}
if (conn->context->recv_cb) {
/* If there is any out-of-order pending data, then pass it
* to the application here.
*/
*len += tcp_check_pending_data(conn, pkt, *len);
net_pkt_cursor_init(pkt);
net_pkt_set_overwrite(pkt, true);
net_pkt_skip(pkt, net_pkt_get_len(pkt) - *len);
tcp_update_recv_wnd(conn, -*len);
/* Do not pass data to application with TCP conn
* locked as there could be an issue when the app tries
* to send the data and the conn is locked. So the recv
* data is placed in fifo which is flushed in tcp_in()
* after unlocking the conn
*/
k_fifo_put(&conn->recv_data, pkt);
ret = NET_OK;
}
out:
return ret;
}
static int tcp_finalize_pkt(struct net_pkt *pkt)
{
net_pkt_cursor_init(pkt);
if (IS_ENABLED(CONFIG_NET_IPV4) && net_pkt_family(pkt) == AF_INET) {
return net_ipv4_finalize(pkt, IPPROTO_TCP);
}
if (IS_ENABLED(CONFIG_NET_IPV6) && net_pkt_family(pkt) == AF_INET6) {
return net_ipv6_finalize(pkt, IPPROTO_TCP);
}
return -EINVAL;
}
static int tcp_header_add(struct tcp *conn, struct net_pkt *pkt, uint8_t flags,
uint32_t seq)
{
NET_PKT_DATA_ACCESS_DEFINE(tcp_access, struct tcphdr);
struct tcphdr *th;
th = (struct tcphdr *)net_pkt_get_data(pkt, &tcp_access);
if (!th) {
return -ENOBUFS;
}
memset(th, 0, sizeof(struct tcphdr));
UNALIGNED_PUT(conn->src.sin.sin_port, &th->th_sport);
UNALIGNED_PUT(conn->dst.sin.sin_port, &th->th_dport);
th->th_off = 5;
if (conn->send_options.mss_found) {
th->th_off++;
}
UNALIGNED_PUT(flags, &th->th_flags);
UNALIGNED_PUT(htons(conn->recv_win), &th->th_win);
UNALIGNED_PUT(htonl(seq), &th->th_seq);
if (ACK & flags) {
UNALIGNED_PUT(htonl(conn->ack), &th->th_ack);
}
return net_pkt_set_data(pkt, &tcp_access);
}
static int ip_header_add(struct tcp *conn, struct net_pkt *pkt)
{
if (IS_ENABLED(CONFIG_NET_IPV4) && net_pkt_family(pkt) == AF_INET) {
return net_context_create_ipv4_new(conn->context, pkt,
&conn->src.sin.sin_addr,
&conn->dst.sin.sin_addr);
}
if (IS_ENABLED(CONFIG_NET_IPV6) && net_pkt_family(pkt) == AF_INET6) {
return net_context_create_ipv6_new(conn->context, pkt,
&conn->src.sin6.sin6_addr,
&conn->dst.sin6.sin6_addr);
}
return -EINVAL;
}
static int set_tcp_nodelay(struct tcp *conn, const void *value, size_t len)
{
int no_delay_int;
if (len != sizeof(int)) {
return -EINVAL;
}
no_delay_int = *(int *)value;
if ((no_delay_int < 0) || (no_delay_int > 1)) {
return -EINVAL;
}
conn->tcp_nodelay = (bool)no_delay_int;
return 0;
}
static int get_tcp_nodelay(struct tcp *conn, void *value, size_t *len)
{
int no_delay_int = (int)conn->tcp_nodelay;
*((int *)value) = no_delay_int;
if (len) {
*len = sizeof(int);
}
return 0;
}
static int net_tcp_set_mss_opt(struct tcp *conn, struct net_pkt *pkt)
{
NET_PKT_DATA_ACCESS_DEFINE(mss_opt_access, struct tcp_mss_option);
struct tcp_mss_option *mss;
uint32_t recv_mss;
mss = net_pkt_get_data(pkt, &mss_opt_access);
if (!mss) {
return -ENOBUFS;
}
recv_mss = net_tcp_get_supported_mss(conn);
recv_mss |= (NET_TCP_MSS_OPT << 24) | (NET_TCP_MSS_SIZE << 16);
UNALIGNED_PUT(htonl(recv_mss), (uint32_t *)mss);
return net_pkt_set_data(pkt, &mss_opt_access);
}
static bool is_destination_local(struct net_pkt *pkt)
{
if (IS_ENABLED(CONFIG_NET_IPV4) && net_pkt_family(pkt) == AF_INET) {
if (net_ipv4_is_addr_loopback(
(struct in_addr *)NET_IPV4_HDR(pkt)->dst) ||
net_ipv4_is_my_addr(
(struct in_addr *)NET_IPV4_HDR(pkt)->dst)) {
return true;
}
}
if (IS_ENABLED(CONFIG_NET_IPV6) && net_pkt_family(pkt) == AF_INET6) {
if (net_ipv6_is_addr_loopback(
(struct in6_addr *)NET_IPV6_HDR(pkt)->dst) ||
net_ipv6_is_my_addr(
(struct in6_addr *)NET_IPV6_HDR(pkt)->dst)) {
return true;
}
}
return false;
}
void net_tcp_reply_rst(struct net_pkt *pkt)
{
NET_PKT_DATA_ACCESS_DEFINE(tcp_access_rst, struct tcphdr);
struct tcphdr *th_pkt = th_get(pkt);
struct tcphdr *th_rst;
struct net_pkt *rst;
int ret;
if (th_pkt == NULL || (th_flags(th_pkt) & RST)) {
/* Don't reply to a RST segment. */
return;
}
rst = tcp_pkt_alloc_no_conn(pkt->iface, pkt->family,
sizeof(struct tcphdr));
if (rst == NULL) {
return;
}
/* IP header */
if (IS_ENABLED(CONFIG_NET_IPV4) && net_pkt_family(pkt) == AF_INET) {
ret = net_ipv4_create(rst,
(struct in_addr *)NET_IPV4_HDR(pkt)->dst,
(struct in_addr *)NET_IPV4_HDR(pkt)->src);
} else if (IS_ENABLED(CONFIG_NET_IPV6) && net_pkt_family(pkt) == AF_INET6) {
ret = net_ipv6_create(rst,
(struct in6_addr *)NET_IPV6_HDR(pkt)->dst,
(struct in6_addr *)NET_IPV6_HDR(pkt)->src);
} else {
ret = -EINVAL;
}
if (ret < 0) {
goto err;
}
/* TCP header */
th_rst = (struct tcphdr *)net_pkt_get_data(rst, &tcp_access_rst);
if (th_rst == NULL) {
goto err;
}
memset(th_rst, 0, sizeof(struct tcphdr));
UNALIGNED_PUT(th_pkt->th_dport, &th_rst->th_sport);
UNALIGNED_PUT(th_pkt->th_sport, &th_rst->th_dport);
th_rst->th_off = 5;
if (th_flags(th_pkt) & ACK) {
UNALIGNED_PUT(RST, &th_rst->th_flags);
UNALIGNED_PUT(th_pkt->th_ack, &th_rst->th_seq);
} else {
uint32_t ack = ntohl(th_pkt->th_seq) + tcp_data_len(pkt);
UNALIGNED_PUT(RST | ACK, &th_rst->th_flags);
UNALIGNED_PUT(htonl(ack), &th_rst->th_ack);
}
ret = net_pkt_set_data(rst, &tcp_access_rst);
if (ret < 0) {
goto err;
}
ret = tcp_finalize_pkt(rst);
if (ret < 0) {
goto err;
}
NET_DBG("%s", tcp_th(rst));
tcp_send(rst);
return;
err:
tcp_pkt_unref(rst);
}
static int tcp_out_ext(struct tcp *conn, uint8_t flags, struct net_pkt *data,
uint32_t seq)
{
size_t alloc_len = sizeof(struct tcphdr);
struct net_pkt *pkt;
int ret = 0;
if (conn->send_options.mss_found) {
alloc_len += sizeof(uint32_t);
}
pkt = tcp_pkt_alloc(conn, alloc_len);
if (!pkt) {
ret = -ENOBUFS;
goto out;
}
if (data) {
/* Append the data buffer to the pkt */
net_pkt_append_buffer(pkt, data->buffer);
data->buffer = NULL;
}
ret = ip_header_add(conn, pkt);
if (ret < 0) {
tcp_pkt_unref(pkt);
goto out;
}
ret = tcp_header_add(conn, pkt, flags, seq);
if (ret < 0) {
tcp_pkt_unref(pkt);
goto out;
}
if (conn->send_options.mss_found) {
ret = net_tcp_set_mss_opt(conn, pkt);
if (ret < 0) {
tcp_pkt_unref(pkt);
goto out;
}
}
ret = tcp_finalize_pkt(pkt);
if (ret < 0) {
tcp_pkt_unref(pkt);
goto out;
}
if (tcp_send_cb) {
ret = tcp_send_cb(pkt);
goto out;
}
sys_slist_append(&conn->send_queue, &pkt->next);
if (is_destination_local(pkt)) {
/* If the destination is local, we have to let the current
* thread to finish with any state-machine changes before
* sending the packet, or it might lead to state inconsistencies
*/
k_work_schedule_for_queue(&tcp_work_q,
&conn->send_timer, K_NO_WAIT);
} else if (tcp_send_process_no_lock(conn)) {
tcp_conn_close(conn, -ETIMEDOUT);
}
out:
return ret;
}
static void tcp_out(struct tcp *conn, uint8_t flags)
{
(void)tcp_out_ext(conn, flags, NULL /* no data */, conn->seq);
}
static int tcp_pkt_pull(struct net_pkt *pkt, size_t len)
{
int total = net_pkt_get_len(pkt);
int ret = 0;
if (len > total) {
ret = -EINVAL;
goto out;
}
net_pkt_cursor_init(pkt);
net_pkt_set_overwrite(pkt, true);
net_pkt_pull(pkt, len);
net_pkt_trim_buffer(pkt);
out:
return ret;
}
static int tcp_pkt_peek(struct net_pkt *to, struct net_pkt *from, size_t pos,
size_t len)
{
net_pkt_cursor_init(to);
net_pkt_cursor_init(from);
if (pos) {
net_pkt_set_overwrite(from, true);
net_pkt_skip(from, pos);
}
return net_pkt_copy(to, from, len);
}
static int tcp_pkt_append(struct net_pkt *pkt, const uint8_t *data, size_t len)
{
size_t alloc_len = len;
struct net_buf *buf = NULL;
int ret = 0;
if (pkt->buffer) {
buf = net_buf_frag_last(pkt->buffer);
if (len > net_buf_tailroom(buf)) {
alloc_len -= net_buf_tailroom(buf);
} else {
alloc_len = 0;
}
}
if (alloc_len > 0) {
ret = net_pkt_alloc_buffer_raw(pkt, alloc_len,
TCP_PKT_ALLOC_TIMEOUT);
if (ret < 0) {
return -ENOBUFS;
}
}
if (buf == NULL) {
buf = pkt->buffer;
}
while (buf != NULL && len > 0) {
size_t write_len = MIN(len, net_buf_tailroom(buf));
net_buf_add_mem(buf, data, write_len);
data += write_len;
len -= write_len;
buf = buf->frags;
}
NET_ASSERT(len == 0, "Not all bytes written");
return ret;
}
static bool tcp_window_full(struct tcp *conn)
{
bool window_full = (conn->send_data_total >= conn->send_win);
#ifdef CONFIG_NET_TCP_CONGESTION_AVOIDANCE
window_full = window_full || (conn->send_data_total >= conn->ca.cwnd);
#endif
if (window_full) {
NET_DBG("conn: %p TX window_full", conn);
}
return window_full;
}
static int tcp_unsent_len(struct tcp *conn)
{
int unsent_len;
if (conn->unacked_len > conn->send_data_total) {
NET_ERR("total=%zu, unacked_len=%d",
conn->send_data_total, conn->unacked_len);
unsent_len = -ERANGE;
goto out;
}
unsent_len = conn->send_data_total - conn->unacked_len;
if (conn->unacked_len >= conn->send_win) {
unsent_len = 0;
} else {
unsent_len = MIN(unsent_len, conn->send_win - conn->unacked_len);
#ifdef CONFIG_NET_TCP_CONGESTION_AVOIDANCE
if (conn->unacked_len >= conn->ca.cwnd) {
unsent_len = 0;
} else {
unsent_len = MIN(unsent_len, conn->ca.cwnd - conn->unacked_len);
}
#endif
}
out:
return unsent_len;
}
static int tcp_send_data(struct tcp *conn)
{
int ret = 0;
int len;
struct net_pkt *pkt;
len = MIN(tcp_unsent_len(conn), conn_mss(conn));
if (len < 0) {
ret = len;
goto out;
}
if (len == 0) {
NET_DBG("conn: %p no data to send", conn);
ret = -ENODATA;
goto out;
}
pkt = tcp_pkt_alloc(conn, len);
if (!pkt) {
NET_ERR("conn: %p packet allocation failed, len=%d", conn, len);
ret = -ENOBUFS;
goto out;
}
ret = tcp_pkt_peek(pkt, conn->send_data, conn->unacked_len, len);
if (ret < 0) {
tcp_pkt_unref(pkt);
ret = -ENOBUFS;
goto out;
}
ret = tcp_out_ext(conn, PSH | ACK, pkt, conn->seq + conn->unacked_len);
if (ret == 0) {
conn->unacked_len += len;
if (conn->data_mode == TCP_DATA_MODE_RESEND) {
net_stats_update_tcp_resent(conn->iface, len);
net_stats_update_tcp_seg_rexmit(conn->iface);
} else {
net_stats_update_tcp_sent(conn->iface, len);
net_stats_update_tcp_seg_sent(conn->iface);
}
}
/* The data we want to send, has been moved to the send queue so we
* can unref the head net_pkt. If there was an error, we need to remove
* the packet anyway.
*/
tcp_pkt_unref(pkt);
conn_send_data_dump(conn);
out:
return ret;
}
/* Send all queued but unsent data from the send_data packet by packet
* until the receiver's window is full. */
static int tcp_send_queued_data(struct tcp *conn)
{
int ret = 0;
bool subscribe = false;
if (conn->data_mode == TCP_DATA_MODE_RESEND) {
goto out;
}
while (tcp_unsent_len(conn) > 0) {
/* Implement Nagle's algorithm */
if ((conn->tcp_nodelay == false) && (conn->unacked_len > 0)) {
/* If there is already pending data */
if (tcp_unsent_len(conn) < conn_mss(conn)) {
/* The number of bytes to be transmitted is less than an MSS,
* skip transmission for now.
* Wait for more data to be transmitted or all pending data
* being acknowledged.
*/
break;
}
}
ret = tcp_send_data(conn);
if (ret < 0) {
break;
}
}
if (conn->send_data_total) {
subscribe = true;
}
if (k_work_delayable_remaining_get(&conn->send_data_timer)) {
subscribe = false;
}
if (subscribe) {
conn->send_data_retries = 0;
k_work_reschedule_for_queue(&tcp_work_q, &conn->send_data_timer,
K_MSEC(TCP_RTO_MS));
}
out:
return ret;
}
static void tcp_cleanup_recv_queue(struct k_work *work)
{
struct k_work_delayable *dwork = k_work_delayable_from_work(work);
struct tcp *conn = CONTAINER_OF(dwork, struct tcp, recv_queue_timer);
k_mutex_lock(&conn->lock, K_FOREVER);
NET_DBG("Cleanup recv queue conn %p len %zd seq %u", conn,
net_pkt_get_len(conn->queue_recv_data),
tcp_get_seq(conn->queue_recv_data->buffer));
net_buf_unref(conn->queue_recv_data->buffer);
conn->queue_recv_data->buffer = NULL;
k_mutex_unlock(&conn->lock);
}
static void tcp_resend_data(struct k_work *work)
{
struct k_work_delayable *dwork = k_work_delayable_from_work(work);
struct tcp *conn = CONTAINER_OF(dwork, struct tcp, send_data_timer);
bool conn_unref = false;
int ret;
int exp_tcp_rto;
k_mutex_lock(&conn->lock, K_FOREVER);
NET_DBG("send_data_retries=%hu", conn->send_data_retries);
if (conn->send_data_retries >= tcp_retries) {
NET_DBG("conn: %p close, data retransmissions exceeded", conn);
conn_unref = true;
goto out;
}
if (IS_ENABLED(CONFIG_NET_TCP_CONGESTION_AVOIDANCE) &&
(conn->send_data_retries == 0)) {
tcp_ca_timeout(conn);
if (tcp_window_full(conn)) {
(void)k_sem_take(&conn->tx_sem, K_NO_WAIT);
}
}
conn->data_mode = TCP_DATA_MODE_RESEND;
conn->unacked_len = 0;
ret = tcp_send_data(conn);
conn->send_data_retries++;
if (ret == 0) {
if (conn->in_close && conn->send_data_total == 0) {
NET_DBG("TCP connection in %s close, "
"not disposing yet (waiting %dms)",
"active", tcp_fin_timeout_ms);
k_work_reschedule_for_queue(&tcp_work_q,
&conn->fin_timer,
FIN_TIMEOUT);
conn_state(conn, TCP_FIN_WAIT_1);
ret = tcp_out_ext(conn, FIN | ACK, NULL,
conn->seq + conn->unacked_len);
if (ret == 0) {
conn_seq(conn, + 1);
}
keep_alive_timer_stop(conn);
goto out;
}
} else if (ret == -ENODATA) {
conn->data_mode = TCP_DATA_MODE_SEND;
goto out;
} else if (ret == -ENOBUFS) {
NET_ERR("TCP failed to allocate buffer in retransmission");
}
exp_tcp_rto = TCP_RTO_MS;
/* The last retransmit does not need to wait that long */
if (conn->send_data_retries < tcp_retries) {
/* Every retransmit, the retransmission timeout increases by a factor 1.5 */
for (int i = 0; i < conn->send_data_retries; i++) {
exp_tcp_rto += exp_tcp_rto >> 1;
}
}
k_work_reschedule_for_queue(&tcp_work_q, &conn->send_data_timer,
K_MSEC(exp_tcp_rto));
out:
k_mutex_unlock(&conn->lock);
if (conn_unref) {
tcp_conn_close(conn, -ETIMEDOUT);
}
}
static void tcp_timewait_timeout(struct k_work *work)
{
struct k_work_delayable *dwork = k_work_delayable_from_work(work);
struct tcp *conn = CONTAINER_OF(dwork, struct tcp, timewait_timer);
/* no need to acquire the conn->lock as there is nothing scheduled here */
NET_DBG("conn: %p %s", conn, tcp_conn_state(conn, NULL));
(void)tcp_conn_close(conn, -ETIMEDOUT);
}
static void tcp_establish_timeout(struct tcp *conn)
{
NET_DBG("Did not receive %s in %dms", "ACK", ACK_TIMEOUT_MS);
NET_DBG("conn: %p %s", conn, tcp_conn_state(conn, NULL));
(void)tcp_conn_close(conn, -ETIMEDOUT);
}
static void tcp_fin_timeout(struct k_work *work)
{
struct k_work_delayable *dwork = k_work_delayable_from_work(work);
struct tcp *conn = CONTAINER_OF(dwork, struct tcp, fin_timer);
/* no need to acquire the conn->lock as there is nothing scheduled here */
if (conn->state == TCP_SYN_RECEIVED) {
tcp_establish_timeout(conn);
return;
}
NET_DBG("Did not receive %s in %dms", "FIN", tcp_fin_timeout_ms);
NET_DBG("conn: %p %s", conn, tcp_conn_state(conn, NULL));
(void)tcp_conn_close(conn, -ETIMEDOUT);
}
static void tcp_last_ack_timeout(struct k_work *work)
{
struct k_work_delayable *dwork = k_work_delayable_from_work(work);
struct tcp *conn = CONTAINER_OF(dwork, struct tcp, fin_timer);
NET_DBG("Did not receive %s in %dms", "last ACK", LAST_ACK_TIMEOUT_MS);
NET_DBG("conn: %p %s", conn, tcp_conn_state(conn, NULL));
(void)tcp_conn_close(conn, -ETIMEDOUT);
}
static void tcp_setup_last_ack_timer(struct tcp *conn)
{
/* Just in case the last ack is lost, install a timer that will
* close the connection in that case. Use the fin_timer for that
* as the fin handling cannot be done in this passive close state.
* Instead of default tcp_fin_timeout() function, have a separate
* function to catch this last ack case.
*/
k_work_init_delayable(&conn->fin_timer, tcp_last_ack_timeout);
NET_DBG("TCP connection in %s close, "
"not disposing yet (waiting %dms)",
"passive", LAST_ACK_TIMEOUT_MS);
k_work_reschedule_for_queue(&tcp_work_q,
&conn->fin_timer,
LAST_ACK_TIMEOUT);
}
static void tcp_cancel_last_ack_timer(struct tcp *conn)
{
k_work_cancel_delayable(&conn->fin_timer);
}
#if defined(CONFIG_NET_TCP_KEEPALIVE)
static void tcp_send_keepalive_probe(struct k_work *work)
{
struct k_work_delayable *dwork = k_work_delayable_from_work(work);
struct tcp *conn = CONTAINER_OF(dwork, struct tcp, keepalive_timer);
if (conn->state != TCP_ESTABLISHED) {
NET_DBG("conn: %p TCP connection not established", conn);
return;
}
if (!conn->keep_alive) {
NET_DBG("conn: %p keepalive is not enabled", conn);
return;
}
conn->keep_cur++;
if (conn->keep_cur > conn->keep_cnt) {
NET_DBG("conn: %p keepalive probe failed multiple times",
conn);
tcp_conn_close(conn, -ETIMEDOUT);
return;
}
NET_DBG("conn: %p keepalive probe", conn);
k_work_reschedule_for_queue(&tcp_work_q, &conn->keepalive_timer,
K_SECONDS(conn->keep_intvl));
(void)tcp_out_ext(conn, ACK, NULL, conn->seq - 1);
}
#endif /* CONFIG_NET_TCP_KEEPALIVE */
static void tcp_send_zwp(struct k_work *work)
{
struct k_work_delayable *dwork = k_work_delayable_from_work(work);
struct tcp *conn = CONTAINER_OF(dwork, struct tcp, persist_timer);
k_mutex_lock(&conn->lock, K_FOREVER);
(void)tcp_out_ext(conn, ACK, NULL, conn->seq - 1);
tcp_derive_rto(conn);
if (conn->send_win == 0) {
uint64_t timeout = TCP_RTO_MS;
/* Make sure the bitwise shift does not result in undefined behaviour */
if (conn->zwp_retries < 63) {
conn->zwp_retries++;
}
timeout <<= conn->zwp_retries;
if (timeout == 0 || timeout > ZWP_MAX_DELAY_MS) {
timeout = ZWP_MAX_DELAY_MS;
}
(void)k_work_reschedule_for_queue(
&tcp_work_q, &conn->persist_timer, K_MSEC(timeout));
}
k_mutex_unlock(&conn->lock);
}
static void tcp_send_ack(struct k_work *work)
{
struct k_work_delayable *dwork = k_work_delayable_from_work(work);
struct tcp *conn = CONTAINER_OF(dwork, struct tcp, ack_timer);
k_mutex_lock(&conn->lock, K_FOREVER);
tcp_out(conn, ACK);
k_mutex_unlock(&conn->lock);
}
static void tcp_conn_ref(struct tcp *conn)
{
int ref_count = atomic_inc(&conn->ref_count) + 1;
NET_DBG("conn: %p, ref_count: %d", conn, ref_count);
}
static struct tcp *tcp_conn_alloc(void)
{
struct tcp *conn = NULL;
int ret;
ret = k_mem_slab_alloc(&tcp_conns_slab, (void **)&conn, K_NO_WAIT);
if (ret) {
NET_ERR("Cannot allocate slab");
goto out;
}
memset(conn, 0, sizeof(*conn));
if (CONFIG_NET_TCP_RECV_QUEUE_TIMEOUT) {
conn->queue_recv_data = tcp_rx_pkt_alloc(conn, 0);
if (conn->queue_recv_data == NULL) {
NET_ERR("Cannot allocate %s queue for conn %p", "recv",
conn);
goto fail;
}
}
conn->send_data = tcp_pkt_alloc(conn, 0);
if (conn->send_data == NULL) {
NET_ERR("Cannot allocate %s queue for conn %p", "send", conn);
goto fail;
}
k_mutex_init(&conn->lock);
k_fifo_init(&conn->recv_data);
k_sem_init(&conn->connect_sem, 0, K_SEM_MAX_LIMIT);
k_sem_init(&conn->tx_sem, 1, 1);
conn->in_connect = false;
conn->state = TCP_LISTEN;
conn->recv_win_max = tcp_rx_window;
conn->recv_win = conn->recv_win_max;
conn->send_win_max = MAX(tcp_tx_window, NET_IPV6_MTU);
conn->send_win = conn->send_win_max;
conn->tcp_nodelay = false;
#ifdef CONFIG_NET_TCP_FAST_RETRANSMIT
conn->dup_ack_cnt = 0;
#endif
#ifdef CONFIG_NET_TCP_CONGESTION_AVOIDANCE
/* Initially set the congestion window at its max size, since only the MSS
* is available as soon as the connection is established
*/
conn->ca.cwnd = UINT16_MAX;
#endif
/* The ISN value will be set when we get the connection attempt or
* when trying to create a connection.
*/
conn->seq = 0U;
sys_slist_init(&conn->send_queue);
k_work_init_delayable(&conn->send_timer, tcp_send_process);
k_work_init_delayable(&conn->timewait_timer, tcp_timewait_timeout);
k_work_init_delayable(&conn->fin_timer, tcp_fin_timeout);
k_work_init_delayable(&conn->send_data_timer, tcp_resend_data);
k_work_init_delayable(&conn->recv_queue_timer, tcp_cleanup_recv_queue);
k_work_init_delayable(&conn->persist_timer, tcp_send_zwp);
k_work_init_delayable(&conn->ack_timer, tcp_send_ack);
k_work_init(&conn->conn_release, tcp_conn_release);
keep_alive_timer_init(conn);
tcp_conn_ref(conn);
k_mutex_lock(&tcp_lock, K_FOREVER);
sys_slist_append(&tcp_conns, &conn->next);
k_mutex_unlock(&tcp_lock);
out:
NET_DBG("conn: %p", conn);
return conn;
fail:
if (CONFIG_NET_TCP_RECV_QUEUE_TIMEOUT && conn->queue_recv_data) {
tcp_pkt_unref(conn->queue_recv_data);
conn->queue_recv_data = NULL;
}
k_mem_slab_free(&tcp_conns_slab, (void *)conn);
return NULL;
}
int net_tcp_get(struct net_context *context)
{
int ret = 0;
struct tcp *conn;
conn = tcp_conn_alloc();
if (conn == NULL) {
ret = -ENOMEM;
return ret;
}
/* Mutually link the net_context and tcp connection */
conn->context = context;
context->tcp = conn;
return ret;
}
static bool tcp_endpoint_cmp(union tcp_endpoint *ep, struct net_pkt *pkt,
enum pkt_addr which)
{
union tcp_endpoint ep_tmp;
if (tcp_endpoint_set(&ep_tmp, pkt, which) < 0) {
return false;
}
return !memcmp(ep, &ep_tmp, tcp_endpoint_len(ep->sa.sa_family));
}
static bool tcp_conn_cmp(struct tcp *conn, struct net_pkt *pkt)
{
return tcp_endpoint_cmp(&conn->src, pkt, TCP_EP_DST) &&
tcp_endpoint_cmp(&conn->dst, pkt, TCP_EP_SRC);
}
static struct tcp *tcp_conn_search(struct net_pkt *pkt)
{
bool found = false;
struct tcp *conn;
struct tcp *tmp;
k_mutex_lock(&tcp_lock, K_FOREVER);
SYS_SLIST_FOR_EACH_CONTAINER_SAFE(&tcp_conns, conn, tmp, next) {
found = tcp_conn_cmp(conn, pkt);
if (found) {
break;
}
}
k_mutex_unlock(&tcp_lock);
return found ? conn : NULL;
}
static struct tcp *tcp_conn_new(struct net_pkt *pkt);
static enum net_verdict tcp_recv(struct net_conn *net_conn,
struct net_pkt *pkt,
union net_ip_header *ip,
union net_proto_header *proto,
void *user_data)
{
struct tcp *conn;
struct tcphdr *th;
enum net_verdict verdict = NET_DROP;
ARG_UNUSED(net_conn);
ARG_UNUSED(proto);
conn = tcp_conn_search(pkt);
if (conn) {
goto in;
}
th = th_get(pkt);
if (th_flags(th) & SYN && !(th_flags(th) & ACK)) {
struct tcp *conn_old = ((struct net_context *)user_data)->tcp;
conn = tcp_conn_new(pkt);
if (!conn) {
NET_ERR("Cannot allocate a new TCP connection");
goto in;
}
conn->accepted_conn = conn_old;
}
in:
if (conn) {
verdict = tcp_in(conn, pkt);
} else {
net_tcp_reply_rst(pkt);
}
return verdict;
}
static uint32_t seq_scale(uint32_t seq)
{
return seq + (k_ticks_to_ns_floor32(k_uptime_ticks()) >> 6);
}
static uint8_t unique_key[16]; /* MD5 128 bits as described in RFC6528 */
static uint32_t tcpv6_init_isn(struct in6_addr *saddr,
struct in6_addr *daddr,
uint16_t sport,
uint16_t dport)
{
struct {
uint8_t key[sizeof(unique_key)];
struct in6_addr saddr;
struct in6_addr daddr;
uint16_t sport;
uint16_t dport;
} buf = {
.saddr = *(struct in6_addr *)saddr,
.daddr = *(struct in6_addr *)daddr,
.sport = sport,
.dport = dport
};
uint8_t hash[16];
static bool once;
if (!once) {
sys_rand_get(unique_key, sizeof(unique_key));
once = true;
}
memcpy(buf.key, unique_key, sizeof(buf.key));
#if defined(CONFIG_NET_TCP_ISN_RFC6528)
mbedtls_md5((const unsigned char *)&buf, sizeof(buf), hash);
#endif
return seq_scale(UNALIGNED_GET((uint32_t *)&hash[0]));
}
static uint32_t tcpv4_init_isn(struct in_addr *saddr,
struct in_addr *daddr,
uint16_t sport,
uint16_t dport)
{
struct {
uint8_t key[sizeof(unique_key)];
struct in_addr saddr;
struct in_addr daddr;
uint16_t sport;
uint16_t dport;
} buf = {
.saddr = *(struct in_addr *)saddr,
.daddr = *(struct in_addr *)daddr,
.sport = sport,
.dport = dport
};
uint8_t hash[16];
static bool once;
if (!once) {
sys_rand_get(unique_key, sizeof(unique_key));
once = true;
}
memcpy(buf.key, unique_key, sizeof(unique_key));
#if defined(CONFIG_NET_TCP_ISN_RFC6528)
mbedtls_md5((const unsigned char *)&buf, sizeof(buf), hash);
#endif
return seq_scale(UNALIGNED_GET((uint32_t *)&hash[0]));
}
static uint32_t tcp_init_isn(struct sockaddr *saddr, struct sockaddr *daddr)
{
if (IS_ENABLED(CONFIG_NET_TCP_ISN_RFC6528)) {
if (IS_ENABLED(CONFIG_NET_IPV6) &&
saddr->sa_family == AF_INET6) {
return tcpv6_init_isn(&net_sin6(saddr)->sin6_addr,
&net_sin6(daddr)->sin6_addr,
net_sin6(saddr)->sin6_port,
net_sin6(daddr)->sin6_port);
} else if (IS_ENABLED(CONFIG_NET_IPV4) &&
saddr->sa_family == AF_INET) {
return tcpv4_init_isn(&net_sin(saddr)->sin_addr,
&net_sin(daddr)->sin_addr,
net_sin(saddr)->sin_port,
net_sin(daddr)->sin_port);
}
}
return sys_rand32_get();
}
/* Create a new tcp connection, as a part of it, create and register
* net_context
*/
static struct tcp *tcp_conn_new(struct net_pkt *pkt)
{
struct tcp *conn = NULL;
struct net_context *context = NULL;
sa_family_t af = net_pkt_family(pkt);
struct sockaddr local_addr = { 0 };
int ret;
ret = net_context_get(af, SOCK_STREAM, IPPROTO_TCP, &context);
if (ret < 0) {
NET_ERR("net_context_get(): %d", ret);
goto err;
}
conn = context->tcp;
conn->iface = pkt->iface;
tcp_derive_rto(conn);
net_context_set_family(conn->context, net_pkt_family(pkt));
if (tcp_endpoint_set(&conn->dst, pkt, TCP_EP_SRC) < 0) {
net_context_put(context);
conn = NULL;
goto err;
}
if (tcp_endpoint_set(&conn->src, pkt, TCP_EP_DST) < 0) {
net_context_put(context);
conn = NULL;
goto err;
}
NET_DBG("conn: src: %s, dst: %s",
net_sprint_addr(conn->src.sa.sa_family,
(const void *)&conn->src.sin.sin_addr),
net_sprint_addr(conn->dst.sa.sa_family,
(const void *)&conn->dst.sin.sin_addr));
memcpy(&context->remote, &conn->dst, sizeof(context->remote));
context->flags |= NET_CONTEXT_REMOTE_ADDR_SET;
net_sin_ptr(&context->local)->sin_family = af;
local_addr.sa_family = net_context_get_family(context);
if (IS_ENABLED(CONFIG_NET_IPV6) &&
net_context_get_family(context) == AF_INET6) {
net_ipaddr_copy(&net_sin6(&local_addr)->sin6_addr,
&conn->src.sin6.sin6_addr);
} else if (IS_ENABLED(CONFIG_NET_IPV4) &&
net_context_get_family(context) == AF_INET) {
net_ipaddr_copy(&net_sin(&local_addr)->sin_addr,
&conn->src.sin.sin_addr);
}
ret = net_context_bind(context, &local_addr, sizeof(local_addr));
if (ret < 0) {
NET_DBG("Cannot bind accepted context, connection reset");
net_context_put(context);
conn = NULL;
goto err;
}
/* The newly created context object for the new TCP client connection needs
* all four parameters of the tuple (local address, local port, remote
* address, remote port) to be properly identified. Remote address and port
* are already copied above from conn->dst. The call to net_context_bind
* with the prepared local_addr further copies the local address. However,
* this call wont copy the local port, as the bind would then fail due to
* an address/port reuse without the REUSEPORT option enables for both
* connections. Therefore, we copy the port after the bind call.
* It is safe to bind to this address/port combination, as the new TCP
* client connection is separated from the local listening connection
* by the specified remote address and remote port.
*/
if (IS_ENABLED(CONFIG_NET_IPV6) &&
net_context_get_family(context) == AF_INET6) {
net_sin6_ptr(&context->local)->sin6_port = conn->src.sin6.sin6_port;
} else if (IS_ENABLED(CONFIG_NET_IPV4) &&
net_context_get_family(context) == AF_INET) {
net_sin_ptr(&context->local)->sin_port = conn->src.sin.sin_port;
}
if (!(IS_ENABLED(CONFIG_NET_TEST_PROTOCOL) ||
IS_ENABLED(CONFIG_NET_TEST))) {
conn->seq = tcp_init_isn(&local_addr, &context->remote);
}
NET_DBG("context: local: %s, remote: %s",
net_sprint_addr(local_addr.sa_family,
(const void *)&net_sin(&local_addr)->sin_addr),
net_sprint_addr(context->remote.sa_family,
(const void *)&net_sin(&context->remote)->sin_addr));
ret = net_conn_register(IPPROTO_TCP, af,
&context->remote, &local_addr,
ntohs(conn->dst.sin.sin_port),/* local port */
ntohs(conn->src.sin.sin_port),/* remote port */
context, tcp_recv, context,
&context->conn_handler);
if (ret < 0) {
NET_ERR("net_conn_register(): %d", ret);
net_context_put(context);
conn = NULL;
goto err;
}
err:
if (!conn) {
net_stats_update_tcp_seg_conndrop(net_pkt_iface(pkt));
}
return conn;
}
static bool tcp_validate_seq(struct tcp *conn, struct tcphdr *hdr)
{
return (net_tcp_seq_cmp(th_seq(hdr), conn->ack) >= 0) &&
(net_tcp_seq_cmp(th_seq(hdr), conn->ack + conn->recv_win) < 0);
}
static int32_t tcp_compute_new_length(struct tcp *conn, struct tcphdr *hdr, size_t len,
bool fin_received)
{
int32_t new_len = 0;
if (len > 0) {
/* Cases:
* - Data already received earlier: new_len <= 0
* - Partially new data new_len > 0
* - Out of order data new_len > 0,
* should be checked by sequence number
*/
new_len = (int32_t)(len) - net_tcp_seq_cmp(conn->ack, th_seq(hdr));
if (fin_received) {
/* Add with one additional byte as the FIN flag has to be subtracted */
new_len++;
}
}
return new_len;
}
static enum tcp_state tcp_enter_time_wait(struct tcp *conn)
{
tcp_send_timer_cancel(conn);
/* Entering TIME-WAIT, so cancel the timer and start the TIME-WAIT timer */
k_work_cancel_delayable(&conn->fin_timer);
k_work_reschedule_for_queue(
&tcp_work_q, &conn->timewait_timer,
K_MSEC(CONFIG_NET_TCP_TIME_WAIT_DELAY));
return TCP_TIME_WAIT;
}
static bool check_seq_list(struct net_buf *buf)
{
struct net_buf *last = NULL;
struct net_buf *tmp = buf;
uint32_t seq;
uint32_t next_seq = 0;
bool result = true;
while (tmp) {
seq = tcp_get_seq(tmp);
NET_DBG("buf %p seq %u len %d", tmp, seq, tmp->len);
if (last != NULL) {
if (next_seq != seq) {
result = false;
}
}
next_seq = seq + tmp->len;
last = tmp;
tmp = tmp->frags;
}
return result;
}
static void tcp_queue_recv_data(struct tcp *conn, struct net_pkt *pkt,
size_t len, uint32_t seq)
{
uint32_t seq_start = seq;
bool inserted = false;
struct net_buf *tmp;
NET_DBG("conn: %p len %zd seq %u ack %u", conn, len, seq, conn->ack);
tmp = pkt->buffer;
tcp_set_seq(tmp, seq);
seq += tmp->len;
tmp = tmp->frags;
while (tmp) {
tcp_set_seq(tmp, seq);
seq += tmp->len;
tmp = tmp->frags;
}
if (IS_ENABLED(CONFIG_NET_TCP_LOG_LEVEL_DBG)) {
NET_DBG("Queuing data: conn %p", conn);
}
if (!net_pkt_is_empty(conn->queue_recv_data)) {
/* Place the data to correct place in the list. If the data
* would not be sequential, then drop this packet.
*
* Only work with subtractions between sequence numbers in uint32_t format
* to proper handle cases that are around the wrapping point.
*/
/* Some potentential cases:
* Note: MI = MAX_INT
* Packet | Queued| End off1 | Start off| End off2 | Required handling
* Seq|Len|Seq|Len| | | |
* 3 | 3 | 6 | 4 | 3+3-6= 0 | NA | NA | Prepend
* 3 | 4 | 6 | 4 | 3+4-6 = 1 | NA | NA | Prepend, pull from buffer
* 3 | 7 | 6 | 4 | 3+7-6 = 4 | 6-3=3 | 6+4-3=7 | Drop queued data
* 3 | 8 | 6 | 4 | 3+8-6 = 5 | 6-3=3 | 6+4-3=7 | Drop queued data
* 6 | 5 | 6 | 4 | 6+5-6 = 5 | 6-6=0 | 6+4-6=4 | Drop queued data
* 6 | 4 | 6 | 4 | 6+4-6 = 4 | 6-6=0 | 6+4-6=4 | Drop queued data / packet
* 7 | 2 | 6 | 4 | 7+2-6 = 3 | 6-7=MI | 6+4-7=3 | Drop packet
* 10 | 2 | 6 | 4 | 10+2-6= 6 | 6-10=MI-3| 6+4-10=0 | Append
* 7 | 4 | 6 | 4 | 7+4-6 = 5 | 6-7 =MI | 6+4-7 =3 | Append, pull from packet
* 11 | 2 | 6 | 4 | 11+2-6= 7 | 6-11=MI-6| 6+4-11=MI-1 | Drop incoming packet
* 2 | 3 | 6 | 4 | 2+3-6= MI | 6-2=4 | 6+4-2=8 | Drop incoming packet
*/
uint32_t pending_seq;
uint32_t start_offset;
uint32_t end_offset;
size_t pending_len;
pending_seq = tcp_get_seq(conn->queue_recv_data->buffer);
end_offset = seq - pending_seq;
pending_len = net_pkt_get_len(conn->queue_recv_data);
if (end_offset < pending_len) {
if (end_offset < len) {
if (end_offset) {
net_pkt_remove_tail(pkt, end_offset);
}
/* Put new data before the pending data */
net_buf_frag_add(pkt->buffer,
conn->queue_recv_data->buffer);
NET_DBG("Adding at before queue, end_offset %i, pending_len %zu",
end_offset, pending_len);
conn->queue_recv_data->buffer = pkt->buffer;
inserted = true;
}
} else {
struct net_buf *last;
last = net_buf_frag_last(conn->queue_recv_data->buffer);
pending_seq = tcp_get_seq(last);
start_offset = pending_seq - seq_start;
/* Compute the offset w.r.t. the start point of the new packet */
end_offset = (pending_seq + last->len) - seq_start;
/* Check if queue start with within the within the new packet */
if ((start_offset < len) && (end_offset <= len)) {
/* The queued data is irrelevant since the new packet overlaps the
* new packet, take the new packet as contents
*/
net_buf_unref(conn->queue_recv_data->buffer);
conn->queue_recv_data->buffer = pkt->buffer;
inserted = true;
} else {
if (end_offset < len) {
if (end_offset) {
net_pkt_remove_tail(conn->queue_recv_data,
end_offset);
}
/* Put new data after pending data */
NET_DBG("Adding at end of queue, start %i, end %i, len %zu",
start_offset, end_offset, len);
net_buf_frag_add(conn->queue_recv_data->buffer,
pkt->buffer);
inserted = true;
}
}
}
if (inserted) {
NET_DBG("All pending data: conn %p", conn);
if (check_seq_list(conn->queue_recv_data->buffer) == false) {
NET_ERR("Incorrect order in out of order sequence for conn %p",
conn);
/* error in sequence list, drop it */
net_buf_unref(conn->queue_recv_data->buffer);
conn->queue_recv_data->buffer = NULL;
}
} else {
NET_DBG("Cannot add new data to queue");
}
} else {
net_pkt_append_buffer(conn->queue_recv_data, pkt->buffer);
inserted = true;
}
if (inserted) {
/* We need to keep the received data but free the pkt */
pkt->buffer = NULL;
if (!k_work_delayable_is_pending(&conn->recv_queue_timer)) {
k_work_reschedule_for_queue(
&tcp_work_q, &conn->recv_queue_timer,
K_MSEC(CONFIG_NET_TCP_RECV_QUEUE_TIMEOUT));
}
}
}
static enum net_verdict tcp_data_received(struct tcp *conn, struct net_pkt *pkt,
size_t *len)
{
enum net_verdict ret;
if (*len == 0) {
return NET_DROP;
}
ret = tcp_data_get(conn, pkt, len);
net_stats_update_tcp_seg_recv(conn->iface);
conn_ack(conn, *len);
/* Delay ACK response in case of small window or missing PSH,
* as described in RFC 813.
*/
if (tcp_short_window(conn)) {
k_work_schedule_for_queue(&tcp_work_q, &conn->ack_timer,
ACK_DELAY);
} else {
k_work_cancel_delayable(&conn->ack_timer);
tcp_out(conn, ACK);
}
return ret;
}
static void tcp_out_of_order_data(struct tcp *conn, struct net_pkt *pkt,
size_t data_len, uint32_t seq)
{
size_t headers_len;
if (data_len == 0) {
return;
}
headers_len = net_pkt_get_len(pkt) - data_len;
/* Get rid of protocol headers from the data */
if (tcp_pkt_pull(pkt, headers_len) < 0) {
return;
}
/* We received out-of-order data. Try to queue it.
*/
tcp_queue_recv_data(conn, pkt, data_len, seq);
}
static void tcp_check_sock_options(struct tcp *conn)
{
int sndbuf_opt = 0;
int rcvbuf_opt = 0;
if (IS_ENABLED(CONFIG_NET_CONTEXT_SNDBUF)) {
(void)net_context_get_option(conn->context, NET_OPT_SNDBUF,
&sndbuf_opt, NULL);
}
if (IS_ENABLED(CONFIG_NET_CONTEXT_RCVBUF)) {
(void)net_context_get_option(conn->context, NET_OPT_RCVBUF,
&rcvbuf_opt, NULL);
}
if (sndbuf_opt > 0 && sndbuf_opt != conn->send_win_max) {
k_mutex_lock(&conn->lock, K_FOREVER);
conn->send_win_max = sndbuf_opt;
if (conn->send_win > conn->send_win_max) {
conn->send_win = conn->send_win_max;
}
k_mutex_unlock(&conn->lock);
}
if (rcvbuf_opt > 0 && rcvbuf_opt != conn->recv_win_max) {
int diff;
k_mutex_lock(&conn->lock, K_FOREVER);
diff = rcvbuf_opt - conn->recv_win_max;
conn->recv_win_max = rcvbuf_opt;
tcp_update_recv_wnd(conn, diff);
k_mutex_unlock(&conn->lock);
}
}
/* TCP state machine, everything happens here */
static enum net_verdict tcp_in(struct tcp *conn, struct net_pkt *pkt)
{
struct tcphdr *th = pkt ? th_get(pkt) : NULL;
uint8_t next = 0, fl = 0;
bool do_close = false;
bool connection_ok = false;
size_t tcp_options_len = th ? (th_off(th) - 5) * 4 : 0;
struct net_conn *conn_handler = NULL;
struct net_pkt *recv_pkt;
void *recv_user_data;
struct k_fifo *recv_data_fifo;
size_t len;
int ret;
int close_status = 0;
enum net_verdict verdict = NET_DROP;
if (th) {
/* Currently we ignore ECN and CWR flags */
fl = th_flags(th) & ~(ECN | CWR);
}
if (conn->state != TCP_SYN_SENT) {
tcp_check_sock_options(conn);
}
k_mutex_lock(&conn->lock, K_FOREVER);
/* Connection context was already freed. */
if (conn->state == TCP_UNUSED) {
k_mutex_unlock(&conn->lock);
return NET_DROP;
}
NET_DBG("%s", tcp_conn_state(conn, pkt));
if (th && th_off(th) < 5) {
tcp_out(conn, RST);
do_close = true;
close_status = -ECONNRESET;
goto out;
}
if (FL(&fl, &, RST)) {
/* We only accept RST packet that has valid seq field. */
if (!tcp_validate_seq(conn, th)) {
net_stats_update_tcp_seg_rsterr(net_pkt_iface(pkt));
k_mutex_unlock(&conn->lock);
return NET_DROP;
}
/* Valid RST received. */
verdict = NET_OK;
net_stats_update_tcp_seg_rst(net_pkt_iface(pkt));
do_close = true;
close_status = -ECONNRESET;
/* If we receive RST and ACK for the sent SYN, it means
* that there is no socket listening the port we are trying
* to connect to. Set the errno properly in this case.
*/
if (conn->in_connect) {
fl = th_flags(th);
if (FL(&fl, ==, RST | ACK)) {
close_status = -ECONNREFUSED;
}
}
goto out;
}
if (tcp_options_len && !tcp_options_check(&conn->recv_options, pkt,
tcp_options_len)) {
NET_DBG("DROP: Invalid TCP option list");
tcp_out(conn, RST);
do_close = true;
close_status = -ECONNRESET;
goto out;
}
if (th && (conn->state != TCP_LISTEN) && (conn->state != TCP_SYN_SENT) &&
tcp_validate_seq(conn, th) && FL(&fl, &, SYN)) {
/* According to RFC 793, ch 3.9 Event Processing, receiving SYN
* once the connection has been established is an error
* condition, reset should be sent and connection closed.
*/
NET_DBG("conn: %p, SYN received in %s state, dropping connection",
conn, tcp_state_to_str(conn->state, false));
net_stats_update_tcp_seg_drop(conn->iface);
tcp_out(conn, RST);
do_close = true;
close_status = -ECONNRESET;
goto out;
}
if (th) {
conn->send_win = ntohs(th_win(th));
if (conn->send_win > conn->send_win_max) {
NET_DBG("Lowering send window from %u to %u",
conn->send_win, conn->send_win_max);
conn->send_win = conn->send_win_max;
}
if (conn->send_win == 0) {
if (!k_work_delayable_is_pending(&conn->persist_timer)) {
conn->zwp_retries = 0;
(void)k_work_reschedule_for_queue(
&tcp_work_q, &conn->persist_timer,
K_MSEC(TCP_RTO_MS));
}
} else {
(void)k_work_cancel_delayable(&conn->persist_timer);
}
if (tcp_window_full(conn)) {
(void)k_sem_take(&conn->tx_sem, K_NO_WAIT);
} else {
k_sem_give(&conn->tx_sem);
}
}
next_state:
len = pkt ? tcp_data_len(pkt) : 0;
switch (conn->state) {
case TCP_LISTEN:
if (FL(&fl, ==, SYN)) {
/* Make sure our MSS is also sent in the ACK */
conn->send_options.mss_found = true;
conn_ack(conn, th_seq(th) + 1); /* capture peer's isn */
tcp_out(conn, SYN | ACK);
conn->send_options.mss_found = false;
conn_seq(conn, + 1);
next = TCP_SYN_RECEIVED;
/* Close the connection if we do not receive ACK on time.
*/
k_work_reschedule_for_queue(&tcp_work_q,
&conn->establish_timer,
ACK_TIMEOUT);
verdict = NET_OK;
} else {
conn->send_options.mss_found = true;
tcp_out(conn, SYN);
conn->send_options.mss_found = false;
conn_seq(conn, + 1);
next = TCP_SYN_SENT;
tcp_conn_ref(conn);
}
break;
case TCP_SYN_RECEIVED:
if (FL(&fl, &, ACK, th_ack(th) == conn->seq &&
th_seq(th) == conn->ack)) {
net_tcp_accept_cb_t accept_cb = NULL;
struct net_context *context = NULL;
if (conn->accepted_conn != NULL) {
accept_cb = conn->accepted_conn->accept_cb;
context = conn->accepted_conn->context;
keep_alive_param_copy(conn, conn->accepted_conn);
}
k_work_cancel_delayable(&conn->establish_timer);
tcp_send_timer_cancel(conn);
tcp_conn_ref(conn);
net_context_set_state(conn->context,
NET_CONTEXT_CONNECTED);
/* Make sure the accept_cb is only called once. */
conn->accepted_conn = NULL;
if (accept_cb == NULL) {
/* In case of no accept_cb registered,
* application will not take ownership of the
* connection. To prevent connection leak, unref
* the TCP context and put the connection into
* active close (TCP_FIN_WAIT_1).
*/
net_tcp_put(conn->context);
break;
}
keep_alive_timer_restart(conn);
net_ipaddr_copy(&conn->context->remote, &conn->dst.sa);
/* Check if v4-mapping-to-v6 needs to be done for
* the accepted socket.
*/
if (IS_ENABLED(CONFIG_NET_IPV4_MAPPING_TO_IPV6) &&
net_context_get_family(conn->context) == AF_INET &&
net_context_get_family(context) == AF_INET6 &&
!net_context_is_v6only_set(context)) {
struct in6_addr mapped;
net_ipv6_addr_create_v4_mapped(
&net_sin(&conn->context->remote)->sin_addr,
&mapped);
net_ipaddr_copy(&net_sin6(&conn->context->remote)->sin6_addr,
&mapped);
net_sin6(&conn->context->remote)->sin6_family = AF_INET6;
NET_DBG("Setting v4 mapped address %s",
net_sprint_ipv6_addr(&mapped));
/* Note that we cannot set the local address to IPv6 one
* as that is used to match the connection, and not just
* for printing. The remote address is only used for
* passing it to accept() and printing it by "net conn"
* command.
*/
}
accept_cb(conn->context, &conn->context->remote,
net_context_get_family(context) == AF_INET6 ?
sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in),
0, context);
next = TCP_ESTABLISHED;
tcp_ca_init(conn);
if (len) {
verdict = tcp_data_get(conn, pkt, &len);
if (verdict == NET_OK) {
/* net_pkt owned by the recv fifo now */
pkt = NULL;
}
conn_ack(conn, + len);
tcp_out(conn, ACK);
} else {
verdict = NET_OK;
}
/* ACK for SYN | ACK has been received. This signilizes that
* the connection makes a "forward progress".
*/
tcp_nbr_reachability_hint(conn);
}
break;
case TCP_SYN_SENT:
/* if we are in SYN SENT and receive only a SYN without an
* ACK , shouldn't we go to SYN RECEIVED state? See Figure
* 6 of RFC 793
*/
if (FL(&fl, &, SYN | ACK, th && th_ack(th) == conn->seq)) {
tcp_send_timer_cancel(conn);
conn_ack(conn, th_seq(th) + 1);
if (len) {
verdict = tcp_data_get(conn, pkt, &len);
if (verdict == NET_OK) {
/* net_pkt owned by the recv fifo now */
pkt = NULL;
}
conn_ack(conn, + len);
} else {
verdict = NET_OK;
}
next = TCP_ESTABLISHED;
net_context_set_state(conn->context,
NET_CONTEXT_CONNECTED);
tcp_ca_init(conn);
tcp_out(conn, ACK);
keep_alive_timer_restart(conn);
/* The connection semaphore is released *after*
* we have changed the connection state. This way
* the application can send data and it is queued
* properly even if this thread is running in lower
* priority.
*/
connection_ok = true;
/* ACK for SYN has been received. This signilizes that
* the connection makes a "forward progress".
*/
tcp_nbr_reachability_hint(conn);
} else if (pkt) {
net_tcp_reply_rst(pkt);
}
break;
case TCP_ESTABLISHED:
/* full-close */
if (th && FL(&fl, ==, (FIN | ACK), th_seq(th) == conn->ack)) {
if (net_tcp_seq_cmp(th_ack(th), conn->seq) > 0) {
uint32_t len_acked = th_ack(th) - conn->seq;
conn_seq(conn, + len_acked);
}
conn_ack(conn, + 1);
tcp_out(conn, FIN | ACK);
conn_seq(conn, + 1);
next = TCP_LAST_ACK;
verdict = NET_OK;
keep_alive_timer_stop(conn);
tcp_setup_last_ack_timer(conn);
break;
} else if (th && FL(&fl, ==, FIN, th_seq(th) == conn->ack)) {
conn_ack(conn, + 1);
tcp_out(conn, ACK);
next = TCP_CLOSE_WAIT;
verdict = NET_OK;
keep_alive_timer_stop(conn);
break;
} else if (th && FL(&fl, ==, (FIN | ACK | PSH),
th_seq(th) == conn->ack)) {
if (len) {
verdict = tcp_data_get(conn, pkt, &len);
if (verdict == NET_OK) {
/* net_pkt owned by the recv fifo now */
pkt = NULL;
}
} else {
verdict = NET_OK;
}
conn_ack(conn, + len + 1);
tcp_out(conn, FIN | ACK);
conn_seq(conn, + 1);
next = TCP_LAST_ACK;
keep_alive_timer_stop(conn);
tcp_setup_last_ack_timer(conn);
break;
}
/* Whatever we've received, we know that peer is alive, so reset
* the keepalive timer.
*/
keep_alive_timer_restart(conn);
#ifdef CONFIG_NET_TCP_FAST_RETRANSMIT
if (th && (net_tcp_seq_cmp(th_ack(th), conn->seq) == 0)) {
/* Only if there is pending data, increment the duplicate ack count */
if (conn->send_data_total > 0) {
/* There could be also payload, only without payload account them */
if (len == 0) {
/* Increment the duplicate acc counter,
* but maximize the value
*/
conn->dup_ack_cnt = MIN(conn->dup_ack_cnt + 1,
DUPLICATE_ACK_RETRANSMIT_TRHESHOLD + 1);
tcp_ca_dup_ack(conn);
}
} else {
conn->dup_ack_cnt = 0;
}
/* Only do fast retransmit when not already in a resend state */
if ((conn->data_mode == TCP_DATA_MODE_SEND) &&
(conn->dup_ack_cnt == DUPLICATE_ACK_RETRANSMIT_TRHESHOLD)) {
/* Apply a fast retransmit */
int temp_unacked_len = conn->unacked_len;
conn->unacked_len = 0;
(void)tcp_send_data(conn);
/* Restore the current transmission */
conn->unacked_len = temp_unacked_len;
tcp_ca_fast_retransmit(conn);
if (tcp_window_full(conn)) {
(void)k_sem_take(&conn->tx_sem, K_NO_WAIT);
}
}
}
#endif
NET_ASSERT((conn->send_data_total == 0) ||
k_work_delayable_is_pending(&conn->send_data_timer),
"conn: %p, Missing a subscription "
"of the send_data queue timer", conn);
if (th && (net_tcp_seq_cmp(th_ack(th), conn->seq) > 0)) {
uint32_t len_acked = th_ack(th) - conn->seq;
NET_DBG("conn: %p len_acked=%u", conn, len_acked);
if ((conn->send_data_total < len_acked) ||
(tcp_pkt_pull(conn->send_data,
len_acked) < 0)) {
NET_ERR("conn: %p, Invalid len_acked=%u "
"(total=%zu)", conn, len_acked,
conn->send_data_total);
net_stats_update_tcp_seg_drop(conn->iface);
tcp_out(conn, RST);
do_close = true;
close_status = -ECONNRESET;
break;
}
#ifdef CONFIG_NET_TCP_FAST_RETRANSMIT
/* New segment, reset duplicate ack counter */
conn->dup_ack_cnt = 0;
#endif
tcp_ca_pkts_acked(conn, len_acked);
conn->send_data_total -= len_acked;
if (conn->unacked_len < len_acked) {
conn->unacked_len = 0;
} else {
conn->unacked_len -= len_acked;
}
if (!tcp_window_full(conn)) {
k_sem_give(&conn->tx_sem);
}
conn_seq(conn, + len_acked);
net_stats_update_tcp_seg_recv(conn->iface);
/* Receipt of an acknowledgment that covers a sequence number
* not previously acknowledged indicates that the connection
* makes a "forward progress".
*/
tcp_nbr_reachability_hint(conn);
conn_send_data_dump(conn);
conn->send_data_retries = 0;
if (conn->data_mode == TCP_DATA_MODE_RESEND) {
conn->unacked_len = 0;
tcp_derive_rto(conn);
}
conn->data_mode = TCP_DATA_MODE_SEND;
if (conn->send_data_total > 0) {
k_work_reschedule_for_queue(&tcp_work_q, &conn->send_data_timer,
K_MSEC(TCP_RTO_MS));
}
/* We are closing the connection, send a FIN to peer */
if (conn->in_close && conn->send_data_total == 0) {
tcp_send_timer_cancel(conn);
next = TCP_FIN_WAIT_1;
k_work_reschedule_for_queue(&tcp_work_q,
&conn->fin_timer,
FIN_TIMEOUT);
tcp_out(conn, FIN | ACK);
conn_seq(conn, + 1);
verdict = NET_OK;
keep_alive_timer_stop(conn);
break;
}
ret = tcp_send_queued_data(conn);
if (ret < 0 && ret != -ENOBUFS) {
tcp_out(conn, RST);
do_close = true;
close_status = ret;
verdict = NET_OK;
break;
}
if (tcp_window_full(conn)) {
(void)k_sem_take(&conn->tx_sem, K_NO_WAIT);
}
}
if (th) {
if (th_seq(th) == conn->ack) {
if (len > 0) {
verdict = tcp_data_received(conn, pkt, &len);
if (verdict == NET_OK) {
/* net_pkt owned by the recv fifo now */
pkt = NULL;
}
} else {
/* ACK, no data */
verdict = NET_OK;
}
} else if (net_tcp_seq_greater(conn->ack, th_seq(th))) {
/* This should handle the acknowledgements of keep alive
* packets and retransmitted data.
* RISK:
* There is a tiny risk of creating a ACK loop this way when
* both ends of the connection are out of order due to packet
* loss is a simulatanious bidirectional data flow.
*/
tcp_out(conn, ACK); /* peer has resent */
net_stats_update_tcp_seg_ackerr(conn->iface);
verdict = NET_OK;
} else if (CONFIG_NET_TCP_RECV_QUEUE_TIMEOUT) {
tcp_out_of_order_data(conn, pkt, len,
th_seq(th));
/* Send out a duplicated ACK */
if ((len > 0) || FL(&fl, &, FIN)) {
tcp_out(conn, ACK);
}
verdict = NET_OK;
}
}
/* Check if there is any data left to retransmit possibly*/
if (conn->send_data_total == 0) {
conn->send_data_retries = 0;
k_work_cancel_delayable(&conn->send_data_timer);
}
/* A lot could have happened to the transmission window check the situation here */
if (tcp_window_full(conn)) {
(void)k_sem_take(&conn->tx_sem, K_NO_WAIT);
} else {
k_sem_give(&conn->tx_sem);
}
break;
case TCP_CLOSE_WAIT:
tcp_out(conn, FIN);
conn_seq(conn, + 1);
next = TCP_LAST_ACK;
tcp_setup_last_ack_timer(conn);
break;
case TCP_LAST_ACK:
if (th && FL(&fl, ==, ACK, th_ack(th) == conn->seq)) {
tcp_send_timer_cancel(conn);
do_close = true;
verdict = NET_OK;
close_status = 0;
/* Remove the last ack timer if we received it in time */
tcp_cancel_last_ack_timer(conn);
}
break;
case TCP_CLOSED:
break;
case TCP_FIN_WAIT_1:
/*
* FIN1:
* Acknowledge path and sequence path are independent, treat them that way
* The table of incoming messages and their destination states:
* - & - -> TCP_FIN_WAIT_1
* FIN & - -> TCP_CLOSING
* - & ACK -> TCP_FIN_WAIT_2
* FIN & ACK -> TCP_TIME_WAIT
*/
if (th) {
bool fin_acked = false;
if (tcp_compute_new_length(conn, th, len, false) > 0) {
/* We do not implement half closed sockets, therefore
* cannot accept new data in after sending our FIN, as
* we are in sequence can send a reset now.
*/
net_stats_update_tcp_seg_drop(conn->iface);
next = tcp_enter_time_wait(conn);
tcp_out(conn, RST);
break;
}
if (FL(&fl, &, ACK, th_ack(th) == conn->seq)) {
NET_DBG("conn %p: FIN acknowledged, going to FIN_WAIT_2 "
"state seq %u, ack %u"
, conn, conn->seq, conn->ack);
tcp_send_timer_cancel(conn);
fin_acked = true;
next = TCP_FIN_WAIT_2;
verdict = NET_OK;
}
/*
* There can also be data in the message, so compute with the length
* of the packet to check the sequence number of the FIN flag with the ACK
*/
if (FL(&fl, &, FIN, net_tcp_seq_cmp(th_seq(th) + len, conn->ack) == 0)) {
conn_ack(conn, + 1);
/* State path is dependent on if the acknowledge is in */
if (fin_acked) {
/* Already acknowledged, we can go further */
NET_DBG("conn %p: FIN received, going to TIME WAIT", conn);
next = tcp_enter_time_wait(conn);
tcp_out(conn, ACK);
} else {
/* Fin not yet acknowledged, waiting for the ack in CLOSING
*/
NET_DBG("conn %p: FIN received, going to CLOSING as no "
"ACK has been received", conn);
tcp_send_timer_cancel(conn);
tcp_out_ext(conn, FIN | ACK, NULL, conn->seq - 1);
next = TCP_CLOSING;
}
verdict = NET_OK;
} else {
if (len > 0) {
if (fin_acked) {
/* Send out a duplicate ACK */
tcp_send_timer_cancel(conn);
tcp_out(conn, ACK);
} else {
/* In FIN1 state
* Send out a duplicate ACK, with the pending FIN
* flag
*/
tcp_send_timer_cancel(conn);
tcp_out_ext(conn, FIN | ACK, NULL, conn->seq - 1);
}
verdict = NET_OK;
}
}
}
break;
case TCP_FIN_WAIT_2:
/*
* FIN2:
* Only FIN is relevant in this state, as our FIN was already acknowledged
* - -> TCP_FIN_WAIT_2
* FIN -> TCP_TIME_WAIT
*/
if (th) {
/* No tcp_send_timer_cancel call required here, as is has been called
* before entering this state, only allowed through the
* tcp_enter_time_wait function.
*/
/* Compute if there is new data after our close */
if (tcp_compute_new_length(conn, th, len, false) > 0) {
/* We do not implement half closed sockets, therefore
* cannot accept new data in after sending our FIN, as
* we are in sequence can send a reset now.
*/
net_stats_update_tcp_seg_drop(conn->iface);
next = tcp_enter_time_wait(conn);
tcp_out(conn, RST);
break;
}
/*
* There can also be data in the message, so compute with the length
* of the packet to check the sequence number of the FIN flag with the ACK
*/
if (FL(&fl, &, FIN, net_tcp_seq_cmp(th_seq(th) + len, conn->ack) == 0)) {
conn_ack(conn, + 1);
NET_DBG("conn %p: FIN received, going to TIME WAIT", conn);
next = tcp_enter_time_wait(conn);
verdict = NET_OK;
tcp_out(conn, ACK);
} else {
if (len > 0) {
/* Send out a duplicate ACK */
tcp_out(conn, ACK);
verdict = NET_OK;
}
}
}
break;
case TCP_CLOSING:
if (th) {
bool fin_acked = false;
/*
* Closing:
* Our FIN has to be acknowledged
* - -> TCP_CLOSING
* ACK -> TCP_TIME_WAIT
*/
int32_t new_len = tcp_compute_new_length(conn, th, len, true);
if (new_len > 0) {
/* This should not happen here, as no data can be send after
* the FIN flag has been send.
*/
NET_ERR("conn: %p, new bytes %u during CLOSING state "
"sending reset", conn, new_len);
net_stats_update_tcp_seg_drop(conn->iface);
next = tcp_enter_time_wait(conn);
tcp_out(conn, RST);
break;
}
if (FL(&fl, &, ACK, th_ack(th) == conn->seq)) {
NET_DBG("conn %p: FIN acknowledged, going to TIME WAIT "
"state seq %u, ack %u"
, conn, conn->seq, conn->ack);
next = tcp_enter_time_wait(conn);
fin_acked = true;
verdict = NET_OK;
}
/*
* There can also be data in the message, so compute with the length
* of the packet to check with the ack
* Since the conn->ack was already incremented in TCP_FIN_WAIT_1
* add 1 in the comparison sequence
*/
if ((FL(&fl, &, FIN,
net_tcp_seq_cmp(th_seq(th) + len + 1, conn->ack) == 0)) ||
(len > 0)) {
tcp_send_timer_cancel(conn);
if (fin_acked) {
/* Send out a duplicate ACK */
tcp_out(conn, ACK);
} else {
/* Send out a duplicate ACK, with the pending FIN
* flag
*/
tcp_out_ext(conn, FIN | ACK, NULL, conn->seq - 1);
}
verdict = NET_OK;
}
}
break;
case TCP_TIME_WAIT:
if (th) {
int32_t new_len = tcp_compute_new_length(conn, th, len, true);
/* No tcp_send_timer_cancel call required here, as is has been called
* before entering this state, only allowed through the
* tcp_enter_time_wait function.
*/
if (new_len > 0) {
/* This should not happen here, as no data can be send after
* the FIN flag has been send.
*/
NET_ERR("conn: %p, new bytes %u during TIME-WAIT state "
"sending reset", conn, new_len);
net_stats_update_tcp_seg_drop(conn->iface);
tcp_out(conn, RST);
} else {
/* Acknowledge any FIN attempts, in case retransmission took
* place.
*/
if ((FL(&fl, &, FIN,
net_tcp_seq_cmp(th_seq(th) + 1, conn->ack) == 0)) ||
(len > 0)) {
tcp_out(conn, ACK);
verdict = NET_OK;
}
}
}
break;
default:
NET_ASSERT(false, "%s is unimplemented",
tcp_state_to_str(conn->state, true));
}
out:
if (pkt) {
if (verdict == NET_OK) {
net_pkt_unref(pkt);
}
pkt = NULL;
}
if (next) {
th = NULL;
conn_state(conn, next);
next = 0;
if (connection_ok) {
conn->in_connect = false;
if (conn->connect_cb) {
conn->connect_cb(conn->context, 0, conn->context->user_data);
/* Make sure the connect_cb is only called once. */
conn->connect_cb = NULL;
}
k_sem_give(&conn->connect_sem);
}
goto next_state;
}
if (conn->context) {
/* If the conn->context is not set, then the connection was
* already closed.
*/
conn_handler = (struct net_conn *)conn->context->conn_handler;
}
recv_user_data = conn->recv_user_data;
recv_data_fifo = &conn->recv_data;
k_mutex_unlock(&conn->lock);
/* Pass all the received data stored in recv fifo to the application.
* This is done like this so that we do not have any connection lock
* held.
*/
while (conn_handler && atomic_get(&conn->ref_count) > 0 &&
(recv_pkt = k_fifo_get(recv_data_fifo, K_NO_WAIT)) != NULL) {
if (net_context_packet_received(conn_handler, recv_pkt, NULL,
NULL, recv_user_data) ==
NET_DROP) {
/* Application is no longer there, unref the pkt */
tcp_pkt_unref(recv_pkt);
}
}
/* Make sure we close the connection only once by checking connection
* state.
*/
if (do_close && conn->state != TCP_UNUSED && conn->state != TCP_CLOSED) {
tcp_conn_close(conn, close_status);
}
return verdict;
}
/* Active connection close: send FIN and go to FIN_WAIT_1 state */
int net_tcp_put(struct net_context *context)
{
struct tcp *conn = context->tcp;
if (!conn) {
return -ENOENT;
}
k_mutex_lock(&conn->lock, K_FOREVER);
NET_DBG("%s", conn ? tcp_conn_state(conn, NULL) : "");
NET_DBG("context %p %s", context,
({ const char *state = net_context_state(context);
state ? state : "<unknown>"; }));
if (conn && (conn->state == TCP_ESTABLISHED ||
conn->state == TCP_SYN_RECEIVED)) {
/* Send all remaining data if possible. */
if (conn->send_data_total > 0) {
NET_DBG("conn %p pending %zu bytes", conn,
conn->send_data_total);
conn->in_close = true;
/* How long to wait until all the data has been sent?
*/
k_work_reschedule_for_queue(&tcp_work_q,
&conn->send_data_timer,
K_MSEC(TCP_RTO_MS));
} else {
int ret;
NET_DBG("TCP connection in %s close, "
"not disposing yet (waiting %dms)",
"active", tcp_fin_timeout_ms);
k_work_reschedule_for_queue(&tcp_work_q,
&conn->fin_timer,
FIN_TIMEOUT);
ret = tcp_out_ext(conn, FIN | ACK, NULL,
conn->seq + conn->unacked_len);
if (ret == 0) {
conn_seq(conn, + 1);
}
conn_state(conn, TCP_FIN_WAIT_1);
keep_alive_timer_stop(conn);
}
} else if (conn && conn->in_connect) {
conn->in_connect = false;
}
k_mutex_unlock(&conn->lock);
tcp_conn_unref(conn);
return 0;
}
int net_tcp_listen(struct net_context *context)
{
/* when created, tcp connections are in state TCP_LISTEN */
net_context_set_state(context, NET_CONTEXT_LISTENING);
return 0;
}
int net_tcp_update_recv_wnd(struct net_context *context, int32_t delta)
{
struct tcp *conn = context->tcp;
int ret;
if (!conn) {
NET_ERR("context->tcp == NULL");
return -EPROTOTYPE;
}
k_mutex_lock(&conn->lock, K_FOREVER);
ret = tcp_update_recv_wnd((struct tcp *)context->tcp, delta);
k_mutex_unlock(&conn->lock);
return ret;
}
int net_tcp_queue(struct net_context *context, const void *data, size_t len,
const struct msghdr *msg)
{
struct tcp *conn = context->tcp;
size_t queued_len = 0;
int ret = 0;
if (!conn || conn->state != TCP_ESTABLISHED) {
return -ENOTCONN;
}
k_mutex_lock(&conn->lock, K_FOREVER);
/* If there is no space to transmit, try at a later time.
* The ZWP will make sure the window becomes available at
* some point in time.
*/
if (tcp_window_full(conn)) {
ret = -EAGAIN;
goto out;
}
if (msg) {
len = 0;
for (int i = 0; i < msg->msg_iovlen; i++) {
len += msg->msg_iov[i].iov_len;
}
}
/* Queue no more than TX window permits. It's guaranteed at this point
* that conn->send_data_total is less than conn->send_win, as it was
* verified in tcp_window_full() check above. As the connection mutex
* is held, their values shall not change since.
*/
len = MIN(conn->send_win - conn->send_data_total, len);
if (msg) {
for (int i = 0; i < msg->msg_iovlen; i++) {
int iovlen = MIN(msg->msg_iov[i].iov_len, len);
ret = tcp_pkt_append(conn->send_data,
msg->msg_iov[i].iov_base,
iovlen);
if (ret < 0) {
if (queued_len == 0) {
goto out;
} else {
break;
}
}
queued_len += iovlen;
len -= iovlen;
if (len == 0) {
break;
}
}
} else {
ret = tcp_pkt_append(conn->send_data, data, len);
if (ret < 0) {
goto out;
}
queued_len = len;
}
conn->send_data_total += queued_len;
/* Successfully queued data for transmission. Even if there's a transmit
* failure now (out-of-buf case), it can be ignored for now, retransmit
* timer will take care of queued data retransmission.
*/
ret = tcp_send_queued_data(conn);
if (ret < 0 && ret != -ENOBUFS) {
tcp_conn_close(conn, ret);
goto out;
}
if (tcp_window_full(conn)) {
(void)k_sem_take(&conn->tx_sem, K_NO_WAIT);
}
ret = queued_len;
out:
k_mutex_unlock(&conn->lock);
return ret;
}
/* net context is about to send out queued data - inform caller only */
int net_tcp_send_data(struct net_context *context, net_context_send_cb_t cb,
void *user_data)
{
if (cb) {
cb(context, 0, user_data);
}
return 0;
}
/* When connect() is called on a TCP socket, register the socket for incoming
* traffic with net context and give the TCP packet receiving function, which
* in turn will call tcp_in() to deliver the TCP packet to the stack
*/
int net_tcp_connect(struct net_context *context,
const struct sockaddr *remote_addr,
struct sockaddr *local_addr,
uint16_t remote_port, uint16_t local_port,
k_timeout_t timeout, net_context_connect_cb_t cb,
void *user_data)
{
struct tcp *conn;
int ret = 0;
NET_DBG("context: %p, local: %s, remote: %s", context,
net_sprint_addr(local_addr->sa_family,
(const void *)&net_sin(local_addr)->sin_addr),
net_sprint_addr(remote_addr->sa_family,
(const void *)&net_sin(remote_addr)->sin_addr));
conn = context->tcp;
conn->iface = net_context_get_iface(context);
tcp_derive_rto(conn);
switch (net_context_get_family(context)) {
const struct in_addr *ip4;
const struct in6_addr *ip6;
case AF_INET:
if (!IS_ENABLED(CONFIG_NET_IPV4)) {
ret = -EINVAL;
goto out;
}
memset(&conn->src, 0, sizeof(struct sockaddr_in));
memset(&conn->dst, 0, sizeof(struct sockaddr_in));
conn->src.sa.sa_family = AF_INET;
conn->dst.sa.sa_family = AF_INET;
conn->dst.sin.sin_port = remote_port;
conn->src.sin.sin_port = local_port;
/* we have to select the source address here as
* net_context_create_ipv4_new() is not called in the packet
* output chain
*/
if (net_ipv4_is_addr_unspecified(
&net_sin(local_addr)->sin_addr)) {
ip4 = net_if_ipv4_select_src_addr(
net_context_get_iface(context),
&net_sin(remote_addr)->sin_addr);
net_ipaddr_copy(&conn->src.sin.sin_addr, ip4);
} else {
net_ipaddr_copy(&conn->src.sin.sin_addr,
&net_sin(local_addr)->sin_addr);
}
net_ipaddr_copy(&conn->dst.sin.sin_addr,
&net_sin(remote_addr)->sin_addr);
break;
case AF_INET6:
if (!IS_ENABLED(CONFIG_NET_IPV6)) {
ret = -EINVAL;
goto out;
}
memset(&conn->src, 0, sizeof(struct sockaddr_in6));
memset(&conn->dst, 0, sizeof(struct sockaddr_in6));
conn->src.sin6.sin6_family = AF_INET6;
conn->dst.sin6.sin6_family = AF_INET6;
conn->dst.sin6.sin6_port = remote_port;
conn->src.sin6.sin6_port = local_port;
if (net_ipv6_is_addr_unspecified(
&net_sin6(local_addr)->sin6_addr)) {
ip6 = net_if_ipv6_select_src_addr(
net_context_get_iface(context),
&net_sin6(remote_addr)->sin6_addr);
net_ipaddr_copy(&conn->src.sin6.sin6_addr, ip6);
} else {
net_ipaddr_copy(&conn->src.sin6.sin6_addr,
&net_sin6(local_addr)->sin6_addr);
}
net_ipaddr_copy(&conn->dst.sin6.sin6_addr,
&net_sin6(remote_addr)->sin6_addr);
break;
default:
ret = -EPROTONOSUPPORT;
}
if (!(IS_ENABLED(CONFIG_NET_TEST_PROTOCOL) ||
IS_ENABLED(CONFIG_NET_TEST))) {
conn->seq = tcp_init_isn(&conn->src.sa, &conn->dst.sa);
}
NET_DBG("conn: %p src: %s, dst: %s", conn,
net_sprint_addr(conn->src.sa.sa_family,
(const void *)&conn->src.sin.sin_addr),
net_sprint_addr(conn->dst.sa.sa_family,
(const void *)&conn->dst.sin.sin_addr));
net_context_set_state(context, NET_CONTEXT_CONNECTING);
ret = net_conn_register(net_context_get_proto(context),
net_context_get_family(context),
remote_addr, local_addr,
ntohs(remote_port), ntohs(local_port),
context, tcp_recv, context,
&context->conn_handler);
if (ret < 0) {
goto out;
}
conn->connect_cb = cb;
context->user_data = user_data;
/* Input of a (nonexistent) packet with no flags set will cause
* a TCP connection to be established
*/
conn->in_connect = !IS_ENABLED(CONFIG_NET_TEST_PROTOCOL);
(void)tcp_in(conn, NULL);
if (!IS_ENABLED(CONFIG_NET_TEST_PROTOCOL)) {
if ((K_TIMEOUT_EQ(timeout, K_NO_WAIT)) &&
conn->state != TCP_ESTABLISHED) {
ret = -EINPROGRESS;
goto out;
} else if (k_sem_take(&conn->connect_sem, timeout) != 0 &&
conn->state != TCP_ESTABLISHED) {
if (conn->in_connect) {
conn->in_connect = false;
tcp_conn_close(conn, -ETIMEDOUT);
}
ret = -ETIMEDOUT;
goto out;
}
conn->in_connect = false;
}
out:
NET_DBG("conn: %p, ret=%d", conn, ret);
return ret;
}
int net_tcp_accept(struct net_context *context, net_tcp_accept_cb_t cb,
void *user_data)
{
struct tcp *conn = context->tcp;
struct sockaddr local_addr = { };
uint16_t local_port, remote_port;
if (!conn) {
return -EINVAL;
}
NET_DBG("context: %p, tcp: %p, cb: %p", context, conn, cb);
if (conn->state != TCP_LISTEN) {
return -EINVAL;
}
conn->accept_cb = cb;
local_addr.sa_family = net_context_get_family(context);
switch (local_addr.sa_family) {
struct sockaddr_in *in;
struct sockaddr_in6 *in6;
case AF_INET:
if (!IS_ENABLED(CONFIG_NET_IPV4)) {
return -EINVAL;
}
in = (struct sockaddr_in *)&local_addr;
if (net_sin_ptr(&context->local)->sin_addr) {
net_ipaddr_copy(&in->sin_addr,
net_sin_ptr(&context->local)->sin_addr);
}
in->sin_port =
net_sin((struct sockaddr *)&context->local)->sin_port;
local_port = ntohs(in->sin_port);
remote_port = ntohs(net_sin(&context->remote)->sin_port);
break;
case AF_INET6:
if (!IS_ENABLED(CONFIG_NET_IPV6)) {
return -EINVAL;
}
in6 = (struct sockaddr_in6 *)&local_addr;
if (net_sin6_ptr(&context->local)->sin6_addr) {
net_ipaddr_copy(&in6->sin6_addr,
net_sin6_ptr(&context->local)->sin6_addr);
}
in6->sin6_port =
net_sin6((struct sockaddr *)&context->local)->sin6_port;
local_port = ntohs(in6->sin6_port);
remote_port = ntohs(net_sin6(&context->remote)->sin6_port);
break;
default:
return -EINVAL;
}
context->user_data = user_data;
/* Remove the temporary connection handler and register
* a proper now as we have an established connection.
*/
net_conn_unregister(context->conn_handler);
return net_conn_register(net_context_get_proto(context),
local_addr.sa_family,
context->flags & NET_CONTEXT_REMOTE_ADDR_SET ?
&context->remote : NULL,
&local_addr,
remote_port, local_port,
context, tcp_recv, context,
&context->conn_handler);
}
int net_tcp_recv(struct net_context *context, net_context_recv_cb_t cb,
void *user_data)
{
struct tcp *conn = context->tcp;
NET_DBG("context: %p, cb: %p, user_data: %p", context, cb, user_data);
context->recv_cb = cb;
if (conn) {
conn->recv_user_data = user_data;
}
return 0;
}
int net_tcp_finalize(struct net_pkt *pkt, bool force_chksum)
{
NET_PKT_DATA_ACCESS_DEFINE(tcp_access, struct net_tcp_hdr);
struct net_tcp_hdr *tcp_hdr;
tcp_hdr = (struct net_tcp_hdr *)net_pkt_get_data(pkt, &tcp_access);
if (!tcp_hdr) {
return -ENOBUFS;
}
tcp_hdr->chksum = 0U;
if (net_if_need_calc_tx_checksum(net_pkt_iface(pkt)) || force_chksum) {
tcp_hdr->chksum = net_calc_chksum_tcp(pkt);
net_pkt_set_chksum_done(pkt, true);
}
return net_pkt_set_data(pkt, &tcp_access);
}
struct net_tcp_hdr *net_tcp_input(struct net_pkt *pkt,
struct net_pkt_data_access *tcp_access)
{
struct net_tcp_hdr *tcp_hdr;
if (IS_ENABLED(CONFIG_NET_TCP_CHECKSUM) &&
(net_if_need_calc_rx_checksum(net_pkt_iface(pkt)) ||
net_pkt_is_ip_reassembled(pkt)) &&
net_calc_chksum_tcp(pkt) != 0U) {
NET_DBG("DROP: checksum mismatch");
goto drop;
}
tcp_hdr = (struct net_tcp_hdr *)net_pkt_get_data(pkt, tcp_access);
if (tcp_hdr && !net_pkt_set_data(pkt, tcp_access)) {
return tcp_hdr;
}
drop:
net_stats_update_tcp_seg_chkerr(net_pkt_iface(pkt));
return NULL;
}
#if defined(CONFIG_NET_TEST_PROTOCOL)
static enum net_verdict tcp_input(struct net_conn *net_conn,
struct net_pkt *pkt,
union net_ip_header *ip,
union net_proto_header *proto,
void *user_data)
{
struct tcphdr *th = th_get(pkt);
enum net_verdict verdict = NET_DROP;
if (th) {
struct tcp *conn = tcp_conn_search(pkt);
if (conn == NULL && SYN == th_flags(th)) {
struct net_context *context =
tcp_calloc(1, sizeof(struct net_context));
net_tcp_get(context);
net_context_set_family(context, net_pkt_family(pkt));
conn = context->tcp;
tcp_endpoint_set(&conn->dst, pkt, TCP_EP_SRC);
tcp_endpoint_set(&conn->src, pkt, TCP_EP_DST);
/* Make an extra reference, the sanity check suite
* will delete the connection explicitly
*/
tcp_conn_ref(conn);
}
if (conn) {
conn->iface = pkt->iface;
verdict = tcp_in(conn, pkt);
}
}
return verdict;
}
static size_t tp_tcp_recv_cb(struct tcp *conn, struct net_pkt *pkt)
{
ssize_t len = tcp_data_len(pkt);
struct net_pkt *up = tcp_pkt_clone(pkt);
NET_DBG("pkt: %p, len: %zu", pkt, net_pkt_get_len(pkt));
net_pkt_cursor_init(up);
net_pkt_set_overwrite(up, true);
net_pkt_pull(up, net_pkt_get_len(up) - len);
for (struct net_buf *buf = pkt->buffer; buf != NULL; buf = buf->frags) {
net_tcp_queue(conn->context, buf->data, buf->len);
}
return len;
}
static ssize_t tp_tcp_recv(int fd, void *buf, size_t len, int flags)
{
return 0;
}
static void tp_init(struct tcp *conn, struct tp *tp)
{
struct tp out = {
.msg = "",
.status = "",
.state = tcp_state_to_str(conn->state, true),
.seq = conn->seq,
.ack = conn->ack,
.rcv = "",
.data = "",
.op = "",
};
*tp = out;
}
static void tcp_to_json(struct tcp *conn, void *data, size_t *data_len)
{
struct tp tp;
tp_init(conn, &tp);
tp_encode(&tp, data, data_len);
}
enum net_verdict tp_input(struct net_conn *net_conn,
struct net_pkt *pkt,
union net_ip_header *ip_hdr,
union net_proto_header *proto,
void *user_data)
{
struct net_udp_hdr *uh = net_udp_get_hdr(pkt, NULL);
size_t data_len = ntohs(uh->len) - sizeof(*uh);
struct tcp *conn = tcp_conn_search(pkt);
size_t json_len = 0;
struct tp *tp;
struct tp_new *tp_new;
enum tp_type type;
bool responded = false;
static char buf[512];
enum net_verdict verdict = NET_DROP;
net_pkt_cursor_init(pkt);
net_pkt_set_overwrite(pkt, true);
net_pkt_skip(pkt, net_pkt_ip_hdr_len(pkt) +
net_pkt_ip_opts_len(pkt) + sizeof(*uh));
net_pkt_read(pkt, buf, data_len);
buf[data_len] = '\0';
data_len += 1;
type = json_decode_msg(buf, data_len);
data_len = ntohs(uh->len) - sizeof(*uh);
net_pkt_cursor_init(pkt);
net_pkt_set_overwrite(pkt, true);
net_pkt_skip(pkt, net_pkt_ip_hdr_len(pkt) +
net_pkt_ip_opts_len(pkt) + sizeof(*uh));
net_pkt_read(pkt, buf, data_len);
buf[data_len] = '\0';
data_len += 1;
switch (type) {
case TP_CONFIG_REQUEST:
tp_new = json_to_tp_new(buf, data_len);
break;
default:
tp = json_to_tp(buf, data_len);
break;
}
switch (type) {
case TP_COMMAND:
if (is("CONNECT", tp->op)) {
tp_output(pkt->family, pkt->iface, buf, 1);
responded = true;
{
struct net_context *context = tcp_calloc(1,
sizeof(struct net_context));
net_tcp_get(context);
net_context_set_family(context,
net_pkt_family(pkt));
conn = context->tcp;
tcp_endpoint_set(&conn->dst, pkt, TCP_EP_SRC);
tcp_endpoint_set(&conn->src, pkt, TCP_EP_DST);
conn->iface = pkt->iface;
tcp_conn_ref(conn);
}
conn->seq = tp->seq;
verdict = tcp_in(conn, NULL);
}
if (is("CLOSE", tp->op)) {
tp_trace = false;
{
struct net_context *context;
conn = (void *)sys_slist_peek_head(&tcp_conns);
context = conn->context;
while (tcp_conn_close(conn, 0))
;
tcp_free(context);
}
tp_mem_stat();
tp_nbuf_stat();
tp_pkt_stat();
tp_seq_stat();
}
if (is("CLOSE2", tp->op)) {
struct tcp *conn =
(void *)sys_slist_peek_head(&tcp_conns);
net_tcp_put(conn->context);
}
if (is("RECV", tp->op)) {
#define HEXSTR_SIZE 64
char hexstr[HEXSTR_SIZE];
ssize_t len = tp_tcp_recv(0, buf, sizeof(buf), 0);
tp_init(conn, tp);
bin2hex(buf, len, hexstr, HEXSTR_SIZE);
tp->data = hexstr;
NET_DBG("%zd = tcp_recv(\"%s\")", len, tp->data);
json_len = sizeof(buf);
tp_encode(tp, buf, &json_len);
}
if (is("SEND", tp->op)) {
ssize_t len = tp_str_to_hex(buf, sizeof(buf), tp->data);
struct tcp *conn =
(void *)sys_slist_peek_head(&tcp_conns);
tp_output(pkt->family, pkt->iface, buf, 1);
responded = true;
NET_DBG("tcp_send(\"%s\")", tp->data);
{
net_tcp_queue(conn->context, buf, len);
}
}
break;
case TP_CONFIG_REQUEST:
tp_new_find_and_apply(tp_new, "tcp_rto", &tcp_rto, TP_INT);
tp_new_find_and_apply(tp_new, "tcp_retries", &tcp_retries,
TP_INT);
tp_new_find_and_apply(tp_new, "tcp_window", &tcp_rx_window,
TP_INT);
tp_new_find_and_apply(tp_new, "tp_trace", &tp_trace, TP_BOOL);
break;
case TP_INTROSPECT_REQUEST:
json_len = sizeof(buf);
conn = (void *)sys_slist_peek_head(&tcp_conns);
tcp_to_json(conn, buf, &json_len);
break;
case TP_DEBUG_STOP: case TP_DEBUG_CONTINUE:
tp_state = tp->type;
break;
default:
NET_ASSERT(false, "Unimplemented tp command: %s", tp->msg);
}
if (json_len) {
tp_output(pkt->family, pkt->iface, buf, json_len);
} else if ((TP_CONFIG_REQUEST == type || TP_COMMAND == type)
&& responded == false) {
tp_output(pkt->family, pkt->iface, buf, 1);
}
return verdict;
}
static void test_cb_register(sa_family_t family, uint8_t proto, uint16_t remote_port,
uint16_t local_port, net_conn_cb_t cb)
{
struct net_conn_handle *conn_handle = NULL;
const struct sockaddr addr = { .sa_family = family, };
int ret = net_conn_register(proto,
family,
&addr, /* remote address */
&addr, /* local address */
local_port,
remote_port,
NULL,
cb,
NULL, /* user_data */
&conn_handle);
if (ret < 0) {
NET_ERR("net_conn_register(): %d", ret);
}
}
#endif /* CONFIG_NET_TEST_PROTOCOL */
void net_tcp_foreach(net_tcp_cb_t cb, void *user_data)
{
struct tcp *conn;
struct tcp *tmp;
k_mutex_lock(&tcp_lock, K_FOREVER);
SYS_SLIST_FOR_EACH_CONTAINER_SAFE(&tcp_conns, conn, tmp, next) {
if (atomic_get(&conn->ref_count) > 0) {
k_mutex_unlock(&tcp_lock);
cb(conn, user_data);
k_mutex_lock(&tcp_lock, K_FOREVER);
}
}
k_mutex_unlock(&tcp_lock);
}
uint16_t net_tcp_get_supported_mss(const struct tcp *conn)
{
sa_family_t family = net_context_get_family(conn->context);
if (family == AF_INET) {
#if defined(CONFIG_NET_IPV4)
struct net_if *iface = net_context_get_iface(conn->context);
int mss = 0;
if (iface && net_if_get_mtu(iface) >= NET_IPV4TCPH_LEN) {
/* Detect MSS based on interface MTU minus "TCP,IP
* header size"
*/
mss = net_if_get_mtu(iface) - NET_IPV4TCPH_LEN;
}
if (mss == 0) {
mss = NET_IPV4_MTU - NET_IPV4TCPH_LEN;
}
return mss;
#else
return 0;
#endif /* CONFIG_NET_IPV4 */
}
#if defined(CONFIG_NET_IPV6)
else if (family == AF_INET6) {
struct net_if *iface = net_context_get_iface(conn->context);
int mss = 0;
if (iface && net_if_get_mtu(iface) >= NET_IPV6TCPH_LEN) {
/* Detect MSS based on interface MTU minus "TCP,IP
* header size"
*/
mss = net_if_get_mtu(iface) - NET_IPV6TCPH_LEN;
}
if (mss == 0) {
mss = NET_IPV6_MTU - NET_IPV6TCPH_LEN;
}
return mss;
}
#endif /* CONFIG_NET_IPV6 */
return 0;
}
int net_tcp_set_option(struct net_context *context,
enum tcp_conn_option option,
const void *value, size_t len)
{
int ret = 0;
NET_ASSERT(context);
struct tcp *conn = context->tcp;
NET_ASSERT(conn);
k_mutex_lock(&conn->lock, K_FOREVER);
switch (option) {
case TCP_OPT_NODELAY:
ret = set_tcp_nodelay(conn, value, len);
break;
case TCP_OPT_KEEPALIVE:
ret = set_tcp_keep_alive(conn, value, len);
break;
case TCP_OPT_KEEPIDLE:
ret = set_tcp_keep_idle(conn, value, len);
break;
case TCP_OPT_KEEPINTVL:
ret = set_tcp_keep_intvl(conn, value, len);
break;
case TCP_OPT_KEEPCNT:
ret = set_tcp_keep_cnt(conn, value, len);
break;
}
k_mutex_unlock(&conn->lock);
return ret;
}
int net_tcp_get_option(struct net_context *context,
enum tcp_conn_option option,
void *value, size_t *len)
{
int ret = 0;
NET_ASSERT(context);
struct tcp *conn = context->tcp;
NET_ASSERT(conn);
k_mutex_lock(&conn->lock, K_FOREVER);
switch (option) {
case TCP_OPT_NODELAY:
ret = get_tcp_nodelay(conn, value, len);
break;
case TCP_OPT_KEEPALIVE:
ret = get_tcp_keep_alive(conn, value, len);
break;
case TCP_OPT_KEEPIDLE:
ret = get_tcp_keep_idle(conn, value, len);
break;
case TCP_OPT_KEEPINTVL:
ret = get_tcp_keep_intvl(conn, value, len);
break;
case TCP_OPT_KEEPCNT:
ret = get_tcp_keep_cnt(conn, value, len);
break;
}
k_mutex_unlock(&conn->lock);
return ret;
}
const char *net_tcp_state_str(enum tcp_state state)
{
return tcp_state_to_str(state, false);
}
struct k_sem *net_tcp_tx_sem_get(struct net_context *context)
{
struct tcp *conn = context->tcp;
return &conn->tx_sem;
}
struct k_sem *net_tcp_conn_sem_get(struct net_context *context)
{
struct tcp *conn = context->tcp;
return &conn->connect_sem;
}
void net_tcp_init(void)
{
int i;
int rto;
#if defined(CONFIG_NET_TEST_PROTOCOL)
/* Register inputs for TTCN-3 based TCP sanity check */
test_cb_register(AF_INET, IPPROTO_TCP, 4242, 4242, tcp_input);
test_cb_register(AF_INET6, IPPROTO_TCP, 4242, 4242, tcp_input);
test_cb_register(AF_INET, IPPROTO_UDP, 4242, 4242, tp_input);
test_cb_register(AF_INET6, IPPROTO_UDP, 4242, 4242, tp_input);
tcp_recv_cb = tp_tcp_recv_cb;
#endif
#if defined(CONFIG_NET_TC_THREAD_COOPERATIVE)
#define THREAD_PRIORITY K_PRIO_COOP(CONFIG_NET_TCP_WORKER_PRIO)
#else
#define THREAD_PRIORITY K_PRIO_PREEMPT(CONFIG_NET_TCP_WORKER_PRIO)
#endif
/* Use private workqueue in order not to block the system work queue.
*/
k_work_queue_start(&tcp_work_q, work_q_stack,
K_KERNEL_STACK_SIZEOF(work_q_stack), THREAD_PRIORITY,
NULL);
/* Compute the largest possible retransmission timeout */
tcp_fin_timeout_ms = 0;
rto = tcp_rto;
for (i = 0; i < tcp_retries; i++) {
tcp_fin_timeout_ms += rto;
rto += rto >> 1;
}
/* At the last timeout cicle */
tcp_fin_timeout_ms += tcp_rto;
/* When CONFIG_NET_TCP_RANDOMIZED_RTO is active in can be worse case 1.5 times larger */
if (IS_ENABLED(CONFIG_NET_TCP_RANDOMIZED_RTO)) {
tcp_fin_timeout_ms += tcp_fin_timeout_ms >> 1;
}
k_thread_name_set(&tcp_work_q.thread, "tcp_work");
NET_DBG("Workq started. Thread ID: %p", &tcp_work_q.thread);
}