/***
|
*
|
* ipv4/tcp/tcp.c - TCP implementation for RTnet
|
*
|
* Copyright (C) 2009 Vladimir Zapolskiy <vladimir.zapolskiy@siemens.com>
|
*
|
* This program is free software; you can redistribute it and/or modify
|
* it under the terms of the GNU General Public License, version 2, as
|
* published by the Free Software Foundation.
|
*
|
* This program is distributed in the hope that it will be useful,
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
* GNU General Public License for more details.
|
*
|
* You should have received a copy of the GNU General Public License
|
* along with this program; if not, write to the Free Software
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
*
|
*/
|
|
#include <linux/moduleparam.h>
|
#include <linux/list.h>
|
#include <linux/skbuff.h>
|
#include <linux/err.h>
|
#include <linux/module.h>
|
#include <linux/delay.h>
|
#include <linux/completion.h>
|
#include <net/tcp_states.h>
|
#include <net/tcp.h>
|
|
#include <rtdm/driver.h>
|
#include <rtnet_rtpc.h>
|
#include <rtskb.h>
|
#include <rtdev.h>
|
#include <rtnet_port.h>
|
#include <rtnet_checksum.h>
|
#include <ipv4/tcp.h>
|
#include <ipv4/ip_sock.h>
|
#include <ipv4/ip_output.h>
|
#include <ipv4/ip_fragment.h>
|
#include <ipv4/route.h>
|
#include <ipv4/af_inet.h>
|
#include "timerwheel.h"
|
|
static unsigned int close_timeout = 1000;
|
module_param(close_timeout, uint, 0664);
|
MODULE_PARM_DESC(close_timeout,
|
"max time (ms) to wait during close for FIN-ACK handshake to complete, default 1000");
|
|
#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION
|
|
static unsigned int error_rate;
|
module_param(error_rate, uint, 0664);
|
MODULE_PARM_DESC(error_rate, "simulate packet loss after every n packets");
|
|
static unsigned int multi_error = 1;
|
module_param(multi_error, uint, 0664);
|
MODULE_PARM_DESC(multi_error, "on simulated error, drop n packets in a row");
|
|
static unsigned int counter_start = 1234;
|
module_param(counter_start, uint, 0664);
|
MODULE_PARM_DESC(counter_start, "start value of per-socket packet counter "
|
"(used for error injection)");
|
|
#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION */
|
|
struct tcp_sync {
|
u32 seq;
|
u32 ack_seq;
|
|
/* Local window size sent to peer */
|
u16 window;
|
/* Last received destination peer window size */
|
u16 dst_window;
|
};
|
|
/*
|
connection timeout
|
*/
|
/* 5 second */
|
static const nanosecs_rel_t rt_tcp_connection_timeout = 1000000000ull;
|
|
/* retransmission timerwheel timeout */
|
static const u64 rt_tcp_retransmit_timeout = 100000000ull;
|
|
/*
|
keepalive constants
|
*/
|
/* 75 second */
|
static const u64 rt_tcp_keepalive_intvl = 75000000000ull;
|
/* 9 probes to send */
|
static const u8 rt_tcp_keepalive_probes = 9;
|
/* 2 hour */
|
static const u64 rt_tcp_keepalive_timeout = 7200000000000ull;
|
|
/*
|
retransmission timeout
|
*/
|
/* 50 millisecond */
|
static const nanosecs_rel_t rt_tcp_retransmission_timeout = 50000000ull;
|
/*
|
maximum allowed number of retransmissions
|
*/
|
static const unsigned int max_retransmits = 3;
|
|
struct tcp_keepalive {
|
u8 enabled;
|
u32 probes;
|
rtdm_timer_t timer;
|
};
|
|
/***
|
* This structure is used to register a TCP socket for reception. All
|
* structures are kept in the port_registry array to increase the cache
|
* locality during the critical port lookup in rt_tcp_v4_lookup().
|
*/
|
|
/* if dport & daddr are zeroes, it means a listening socket */
|
/* otherwise this is a data structure, which describes a connection */
|
|
/* NB: sock->prot.inet.saddr & sock->prot.inet.sport values are not used */
|
struct tcp_socket {
|
struct rtsocket sock; /* set up by rt_socket_init() implicitly */
|
u16 sport; /* local port */
|
u32 saddr; /* local ip-addr */
|
u16 dport; /* destination port */
|
u32 daddr; /* destination ip-addr */
|
|
u8 tcp_state; /* tcp connection state */
|
|
u8 is_binding; /* if set, tcp socket is in port binding progress */
|
u8 is_bound; /* if set, tcp socket is already port bound */
|
u8 is_valid; /* if set, read() and write() can process */
|
u8 is_accepting; /* if set, accept() is in progress */
|
u8 is_accepted; /* if set, accept() is already called */
|
u8 is_closed; /* close() call for resource deallocation follows */
|
|
rtdm_event_t send_evt; /* write request is permissible */
|
rtdm_event_t conn_evt; /* connection event */
|
|
struct dest_route rt;
|
struct tcp_sync sync;
|
struct tcp_keepalive keepalive;
|
rtdm_lock_t socket_lock;
|
|
struct hlist_node link;
|
|
nanosecs_rel_t sk_sndtimeo;
|
|
/* retransmission routine data */
|
u32 nacked_first;
|
unsigned int timer_state;
|
struct rtskb_queue retransmit_queue;
|
struct timerwheel_timer timer;
|
|
struct completion fin_handshake;
|
rtdm_nrtsig_t close_sig;
|
|
#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION
|
unsigned int packet_counter;
|
unsigned int error_rate;
|
unsigned int multi_error;
|
#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION */
|
};
|
|
struct rt_tcp_dispatched_packet_send_cmd {
|
__be32 flags; /* packet flags value */
|
struct tcp_socket *ts;
|
};
|
|
/***
|
* Automatic port number assignment
|
|
* The automatic assignment of port numbers to unbound sockets is realised as
|
* a simple addition of two values:
|
* - the socket ID (lower 8 bits of file descriptor) which is set during
|
* initialisation and left unchanged afterwards
|
* - the start value tcp_auto_port_start which is a module parameter
|
|
* tcp_auto_port_mask, also a module parameter, is used to define the range of
|
* port numbers which are used for automatic assignment. Any number within
|
* this range will be rejected when passed to bind_rt().
|
|
*/
|
|
MODULE_LICENSE("GPL");
|
|
static struct {
|
struct rtdm_dev_context dummy;
|
struct tcp_socket rst_socket;
|
} rst_socket_container;
|
|
#define rst_fd (&rst_socket_container.dummy.fd)
|
#define rst_socket (*(struct tcp_socket *)rtdm_fd_to_private(rst_fd))
|
|
static u32 tcp_auto_port_start = 1024;
|
static u32 tcp_auto_port_mask = ~(RT_TCP_SOCKETS - 1);
|
static u32 free_ports = RT_TCP_SOCKETS;
|
#define RT_PORT_BITMAP_WORDS \
|
((RT_TCP_SOCKETS + BITS_PER_LONG - 1) / BITS_PER_LONG)
|
static unsigned long port_bitmap[RT_PORT_BITMAP_WORDS];
|
|
static struct tcp_socket *port_registry[RT_TCP_SOCKETS];
|
static DEFINE_RTDM_LOCK(tcp_socket_base_lock);
|
|
static struct hlist_head port_hash[RT_TCP_SOCKETS * 2];
|
#define port_hash_mask (RT_TCP_SOCKETS * 2 - 1)
|
|
module_param(tcp_auto_port_start, uint, 0444);
|
module_param(tcp_auto_port_mask, uint, 0444);
|
MODULE_PARM_DESC(tcp_auto_port_start, "Start of automatically assigned "
|
"port range for TCP");
|
MODULE_PARM_DESC(tcp_auto_port_mask, "Mask that defines port range for TCP "
|
"for automatic assignment");
|
|
static inline struct tcp_socket *port_hash_search(u32 saddr, u16 sport)
|
{
|
u32 bucket = sport & port_hash_mask;
|
struct tcp_socket *ts;
|
|
hlist_for_each_entry (ts, &port_hash[bucket], link)
|
if (ts->sport == sport &&
|
(saddr == INADDR_ANY || ts->saddr == saddr ||
|
ts->saddr == INADDR_ANY))
|
return ts;
|
|
return NULL;
|
}
|
|
static int port_hash_insert(struct tcp_socket *ts, u32 saddr, u16 sport)
|
{
|
u32 bucket;
|
|
if (port_hash_search(saddr, sport))
|
return -EADDRINUSE;
|
|
bucket = sport & port_hash_mask;
|
ts->saddr = saddr;
|
ts->sport = sport;
|
ts->daddr = 0;
|
ts->dport = 0;
|
|
hlist_add_head(&ts->link, &port_hash[bucket]);
|
|
return 0;
|
}
|
|
static inline void port_hash_del(struct tcp_socket *ts)
|
{
|
hlist_del(&ts->link);
|
}
|
|
/***
|
* rt_tcp_v4_lookup
|
*/
|
static struct rtsocket *rt_tcp_v4_lookup(u32 daddr, u16 dport)
|
{
|
rtdm_lockctx_t context;
|
struct tcp_socket *ts;
|
int ret;
|
|
rtdm_lock_get_irqsave(&tcp_socket_base_lock, context);
|
ts = port_hash_search(daddr, dport);
|
|
if (ts != NULL) {
|
ret = rt_socket_reference(&ts->sock);
|
if (ret == 0 || (ret == -EIDRM && ts->is_closed)) {
|
rtdm_lock_put_irqrestore(&tcp_socket_base_lock,
|
context);
|
|
return &ts->sock;
|
}
|
}
|
|
rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context);
|
|
return NULL;
|
}
|
|
/* test seq1 <= seq2 */
|
static inline int rt_tcp_before(__u32 seq1, __u32 seq2)
|
{
|
return (__s32)(seq1 - seq2) <= 0;
|
}
|
|
/* test seq1 => seq2 */
|
static inline int rt_tcp_after(__u32 seq1, __u32 seq2)
|
{
|
return (__s32)(seq2 - seq1) <= 0;
|
}
|
|
static inline u32 rt_tcp_compute_ack_seq(struct tcphdr *th, u32 len)
|
{
|
u32 ack_seq = ntohl(th->seq) + len;
|
|
if (unlikely(th->syn || th->fin))
|
ack_seq++;
|
|
return ack_seq;
|
}
|
|
static void rt_tcp_keepalive_start(struct tcp_socket *ts)
|
{
|
if (ts->tcp_state == TCP_ESTABLISHED) {
|
rtdm_timer_start(&ts->keepalive.timer, rt_tcp_keepalive_timeout,
|
0, RTDM_TIMERMODE_RELATIVE);
|
}
|
}
|
|
static void rt_tcp_keepalive_stop(struct tcp_socket *ts)
|
{
|
if (ts->tcp_state == TCP_ESTABLISHED) {
|
rtdm_timer_stop(&ts->keepalive.timer);
|
}
|
}
|
|
#ifdef YET_UNUSED
|
static void rt_tcp_keepalive_timer(rtdm_timer_t *timer);
|
|
static void rt_tcp_keepalive_enable(struct tcp_socket *ts)
|
{
|
rtdm_lockctx_t context;
|
struct tcp_keepalive *keepalive;
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
|
keepalive = &ts->keepalive;
|
|
if (keepalive->enabled) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return;
|
}
|
|
keepalive->probes = rt_tcp_keepalive_probes;
|
|
rtdm_timer_init(&keepalive->timer, rt_tcp_keepalive_timer,
|
"RT TCP keepalive timer");
|
|
rt_tcp_keepalive_start(ts);
|
|
keepalive->enabled = 1;
|
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
}
|
#endif
|
|
static void rt_tcp_keepalive_disable(struct tcp_socket *ts)
|
{
|
struct tcp_keepalive *keepalive;
|
|
keepalive = &ts->keepalive;
|
|
if (!keepalive->enabled) {
|
return;
|
}
|
|
rt_tcp_keepalive_stop(ts);
|
rtdm_timer_destroy(&keepalive->timer);
|
|
keepalive->enabled = 0;
|
}
|
|
static void rt_tcp_keepalive_feed(struct tcp_socket *ts)
|
{
|
rtdm_lockctx_t context;
|
struct tcp_keepalive *keepalive;
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
|
keepalive = &ts->keepalive;
|
|
if (ts->tcp_state == TCP_ESTABLISHED && ts->keepalive.enabled) {
|
keepalive->probes = rt_tcp_keepalive_probes;
|
|
/* Restart keepalive timer */
|
rtdm_timer_stop(&keepalive->timer);
|
rtdm_timer_start(&keepalive->timer, rt_tcp_keepalive_timeout, 0,
|
RTDM_TIMERMODE_RELATIVE);
|
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
} else {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
}
|
}
|
|
static int rt_tcp_socket_invalidate(struct tcp_socket *ts, u8 to_state)
|
{
|
int signal = ts->is_valid;
|
|
ts->tcp_state = to_state;
|
|
/*
|
multiple invalidation could happen without fuss,
|
see rt_tcp_close(), rt_tcp_rcv(), timeout expiration etc.
|
*/
|
if (ts->is_valid) {
|
ts->is_valid = 0;
|
|
if (ts->keepalive.enabled) {
|
rt_tcp_keepalive_stop(ts);
|
}
|
}
|
|
return signal;
|
}
|
|
static void rt_tcp_socket_invalidate_signal(struct tcp_socket *ts)
|
{
|
/* awake all readers and writers destroying events */
|
rtdm_sem_destroy(&ts->sock.pending_sem);
|
rtdm_event_destroy(&ts->send_evt);
|
}
|
|
static void rt_tcp_socket_validate(struct tcp_socket *ts)
|
{
|
ts->tcp_state = TCP_ESTABLISHED;
|
|
ts->is_valid = 1;
|
|
if (ts->keepalive.enabled) {
|
rt_tcp_keepalive_start(ts);
|
}
|
|
rtdm_event_init(&ts->send_evt, 0);
|
}
|
|
/***
|
* rt_tcp_retransmit_handler - timerwheel handler to process a retransmission
|
* @data: pointer to a rttcp socket structure
|
*/
|
static void rt_tcp_retransmit_handler(void *data)
|
{
|
struct tcp_socket *ts = (struct tcp_socket *)data;
|
struct rtskb *skb;
|
rtdm_lockctx_t context;
|
int signal;
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
|
if (unlikely(rtskb_queue_empty(&ts->retransmit_queue))) {
|
/* handled, but retransmission queue is empty */
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
rtdm_printk("rttcp: bug in RT TCP retransmission routine\n");
|
return;
|
}
|
|
if (ts->tcp_state == TCP_CLOSE) {
|
/* socket is already closed */
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return;
|
}
|
|
if (ts->timer_state) {
|
/* more tries */
|
ts->timer_state--;
|
timerwheel_add_timer(&ts->timer, rt_tcp_retransmission_timeout);
|
|
/* warning, rtskb_clone is under lock */
|
skb = rtskb_clone(ts->retransmit_queue.first,
|
&ts->sock.skb_pool);
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
/* BUG, window changes are not respected */
|
if (unlikely(rtdev_xmit(skb)) != 0) {
|
kfree_rtskb(skb);
|
rtdm_printk(
|
"rttcp: packet retransmission from timer failed\n");
|
}
|
} else {
|
ts->timer_state = max_retransmits;
|
|
/* report about connection lost */
|
signal = rt_tcp_socket_invalidate(ts, TCP_CLOSE);
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
if (signal)
|
rt_tcp_socket_invalidate_signal(ts);
|
|
/* retransmission queue will be cleaned up in rt_tcp_socket_destruct */
|
rtdm_printk("rttcp: connection is lost by NACK timeout\n");
|
}
|
}
|
|
/***
|
* rt_tcp_retransmit_ack - remove skbs from retransmission queue on ACK
|
* @ts: rttcp socket
|
* @ack_seq: received ACK sequence value
|
*/
|
static void rt_tcp_retransmit_ack(struct tcp_socket *ts, u32 ack_seq)
|
{
|
struct rtskb *skb;
|
rtdm_lockctx_t context;
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
|
/*
|
ACK, but retransmission queue is empty
|
This could happen on repeated ACKs
|
*/
|
if (rtskb_queue_empty(&ts->retransmit_queue)) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return;
|
}
|
|
/*
|
Check ts->nacked_first value firstly to ensure that
|
skb for retransmission is present in the queue, otherwise
|
retransmission queue will be drained completely
|
*/
|
if (!rt_tcp_before(ts->nacked_first, ack_seq)) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return;
|
}
|
|
if (timerwheel_remove_timer(&ts->timer) != 0) {
|
/* already timed out */
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return;
|
}
|
|
dequeue_loop:
|
if (ts->tcp_state == TCP_CLOSE) {
|
/* warn about queue safety in race with anyone,
|
who closes the socket */
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return;
|
}
|
|
if ((skb = __rtskb_dequeue(&ts->retransmit_queue)) == NULL) {
|
ts->timer_state = max_retransmits;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return;
|
}
|
|
if (rt_tcp_before(ts->nacked_first, ack_seq)) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
kfree_rtskb(skb);
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
goto dequeue_loop;
|
}
|
|
/* Put NACKed skb back to queue */
|
/* BUG, need to respect half-acknowledged packets */
|
ts->nacked_first = ntohl(skb->h.th->seq) + 1;
|
|
__rtskb_queue_head(&ts->retransmit_queue, skb);
|
|
/* Have more packages in retransmission queue, restart the timer */
|
timerwheel_add_timer(&ts->timer, rt_tcp_retransmission_timeout);
|
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
}
|
|
/***
|
* rt_tcp_retransmit_send - enqueue a skb to retransmission queue (not locked)
|
* @ts: rttcp socket
|
* @skb: a copied skb for enqueueing
|
*/
|
static void rt_tcp_retransmit_send(struct tcp_socket *ts, struct rtskb *skb)
|
{
|
if (rtskb_queue_empty(&ts->retransmit_queue)) {
|
/* retransmission queue is empty */
|
ts->nacked_first = ntohl(skb->h.th->seq) + 1;
|
|
__rtskb_queue_tail(&ts->retransmit_queue, skb);
|
|
timerwheel_add_timer(&ts->timer, rt_tcp_retransmission_timeout);
|
} else {
|
/* retransmission queue is not empty */
|
__rtskb_queue_tail(&ts->retransmit_queue, skb);
|
}
|
}
|
|
static int rt_ip_build_frame(struct rtskb *skb, struct rtsocket *sk,
|
struct dest_route *rt, struct iphdr *iph)
|
{
|
int ret;
|
struct rtnet_device *rtdev = rt->rtdev;
|
|
RTNET_ASSERT(rtdev->hard_header, return -EBADF;);
|
|
if (!rtdev_reference(rt->rtdev))
|
return -EIDRM;
|
|
iph->ihl = 5; /* 20 byte header only - no TCP options */
|
|
skb->nh.iph = iph;
|
|
iph->version = 4;
|
iph->tos = sk->prot.inet.tos;
|
iph->tot_len = htons(skb->len); /* length of IP header and IP payload */
|
iph->id = htons(0x00); /* zero IP frame id */
|
iph->frag_off = htons(IP_DF); /* and no more frames */
|
iph->ttl = 255;
|
iph->protocol = sk->protocol;
|
iph->saddr = rtdev->local_ip;
|
iph->daddr = rt->ip;
|
iph->check = 0; /* required to compute correct checksum */
|
iph->check = ip_fast_csum((u8 *)iph, 5 /*iph->ihl*/);
|
|
ret = rtdev->hard_header(skb, rtdev, ETH_P_IP, rt->dev_addr,
|
rtdev->dev_addr, skb->len);
|
rtdev_dereference(rt->rtdev);
|
|
if (ret != rtdev->hard_header_len) {
|
rtdm_printk("rttcp: rt_ip_build_frame: error on lower level\n");
|
return -EINVAL;
|
}
|
|
return 0;
|
}
|
|
static void rt_tcp_build_header(struct tcp_socket *ts, struct rtskb *skb,
|
__be32 flags, u8 is_keepalive)
|
{
|
u32 wcheck;
|
u8 tcphdrlen = 20;
|
u8 iphdrlen = 20;
|
struct tcphdr *th;
|
|
th = skb->h.th;
|
th->source = ts->sport;
|
th->dest = ts->dport;
|
|
th->seq = htonl(ts->sync.seq);
|
|
if (unlikely(is_keepalive))
|
th->seq--;
|
|
tcp_flag_word(th) = flags;
|
th->ack_seq = htonl(ts->sync.ack_seq);
|
th->window = htons(ts->sync.window);
|
|
th->doff = tcphdrlen >> 2; /* No options for now */
|
th->res1 = 0;
|
th->check = 0;
|
th->urg_ptr = 0;
|
|
/* compute checksum */
|
wcheck = rtnet_csum(th, tcphdrlen, 0);
|
|
if (skb->len - tcphdrlen - iphdrlen) {
|
wcheck = rtnet_csum(skb->data + tcphdrlen + iphdrlen,
|
skb->len - tcphdrlen - iphdrlen, wcheck);
|
}
|
|
th->check =
|
tcp_v4_check(skb->len - iphdrlen, ts->saddr, ts->daddr, wcheck);
|
}
|
|
static int rt_tcp_segment(struct dest_route *rt, struct tcp_socket *ts,
|
__be32 flags, u32 data_len, u8 *data_ptr,
|
u8 is_keepalive)
|
{
|
struct tcphdr *th;
|
struct rtsocket *sk = &ts->sock;
|
struct rtnet_device *rtdev = rt->rtdev;
|
struct rtskb *skb;
|
struct iphdr *iph;
|
struct rtskb *cloned_skb;
|
rtdm_lockctx_t context;
|
|
int ret;
|
|
u32 hh_len = (rtdev->hard_header_len + 15) & ~15;
|
u32 prio = (volatile unsigned int)sk->priority;
|
u32 mtu = rtdev->get_mtu(rtdev, prio);
|
|
u8 *data = NULL;
|
|
if ((skb = alloc_rtskb(mtu + hh_len + 15, &sk->skb_pool)) == NULL) {
|
rtdm_printk(
|
"rttcp: no more elements in skb_pool for allocation\n");
|
return -ENOBUFS;
|
}
|
|
/* rtskb_reserve(skb, hh_len + 20); */
|
rtskb_reserve(skb, hh_len);
|
|
iph = (struct iphdr *)rtskb_put(skb, 20); /* length of IP header */
|
skb->nh.iph = iph;
|
|
th = (struct tcphdr *)rtskb_put(skb, 20); /* length of TCP header */
|
skb->h.th = th;
|
|
if (data_len) { /* check for available place */
|
data = (u8 *)rtskb_put(skb,
|
data_len); /* length of TCP payload */
|
if (!memcpy(data, (void *)data_ptr, data_len)) {
|
ret = -EFAULT;
|
goto error;
|
}
|
}
|
|
/* used local phy MTU value */
|
if (data_len > mtu)
|
data_len = mtu;
|
|
skb->rtdev = rtdev;
|
skb->priority = prio;
|
|
/* do not validate socket connection on xmit
|
this should be done at upper level */
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
rt_tcp_build_header(ts, skb, flags, is_keepalive);
|
|
if ((ret = rt_ip_build_frame(skb, sk, rt, iph)) != 0) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
goto error;
|
}
|
|
/* add rtskb entry to the socket retransmission queue */
|
if (ts->tcp_state != TCP_CLOSE &&
|
((flags & (TCP_FLAG_SYN | TCP_FLAG_FIN)) || data_len)) {
|
/* rtskb_clone below is called under lock, this is an admission,
|
because for now there is no rtskb copy by reference */
|
cloned_skb = rtskb_clone(skb, &ts->sock.skb_pool);
|
if (!cloned_skb) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
rtdm_printk("rttcp: cann't clone skb\n");
|
ret = -ENOMEM;
|
goto error;
|
}
|
|
rt_tcp_retransmit_send(ts, cloned_skb);
|
}
|
|
/* need to update sync here, because it is safe way in
|
comparison with races on fast ACK response */
|
if (flags & (TCP_FLAG_FIN | TCP_FLAG_SYN))
|
ts->sync.seq++;
|
|
ts->sync.seq += data_len;
|
ts->sync.dst_window -= data_len;
|
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
/* ignore return value from rtdev_xmit */
|
/* the packet was enqueued and on error will be retransmitted later */
|
/* on critical error after retransmission timeout the connection will
|
be closed by connection lost */
|
rtdev_xmit(skb);
|
|
return data_len;
|
|
error:
|
kfree_rtskb(skb);
|
return ret;
|
}
|
|
static int rt_tcp_send(struct tcp_socket *ts, __be32 flags)
|
{
|
struct dest_route rt;
|
int ret;
|
|
/*
|
* We may not have a route yet during setup. But once it is set, it stays
|
* until the socket died.
|
*/
|
if (likely(ts->rt.rtdev)) {
|
ret = rt_tcp_segment(&ts->rt, ts, flags, 0, NULL, 0);
|
} else {
|
ret = rt_ip_route_output(&rt, ts->daddr, ts->saddr);
|
if (ret == 0) {
|
ret = rt_tcp_segment(&rt, ts, flags, 0, NULL, 0);
|
rtdev_dereference(rt.rtdev);
|
}
|
}
|
if (ret < 0)
|
rtdm_printk("rttcp: can't send a packet: err %d\n", -ret);
|
return ret;
|
}
|
|
#ifdef YET_UNUSED
|
static void rt_tcp_keepalive_timer(rtdm_timer_t *timer)
|
{
|
rtdm_lockctx_t context;
|
struct tcp_keepalive *keepalive =
|
container_of(timer, struct tcp_keepalive, timer);
|
|
struct tcp_socket *ts =
|
container_of(keepalive, struct tcp_socket, keepalive);
|
int signal = 0;
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
|
if (keepalive->probes) {
|
/* Send a probe */
|
if (rt_tcp_segment(&ts->rt, ts, 0, 0, NULL, 1) < 0) {
|
/* data receiving and sending is not possible anymore */
|
signal = rt_tcp_socket_invalidate(ts, TCP_TIME_WAIT);
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
}
|
|
keepalive->probes--;
|
rtdm_timer_start_in_handler(&keepalive->timer,
|
rt_tcp_keepalive_intvl, 0,
|
RTDM_TIMERMODE_RELATIVE);
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
} else {
|
/* data receiving and sending is not possible anymore */
|
|
signal = rt_tcp_socket_invalidate(ts, TCP_TIME_WAIT);
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
}
|
|
if (signal)
|
rt_tcp_socket_invalidate_signal(ts);
|
}
|
#endif
|
|
static inline u32 rt_tcp_initial_seq(void)
|
{
|
uint64_t clock_val = rtdm_clock_read_monotonic();
|
return (u32)(clock_val ^ (clock_val >> 32));
|
}
|
|
/***
|
* rt_tcp_dest_socket
|
*/
|
static struct rtsocket *rt_tcp_dest_socket(struct rtskb *skb)
|
{
|
struct tcphdr *th = skb->h.th;
|
|
u32 saddr = skb->nh.iph->saddr;
|
u32 daddr = skb->nh.iph->daddr;
|
u32 sport = th->source;
|
u32 dport = th->dest;
|
|
u32 data_len;
|
|
if (tcp_v4_check(skb->len, saddr, daddr,
|
rtnet_csum(skb->data, skb->len, 0))) {
|
rtdm_printk("rttcp: invalid TCP packet checksum, dropped\n");
|
return NULL; /* Invalid checksum, drop the packet */
|
}
|
|
/* find the destination socket */
|
if ((skb->sk = rt_tcp_v4_lookup(daddr, dport)) == NULL) {
|
/*
|
rtdm_printk("Not found addr:0x%08x, port: 0x%04x\n", daddr, dport);
|
*/
|
if (!th->rst) {
|
/* No listening socket found, send RST|ACK */
|
rst_socket.saddr = daddr;
|
rst_socket.daddr = saddr;
|
rst_socket.sport = dport;
|
rst_socket.dport = sport;
|
|
data_len = skb->len - (th->doff << 2);
|
|
rst_socket.sync.seq = 0;
|
rst_socket.sync.ack_seq =
|
rt_tcp_compute_ack_seq(th, data_len);
|
|
if (rt_ip_route_output(&rst_socket.rt, daddr, saddr) ==
|
0) {
|
rt_socket_reference(&rst_socket.sock);
|
rt_tcp_send(&rst_socket,
|
TCP_FLAG_ACK | TCP_FLAG_RST);
|
rtdev_dereference(rst_socket.rt.rtdev);
|
}
|
}
|
}
|
|
return skb->sk;
|
}
|
|
static void rt_tcp_window_update(struct tcp_socket *ts, u16 window)
|
{
|
rtdm_lockctx_t context;
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
|
if (ts->sync.dst_window) {
|
ts->sync.dst_window = window;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
if (!window) {
|
/* clear send event status */
|
rtdm_event_clear(&ts->send_evt);
|
}
|
} else {
|
if (window) {
|
ts->sync.dst_window = window;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
/* set send event status */
|
rtdm_event_signal(&ts->send_evt);
|
} else {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
}
|
}
|
}
|
|
/***
|
* rt_tcp_rcv
|
*/
|
static void rt_tcp_rcv(struct rtskb *skb)
|
{
|
rtdm_lockctx_t context;
|
struct tcp_socket *ts;
|
struct tcphdr *th = skb->h.th;
|
unsigned int data_len = skb->len - (th->doff << 2);
|
u32 seq = ntohl(th->seq);
|
int signal;
|
|
ts = container_of(skb->sk, struct tcp_socket, sock);
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
|
#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION
|
if (ts->error_rate > 0) {
|
if ((ts->packet_counter++ % error_rate) < ts->multi_error) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
goto drop;
|
}
|
}
|
#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION */
|
|
/* Check for daddr/dport correspondence to values stored in
|
selected socket from hash */
|
if (ts->tcp_state != TCP_LISTEN && (ts->daddr != skb->nh.iph->saddr ||
|
ts->dport != skb->h.th->source)) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
goto drop;
|
}
|
|
/* Check if it is a keepalive probe */
|
if (ts->sync.ack_seq == (seq + 1) && ts->tcp_state == TCP_ESTABLISHED) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
rt_tcp_send(ts, TCP_FLAG_ACK);
|
goto feed;
|
}
|
|
if (ts->tcp_state == TCP_SYN_SENT) {
|
ts->sync.ack_seq = rt_tcp_compute_ack_seq(th, data_len);
|
|
if (th->syn && th->ack) {
|
rt_tcp_socket_validate(ts);
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
rtdm_event_signal(&ts->conn_evt);
|
/* Send ACK */
|
rt_tcp_send(ts, TCP_FLAG_ACK);
|
goto feed;
|
}
|
|
ts->tcp_state = TCP_CLOSE;
|
ts->sync.seq = ntohl(th->ack_seq);
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
/* Send RST|ACK */
|
rtdm_event_signal(&ts->conn_evt);
|
rt_tcp_send(ts, TCP_FLAG_RST | TCP_FLAG_ACK);
|
goto drop;
|
}
|
|
/* Check for SEQ correspondence to determine the connection relevance */
|
|
/* OR-list of conditions to be satisfied:
|
*
|
* th->ack && rt_tcp_after(ts->nacked_first, ntohl(th->ack_seq))
|
* th->ack && th->rst && ...
|
* th->syn && (ts->tcp_state == TCP_LISTEN ||
|
ts->tcp_state == TCP_SYN_SENT)
|
* rt_tcp_after(seq, ts->sync.ack_seq) &&
|
rt_tcp_before(seq, ts->sync.ack_seq + ts->sync.window)
|
*/
|
|
if ((rt_tcp_after(seq, ts->sync.ack_seq) &&
|
rt_tcp_before(seq, ts->sync.ack_seq + ts->sync.window)) ||
|
th->rst ||
|
(th->syn &&
|
(ts->tcp_state == TCP_LISTEN || ts->tcp_state == TCP_SYN_SENT))) {
|
/* everything is ok */
|
} else if (rt_tcp_after(seq, ts->sync.ack_seq - data_len)) {
|
/* retransmission of data we already acked */
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
rt_tcp_send(ts, TCP_FLAG_ACK);
|
goto drop;
|
} else {
|
/* drop forward ack */
|
if (th->ack &&
|
/* but reset ack from old connection */
|
ts->tcp_state == TCP_ESTABLISHED) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
rtdm_printk(
|
"rttcp: dropped unappropriate ACK packet %u\n",
|
ts->sync.ack_seq);
|
goto drop;
|
}
|
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
rtdm_printk("rttcp: sequence number is not in window, "
|
"dropped (failed: %u <= %u <= %u)\n",
|
ts->sync.ack_seq, seq,
|
ts->sync.ack_seq + ts->sync.window);
|
|
/* That's a forced RST for a lost connection */
|
rst_socket.saddr = skb->nh.iph->daddr;
|
rst_socket.daddr = skb->nh.iph->saddr;
|
rst_socket.sport = th->dest;
|
rst_socket.dport = th->source;
|
|
rst_socket.sync.seq = ntohl(th->ack_seq);
|
rst_socket.sync.ack_seq = rt_tcp_compute_ack_seq(th, data_len);
|
|
if (rt_ip_route_output(&rst_socket.rt, rst_socket.daddr,
|
rst_socket.saddr) == 0) {
|
rt_socket_reference(&rst_socket.sock);
|
rt_tcp_send(&rst_socket, TCP_FLAG_RST | TCP_FLAG_ACK);
|
rtdev_dereference(rst_socket.rt.rtdev);
|
}
|
goto drop;
|
}
|
|
if (th->rst) {
|
if (ts->tcp_state == TCP_SYN_RECV) {
|
ts->tcp_state = TCP_LISTEN;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
goto drop;
|
} else {
|
/* Drop our half-open connection, peer obviously went away. */
|
signal = rt_tcp_socket_invalidate(ts, TCP_CLOSE);
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
if (signal)
|
rt_tcp_socket_invalidate_signal(ts);
|
|
goto drop;
|
}
|
}
|
|
ts->sync.ack_seq = rt_tcp_compute_ack_seq(th, data_len);
|
|
if (th->fin) {
|
if (ts->tcp_state == TCP_ESTABLISHED) {
|
/* Send ACK */
|
signal = rt_tcp_socket_invalidate(ts, TCP_CLOSE_WAIT);
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
if (signal)
|
rt_tcp_socket_invalidate_signal(ts);
|
|
rt_tcp_send(ts, TCP_FLAG_ACK);
|
goto feed;
|
} else if ((ts->tcp_state == TCP_FIN_WAIT1 && th->ack) ||
|
ts->tcp_state == TCP_FIN_WAIT2) {
|
/* Send ACK */
|
ts->tcp_state = TCP_TIME_WAIT;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
rt_tcp_send(ts, TCP_FLAG_ACK);
|
/* data receiving is not possible anymore */
|
rtdm_sem_destroy(&ts->sock.pending_sem);
|
rtdm_nrtsig_pend(&ts->close_sig);
|
goto feed;
|
} else if (ts->tcp_state == TCP_FIN_WAIT1) {
|
/* Send ACK */
|
ts->tcp_state = TCP_CLOSING;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
rt_tcp_send(ts, TCP_FLAG_ACK);
|
/* data receiving is not possible anymore */
|
rtdm_sem_destroy(&ts->sock.pending_sem);
|
goto feed;
|
} else {
|
/* just drop it */
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
goto drop;
|
}
|
}
|
|
if (th->syn) {
|
/* Need to differentiate LISTEN socket from ESTABLISHED one */
|
/* Both of them have the same sport/saddr, but different dport/daddr */
|
/* dport is unknown if it is the first connection of n */
|
|
if (ts->tcp_state == TCP_LISTEN) {
|
/* Need to store ts->seq while sending SYN earlier */
|
/* The socket shall be in TCP_LISTEN state */
|
|
/* safe to update ts->saddr here due to a single task for
|
rt_tcp_rcv() and rt_tcp_dest_socket() callers */
|
ts->saddr = skb->nh.iph->daddr;
|
|
ts->daddr = skb->nh.iph->saddr;
|
ts->dport = th->source;
|
ts->sync.seq = rt_tcp_initial_seq();
|
ts->sync.window = 4096;
|
ts->tcp_state = TCP_SYN_RECV;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
/* Send SYN|ACK */
|
rt_tcp_send(ts, TCP_FLAG_SYN | TCP_FLAG_ACK);
|
goto drop;
|
}
|
|
/* Send RST|ACK */
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
rt_tcp_send(ts, TCP_FLAG_RST | TCP_FLAG_ACK);
|
goto drop;
|
}
|
|
/* ACK received without SYN, FIN or RST flags */
|
if (th->ack) {
|
/* Check ack sequence */
|
if (rt_tcp_before(ts->sync.seq + 1, ntohl(th->ack_seq))) {
|
rtdm_printk("rttcp: unexpected ACK %u %u %u\n",
|
ts->sync.seq, ts->nacked_first,
|
ntohl(th->ack_seq));
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
goto drop;
|
}
|
|
if (ts->tcp_state == TCP_LAST_ACK) {
|
/* close connection and free socket data */
|
ts->tcp_state = TCP_CLOSE;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
/* socket destruction will be done on close() */
|
rtdm_nrtsig_pend(&ts->close_sig);
|
goto drop;
|
} else if (ts->tcp_state == TCP_FIN_WAIT1) {
|
ts->tcp_state = TCP_FIN_WAIT2;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
goto feed;
|
} else if (ts->tcp_state == TCP_SYN_RECV) {
|
rt_tcp_socket_validate(ts);
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
rtdm_event_signal(&ts->conn_evt);
|
goto feed;
|
} else if (ts->tcp_state == TCP_CLOSING) {
|
ts->tcp_state = TCP_TIME_WAIT;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
/* socket destruction will be done on close() */
|
rtdm_nrtsig_pend(&ts->close_sig);
|
goto feed;
|
}
|
}
|
|
if (ts->tcp_state != TCP_ESTABLISHED) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
goto drop;
|
}
|
|
if (data_len == 0) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
goto feed;
|
}
|
|
/* Send ACK */
|
ts->sync.window -= data_len;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
rt_tcp_send(ts, TCP_FLAG_ACK);
|
|
rtskb_queue_tail(&skb->sk->incoming, skb);
|
rtdm_sem_up(&ts->sock.pending_sem);
|
|
/* inform retransmission subsystem about arrived ack */
|
if (th->ack) {
|
rt_tcp_retransmit_ack(ts, ntohl(th->ack_seq));
|
}
|
|
rt_tcp_keepalive_feed(ts);
|
rt_tcp_window_update(ts, ntohs(th->window));
|
|
return;
|
|
feed:
|
/* inform retransmission subsystem about arrived ack */
|
if (th->ack) {
|
rt_tcp_retransmit_ack(ts, ntohl(th->ack_seq));
|
}
|
|
rt_tcp_keepalive_feed(ts);
|
rt_tcp_window_update(ts, ntohs(th->window));
|
|
drop:
|
kfree_rtskb(skb);
|
return;
|
}
|
|
/***
|
* rt_tcp_rcv_err
|
*/
|
static void rt_tcp_rcv_err(struct rtskb *skb)
|
{
|
rtdm_printk("rttcp: rt_tcp_rcv err\n");
|
}
|
|
static int rt_tcp_window_send(struct tcp_socket *ts, u32 data_len, u8 *data_ptr)
|
{
|
u32 dst_window = ts->sync.dst_window;
|
int ret;
|
|
if (data_len > dst_window)
|
data_len = dst_window;
|
|
if ((ret = rt_tcp_segment(&ts->rt, ts, TCP_FLAG_ACK, data_len, data_ptr,
|
0)) < 0) {
|
rtdm_printk("rttcp: cann't send a packet: err %d\n", -ret);
|
return ret;
|
}
|
|
return ret;
|
}
|
|
static void rt_tcp_close_signal_handler(rtdm_nrtsig_t *nrtsig, void *arg)
|
{
|
complete_all((struct completion *)arg);
|
}
|
|
static int rt_tcp_socket_create(struct tcp_socket *ts)
|
{
|
rtdm_lockctx_t context;
|
int i;
|
int index;
|
struct rtsocket *sock = &ts->sock;
|
|
sock->prot.inet.saddr = INADDR_ANY;
|
sock->prot.inet.state = TCP_CLOSE;
|
sock->prot.inet.tos = 0;
|
/*
|
rtdm_printk("rttcp: rt_tcp_socket_create 0x%p\n", ts);
|
*/
|
rtdm_lock_init(&ts->socket_lock);
|
|
ts->rt.rtdev = NULL;
|
|
ts->tcp_state = TCP_CLOSE;
|
|
ts->is_accepting = 0;
|
ts->is_accepted = 0;
|
ts->is_binding = 0;
|
ts->is_bound = 0;
|
ts->is_valid = 0;
|
ts->is_closed = 0;
|
|
ts->sk_sndtimeo = RTDM_TIMEOUT_INFINITE;
|
|
rtdm_event_init(&ts->conn_evt, 0);
|
|
ts->keepalive.enabled = 0;
|
|
ts->timer_state = max_retransmits;
|
timerwheel_init_timer(&ts->timer, rt_tcp_retransmit_handler, ts);
|
rtskb_queue_init(&ts->retransmit_queue);
|
|
init_completion(&ts->fin_handshake);
|
rtdm_nrtsig_init(&ts->close_sig, rt_tcp_close_signal_handler,
|
&ts->fin_handshake);
|
|
#ifdef CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION
|
ts->packet_counter = counter_start;
|
ts->error_rate = error_rate;
|
ts->multi_error = multi_error;
|
#endif /* CONFIG_XENO_DRIVERS_NET_RTIPV4_TCP_ERROR_INJECTION */
|
|
rtdm_lock_get_irqsave(&tcp_socket_base_lock, context);
|
|
/* enforce maximum number of TCP sockets */
|
if (free_ports == 0) {
|
rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context);
|
rtdm_nrtsig_destroy(&ts->close_sig);
|
return -EAGAIN;
|
}
|
free_ports--;
|
|
/* find free auto-port in bitmap */
|
for (i = 0; i < RT_PORT_BITMAP_WORDS; i++)
|
if (port_bitmap[i] != (unsigned long)-1)
|
break;
|
index = ffz(port_bitmap[i]);
|
set_bit(index, &port_bitmap[i]);
|
index += i * 32;
|
sock->prot.inet.reg_index = index;
|
sock->prot.inet.sport = index + tcp_auto_port_start;
|
|
/* register TCP socket */
|
port_registry[index] = ts;
|
port_hash_insert(ts, INADDR_ANY, sock->prot.inet.sport);
|
|
rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context);
|
|
return 0;
|
}
|
|
/***
|
* rt_tcp_socket - create a new TCP-Socket
|
* @s: socket
|
*/
|
static int rt_tcp_socket(struct rtdm_fd *fd)
|
{
|
struct tcp_socket *ts = rtdm_fd_to_private(fd);
|
int ret;
|
|
if ((ret = rt_socket_init(fd, IPPROTO_TCP)) != 0)
|
return ret;
|
|
if ((ret = rt_tcp_socket_create(ts)) != 0)
|
rt_socket_cleanup(fd);
|
|
return ret;
|
}
|
|
static int rt_tcp_dispatched_packet_send(struct rt_proc_call *call)
|
{
|
int ret;
|
struct rt_tcp_dispatched_packet_send_cmd *cmd;
|
|
cmd = rtpc_get_priv(call, struct rt_tcp_dispatched_packet_send_cmd);
|
ret = rt_tcp_send(cmd->ts, cmd->flags);
|
|
return ret;
|
}
|
|
/***
|
* rt_tcp_socket_destruct
|
* this function requires non realtime context
|
*/
|
static void rt_tcp_socket_destruct(struct tcp_socket *ts)
|
{
|
rtdm_lockctx_t context;
|
struct rtskb *skb;
|
int index;
|
int signal;
|
struct rtsocket *sock = &ts->sock;
|
|
/*
|
rtdm_printk("rttcp: rt_tcp_socket_destruct 0x%p\n", ts);
|
*/
|
|
rtdm_lock_get_irqsave(&tcp_socket_base_lock, context);
|
if (sock->prot.inet.reg_index >= 0) {
|
index = sock->prot.inet.reg_index;
|
|
clear_bit(index % BITS_PER_LONG,
|
&port_bitmap[index / BITS_PER_LONG]);
|
port_hash_del(port_registry[index]);
|
free_ports++;
|
sock->prot.inet.reg_index = -1;
|
}
|
rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context);
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
|
signal = rt_tcp_socket_invalidate(ts, TCP_CLOSE);
|
|
rt_tcp_keepalive_disable(ts);
|
|
sock->prot.inet.state = TCP_CLOSE;
|
|
/* dereference rtdev */
|
if (ts->rt.rtdev != NULL) {
|
rtdev_dereference(ts->rt.rtdev);
|
ts->rt.rtdev = NULL;
|
}
|
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
if (signal)
|
rt_tcp_socket_invalidate_signal(ts);
|
|
rtdm_event_destroy(&ts->conn_evt);
|
|
rtdm_nrtsig_destroy(&ts->close_sig);
|
|
/* cleanup already collected fragments */
|
rt_ip_frag_invalidate_socket(sock);
|
|
/* free packets in incoming queue */
|
while ((skb = rtskb_dequeue(&sock->incoming)) != NULL)
|
kfree_rtskb(skb);
|
|
/* ensure that the timer is no longer running */
|
timerwheel_remove_timer_sync(&ts->timer);
|
|
/* free packets in retransmission queue */
|
while ((skb = __rtskb_dequeue(&ts->retransmit_queue)) != NULL)
|
kfree_rtskb(skb);
|
}
|
|
/***
|
* rt_tcp_close
|
*/
|
static void rt_tcp_close(struct rtdm_fd *fd)
|
{
|
struct tcp_socket *ts = rtdm_fd_to_private(fd);
|
struct rt_tcp_dispatched_packet_send_cmd send_cmd;
|
rtdm_lockctx_t context;
|
int signal = 0;
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
|
ts->is_closed = 1;
|
|
if (ts->tcp_state == TCP_ESTABLISHED || ts->tcp_state == TCP_SYN_RECV) {
|
/* close() from ESTABLISHED */
|
send_cmd.ts = ts;
|
send_cmd.flags = TCP_FLAG_FIN | TCP_FLAG_ACK;
|
signal = rt_tcp_socket_invalidate(ts, TCP_FIN_WAIT1);
|
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
rtpc_dispatch_call(rt_tcp_dispatched_packet_send, 0, &send_cmd,
|
sizeof(send_cmd), NULL, NULL);
|
/* result is ignored */
|
|
/* Give the peer some time to reply to our FIN.
|
Since it is not relevant what exactly causes the wait
|
function to return its result is ignored. */
|
wait_for_completion_interruptible_timeout(&ts->fin_handshake,
|
msecs_to_jiffies(close_timeout));
|
} else if (ts->tcp_state == TCP_CLOSE_WAIT) {
|
/* Send FIN in CLOSE_WAIT */
|
send_cmd.ts = ts;
|
send_cmd.flags = TCP_FLAG_FIN | TCP_FLAG_ACK;
|
signal = rt_tcp_socket_invalidate(ts, TCP_LAST_ACK);
|
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
rtpc_dispatch_call(rt_tcp_dispatched_packet_send, 0, &send_cmd,
|
sizeof(send_cmd), NULL, NULL);
|
/* result is ignored */
|
|
/* Give the peer some time to reply to our FIN.
|
Since it is not relevant what exactly causes the wait
|
function to return its result is ignored. */
|
wait_for_completion_interruptible_timeout(&ts->fin_handshake,
|
msecs_to_jiffies(close_timeout));
|
} else {
|
/*
|
rt_tcp_socket_validate() has not been called at all,
|
hence socket state is TCP_SYN_SENT or TCP_LISTEN,
|
or socket is in one of close states,
|
hence rt_tcp_socket_invalidate() was called,
|
but close() is called at first time
|
*/
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
}
|
|
if (signal)
|
rt_tcp_socket_invalidate_signal(ts);
|
|
rt_tcp_socket_destruct(ts);
|
|
rt_socket_cleanup(fd);
|
}
|
|
/***
|
* rt_tcp_bind - bind socket to local address
|
* @s: socket
|
* @addr: local address
|
*/
|
static int rt_tcp_bind(struct rtdm_fd *fd, struct tcp_socket *ts,
|
const struct sockaddr __user *addr, socklen_t addrlen)
|
{
|
struct sockaddr_in *usin, _usin;
|
rtdm_lockctx_t context;
|
int index;
|
int bound = 0;
|
int ret = 0;
|
|
usin = rtnet_get_arg(fd, &_usin, addr, sizeof(_usin));
|
if (IS_ERR(usin))
|
return PTR_ERR(usin);
|
|
if ((addrlen < (int)sizeof(struct sockaddr_in)) ||
|
((usin->sin_port & tcp_auto_port_mask) == tcp_auto_port_start))
|
return -EINVAL;
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
if (ts->tcp_state != TCP_CLOSE || ts->is_bound || ts->is_binding) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return -EINVAL;
|
}
|
|
ts->is_binding = 1;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
rtdm_lock_get_irqsave(&tcp_socket_base_lock, context);
|
|
if ((index = ts->sock.prot.inet.reg_index) < 0) {
|
/* socket is destroyed */
|
ret = -EBADF;
|
goto unlock_out;
|
}
|
|
port_hash_del(ts);
|
if (port_hash_insert(ts, usin->sin_addr.s_addr,
|
usin->sin_port ?: index + tcp_auto_port_start)) {
|
port_hash_insert(ts, ts->saddr, ts->sport);
|
|
ret = -EADDRINUSE;
|
goto unlock_out;
|
}
|
|
bound = 1;
|
|
unlock_out:
|
rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context);
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
ts->is_bound = bound;
|
ts->is_binding = 0;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
return ret;
|
}
|
|
/***
|
* rt_tcp_connect
|
*/
|
static int rt_tcp_connect(struct rtdm_fd *fd, struct tcp_socket *ts,
|
const struct sockaddr __user *serv_addr,
|
socklen_t addrlen)
|
{
|
struct sockaddr_in *usin, _usin;
|
struct dest_route rt;
|
rtdm_lockctx_t context;
|
int ret;
|
|
if (addrlen < (int)sizeof(struct sockaddr_in))
|
return -EINVAL;
|
|
usin = rtnet_get_arg(fd, &_usin, serv_addr, sizeof(_usin));
|
if (IS_ERR(usin))
|
return PTR_ERR(usin);
|
|
if (usin->sin_family != AF_INET)
|
return -EAFNOSUPPORT;
|
|
ret = rt_ip_route_output(&rt, usin->sin_addr.s_addr, ts->saddr);
|
if (ret < 0) {
|
/* no route to host */
|
return -ENETUNREACH;
|
}
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
|
if (ts->is_closed) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
ret = -EBADF;
|
goto err_deref;
|
}
|
|
if (ts->tcp_state != TCP_CLOSE || ts->is_binding) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
ret = -EINVAL;
|
goto err_deref;
|
}
|
|
if (ts->rt.rtdev == NULL)
|
memcpy(&ts->rt, &rt, sizeof(rt));
|
else
|
rtdev_dereference(rt.rtdev);
|
|
ts->saddr = rt.rtdev->local_ip;
|
|
ts->daddr = usin->sin_addr.s_addr;
|
ts->dport = usin->sin_port;
|
|
ts->sync.seq = rt_tcp_initial_seq();
|
ts->sync.ack_seq = 0;
|
ts->sync.window = 4096;
|
ts->sync.dst_window = 0;
|
|
ts->tcp_state = TCP_SYN_SENT;
|
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
/* Complete three-way handshake */
|
ret = rt_tcp_send(ts, TCP_FLAG_SYN);
|
if (ret < 0) {
|
rtdm_printk("rttcp: cann't send SYN\n");
|
return ret;
|
}
|
|
ret = rtdm_event_timedwait(&ts->conn_evt, rt_tcp_connection_timeout,
|
NULL);
|
if (unlikely(ret < 0))
|
switch (ret) {
|
case -EWOULDBLOCK:
|
case -ETIMEDOUT:
|
case -EINTR:
|
return ret;
|
|
default:
|
return -EBADF;
|
}
|
|
if (ts->tcp_state == TCP_SYN_SENT) {
|
/* received conn_evt, but connection is not established */
|
return -ECONNREFUSED;
|
}
|
|
return ret;
|
|
err_deref:
|
rtdev_dereference(rt.rtdev);
|
|
return ret;
|
}
|
|
/***
|
* rt_tcp_listen
|
*/
|
static int rt_tcp_listen(struct tcp_socket *ts, unsigned long backlog)
|
{
|
int ret;
|
rtdm_lockctx_t context;
|
|
/* Ignore backlog value, maximum number of queued connections is 1 */
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
if (ts->is_closed) {
|
ret = -EBADF;
|
goto unlock_out;
|
}
|
|
if (ts->tcp_state != TCP_CLOSE || ts->is_binding) {
|
ret = -EINVAL;
|
goto unlock_out;
|
}
|
|
ts->tcp_state = TCP_LISTEN;
|
ret = 0;
|
|
unlock_out:
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
return ret;
|
}
|
|
/***
|
* rt_tcp_accept
|
*/
|
static int rt_tcp_accept(struct rtdm_fd *fd, struct tcp_socket *ts,
|
struct sockaddr *addr, socklen_t __user *addrlen)
|
{
|
/* Return sockaddr, but bind it with rt_socket_init, so it would be
|
possible to read/write from it in future, return valid file descriptor */
|
|
int ret;
|
socklen_t *uaddrlen, _uaddrlen;
|
struct sockaddr_in sin;
|
nanosecs_rel_t timeout = ts->sock.timeout;
|
rtdm_lockctx_t context;
|
struct dest_route rt;
|
|
uaddrlen = rtnet_get_arg(fd, &_uaddrlen, addrlen, sizeof(_uaddrlen));
|
if (IS_ERR(uaddrlen))
|
return PTR_ERR(uaddrlen);
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
if (ts->is_accepting || ts->is_accepted) {
|
/* socket is already accepted or is accepting a connection right now */
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return -EALREADY;
|
}
|
|
if (ts->tcp_state != TCP_LISTEN ||
|
*uaddrlen < sizeof(struct sockaddr_in)) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return -EINVAL;
|
}
|
|
ts->is_accepting = 1;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
ret = rtdm_event_timedwait(&ts->conn_evt, timeout, NULL);
|
|
if (unlikely(ret < 0))
|
switch (ret) {
|
case -ETIMEDOUT:
|
case -EINTR:
|
goto err;
|
|
default:
|
ret = -EBADF;
|
goto err;
|
}
|
|
/* accept() reported about connection establishment */
|
ret = rt_ip_route_output(&rt, ts->daddr, ts->saddr);
|
if (ret < 0) {
|
/* strange, no route to host, keep status quo */
|
ret = -EPROTO;
|
goto err;
|
}
|
|
if (addr) {
|
sin.sin_family = AF_INET;
|
sin.sin_port = ts->dport;
|
sin.sin_addr.s_addr = ts->daddr;
|
ret = rtnet_put_arg(fd, addr, &sin, sizeof(sin));
|
if (ret) {
|
rtdev_dereference(rt.rtdev);
|
ret = -EFAULT;
|
goto err;
|
}
|
}
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
|
if (ts->tcp_state != TCP_ESTABLISHED) {
|
/* protocol error */
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
rtdev_dereference(rt.rtdev);
|
ret = -EPROTO;
|
goto err;
|
}
|
|
if (ts->rt.rtdev == NULL)
|
memcpy(&ts->rt, &rt, sizeof(rt));
|
else
|
rtdev_dereference(rt.rtdev);
|
|
ts->is_accepted = 1;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
ret = rtdm_fd_ufd(rt_socket_fd(&ts->sock));
|
|
err:
|
/* it is not critical to leave this unlocked
|
due to single entry nature of accept() */
|
ts->is_accepting = 0;
|
|
return ret;
|
}
|
|
/***
|
* rt_tcp_shutdown
|
*/
|
static int rt_tcp_shutdown(struct tcp_socket *ts, unsigned long how)
|
{
|
return -EOPNOTSUPP;
|
}
|
|
/***
|
* rt_tcp_setsockopt
|
*/
|
static int rt_tcp_setsockopt(struct rtdm_fd *fd, struct tcp_socket *ts,
|
int level, int optname, const void *optval,
|
socklen_t optlen)
|
{
|
/* uint64_t val; */
|
struct __kernel_old_timeval tv;
|
rtdm_lockctx_t context;
|
|
switch (optname) {
|
case SO_KEEPALIVE:
|
if (optlen < sizeof(unsigned int))
|
return -EINVAL;
|
|
/* commented out, because current implementation transmits
|
keepalive probes from interrupt context */
|
/*
|
val = *(unsigned long*)optval;
|
|
if (val)
|
rt_tcp_keepalive_enable(ts);
|
else
|
rt_tcp_keepalive_disable(ts);
|
*/
|
return 0;
|
|
case SO_SNDTIMEO_OLD:
|
if (optlen < sizeof(tv))
|
return -EINVAL;
|
if (rtdm_copy_from_user(fd, &tv, optval, sizeof(tv)))
|
return -EFAULT;
|
if (tv.tv_usec < 0 || tv.tv_usec >= 1000000)
|
return -EDOM;
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
|
if (tv.tv_sec < 0) {
|
ts->sk_sndtimeo = RTDM_TIMEOUT_NONE;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return 0;
|
}
|
|
ts->sk_sndtimeo = RTDM_TIMEOUT_INFINITE;
|
if (tv.tv_sec == 0 && tv.tv_usec == 0) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return 0;
|
}
|
|
if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / 1000000000ull - 1))
|
ts->sk_sndtimeo =
|
(tv.tv_sec * 1000000 + tv.tv_usec) * 1000;
|
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
return 0;
|
|
case SO_REUSEADDR:
|
/* to implement */
|
return -EOPNOTSUPP;
|
}
|
|
return -ENOPROTOOPT;
|
}
|
|
/***
|
* rt_tcp_getsockopt
|
*/
|
static int rt_tcp_getsockopt(struct rtdm_fd *fd, struct tcp_socket *ts,
|
int level, int optname, void *optval,
|
socklen_t *optlen)
|
{
|
int ret = 0;
|
|
if (*optlen < sizeof(unsigned int))
|
return -EINVAL;
|
|
switch (optname) {
|
case SO_ERROR:
|
ret = 0; /* used in nonblocking connect(), extend later */
|
break;
|
|
default:
|
ret = -ENOPROTOOPT;
|
break;
|
}
|
|
return ret;
|
}
|
|
/***
|
* rt_tcp_ioctl
|
*/
|
static int rt_tcp_ioctl(struct rtdm_fd *fd, unsigned int request,
|
void __user *arg)
|
{
|
struct tcp_socket *ts = rtdm_fd_to_private(fd);
|
const struct _rtdm_setsockaddr_args *setaddr;
|
struct _rtdm_setsockaddr_args _setaddr;
|
const struct _rtdm_getsockaddr_args *getaddr;
|
struct _rtdm_getsockaddr_args _getaddr;
|
const struct _rtdm_getsockopt_args *getopt;
|
struct _rtdm_getsockopt_args _getopt;
|
const struct _rtdm_setsockopt_args *setopt;
|
struct _rtdm_setsockopt_args _setopt;
|
int in_rt;
|
|
/* fast path for common socket IOCTLs */
|
if (_IOC_TYPE(request) == RTIOC_TYPE_NETWORK)
|
return rt_socket_common_ioctl(fd, request, arg);
|
|
in_rt = rtdm_in_rt_context();
|
|
switch (request) {
|
case _RTIOC_BIND:
|
setaddr = rtnet_get_arg(fd, &_setaddr, arg, sizeof(_setaddr));
|
if (IS_ERR(setaddr))
|
return PTR_ERR(setaddr);
|
return rt_tcp_bind(fd, ts, setaddr->addr, setaddr->addrlen);
|
case _RTIOC_CONNECT:
|
if (!in_rt)
|
return -ENOSYS;
|
setaddr = rtnet_get_arg(fd, &_setaddr, arg, sizeof(_setaddr));
|
if (IS_ERR(setaddr))
|
return PTR_ERR(setaddr);
|
return rt_tcp_connect(fd, ts, setaddr->addr, setaddr->addrlen);
|
|
case _RTIOC_LISTEN:
|
return rt_tcp_listen(ts, (unsigned long)arg);
|
|
case _RTIOC_ACCEPT:
|
if (!in_rt)
|
return -ENOSYS;
|
getaddr = rtnet_get_arg(fd, &_getaddr, arg, sizeof(_getaddr));
|
if (IS_ERR(getaddr))
|
return PTR_ERR(getaddr);
|
return rt_tcp_accept(fd, ts, getaddr->addr, getaddr->addrlen);
|
|
case _RTIOC_SHUTDOWN:
|
return rt_tcp_shutdown(ts, (unsigned long)arg);
|
|
case _RTIOC_SETSOCKOPT:
|
setopt = rtnet_get_arg(fd, &_setopt, arg, sizeof(_setopt));
|
if (IS_ERR(setopt))
|
return PTR_ERR(setopt);
|
|
if (setopt->level != SOL_SOCKET)
|
break;
|
|
return rt_tcp_setsockopt(fd, ts, setopt->level, setopt->optname,
|
setopt->optval, setopt->optlen);
|
|
case _RTIOC_GETSOCKOPT:
|
getopt = rtnet_get_arg(fd, &_getopt, arg, sizeof(_getopt));
|
if (IS_ERR(getopt))
|
return PTR_ERR(getopt);
|
|
if (getopt->level != SOL_SOCKET)
|
break;
|
|
return rt_tcp_getsockopt(fd, ts, getopt->level, getopt->optname,
|
getopt->optval, getopt->optlen);
|
default:
|
break;
|
}
|
|
return rt_ip_ioctl(fd, request, arg);
|
}
|
|
/***
|
* rt_tcp_read
|
*/
|
static ssize_t rt_tcp_read(struct rtdm_fd *fd, void *buf, size_t nbyte)
|
{
|
struct tcp_socket *ts = rtdm_fd_to_private(fd);
|
struct rtsocket *sock = &ts->sock;
|
|
struct rtskb *skb;
|
struct rtskb *first_skb;
|
nanosecs_rel_t timeout = sock->timeout;
|
size_t data_len;
|
size_t th_len;
|
size_t copied = 0;
|
size_t block_size;
|
u8 *user_buf = buf;
|
int ret;
|
rtdm_lockctx_t context;
|
|
rtdm_toseq_t timeout_seq;
|
|
if (!rtdm_fd_is_user(fd)) {
|
return -EFAULT;
|
}
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
|
if (ts->is_closed) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return -EBADF;
|
}
|
|
if (!ts->is_valid) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return 0;
|
}
|
|
if (ts->tcp_state != TCP_ESTABLISHED &&
|
ts->tcp_state != TCP_FIN_WAIT2) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return -EINVAL;
|
}
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
rtdm_toseq_init(&timeout_seq, timeout);
|
|
while (copied < nbyte) {
|
ret = rtdm_sem_timeddown(&ts->sock.pending_sem, timeout,
|
&timeout_seq);
|
|
if (unlikely(ret < 0))
|
switch (ret) {
|
case -EWOULDBLOCK:
|
case -ETIMEDOUT:
|
case -EINTR:
|
return (copied ? copied : ret);
|
|
case -EIDRM: /* event is destroyed */
|
if (ts->is_closed)
|
return -EBADF;
|
|
return copied;
|
|
default:
|
if (ts->is_closed) {
|
return -EBADF;
|
}
|
|
return 0;
|
}
|
|
skb = rtskb_dequeue_chain(&sock->incoming);
|
RTNET_ASSERT(skb != NULL, return -EFAULT;);
|
|
th_len = (skb->h.th->doff) << 2;
|
|
data_len = skb->len - th_len;
|
|
__rtskb_pull(skb, th_len);
|
|
first_skb = skb;
|
|
/* iterate over all IP fragments */
|
iterate_fragments:
|
block_size = skb->len;
|
copied += block_size;
|
data_len -= block_size;
|
|
if (copied > nbyte) {
|
block_size -= copied - nbyte;
|
copied = nbyte;
|
|
if (rtdm_copy_to_user(fd, user_buf, skb->data,
|
block_size)) {
|
kfree_rtskb(first_skb); /* or store the data? */
|
return -EFAULT;
|
}
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
if (ts->sync.window) {
|
ts->sync.window += block_size;
|
rtdm_lock_put_irqrestore(&ts->socket_lock,
|
context);
|
} else {
|
ts->sync.window = block_size;
|
rtdm_lock_put_irqrestore(&ts->socket_lock,
|
context);
|
rt_tcp_send(ts,
|
TCP_FLAG_ACK); /* window update */
|
}
|
|
__rtskb_pull(skb, block_size);
|
__rtskb_push(first_skb, sizeof(struct tcphdr));
|
first_skb->h.th->doff = 5;
|
rtskb_queue_head(&sock->incoming, first_skb);
|
rtdm_sem_up(&ts->sock.pending_sem);
|
|
return copied;
|
}
|
|
if (rtdm_copy_to_user(fd, user_buf, skb->data, block_size)) {
|
kfree_rtskb(first_skb); /* or store the data? */
|
return -EFAULT;
|
}
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
if (ts->sync.window) {
|
ts->sync.window += block_size;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
} else {
|
ts->sync.window = block_size;
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
rt_tcp_send(ts, TCP_FLAG_ACK); /* window update */
|
}
|
|
if ((skb = skb->next) != NULL) {
|
user_buf += data_len;
|
goto iterate_fragments;
|
}
|
|
kfree_rtskb(first_skb);
|
}
|
|
return copied;
|
}
|
|
/***
|
* rt_tcp_write
|
*/
|
static ssize_t rt_tcp_write(struct rtdm_fd *fd, const void __user *user_buf,
|
size_t nbyte)
|
{
|
struct tcp_socket *ts = rtdm_fd_to_private(fd);
|
uint32_t sent_len = 0;
|
rtdm_lockctx_t context;
|
int ret = 0;
|
nanosecs_rel_t sk_sndtimeo;
|
void *buf;
|
|
if (!rtdm_fd_is_user(fd)) {
|
return -EFAULT;
|
}
|
|
rtdm_lock_get_irqsave(&ts->socket_lock, context);
|
|
sk_sndtimeo = ts->sk_sndtimeo;
|
|
if (!ts->is_valid) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return -EPIPE;
|
}
|
|
if ((ts->daddr | ts->dport) == 0 || ts->tcp_state != TCP_ESTABLISHED) {
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
return -EINVAL;
|
}
|
|
rtdm_lock_put_irqrestore(&ts->socket_lock, context);
|
|
buf = xnmalloc(nbyte);
|
if (buf == NULL)
|
return -ENOMEM;
|
|
ret = rtdm_copy_from_user(fd, buf, user_buf, nbyte);
|
if (ret) {
|
xnfree(buf);
|
return ret;
|
}
|
|
while (sent_len < nbyte) {
|
ret = rtdm_event_timedwait(&ts->send_evt, sk_sndtimeo, NULL);
|
|
if (unlikely(ret < 0))
|
switch (ret) {
|
case -EWOULDBLOCK:
|
case -ETIMEDOUT:
|
case -EINTR:
|
xnfree(buf);
|
return sent_len ?: ret;
|
|
case -EIDRM: /* event is destroyed */
|
default:
|
if (ts->is_closed) {
|
xnfree(buf);
|
return -EBADF;
|
}
|
|
xnfree(buf);
|
return sent_len ?: ret;
|
}
|
|
ret = rt_tcp_window_send(ts, nbyte - sent_len,
|
((u8 *)buf) + sent_len);
|
|
if (ret < 0) { /* check this branch correctness */
|
rtdm_event_signal(&ts->send_evt);
|
break;
|
}
|
|
sent_len += ret;
|
if (ts->sync.dst_window)
|
rtdm_event_signal(&ts->send_evt);
|
}
|
|
xnfree(buf);
|
return (ret < 0 ? ret : sent_len);
|
}
|
|
/***
|
* rt_tcp_recvmsg
|
*/
|
static ssize_t rt_tcp_recvmsg(struct rtdm_fd *fd, struct user_msghdr *msg,
|
int msg_flags)
|
{
|
struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov;
|
ssize_t ret;
|
size_t len;
|
void *buf;
|
|
if (msg_flags)
|
return -EOPNOTSUPP;
|
|
/* loop over all vectors to be implemented */
|
if (msg->msg_iovlen != 1)
|
return -EOPNOTSUPP;
|
|
ret = rtdm_get_iovec(fd, &iov, msg, iov_fast);
|
if (ret)
|
return ret;
|
|
len = iov[0].iov_len;
|
if (len > 0) {
|
buf = iov[0].iov_base;
|
ret = rt_tcp_read(fd, buf, len);
|
}
|
|
rtdm_drop_iovec(iov, iov_fast);
|
|
return ret;
|
}
|
|
/***
|
* rt_tcp_sendmsg
|
*/
|
static ssize_t rt_tcp_sendmsg(struct rtdm_fd *fd, const struct user_msghdr *msg,
|
int msg_flags)
|
{
|
struct iovec iov_fast[RTDM_IOV_FASTMAX], *iov;
|
ssize_t ret;
|
size_t len;
|
|
if (msg_flags)
|
return -EOPNOTSUPP;
|
|
/* loop over all vectors to be implemented */
|
if (msg->msg_iovlen != 1)
|
return -EOPNOTSUPP;
|
|
ret = rtdm_get_iovec(fd, &iov, msg, iov_fast);
|
if (ret)
|
return ret;
|
|
len = iov[0].iov_len;
|
if (len > 0)
|
ret = rt_tcp_write(fd, iov[0].iov_base, len);
|
|
rtdm_drop_iovec(iov, iov_fast);
|
|
return ret;
|
}
|
|
/***
|
* rt_tcp_select
|
*/
|
static int rt_tcp_select(struct rtdm_fd *fd, rtdm_selector_t *selector,
|
enum rtdm_selecttype type, unsigned fd_index)
|
{
|
struct tcp_socket *ts = rtdm_fd_to_private(fd);
|
|
switch (type) {
|
case XNSELECT_READ:
|
return rtdm_sem_select(&ts->sock.pending_sem, selector,
|
XNSELECT_READ, fd_index);
|
case XNSELECT_WRITE:
|
return rtdm_event_select(&ts->send_evt, selector,
|
XNSELECT_WRITE, fd_index);
|
default:
|
return -EBADF;
|
}
|
|
return -EINVAL;
|
}
|
|
/***
|
* TCP-Initialisation
|
*/
|
static struct rtinet_protocol tcp_protocol = { .protocol = IPPROTO_TCP,
|
.dest_socket =
|
&rt_tcp_dest_socket,
|
.rcv_handler = &rt_tcp_rcv,
|
.err_handler = &rt_tcp_rcv_err,
|
.init_socket = &rt_tcp_socket };
|
|
static struct rtdm_driver tcp_driver = {
|
.profile_info = RTDM_PROFILE_INFO(tcp,
|
RTDM_CLASS_NETWORK,
|
RTDM_SUBCLASS_RTNET,
|
RTNET_RTDM_VER),
|
.device_flags = RTDM_PROTOCOL_DEVICE,
|
.device_count = 1,
|
.context_size = sizeof(struct tcp_socket),
|
|
.protocol_family = PF_INET,
|
.socket_type = SOCK_STREAM,
|
|
.ops = {
|
.socket = rt_inet_socket,
|
.close = rt_tcp_close,
|
.ioctl_rt = rt_tcp_ioctl,
|
.ioctl_nrt = rt_tcp_ioctl,
|
.read_rt = rt_tcp_read,
|
.write_rt = rt_tcp_write,
|
.recvmsg_rt = rt_tcp_recvmsg,
|
.sendmsg_rt = rt_tcp_sendmsg,
|
.select = rt_tcp_select,
|
},
|
};
|
|
static struct rtdm_device tcp_device = {
|
.driver = &tcp_driver,
|
.label = "tcp",
|
};
|
|
#ifdef CONFIG_XENO_OPT_VFILE
|
/***
|
* rt_tcp_proc_read
|
*/
|
static inline char *rt_tcp_string_of_state(u8 state)
|
{
|
switch (state) {
|
case TCP_ESTABLISHED:
|
return "ESTABLISHED";
|
case TCP_SYN_SENT:
|
return "SYN_SENT";
|
case TCP_SYN_RECV:
|
return "SYN_RECV";
|
case TCP_FIN_WAIT1:
|
return "FIN_WAIT1";
|
case TCP_FIN_WAIT2:
|
return "FIN_WAIT2";
|
case TCP_TIME_WAIT:
|
return "TIME_WAIT";
|
case TCP_CLOSE:
|
return "CLOSE";
|
case TCP_CLOSE_WAIT:
|
return "CLOSE_WAIT";
|
case TCP_LAST_ACK:
|
return "LASK_ACK";
|
case TCP_LISTEN:
|
return "LISTEN";
|
case TCP_CLOSING:
|
return "CLOSING";
|
default:
|
return "UNKNOWN";
|
}
|
}
|
|
static int rtnet_ipv4_tcp_show(struct xnvfile_regular_iterator *it, void *data)
|
{
|
rtdm_lockctx_t context;
|
struct tcp_socket *ts;
|
u32 saddr, daddr;
|
u16 sport = 0, dport = 0; /* set to 0 to silence compiler */
|
char sbuffer[24];
|
char dbuffer[24];
|
int state;
|
int index;
|
|
xnvfile_printf(it, "Hash Local Address "
|
"Foreign Address State\n");
|
|
for (index = 0; index < RT_TCP_SOCKETS; index++) {
|
rtdm_lock_get_irqsave(&tcp_socket_base_lock, context);
|
|
ts = port_registry[index];
|
state = ts ? ts->tcp_state : TCP_CLOSE;
|
|
if (ts && ts->tcp_state != TCP_CLOSE) {
|
saddr = ts->saddr;
|
sport = ts->sport;
|
daddr = ts->daddr;
|
dport = ts->dport;
|
}
|
|
rtdm_lock_put_irqrestore(&tcp_socket_base_lock, context);
|
|
if (state != TCP_CLOSE) {
|
snprintf(sbuffer, sizeof(sbuffer), "%u.%u.%u.%u:%u",
|
NIPQUAD(saddr), ntohs(sport));
|
snprintf(dbuffer, sizeof(dbuffer), "%u.%u.%u.%u:%u",
|
NIPQUAD(daddr), ntohs(dport));
|
|
xnvfile_printf(it, "%04X %-23s %-23s %s\n",
|
sport & port_hash_mask, sbuffer, dbuffer,
|
rt_tcp_string_of_state(state));
|
}
|
}
|
|
return 0;
|
}
|
|
static struct xnvfile_regular_ops rtnet_ipv4_tcp_vfile_ops = {
|
.show = rtnet_ipv4_tcp_show,
|
};
|
|
static struct xnvfile_regular rtnet_ipv4_tcp_vfile = {
|
.ops = &rtnet_ipv4_tcp_vfile_ops,
|
};
|
|
/***
|
* rt_tcp_proc_register
|
*/
|
static int __init rt_tcp_proc_register(void)
|
{
|
return xnvfile_init_regular("tcp", &rtnet_ipv4_tcp_vfile,
|
&ipv4_proc_root);
|
}
|
|
/***
|
* rt_tcp_proc_unregister
|
*/
|
|
static void rt_tcp_proc_unregister(void)
|
{
|
xnvfile_destroy_regular(&rtnet_ipv4_tcp_vfile);
|
}
|
#endif /* CONFIG_XENO_OPT_VFILE */
|
|
/***
|
* rt_tcp_init
|
*/
|
int __init rt_tcp_init(void)
|
{
|
unsigned int skbs;
|
int i;
|
int ret;
|
|
if ((tcp_auto_port_start < 0) ||
|
(tcp_auto_port_start >= 0x10000 - RT_TCP_SOCKETS))
|
tcp_auto_port_start = 1024;
|
tcp_auto_port_start =
|
htons(tcp_auto_port_start & (tcp_auto_port_mask & 0xFFFF));
|
tcp_auto_port_mask = htons(tcp_auto_port_mask | 0xFFFF0000);
|
|
for (i = 0; i < ARRAY_SIZE(port_hash); i++)
|
INIT_HLIST_HEAD(&port_hash[i]);
|
|
/* Perform essential initialization of the RST|ACK socket */
|
skbs = rt_bare_socket_init(rst_fd, IPPROTO_TCP, RT_TCP_RST_PRIO,
|
RT_TCP_RST_POOL_SIZE);
|
if (skbs < RT_TCP_RST_POOL_SIZE)
|
printk("rttcp: allocated only %d RST|ACK rtskbs\n", skbs);
|
rst_socket.sock.prot.inet.tos = 0;
|
rst_fd->refs = 1;
|
rtdm_lock_init(&rst_socket.socket_lock);
|
|
/*
|
* 100 ms forwarding timer with 8.38 ms slots
|
*/
|
ret = timerwheel_init(100000000ull, 23);
|
if (ret < 0) {
|
rtdm_printk("rttcp: cann't initialize timerwheel task: %d\n",
|
-ret);
|
goto out_1;
|
}
|
|
#ifdef CONFIG_XENO_OPT_VFILE
|
if ((ret = rt_tcp_proc_register()) < 0) {
|
rtdm_printk("rttcp: cann't initialize proc entry: %d\n", -ret);
|
goto out_2;
|
}
|
#endif /* CONFIG_XENO_OPT_VFILE */
|
|
rt_inet_add_protocol(&tcp_protocol);
|
|
ret = rtdm_dev_register(&tcp_device);
|
if (ret < 0) {
|
rtdm_printk("rttcp: cann't register RT TCP: %d\n", -ret);
|
goto out_3;
|
}
|
|
return ret;
|
|
out_3:
|
rt_inet_del_protocol(&tcp_protocol);
|
#ifdef CONFIG_XENO_OPT_VFILE
|
rt_tcp_proc_unregister();
|
#endif /* CONFIG_XENO_OPT_VFILE */
|
|
out_2:
|
timerwheel_cleanup();
|
|
out_1:
|
rt_bare_socket_cleanup(&rst_socket.sock);
|
|
return ret;
|
}
|
|
/***
|
* rt_tcp_release
|
*/
|
void __exit rt_tcp_release(void)
|
{
|
rt_inet_del_protocol(&tcp_protocol);
|
|
#ifdef CONFIG_XENO_OPT_VFILE
|
rt_tcp_proc_unregister();
|
#endif /* CONFIG_XENO_OPT_VFILE */
|
|
timerwheel_cleanup();
|
|
rt_bare_socket_cleanup(&rst_socket.sock);
|
|
rtdm_dev_unregister(&tcp_device);
|
}
|
|
module_init(rt_tcp_init);
|
module_exit(rt_tcp_release);
|