summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/netdev_features.h7
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--include/net/sctp/sctp.h4
-rw-r--r--include/net/sctp/structs.h5
-rw-r--r--net/core/ethtool.c1
-rw-r--r--net/core/skbuff.c3
-rw-r--r--net/sctp/Makefile3
-rw-r--r--net/sctp/input.c12
-rw-r--r--net/sctp/inqueue.c51
-rw-r--r--net/sctp/offload.c98
-rw-r--r--net/sctp/output.c363
-rw-r--r--net/sctp/protocol.c3
-rw-r--r--net/sctp/socket.c2
14 files changed, 429 insertions, 126 deletions
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index aa7b2400f98c..9c6c8ef2e9e7 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -53,8 +53,9 @@ enum {
* headers in software.
*/
NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */
+ NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */
/**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */
- NETIF_F_GSO_TUNNEL_REMCSUM_BIT,
+ NETIF_F_GSO_SCTP_BIT,
NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */
NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */
@@ -128,6 +129,7 @@ enum {
#define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID)
#define NETIF_F_GSO_PARTIAL __NETIF_F(GSO_PARTIAL)
#define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
+#define NETIF_F_GSO_SCTP __NETIF_F(GSO_SCTP)
#define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
#define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX)
#define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX)
@@ -166,7 +168,8 @@ enum {
NETIF_F_FSO)
/* List of features with software fallbacks. */
-#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO)
+#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO | \
+ NETIF_F_GSO_SCTP)
/*
* If one device supports one of these features, then enable them
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f45929ce8157..fa6df2699532 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4012,6 +4012,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
+ BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT));
return (features & feature) == feature;
}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index aa3f9d7e8d5c..dc0fca747c5e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -487,6 +487,8 @@ enum {
SKB_GSO_PARTIAL = 1 << 13,
SKB_GSO_TUNNEL_REMCSUM = 1 << 14,
+
+ SKB_GSO_SCTP = 1 << 15,
};
#if BITS_PER_LONG > 32
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index b392ac8382f2..632e205ca54b 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -186,6 +186,10 @@ void sctp_assocs_proc_exit(struct net *net);
int sctp_remaddr_proc_init(struct net *net);
void sctp_remaddr_proc_exit(struct net *net);
+/*
+ * sctp/offload.c
+ */
+int sctp_offload_init(void);
/*
* Module global variables
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 16b013a6191c..83c5ec58b93a 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -566,6 +566,9 @@ struct sctp_chunk {
/* This points to the sk_buff containing the actual data. */
struct sk_buff *skb;
+ /* In case of GSO packets, this will store the head one */
+ struct sk_buff *head_skb;
+
/* These are the SCTP headers by reverse order in a packet.
* Note that some of these may happen more than once. In that
* case, we point at the "current" one, whatever that means
@@ -696,6 +699,8 @@ struct sctp_packet {
size_t overhead;
/* This is the total size of all chunks INCLUDING padding. */
size_t size;
+ /* This is the maximum size this packet may have */
+ size_t max_size;
/* The packet is destined for this transport address.
* The function we finally use to pass down to the next lower
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f4034817d255..977489820eb9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -89,6 +89,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
[NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
+ [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5ca562b56ec3..b6e0f95bef36 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -49,6 +49,7 @@
#include <linux/slab.h>
#include <linux/tcp.h>
#include <linux/udp.h>
+#include <linux/sctp.h>
#include <linux/netdevice.h>
#ifdef CONFIG_NET_CLS_ACT
#include <net/pkt_sched.h>
@@ -4383,6 +4384,8 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
thlen += inner_tcp_hdrlen(skb);
} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
thlen = tcp_hdrlen(skb);
+ } else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) {
+ thlen = sizeof(struct sctphdr);
}
/* UFO sets gso_size to the size of the fragmentation
* payload, i.e. the size of the L4 (UDP) header is already
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 0fca5824ad0e..6c4f7496cec6 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -11,7 +11,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
transport.o chunk.o sm_make_chunk.o ulpevent.o \
inqueue.o outqueue.o ulpqueue.o \
tsnmap.o bind_addr.o socket.o primitive.o \
- output.o input.o debug.o ssnmap.o auth.o
+ output.o input.o debug.o ssnmap.o auth.o \
+ offload.o
sctp_probe-y := probe.o
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 5cff2546c3dd..6f8e676d285e 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -139,7 +139,9 @@ int sctp_rcv(struct sk_buff *skb)
skb->csum_valid = 0; /* Previous value not applicable */
if (skb_csum_unnecessary(skb))
__skb_decr_checksum_unnecessary(skb);
- else if (!sctp_checksum_disable && sctp_rcv_checksum(net, skb) < 0)
+ else if (!sctp_checksum_disable &&
+ !(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
+ sctp_rcv_checksum(net, skb) < 0)
goto discard_it;
skb->csum_valid = 1;
@@ -1175,6 +1177,14 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
{
sctp_chunkhdr_t *ch;
+ /* We do not allow GSO frames here as we need to linearize and
+ * then cannot guarantee frame boundaries. This shouldn't be an
+ * issue as packets hitting this are mostly INIT or INIT-ACK and
+ * those cannot be on GSO-style anyway.
+ */
+ if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
+ return NULL;
+
if (skb_linearize(skb))
return NULL;
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 5ba08ceda3ab..edabbbdfca54 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -138,6 +138,17 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
if (chunk->singleton ||
chunk->end_of_packet ||
chunk->pdiscard) {
+ if (chunk->head_skb == chunk->skb) {
+ chunk->skb = skb_shinfo(chunk->skb)->frag_list;
+ goto new_skb;
+ }
+ if (chunk->skb->next) {
+ chunk->skb = chunk->skb->next;
+ goto new_skb;
+ }
+
+ if (chunk->head_skb)
+ chunk->skb = chunk->head_skb;
sctp_chunk_free(chunk);
chunk = queue->in_progress = NULL;
} else {
@@ -155,15 +166,15 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
next_chunk:
/* Is the queue empty? */
- if (list_empty(&queue->in_chunk_list))
+ entry = sctp_list_dequeue(&queue->in_chunk_list);
+ if (!entry)
return NULL;
- entry = queue->in_chunk_list.next;
chunk = list_entry(entry, struct sctp_chunk, list);
- list_del_init(entry);
/* Linearize if it's not GSO */
- if (skb_is_nonlinear(chunk->skb)) {
+ if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) != SKB_GSO_SCTP &&
+ skb_is_nonlinear(chunk->skb)) {
if (skb_linearize(chunk->skb)) {
__SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
sctp_chunk_free(chunk);
@@ -174,15 +185,39 @@ next_chunk:
chunk->sctp_hdr = sctp_hdr(chunk->skb);
}
+ if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) {
+ /* GSO-marked skbs but without frags, handle
+ * them normally
+ */
+ if (skb_shinfo(chunk->skb)->frag_list)
+ chunk->head_skb = chunk->skb;
+
+ /* skbs with "cover letter" */
+ if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len)
+ chunk->skb = skb_shinfo(chunk->skb)->frag_list;
+
+ if (WARN_ON(!chunk->skb)) {
+ __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
+ sctp_chunk_free(chunk);
+ goto next_chunk;
+ }
+ }
+
+ if (chunk->asoc)
+ sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb);
+
queue->in_progress = chunk;
+new_skb:
/* This is the first chunk in the packet. */
- chunk->singleton = 1;
ch = (sctp_chunkhdr_t *) chunk->skb->data;
+ chunk->singleton = 1;
chunk->data_accepted = 0;
-
- if (chunk->asoc)
- sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb);
+ chunk->pdiscard = 0;
+ chunk->auth = 0;
+ chunk->has_asconf = 0;
+ chunk->end_of_packet = 0;
+ chunk->ecn_ce_done = 0;
}
chunk->chunk_hdr = ch;
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
new file mode 100644
index 000000000000..a37887b373a7
--- /dev/null
+++ b/net/sctp/offload.c
@@ -0,0 +1,98 @@
+/*
+ * sctp_offload - GRO/GSO Offloading for SCTP
+ *
+ * Copyright (C) 2015, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/socket.h>
+#include <linux/sctp.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/kfifo.h>
+#include <linux/time.h>
+#include <net/net_namespace.h>
+
+#include <linux/skbuff.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/checksum.h>
+#include <net/protocol.h>
+
+static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
+{
+ skb->ip_summed = CHECKSUM_NONE;
+ return sctp_compute_cksum(skb, skb_transport_offset(skb));
+}
+
+static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct sctphdr *sh;
+
+ sh = sctp_hdr(skb);
+ if (!pskb_may_pull(skb, sizeof(*sh)))
+ goto out;
+
+ __skb_pull(skb, sizeof(*sh));
+
+ if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
+ /* Packet is from an untrusted source, reset gso_segs. */
+ struct skb_shared_info *pinfo = skb_shinfo(skb);
+ struct sk_buff *frag_iter;
+
+ pinfo->gso_segs = 0;
+ if (skb->len != skb->data_len) {
+ /* Means we have chunks in here too */
+ pinfo->gso_segs++;
+ }
+
+ skb_walk_frags(skb, frag_iter)
+ pinfo->gso_segs++;
+
+ segs = NULL;
+ goto out;
+ }
+
+ segs = skb_segment(skb, features | NETIF_F_HW_CSUM);
+ if (IS_ERR(segs))
+ goto out;
+
+ /* All that is left is update SCTP CRC if necessary */
+ if (!(features & NETIF_F_SCTP_CRC)) {
+ for (skb = segs; skb; skb = skb->next) {
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ sh = sctp_hdr(skb);
+ sh->checksum = sctp_gso_make_checksum(skb);
+ }
+ }
+ }
+
+out:
+ return segs;
+}
+
+static const struct net_offload sctp_offload = {
+ .callbacks = {
+ .gso_segment = sctp_gso_segment,
+ },
+};
+
+int __init sctp_offload_init(void)
+{
+ return inet_add_offload(&sctp_offload, IPPROTO_SCTP);
+}
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 9844fe573029..60499a69179d 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -84,18 +84,42 @@ static void sctp_packet_reset(struct sctp_packet *packet)
struct sctp_packet *sctp_packet_config(struct sctp_packet *packet,
__u32 vtag, int ecn_capable)
{
- struct sctp_chunk *chunk = NULL;
+ struct sctp_transport *tp = packet->transport;
+ struct sctp_association *asoc = tp->asoc;
pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
packet->vtag = vtag;
+ if (asoc && tp->dst) {
+ struct sock *sk = asoc->base.sk;
+
+ rcu_read_lock();
+ if (__sk_dst_get(sk) != tp->dst) {
+ dst_hold(tp->dst);
+ sk_setup_caps(sk, tp->dst);
+ }
+
+ if (sk_can_gso(sk)) {
+ struct net_device *dev = tp->dst->dev;
+
+ packet->max_size = dev->gso_max_size;
+ } else {
+ packet->max_size = asoc->pathmtu;
+ }
+ rcu_read_unlock();
+
+ } else {
+ packet->max_size = tp->pathmtu;
+ }
+
if (ecn_capable && sctp_packet_empty(packet)) {
- chunk = sctp_get_ecne_prepend(packet->transport->asoc);
+ struct sctp_chunk *chunk;
/* If there a is a prepend chunk stick it on the list before
* any other chunks get appended.
*/
+ chunk = sctp_get_ecne_prepend(asoc);
if (chunk)
sctp_packet_append_chunk(packet, chunk);
}
@@ -381,12 +405,15 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
struct sctp_transport *tp = packet->transport;
struct sctp_association *asoc = tp->asoc;
struct sctphdr *sh;
- struct sk_buff *nskb;
+ struct sk_buff *nskb = NULL, *head = NULL;
struct sctp_chunk *chunk, *tmp;
struct sock *sk;
int err = 0;
int padding; /* How much padding do we need? */
+ int pkt_size;
__u8 has_data = 0;
+ int gso = 0;
+ int pktcount = 0;
struct dst_entry *dst;
unsigned char *auth = NULL; /* pointer to auth in skb data */
@@ -400,18 +427,37 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
sk = chunk->skb->sk;
- /* Allocate the new skb. */
- nskb = alloc_skb(packet->size + MAX_HEADER, gfp);
- if (!nskb)
+ /* Allocate the head skb, or main one if not in GSO */
+ if (packet->size > tp->pathmtu && !packet->ipfragok) {
+ if (sk_can_gso(sk)) {
+ gso = 1;
+ pkt_size = packet->overhead;
+ } else {
+ /* If this happens, we trash this packet and try
+ * to build a new one, hopefully correct this
+ * time. Application may notice this error.
+ */
+ pr_err_once("Trying to GSO but underlying device doesn't support it.");
+ goto nomem;
+ }
+ } else {
+ pkt_size = packet->size;
+ }
+ head = alloc_skb(pkt_size + MAX_HEADER, gfp);
+ if (!head)
goto nomem;
+ if (gso) {
+ NAPI_GRO_CB(head)->last = head;
+ skb_shinfo(head)->gso_type = sk->sk_gso_type;
+ }
/* Make sure the outbound skb has enough header room reserved. */
- skb_reserve(nskb, packet->overhead + MAX_HEADER);
+ skb_reserve(head, packet->overhead + MAX_HEADER);
/* Set the owning socket so that we know where to get the
* destination IP address.
*/
- sctp_packet_set_owner_w(nskb, sk);
+ sctp_packet_set_owner_w(head, sk);
if (!sctp_transport_dst_check(tp)) {
sctp_transport_route(tp, NULL, sctp_sk(sk));
@@ -422,11 +468,11 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
dst = dst_clone(tp->dst);
if (!dst)
goto no_route;
- skb_dst_set(nskb, dst);
+ skb_dst_set(head, dst);
/* Build the SCTP header. */
- sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr));
- skb_reset_transport_header(nskb);
+ sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
+ skb_reset_transport_header(head);
sh->source = htons(packet->source_port);
sh->dest = htons(packet->destination_port);
@@ -441,90 +487,133 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
sh->vtag = htonl(packet->vtag);
sh->checksum = 0;
- /**
- * 6.10 Bundling
- *
- * An endpoint bundles chunks by simply including multiple
- * chunks in one outbound SCTP packet. ...
- */
-
- /**
- * 3.2 Chunk Field Descriptions
- *
- * The total length of a chunk (including Type, Length and
- * Value fields) MUST be a multiple of 4 bytes. If the length
- * of the chunk is not a multiple of 4 bytes, the sender MUST
- * pad the chunk with all zero bytes and this padding is not
- * included in the chunk length field. The sender should
- * never pad with more than 3 bytes.
- *
- * [This whole comment explains WORD_ROUND() below.]
- */
-
pr_debug("***sctp_transmit_packet***\n");
- list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
- list_del_init(&chunk->list);
- if (sctp_chunk_is_data(chunk)) {
- /* 6.3.1 C4) When data is in flight and when allowed
- * by rule C5, a new RTT measurement MUST be made each
- * round trip. Furthermore, new RTT measurements
- * SHOULD be made no more than once per round-trip
- * for a given destination transport address.
- */
+ do {
+ /* Set up convenience variables... */
+ chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
+ pktcount++;
- if (!chunk->resent && !tp->rto_pending) {
- chunk->rtt_in_progress = 1;
- tp->rto_pending = 1;
+ /* Calculate packet size, so it fits in PMTU. Leave
+ * other chunks for the next packets.
+ */
+ if (gso) {
+ pkt_size = packet->overhead;
+ list_for_each_entry(chunk, &packet->chunk_list, list) {
+ int padded = WORD_ROUND(chunk->skb->len);
+
+ if (pkt_size + padded > tp->pathmtu)
+ break;
+ pkt_size += padded;
}
- has_data = 1;
- }
+ /* Allocate a new skb. */
+ nskb = alloc_skb(pkt_size + MAX_HEADER, gfp);
+ if (!nskb)
+ goto nomem;
- padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
- if (padding)
- memset(skb_put(chunk->skb, padding), 0, padding);
+ /* Make sure the outbound skb has enough header
+ * room reserved.
+ */
+ skb_reserve(nskb, packet->overhead + MAX_HEADER);
+ } else {
+ nskb = head;
+ }
- /* if this is the auth chunk that we are adding,
- * store pointer where it will be added and put
- * the auth into the packet.
+ /**
+ * 3.2 Chunk Field Descriptions
+ *
+ * The total length of a chunk (including Type, Length and
+ * Value fields) MUST be a multiple of 4 bytes. If the length
+ * of the chunk is not a multiple of 4 bytes, the sender MUST
+ * pad the chunk with all zero bytes and this padding is not
+ * included in the chunk length field. The sender should
+ * never pad with more than 3 bytes.
+ *
+ * [This whole comment explains WORD_ROUND() below.]
*/
- if (chunk == packet->auth)
- auth = skb_tail_pointer(nskb);
- memcpy(skb_put(nskb, chunk->skb->len),
+ pkt_size -= packet->overhead;
+ list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
+ list_del_init(&chunk->list);
+ if (sctp_chunk_is_data(chunk)) {
+ /* 6.3.1 C4) When data is in flight and when allowed
+ * by rule C5, a new RTT measurement MUST be made each
+ * round trip. Furthermore, new RTT measurements
+ * SHOULD be made no more than once per round-trip
+ * for a given destination transport address.
+ */
+
+ if (!chunk->resent && !tp->rto_pending) {
+ chunk->rtt_in_progress = 1;
+ tp->rto_pending = 1;
+ }
+
+ has_data = 1;
+ }
+
+ padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
+ if (padding)
+ memset(skb_put(chunk->skb, padding), 0, padding);
+
+ /* if this is the auth chunk that we are adding,
+ * store pointer where it will be added and put
+ * the auth into the packet.
+ */
+ if (chunk == packet->auth)
+ auth = skb_tail_pointer(nskb);
+
+ memcpy(skb_put(nskb, chunk->skb->len),
chunk->skb->data, chunk->skb->len);
- pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, "
- "rtt_in_progress:%d\n", chunk,
- sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
- chunk->has_tsn ? "TSN" : "No TSN",
- chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
- ntohs(chunk->chunk_hdr->length), chunk->skb->len,
- chunk->rtt_in_progress);
-
- /*
- * If this is a control chunk, this is our last
- * reference. Free data chunks after they've been
- * acknowledged or have failed.
- */
- if (!sctp_chunk_is_data(chunk))
- sctp_chunk_free(chunk);
- }
+ pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n",
+ chunk,
+ sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
+ chunk->has_tsn ? "TSN" : "No TSN",
+ chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
+ ntohs(chunk->chunk_hdr->length), chunk->skb->len,
+ chunk->rtt_in_progress);
+
+ /* If this is a control chunk, this is our last
+ * reference. Free data chunks after they've been
+ * acknowledged or have failed.
+ * Re-queue auth chunks if needed.
+ */
+ pkt_size -= WORD_ROUND(chunk->skb->len);
- /* SCTP-AUTH, Section 6.2
- * The sender MUST calculate the MAC as described in RFC2104 [2]
- * using the hash function H as described by the MAC Identifier and
- * the shared association key K based on the endpoint pair shared key
- * described by the shared key identifier. The 'data' used for the
- * computation of the AUTH-chunk is given by the AUTH chunk with its
- * HMAC field set to zero (as shown in Figure 6) followed by all
- * chunks that are placed after the AUTH chunk in the SCTP packet.
- */
- if (auth)
- sctp_auth_calculate_hmac(asoc, nskb,
- (struct sctp_auth_chunk *)auth,
- gfp);
+ if (chunk == packet->auth && !list_empty(&packet->chunk_list))
+ list_add(&chunk->list, &packet->chunk_list);
+ else if (!sctp_chunk_is_data(chunk))
+ sctp_chunk_free(chunk);
+
+ if (!pkt_size)
+ break;
+ }
+
+ /* SCTP-AUTH, Section 6.2
+ * The sender MUST calculate the MAC as described in RFC2104 [2]
+ * using the hash function H as described by the MAC Identifier and
+ * the shared association key K based on the endpoint pair shared key
+ * described by the shared key identifier. The 'data' used for the
+ * computation of the AUTH-chunk is given by the AUTH chunk with its
+ * HMAC field set to zero (as shown in Figure 6) followed by all
+ * chunks that are placed after the AUTH chunk in the SCTP packet.
+ */
+ if (auth)
+ sctp_auth_calculate_hmac(asoc, nskb,
+ (struct sctp_auth_chunk *)auth,
+ gfp);
+
+ if (!gso)
+ break;
+
+ if (skb_gro_receive(&head, nskb))
+ goto nomem;
+ nskb = NULL;
+ if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
+ sk->sk_gso_max_segs))
+ goto nomem;
+ } while (!list_empty(&packet->chunk_list));
/* 2) Calculate the Adler-32 checksum of the whole packet,
* including the SCTP common header and all the
@@ -532,16 +621,18 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
*
* Note: Adler-32 is no longer applicable, as has been replaced
* by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
+ *
+ * If it's a GSO packet, it's postponed to sctp_skb_segment.
*/
- if (!sctp_checksum_disable) {
- if (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
- (dst_xfrm(dst) != NULL) || packet->ipfragok) {
- sh->checksum = sctp_compute_cksum(nskb, 0);
+ if (!sctp_checksum_disable || gso) {
+ if (!gso && (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
+ dst_xfrm(dst) || packet->ipfragok)) {
+ sh->checksum = sctp_compute_cksum(head, 0);
} else {
/* no need to seed pseudo checksum for SCTP */
- nskb->ip_summed = CHECKSUM_PARTIAL;
- nskb->csum_start = skb_transport_header(nskb) - nskb->head;
- nskb->csum_offset = offsetof(struct sctphdr, checksum);
+ head->ip_summed = CHECKSUM_PARTIAL;
+ head->csum_start = skb_transport_header(head) - head->head;
+ head->csum_offset = offsetof(struct sctphdr, checksum);
}
}
@@ -557,7 +648,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
* Note: The works for IPv6 layer checks this bit too later
* in transmission. See IP6_ECN_flow_xmit().
*/
- tp->af_specific->ecn_capable(nskb->sk);
+ tp->af_specific->ecn_capable(sk);
/* Set up the IP options. */
/* BUG: not implemented
@@ -566,7 +657,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
/* Dump that on IP! */
if (asoc) {
- asoc->stats.opackets++;
+ asoc->stats.opackets += pktcount;
if (asoc->peer.last_sent_to != tp)
/* Considering the multiple CPU scenario, this is a
* "correcter" place for last_sent_to. --xguo
@@ -589,16 +680,36 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
}
}
- pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len);
+ pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len);
+
+ if (gso) {
+ /* Cleanup our debris for IP stacks */
+ memset(head->cb, 0, max(sizeof(struct inet_skb_parm),
+ sizeof(struct inet6_skb_parm)));
- nskb->ignore_df = packet->ipfragok;
- tp->af_specific->sctp_xmit(nskb, tp);
+ skb_shinfo(head)->gso_segs = pktcount;
+ skb_shinfo(head)->gso_size = GSO_BY_FRAGS;
+
+ /* We have to refresh this in case we are xmiting to
+ * more than one transport at a time
+ */
+ rcu_read_lock();
+ if (__sk_dst_get(sk) != tp->dst) {
+ dst_hold(tp->dst);
+ sk_setup_caps(sk, tp->dst);
+ }
+ rcu_read_unlock();
+ }
+ head->ignore_df = packet->ipfragok;
+ tp->af_specific->sctp_xmit(head, tp);
out:
sctp_packet_reset(packet);
return err;
no_route:
- kfree_skb(nskb);
+ kfree_skb(head);
+ if (nskb != head)
+ kfree_skb(nskb);
if (asoc)
IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
@@ -751,39 +862,63 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
struct sctp_chunk *chunk,
u16 chunk_len)
{
- size_t psize;
- size_t pmtu;
- int too_big;
+ size_t psize, pmtu;
sctp_xmit_t retval = SCTP_XMIT_OK;
psize = packet->size;
- pmtu = ((packet->transport->asoc) ?
- (packet->transport->asoc->pathmtu) :
- (packet->transport->pathmtu));
-
- too_big = (psize + chunk_len > pmtu);
+ if (packet->transport->asoc)
+ pmtu = packet->transport->asoc->pathmtu;
+ else
+ pmtu = packet->transport->pathmtu;
/* Decide if we need to fragment or resubmit later. */
- if (too_big) {
- /* It's OK to fragmet at IP level if any one of the following
+ if (psize + chunk_len > pmtu) {
+ /* It's OK to fragment at IP level if any one of the following
* is true:
- * 1. The packet is empty (meaning this chunk is greater
- * the MTU)
- * 2. The chunk we are adding is a control chunk
- * 3. The packet doesn't have any data in it yet and data
- * requires authentication.
+ * 1. The packet is empty (meaning this chunk is greater
+ * the MTU)
+ * 2. The packet doesn't have any data in it yet and data
+ * requires authentication.
*/
- if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) ||
+ if (sctp_packet_empty(packet) ||
(!packet->has_data && chunk->auth)) {
/* We no longer do re-fragmentation.
* Just fragment at the IP layer, if we
* actually hit this condition
*/
packet->ipfragok = 1;
- } else {
- retval = SCTP_XMIT_PMTU_FULL;
+ goto out;
}
+
+ /* It is also okay to fragment if the chunk we are
+ * adding is a control chunk, but only if current packet
+ * is not a GSO one otherwise it causes fragmentation of
+ * a large frame. So in this case we allow the
+ * fragmentation by forcing it to be in a new packet.
+ */
+ if (!sctp_chunk_is_data(chunk) && packet->has_data)
+ retval = SCTP_XMIT_PMTU_FULL;
+
+ if (psize + chunk_len > packet->max_size)
+ /* Hit GSO/PMTU limit, gotta flush */
+ retval = SCTP_XMIT_PMTU_FULL;
+
+ if (!packet->transport->burst_limited &&
+ psize + chunk_len > (packet->transport->cwnd >> 1))
+ /* Do not allow a single GSO packet to use more
+ * than half of cwnd.
+ */
+ retval = SCTP_XMIT_PMTU_FULL;
+
+ if (packet->transport->burst_limited &&
+ psize + chunk_len > (packet->transport->burst_limited >> 1))
+ /* Do not allow a single GSO packet to use more
+ * than half of original cwnd.
+ */
+ retval = SCTP_XMIT_PMTU_FULL;
+ /* Otherwise it will fit in the GSO packet */
}
+out:
return retval;
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index d3d50daa248b..40022ee885d7 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1516,6 +1516,9 @@ static __init int sctp_init(void)
if (status)
goto err_v6_add_protocol;
+ if (sctp_offload_init() < 0)
+ pr_crit("%s: Cannot add SCTP protocol offload\n", __func__);
+
out:
return status;
err_v6_add_protocol:
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 67154b848aa9..712fb2339baa 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4003,6 +4003,8 @@ static int sctp_init_sock(struct sock *sk)
return -ESOCKTNOSUPPORT;
}
+ sk->sk_gso_type = SKB_GSO_SCTP;
+
/* Initialize default send parameters. These parameters can be
* modified with the SCTP_DEFAULT_SEND_PARAM socket option.
*/