summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorRobert-Ionut Alexa <robert-ionut.alexa@nxp.com>2022-07-06 20:31:50 +0300
committerIoana Ciornei <ioana.ciornei@nxp.com>2022-08-03 19:20:12 +0300
commit65d47bbe663e751c042ad7c2516fa23f49c0efa4 (patch)
tree4078c9554dd21576ccb3195b72a772748f308a4d /drivers
parent30a74a3ff12becbd8cfeba934e1e397cea0e929e (diff)
net: dpaa2: AF_XDP TX zero copy support
Add support for packet processing on the Tx path using AF_XDP zero copy mode. Signed-off-by: Robert-Ionut Alexa <robert-ionut.alexa@nxp.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-trace.h8
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c56
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h16
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-xsk.c121
4 files changed, 187 insertions, 14 deletions
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-trace.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-trace.h
index 57792e284e40..9b43fadb9b11 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-trace.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-trace.h
@@ -73,6 +73,14 @@ DEFINE_EVENT(dpaa2_eth_fd, dpaa2_tx_fd,
TP_ARGS(netdev, fd)
);
+/* Tx (egress) XSK fd */
+DEFINE_EVENT(dpaa2_eth_fd, dpaa2_tx_xsk_fd,
+ TP_PROTO(struct net_device *netdev,
+ const struct dpaa2_fd *fd),
+
+ TP_ARGS(netdev, fd)
+);
+
/* Rx fd */
DEFINE_EVENT(dpaa2_eth_fd, dpaa2_rx_fd,
TP_PROTO(struct net_device *netdev,
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 453c4525f701..2858d449289b 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1134,9 +1134,10 @@ static int dpaa2_eth_build_single_fd(struct dpaa2_eth_priv *priv,
* This can be called either from dpaa2_eth_tx_conf() or on the error path of
* dpaa2_eth_tx().
*/
-static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv,
- struct dpaa2_eth_fq *fq,
- const struct dpaa2_fd *fd, bool in_napi)
+void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv,
+ struct dpaa2_eth_channel *ch,
+ struct dpaa2_eth_fq *fq,
+ const struct dpaa2_fd *fd, bool in_napi)
{
struct device *dev = priv->net_dev->dev.parent;
dma_addr_t fd_addr, sg_addr;
@@ -1164,7 +1165,8 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv,
skb_tail_pointer(skb) - buffer_start,
DMA_BIDIRECTIONAL);
} else {
- WARN_ONCE(swa->type != DPAA2_ETH_SWA_XDP, "Wrong SWA type");
+ WARN_ONCE(swa->type != DPAA2_ETH_SWA_XDP && swa->type != DPAA2_ETH_SWA_XSK,
+ "Wrong SWA type");
dma_unmap_single(dev, fd_addr, swa->xdp.dma_size,
DMA_BIDIRECTIONAL);
}
@@ -1220,6 +1222,11 @@ static void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv,
return;
}
+ if (swa->type == DPAA2_ETH_SWA_XSK) {
+ ch->xsk_frames_done++;
+ return;
+ }
+
if (swa->type != DPAA2_ETH_SWA_XDP && in_napi) {
fq->dq_frames++;
fq->dq_bytes += fd_len;
@@ -1400,7 +1407,7 @@ err_alloc_tso_hdr:
err_sgt_get:
/* Free all the other FDs that were already fully created */
for (i = 0; i < index; i++)
- dpaa2_eth_free_tx_fd(priv, NULL, &fd_start[i], false);
+ dpaa2_eth_free_tx_fd(priv, NULL, NULL, &fd_start[i], false);
return err;
}
@@ -1498,7 +1505,7 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb,
if (dpaa2_eth_ceetm_is_enabled(priv)) {
err = dpaa2_ceetm_classify(skb, net_dev->qdisc, &ch_id, &prio);
if (err) {
- dpaa2_eth_free_tx_fd(priv, fq, fd, false);
+ dpaa2_eth_free_tx_fd(priv, NULL, fq, fd, false);
percpu_stats->tx_dropped++;
return NETDEV_TX_OK;
}
@@ -1526,7 +1533,7 @@ static netdev_tx_t __dpaa2_eth_tx(struct sk_buff *skb,
if (unlikely(err < 0)) {
percpu_stats->tx_errors++;
/* Clean up everything, including freeing the skb */
- dpaa2_eth_free_tx_fd(priv, fq, fd, false);
+ dpaa2_eth_free_tx_fd(priv, NULL, fq, fd, false);
netdev_tx_completed_queue(nq, 1, fd_len);
} else {
percpu_stats->tx_packets += total_enqueued;
@@ -1619,7 +1626,7 @@ static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv,
/* Check frame errors in the FD field */
fd_errors = dpaa2_fd_get_ctrl(fd) & DPAA2_FD_TX_ERR_MASK;
- dpaa2_eth_free_tx_fd(priv, fq, fd, true);
+ dpaa2_eth_free_tx_fd(priv, ch, fq, fd, true);
if (likely(!fd_errors))
return;
@@ -1960,6 +1967,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
struct dpaa2_eth_fq *fq, *txc_fq = NULL;
struct netdev_queue *nq;
int store_cleaned, work_done;
+ bool work_done_zc = false;
struct list_head rx_list;
int retries = 0;
u16 flowid;
@@ -1969,6 +1977,11 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
ch->xdp.res = 0;
priv = ch->priv;
+ /* Tx ZC */
+ if (ch->xsk_zc)
+ work_done_zc = dpaa2_xsk_tx(priv, ch);
+
+ /* Rx or Tx conf slow path */
INIT_LIST_HEAD(&rx_list);
ch->rx_list = &rx_list;
@@ -1981,8 +1994,12 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
dpaa2_eth_refill_pool(priv, ch);
store_cleaned = dpaa2_eth_consume_frames(ch, &fq);
- if (store_cleaned <= 0)
- break;
+ if (store_cleaned <= 0) {
+ if (!work_done_zc)
+ break;
+ if (work_done_zc)
+ goto out;
+ }
if (fq->type == DPAA2_RX_FQ) {
rx_cleaned += store_cleaned;
flowid = fq->flowid;
@@ -1996,7 +2013,8 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
* or we reached the Tx confirmations threshold, we're done.
*/
if (rx_cleaned >= budget ||
- txconf_cleaned >= DPAA2_ETH_TXCONF_PER_NAPI) {
+ txconf_cleaned >= DPAA2_ETH_TXCONF_PER_NAPI ||
+ work_done_zc) {
work_done = budget;
goto out;
}
@@ -2024,6 +2042,11 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
out:
netif_receive_skb_list(ch->rx_list);
+ if (ch->xsk_zc && ch->xsk_pool && ch->xsk_frames_done) {
+ xsk_tx_completed(ch->xsk_pool, ch->xsk_frames_done);
+ ch->xsk_frames_done = 0;
+ }
+
if (txc_fq && txc_fq->dq_frames) {
nq = netdev_get_tx_queue(priv->net_dev, txc_fq->flowid);
netdev_tx_completed_queue(nq, txc_fq->dq_frames,
@@ -2037,7 +2060,10 @@ out:
else if (rx_cleaned && ch->xdp.res & XDP_TX)
dpaa2_eth_xdp_tx_flush(priv, ch, &priv->fq[flowid]);
- return work_done;
+ if (!ch->xsk_zc)
+ return work_done;
+ else
+ return work_done_zc ? budget : work_done;
}
static void dpaa2_eth_enable_ch_napi(struct dpaa2_eth_priv *priv)
@@ -3034,7 +3060,11 @@ static void dpaa2_eth_cdan_cb(struct dpaa2_io_notification_ctx *ctx)
/* Update NAPI statistics */
ch->stats.cdan++;
- napi_schedule(&ch->napi);
+ /* NAPI can also be scheduled from the AF_XDP Tx path. Mark a missed
+ * so that it can be rescheduled again.
+ */
+ if (!napi_if_scheduled_mark_missed(&ch->napi))
+ napi_schedule(&ch->napi);
}
/* Allocate and configure a DPCON object */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index baf3366e003c..e40f66576eda 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -53,6 +53,12 @@
*/
#define DPAA2_ETH_TXCONF_PER_NAPI 256
+/* Maximum number of Tx frames to be processed in a single NAPI
+ * call when AF_XDP is running. Bind it to DPAA2_ETH_TXCONF_PER_NAPI
+ * to maximize the throughput.
+ */
+#define DPAA2_ETH_TX_ZC_PER_NAPI DPAA2_ETH_TXCONF_PER_NAPI
+
/* Buffer qouta per channel. We want to keep in check number of ingress frames
* in flight: for small sized frames, congestion group taildrop may kick in
* first; for large sizes, Rx FQ taildrop threshold will ensure only a
@@ -494,6 +500,7 @@ struct dpaa2_eth_channel {
int recycled_bufs_cnt;
bool xsk_zc;
+ int xsk_frames_done;
struct xsk_buff_pool *xsk_pool;
struct dpaa2_eth_buf_pool *bp;
};
@@ -530,7 +537,7 @@ struct dpaa2_eth_trap_data {
#define DPAA2_ETH_DEFAULT_COPYBREAK 512
-#define DPAA2_ETH_ENQUEUE_MAX_FDS 200
+#define DPAA2_ETH_ENQUEUE_MAX_FDS 256
struct dpaa2_eth_fds {
struct dpaa2_fd array[DPAA2_ETH_ENQUEUE_MAX_FDS];
};
@@ -831,4 +838,11 @@ int dpaa2_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
int dpaa2_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool,
u16 qid);
+void dpaa2_eth_free_tx_fd(struct dpaa2_eth_priv *priv,
+ struct dpaa2_eth_channel *ch,
+ struct dpaa2_eth_fq *fq,
+ const struct dpaa2_fd *fd, bool in_napi);
+bool dpaa2_xsk_tx(struct dpaa2_eth_priv *priv,
+ struct dpaa2_eth_channel *ch);
+
#endif /* __DPAA2_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-xsk.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-xsk.c
index 9d45ccf4f873..912cf041c552 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-xsk.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-xsk.c
@@ -172,6 +172,7 @@ static int dpaa2_xsk_disable_pool(struct net_device *dev, u16 qid)
priv->channel[qid]->xsk_zc = false;
priv->channel[qid]->xsk_pool = NULL;
+ priv->channel[qid]->xsk_frames_done = 0;
priv->channel[qid]->bp = priv->bp[DPAA2_ETH_DEFAULT_BP];
/* Restore Rx callback to slow path */
@@ -213,6 +214,12 @@ static int dpaa2_xsk_enable_pool(struct net_device *dev,
return -EOPNOTSUPP;
}
+ if (pool->tx_headroom < priv->tx_data_offset) {
+ netdev_err(dev, "Must reserve at least %d Tx headroom within the frame buffer\n",
+ priv->tx_data_offset);
+ return -EOPNOTSUPP;
+ }
+
up = netif_running(dev);
if (up)
dpaa2_eth_stop(dev);
@@ -294,5 +301,119 @@ int dpaa2_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16
int dpaa2_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
{
+ struct dpaa2_eth_priv *priv = netdev_priv(dev);
+ struct dpaa2_eth_channel *ch = priv->channel[qid];
+
+ if (!priv->link_state.up)
+ return -ENETDOWN;
+
+ if (!ch->xsk_zc)
+ return -EOPNOTSUPP;
+
+ if (!priv->xdp_prog)
+ return -ENXIO;
+
+ /* If NAPI is already scheduled, mark a miss so it will run again. This
+ * way we ensure that no wakeup calls are missed, even though this can
+ * lead to rescheduling NAPI even though previously we did not consume
+ * the entire budget.
+ */
+ if (!napi_if_scheduled_mark_missed(&ch->napi))
+ napi_schedule(&ch->napi);
+
return 0;
}
+
+bool dpaa2_xsk_tx(struct dpaa2_eth_priv *priv,
+ struct dpaa2_eth_channel *ch)
+{
+ struct xdp_desc *xdp_descs = ch->xsk_pool->tx_descs;
+ int store_cleaned = 0, total_enqueued, enqueued;
+ struct dpaa2_eth_drv_stats *percpu_extras;
+ struct rtnl_link_stats64 *percpu_stats;
+ int bytes_sent = 0, batch, i, err;
+ struct dpaa2_eth_swa *swa;
+ bool work_done_zc = false;
+ int retries, max_retries;
+ struct dpaa2_eth_fq *fq;
+ struct dpaa2_fd *fds;
+ bool flush = false;
+ dma_addr_t addr;
+ void *vaddr;
+ u8 prio = 0;
+
+ percpu_stats = this_cpu_ptr(priv->percpu_stats);
+ percpu_extras = this_cpu_ptr(priv->percpu_extras);
+ fds = (this_cpu_ptr(priv->fd))->array;
+
+ /* Use the FQ with the same idx as the affine CPU */
+ fq = &priv->fq[ch->nctx.desired_cpu];
+
+ while (!work_done_zc) {
+ batch = xsk_tx_peek_release_desc_batch(ch->xsk_pool,
+ DPAA2_ETH_TX_ZC_PER_NAPI - store_cleaned);
+ if (!batch)
+ break;
+
+ for (i = 0; i < batch; i++) {
+ addr = xsk_buff_raw_get_dma(ch->xsk_pool, xdp_descs[i].addr);
+ vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
+ xsk_buff_raw_dma_sync_for_device(ch->xsk_pool, addr, xdp_descs[i].len);
+
+ /* Store the buffer type at the beginning of the frame
+ * (in the private data area) such that we can release it
+ * on Tx confirm
+ */
+ swa = (struct dpaa2_eth_swa *)vaddr;
+ swa->type = DPAA2_ETH_SWA_XSK;
+
+ /* Initialize FD fields */
+ memset(&fds[i], 0, sizeof(struct dpaa2_fd));
+ dpaa2_fd_set_addr(&fds[i], addr);
+ dpaa2_fd_set_offset(&fds[i], ch->xsk_pool->tx_headroom);
+ dpaa2_fd_set_len(&fds[i], xdp_descs[i].len);
+ dpaa2_fd_set_format(&fds[i], dpaa2_fd_single);
+ dpaa2_fd_set_ctrl(&fds[i], FD_CTRL_PTA);
+ bytes_sent += xdp_descs[i].len;
+
+ /* tracing point */
+ trace_dpaa2_tx_xsk_fd(priv->net_dev, &fds[i]);
+ }
+
+ /* Enqueue frames */
+ max_retries = batch * DPAA2_ETH_ENQUEUE_RETRIES;
+ total_enqueued = 0;
+ enqueued = 0;
+ retries = 0;
+ while (total_enqueued < batch && retries < max_retries) {
+ err = priv->enqueue(priv, fq, &fds[total_enqueued], prio,
+ batch - total_enqueued, &enqueued);
+ if (err == -EBUSY) {
+ retries++;
+ continue;
+ }
+
+ total_enqueued += enqueued;
+ }
+ percpu_extras->tx_portal_busy += retries;
+ store_cleaned += total_enqueued;
+
+ if (unlikely(err < 0)) {
+ for (i = total_enqueued; i < batch; i++)
+ dpaa2_eth_free_tx_fd(priv, ch, fq, &fds[i], false);
+ percpu_stats->tx_errors++;
+ } else {
+ percpu_stats->tx_packets += total_enqueued;
+ percpu_stats->tx_bytes += bytes_sent;
+ flush = true;
+ }
+
+ if (store_cleaned == DPAA2_ETH_TX_ZC_PER_NAPI)
+ work_done_zc = true;
+ }
+
+ if (flush)
+ xsk_tx_release(ch->xsk_pool);
+
+ return work_done_zc;
+}