summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/amso1100/Kconfig2
-rw-r--r--drivers/infiniband/hw/amso1100/c2_qp.c1
-rw-r--r--drivers/infiniband/hw/amso1100/c2_vq.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/Kconfig2
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c8
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h3
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c128
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c7
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c7
-rw-r--r--drivers/infiniband/hw/ehca/Kconfig2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_av.c10
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h164
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes_pSeries.h156
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c62
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c11
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c97
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c250
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_iverbs.h25
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c181
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c1356
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.h23
-rw-r--r--drivers/infiniband/hw/ehca/ehca_pd.c26
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qes.h22
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c876
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c100
-rw-r--r--drivers/infiniband/hw/ehca/ehca_tools.h45
-rw-r--r--drivers/infiniband/hw/ehca/ehca_uverbs.c25
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c111
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.h1
-rw-r--r--drivers/infiniband/hw/ehca/hcp_phyp.c2
-rw-r--r--drivers/infiniband/hw/ehca/hipz_fns_core.h4
-rw-r--r--drivers/infiniband/hw/ehca/hipz_hw.h41
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.c222
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.h60
-rw-r--r--drivers/infiniband/hw/ipath/Kconfig2
-rw-r--r--drivers/infiniband/hw/ipath/Makefile1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h36
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c7
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c42
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c199
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c303
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c205
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c9
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c101
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c108
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c31
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c196
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h98
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_layer.c365
-rw-r--r--drivers/infiniband/hw/ipath/ipath_layer.h71
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mmap.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c19
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c116
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c38
-rw-r--r--drivers/infiniband/hw/ipath/ipath_srq.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c79
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c43
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c9
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_pages.c28
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c31
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h9
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_ppc64.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_x86_64.c29
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig1
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c2
-rw-r--r--drivers/infiniband/hw/mlx4/main.c6
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h4
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c305
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c18
-rw-r--r--drivers/infiniband/hw/mthca/Kconfig2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_allocator.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c22
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c221
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c28
-rw-r--r--drivers/infiniband/hw/mthca/mthca_wqe.h15
87 files changed, 4162 insertions, 2715 deletions
diff --git a/drivers/infiniband/hw/amso1100/Kconfig b/drivers/infiniband/hw/amso1100/Kconfig
index 809cb14ac6de..e6ce5f209e47 100644
--- a/drivers/infiniband/hw/amso1100/Kconfig
+++ b/drivers/infiniband/hw/amso1100/Kconfig
@@ -1,6 +1,6 @@
config INFINIBAND_AMSO1100
tristate "Ammasso 1100 HCA support"
- depends on PCI && INET && INFINIBAND
+ depends on PCI && INET
---help---
This is a low-level driver for the Ammasso 1100 host
channel adapter (HCA).
diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c
index 420c1380f5c3..01d07862ea86 100644
--- a/drivers/infiniband/hw/amso1100/c2_qp.c
+++ b/drivers/infiniband/hw/amso1100/c2_qp.c
@@ -506,6 +506,7 @@ int c2_alloc_qp(struct c2_dev *c2dev,
qp->send_sgl_depth = qp_attrs->cap.max_send_sge;
qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge;
qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge;
+ init_waitqueue_head(&qp->wait);
/* Initialize the SQ MQ */
q_size = be32_to_cpu(reply->sq_depth);
diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c
index 36620a22413c..cfdacb1ec279 100644
--- a/drivers/infiniband/hw/amso1100/c2_vq.c
+++ b/drivers/infiniband/hw/amso1100/c2_vq.c
@@ -85,7 +85,7 @@ int vq_init(struct c2_dev *c2dev)
(char) ('0' + c2dev->devnum));
c2dev->host_msg_cache =
kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0,
- SLAB_HWCACHE_ALIGN, NULL, NULL);
+ SLAB_HWCACHE_ALIGN, NULL);
if (c2dev->host_msg_cache == NULL) {
return -ENOMEM;
}
diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig
index 77977f55dca3..2acec3fadf69 100644
--- a/drivers/infiniband/hw/cxgb3/Kconfig
+++ b/drivers/infiniband/hw/cxgb3/Kconfig
@@ -1,6 +1,6 @@
config INFINIBAND_CXGB3
tristate "Chelsio RDMA Driver"
- depends on CHELSIO_T3 && INFINIBAND && INET
+ depends on CHELSIO_T3 && INET
select GENERIC_ALLOCATOR
---help---
This is an iWARP/RDMA driver for the Chelsio T3 1GbE and
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 76049afc7655..beb2a381467f 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -144,7 +144,7 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
}
wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
memset(wqe, 0, sizeof(*wqe));
- build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 1, qpid, 7);
+ build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7);
wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
sge_cmd = qpid << 8 | 3;
wqe->sge_cmd = cpu_to_be64(sge_cmd);
@@ -548,7 +548,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
memset(wqe, 0, sizeof(*wqe));
- build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 1,
+ build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
T3_CTL_QP_TID, 7);
wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
@@ -833,7 +833,7 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
wqe->ird = cpu_to_be32(attr->ird);
wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
- wqe->rsvd = 0;
+ wqe->irs = cpu_to_be32(attr->irs);
skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */
return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
}
@@ -916,7 +916,7 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p)
PDBG("%s opening rnic dev %s\n", __FUNCTION__, rdev_p->dev_name);
memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
if (!rdev_p->t3cdev_p)
- rdev_p->t3cdev_p = T3CDEV(netdev_p);
+ rdev_p->t3cdev_p = dev2t3cdev(netdev_p);
rdev_p->t3cdev_p->ulp = (void *) rdev_p;
err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
&(rdev_p->rnic_info));
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index ff7290eacefb..c84d4ac49355 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -294,6 +294,7 @@ struct t3_rdma_init_attr {
u64 qp_dma_addr;
u32 qp_dma_size;
u32 flags;
+ u32 irs;
};
struct t3_rdma_init_wr {
@@ -314,7 +315,7 @@ struct t3_rdma_init_wr {
__be32 ird;
__be64 qp_dma_addr; /* 7 */
__be32 qp_dma_size; /* 8 */
- u32 rsvd;
+ u32 irs;
};
struct t3_genbit {
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index b2faff5abce8..1cdfcd43b0bc 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -139,7 +139,7 @@ static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
skb->priority = CPL_PRIORITY_SETUP;
- tdev->send(tdev, skb);
+ cxgb3_ofld_send(tdev, skb);
return;
}
@@ -161,7 +161,7 @@ int iwch_quiesce_tid(struct iwch_ep *ep)
req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
skb->priority = CPL_PRIORITY_DATA;
- ep->com.tdev->send(ep->com.tdev, skb);
+ cxgb3_ofld_send(ep->com.tdev, skb);
return 0;
}
@@ -183,7 +183,7 @@ int iwch_resume_tid(struct iwch_ep *ep)
req->val = 0;
skb->priority = CPL_PRIORITY_DATA;
- ep->com.tdev->send(ep->com.tdev, skb);
+ cxgb3_ofld_send(ep->com.tdev, skb);
return 0;
}
@@ -229,9 +229,8 @@ static void *alloc_ep(int size, gfp_t gfp)
{
struct iwch_ep_common *epc;
- epc = kmalloc(size, gfp);
+ epc = kzalloc(size, gfp);
if (epc) {
- memset(epc, 0, size);
kref_init(&epc->kref);
spin_lock_init(&epc->lock);
init_waitqueue_head(&epc->waitq);
@@ -254,8 +253,6 @@ static void release_ep_resources(struct iwch_ep *ep)
cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
dst_release(ep->dst);
l2t_release(L2DATA(ep->com.tdev), ep->l2t);
- if (ep->com.tdev->type == T3B)
- release_tid(ep->com.tdev, ep->hwtid, NULL);
put_ep(&ep->com);
}
@@ -515,7 +512,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
req->len = htonl(len);
req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
V_TX_SNDBUF(snd_win>>15));
- req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT);
+ req->flags = htonl(F_TX_INIT);
req->sndseq = htonl(ep->snd_seq);
BUG_ON(ep->mpa_skb);
ep->mpa_skb = skb;
@@ -566,7 +563,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
req->len = htonl(mpalen);
req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
V_TX_SNDBUF(snd_win>>15));
- req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT);
+ req->flags = htonl(F_TX_INIT);
req->sndseq = htonl(ep->snd_seq);
BUG_ON(ep->mpa_skb);
ep->mpa_skb = skb;
@@ -618,7 +615,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
req->len = htonl(len);
req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
V_TX_SNDBUF(snd_win>>15));
- req->flags = htonl(F_TX_MORE | F_TX_IMM_ACK | F_TX_INIT);
+ req->flags = htonl(F_TX_INIT);
req->sndseq = htonl(ep->snd_seq);
ep->mpa_skb = skb;
state_set(&ep->com, MPA_REP_SENT);
@@ -641,6 +638,7 @@ static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
ep->snd_seq = ntohl(req->snd_isn);
+ ep->rcv_seq = ntohl(req->rcv_isn);
set_emss(ep, ntohs(req->tcp_opt));
@@ -786,7 +784,7 @@ static int update_rx_credits(struct iwch_ep *ep, u32 credits)
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
skb->priority = CPL_PRIORITY_ACK;
- ep->com.tdev->send(ep->com.tdev, skb);
+ cxgb3_ofld_send(ep->com.tdev, skb);
return credits;
}
@@ -1023,6 +1021,9 @@ static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
skb_pull(skb, sizeof(*hdr));
skb_trim(skb, dlen);
+ ep->rcv_seq += dlen;
+ BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
+
switch (state_read(&ep->com)) {
case MPA_REQ_SENT:
process_mpa_reply(ep, skb);
@@ -1060,7 +1061,6 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct iwch_ep *ep = ctx;
struct cpl_wr_ack *hdr = cplhdr(skb);
unsigned int credits = ntohs(hdr->credits);
- enum iwch_qp_attr_mask mask;
PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits);
@@ -1072,30 +1072,6 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
ep->mpa_skb = NULL;
dst_confirm(ep->dst);
if (state_read(&ep->com) == MPA_REP_SENT) {
- struct iwch_qp_attributes attrs;
-
- /* bind QP to EP and move to RTS */
- attrs.mpa_attr = ep->mpa_attr;
- attrs.max_ird = ep->ord;
- attrs.max_ord = ep->ord;
- attrs.llp_stream_handle = ep;
- attrs.next_state = IWCH_QP_STATE_RTS;
-
- /* bind QP and TID with INIT_WR */
- mask = IWCH_QP_ATTR_NEXT_STATE |
- IWCH_QP_ATTR_LLP_STREAM_HANDLE |
- IWCH_QP_ATTR_MPA_ATTR |
- IWCH_QP_ATTR_MAX_IRD |
- IWCH_QP_ATTR_MAX_ORD;
-
- ep->com.rpl_err = iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, mask, &attrs, 1);
-
- if (!ep->com.rpl_err) {
- state_set(&ep->com, FPDU_MODE);
- established_upcall(ep);
- }
-
ep->com.rpl_done = 1;
PDBG("waking up ep %p\n", ep);
wake_up(&ep->com.waitq);
@@ -1124,6 +1100,15 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
return CPL_RET_BUF_DONE;
}
+/*
+ * Return whether a failed active open has allocated a TID
+ */
+static inline int act_open_has_tid(int status)
+{
+ return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
+ status != CPL_ERR_ARP_MISS;
+}
+
static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
{
struct iwch_ep *ep = ctx;
@@ -1133,7 +1118,7 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
status2errno(rpl->status));
connect_reply_upcall(ep, status2errno(rpl->status));
state_set(&ep->com, DEAD);
- if (ep->com.tdev->type == T3B)
+ if (ep->com.tdev->type == T3B && act_open_has_tid(rpl->status))
release_tid(ep->com.tdev, GET_TID(rpl), NULL);
cxgb3_free_atid(ep->com.tdev, ep->atid);
dst_release(ep->dst);
@@ -1167,7 +1152,7 @@ static int listen_start(struct iwch_listen_ep *ep)
req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
skb->priority = 1;
- ep->com.tdev->send(ep->com.tdev, skb);
+ cxgb3_ofld_send(ep->com.tdev, skb);
return 0;
}
@@ -1201,7 +1186,7 @@ static int listen_stop(struct iwch_listen_ep *ep)
req->cpu_idx = 0;
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
skb->priority = 1;
- ep->com.tdev->send(ep->com.tdev, skb);
+ cxgb3_ofld_send(ep->com.tdev, skb);
return 0;
}
@@ -1279,7 +1264,7 @@ static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
rpl->opt2 = 0;
rpl->rsvd = rpl->opt2;
- tdev->send(tdev, skb);
+ cxgb3_ofld_send(tdev, skb);
}
}
@@ -1378,6 +1363,7 @@ static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
PDBG("%s ep %p\n", __FUNCTION__, ep);
ep->snd_seq = ntohl(req->snd_isn);
+ ep->rcv_seq = ntohl(req->rcv_isn);
set_emss(ep, ntohs(req->tcp_opt));
@@ -1485,6 +1471,13 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
int ret;
int state;
+ if (is_neg_adv_abort(req->status)) {
+ PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep,
+ ep->hwtid);
+ t3_l2t_send_event(ep->com.tdev, ep->l2t);
+ return CPL_RET_BUF_DONE;
+ }
+
/*
* We get 2 peer aborts from the HW. The first one must
* be ignored except for scribbling that we need one more.
@@ -1494,13 +1487,6 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
return CPL_RET_BUF_DONE;
}
- if (is_neg_adv_abort(req->status)) {
- PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep,
- ep->hwtid);
- t3_l2t_send_event(ep->com.tdev, ep->l2t);
- return CPL_RET_BUF_DONE;
- }
-
state = state_read(&ep->com);
PDBG("%s ep %p state %u\n", __FUNCTION__, ep, state);
switch (state) {
@@ -1571,7 +1557,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
rpl->cmd = CPL_ABORT_NO_RST;
- ep->com.tdev->send(ep->com.tdev, rpl_skb);
+ cxgb3_ofld_send(ep->com.tdev, rpl_skb);
if (state != ABORTING) {
state_set(&ep->com, DEAD);
release_ep_resources(ep);
@@ -1732,10 +1718,8 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid);
- if (state_read(&ep->com) == DEAD) {
- put_ep(&ep->com);
+ if (state_read(&ep->com) == DEAD)
return -ECONNRESET;
- }
BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
BUG_ON(!qp);
@@ -1755,17 +1739,8 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->ird = conn_param->ird;
ep->ord = conn_param->ord;
PDBG("%s %d ird %d ord %d\n", __FUNCTION__, __LINE__, ep->ird, ep->ord);
+
get_ep(&ep->com);
- err = send_mpa_reply(ep, conn_param->private_data,
- conn_param->private_data_len);
- if (err) {
- ep->com.cm_id = NULL;
- ep->com.qp = NULL;
- cm_id->rem_ref(cm_id);
- abort_connection(ep, NULL, GFP_KERNEL);
- put_ep(&ep->com);
- return err;
- }
/* bind QP to EP and move to RTS */
attrs.mpa_attr = ep->mpa_attr;
@@ -1783,16 +1758,28 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
err = iwch_modify_qp(ep->com.qp->rhp,
ep->com.qp, mask, &attrs, 1);
+ if (err)
+ goto err;
- if (err) {
- ep->com.cm_id = NULL;
- ep->com.qp = NULL;
- cm_id->rem_ref(cm_id);
- abort_connection(ep, NULL, GFP_KERNEL);
- } else {
- state_set(&ep->com, FPDU_MODE);
- established_upcall(ep);
- }
+ err = send_mpa_reply(ep, conn_param->private_data,
+ conn_param->private_data_len);
+ if (err)
+ goto err;
+
+ /* wait for wr_ack */
+ wait_event(ep->com.waitq, ep->com.rpl_done);
+ err = ep->com.rpl_err;
+ if (err)
+ goto err;
+
+ state_set(&ep->com, FPDU_MODE);
+ established_upcall(ep);
+ put_ep(&ep->com);
+ return 0;
+err:
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ cm_id->rem_ref(cm_id);
put_ep(&ep->com);
return err;
}
@@ -1926,6 +1913,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
fail3:
cxgb3_free_stid(ep->com.tdev, ep->stid);
fail2:
+ cm_id->rem_ref(cm_id);
put_ep(&ep->com);
fail1:
out:
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index 21a388c313cf..6107e7cd9b57 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -175,6 +175,7 @@ struct iwch_ep {
unsigned int atid;
u32 hwtid;
u32 snd_seq;
+ u32 rcv_seq;
struct l2t_entry *l2t;
struct dst_entry *dst;
struct sk_buff *mpa_skb;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index e7c2c3948037..f0c777589374 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1163,9 +1163,10 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.post_recv = iwch_post_receive;
- dev->ibdev.iwcm =
- (struct iw_cm_verbs *) kmalloc(sizeof(struct iw_cm_verbs),
- GFP_KERNEL);
+ dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
+ if (!dev->ibdev.iwcm)
+ return -ENOMEM;
+
dev->ibdev.iwcm->connect = iwch_connect;
dev->ibdev.iwcm->accept = iwch_accept_cr;
dev->ibdev.iwcm->reject = iwch_reject_cr;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 714dddbc9a98..dd89b6b91f9c 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -628,9 +628,9 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
/* immediate data starts here. */
term = (struct terminate_message *)wqe->send.sgl;
build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
- build_fw_riwrh((void *)wqe, T3_WR_SEND,
- T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 1,
- qhp->ep->hwtid, 5);
+ wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) |
+ V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
+ wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid));
skb->priority = CPL_PRIORITY_DATA;
return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
}
@@ -732,6 +732,7 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
init_attr.qp_dma_addr = qhp->wq.dma_addr;
init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
+ init_attr.irs = qhp->ep->rcv_seq;
PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
"flags 0x%x qpcaps 0x%x\n", __FUNCTION__,
init_attr.rq_addr, init_attr.rq_size,
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig
index 1a854598e0e6..59f807d8d58e 100644
--- a/drivers/infiniband/hw/ehca/Kconfig
+++ b/drivers/infiniband/hw/ehca/Kconfig
@@ -1,6 +1,6 @@
config INFINIBAND_EHCA
tristate "eHCA support"
- depends on IBMEBUS && INFINIBAND
+ depends on IBMEBUS
---help---
This driver supports the IBM pSeries eHCA InfiniBand adapter.
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c
index 0d6e2c4bb245..97d108634c58 100644
--- a/drivers/infiniband/hw/ehca/ehca_av.c
+++ b/drivers/infiniband/hw/ehca/ehca_av.c
@@ -79,7 +79,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
av->av.ipd = (ah_mult > 0) ?
((ehca_mult - 1) / ah_mult) : 0;
} else
- av->av.ipd = ehca_static_rate;
+ av->av.ipd = ehca_static_rate;
av->av.lnh = ah_attr->ah_flags;
av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6);
@@ -118,7 +118,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
}
memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
}
- av->av.pmtu = EHCA_MAX_MTU;
+ av->av.pmtu = shca->max_mtu;
/* dgid comes in grh.word_3 */
memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
@@ -137,6 +137,8 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
struct ehca_av *av;
struct ehca_ud_av new_ehca_av;
struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+ struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca,
+ ib_device);
u32 cur_pid = current->tgid;
if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
@@ -192,7 +194,7 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
}
- new_ehca_av.pmtu = EHCA_MAX_MTU;
+ new_ehca_av.pmtu = shca->max_mtu;
memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
sizeof(ah_attr->grh.dgid));
@@ -257,7 +259,7 @@ int ehca_init_av_cache(void)
av_cache = kmem_cache_create("ehca_cache_av",
sizeof(struct ehca_av), 0,
SLAB_HWCACHE_ALIGN,
- NULL, NULL);
+ NULL);
if (!av_cache)
return -ENOMEM;
return 0;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 1d286d3cc2d5..b5e960305316 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -5,6 +5,7 @@
*
* Authors: Heiko J Schick <schickhj@de.ibm.com>
* Christoph Raisch <raisch@de.ibm.com>
+ * Joachim Fenkes <fenkes@de.ibm.com>
*
* Copyright (c) 2005 IBM Corporation
*
@@ -42,7 +43,6 @@
#ifndef __EHCA_CLASSES_H__
#define __EHCA_CLASSES_H__
-
struct ehca_module;
struct ehca_qp;
struct ehca_cq;
@@ -86,13 +86,24 @@ struct ehca_eq {
struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
};
+struct ehca_sma_attr {
+ u16 lid, lmc, sm_sl, sm_lid;
+ u16 pkey_tbl_len, pkeys[16];
+};
+
struct ehca_sport {
struct ib_cq *ibcq_aqp1;
struct ib_qp *ibqp_aqp1;
enum ib_rate rate;
enum ib_port_state port_state;
+ struct ehca_sma_attr saved_attr;
};
+#define HCA_CAP_MR_PGSIZE_4K 1
+#define HCA_CAP_MR_PGSIZE_64K 2
+#define HCA_CAP_MR_PGSIZE_1M 4
+#define HCA_CAP_MR_PGSIZE_16M 8
+
struct ehca_shca {
struct ib_device ib_device;
struct ibmebus_dev *ibmebus_dev;
@@ -107,17 +118,36 @@ struct ehca_shca {
struct ehca_pd *pd;
struct h_galpas galpas;
struct mutex modify_mutex;
+ u64 hca_cap;
+ /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
+ u32 hca_cap_mr_pgsize;
+ int max_mtu;
};
struct ehca_pd {
struct ib_pd ib_pd;
struct ipz_pd fw_pd;
u32 ownpid;
+ /* small queue mgmt */
+ struct mutex lock;
+ struct list_head free[2];
+ struct list_head full[2];
+};
+
+enum ehca_ext_qp_type {
+ EQPT_NORMAL = 0,
+ EQPT_LLQP = 1,
+ EQPT_SRQBASE = 2,
+ EQPT_SRQ = 3,
};
struct ehca_qp {
- struct ib_qp ib_qp;
+ union {
+ struct ib_qp ib_qp;
+ struct ib_srq ib_srq;
+ };
u32 qp_type;
+ enum ehca_ext_qp_type ext_type;
struct ipz_queue ipz_squeue;
struct ipz_queue ipz_rqueue;
struct h_galpas galpas;
@@ -140,6 +170,10 @@ struct ehca_qp {
u32 mm_count_galpa;
};
+#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
+#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ)
+#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE)
+
/* must be power of 2 */
#define QP_HASHTAB_LEN 8
@@ -156,8 +190,8 @@ struct ehca_cq {
spinlock_t cb_lock;
struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
struct list_head entry;
- u32 nr_callbacks; /* #events assigned to cpu by scaling code */
- u32 nr_events; /* #events seen */
+ u32 nr_callbacks; /* #events assigned to cpu by scaling code */
+ atomic_t nr_events; /* #events seen */
wait_queue_head_t wait_completion;
spinlock_t task_lock;
u32 ownpid;
@@ -180,11 +214,12 @@ struct ehca_mr {
spinlock_t mrlock;
enum ehca_mr_flag flags;
- u32 num_pages; /* number of MR pages */
- u32 num_4k; /* number of 4k "page" portions to form MR */
+ u32 num_kpages; /* number of kernel pages */
+ u32 num_hwpages; /* number of hw pages to form MR */
+ u64 hwpage_size; /* hw page size used for this MR */
int acl; /* ACL (stored here for usage in reregister) */
u64 *start; /* virtual start address (stored here for */
- /* usage in reregister) */
+ /* usage in reregister) */
u64 size; /* size (stored here for usage in reregister) */
u32 fmr_page_size; /* page size for FMR */
u32 fmr_max_pages; /* max pages for FMR */
@@ -193,9 +228,6 @@ struct ehca_mr {
/* fw specific data */
struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */
struct h_galpas galpas;
- /* data for userspace bridge */
- u32 nr_of_pages;
- void *pagearray;
};
struct ehca_mw {
@@ -217,26 +249,30 @@ enum ehca_mr_pgi_type {
struct ehca_mr_pginfo {
enum ehca_mr_pgi_type type;
- u64 num_pages;
- u64 page_cnt;
- u64 num_4k; /* number of 4k "page" portions */
- u64 page_4k_cnt; /* counter for 4k "page" portions */
- u64 next_4k; /* next 4k "page" portion in buffer/chunk/listelem */
-
- /* type EHCA_MR_PGI_PHYS section */
- int num_phys_buf;
- struct ib_phys_buf *phys_buf_array;
- u64 next_buf;
-
- /* type EHCA_MR_PGI_USER section */
- struct ib_umem *region;
- struct ib_umem_chunk *next_chunk;
- u64 next_nmap;
-
- /* type EHCA_MR_PGI_FMR section */
- u64 *page_list;
- u64 next_listelem;
- /* next_4k also used within EHCA_MR_PGI_FMR */
+ u64 num_kpages;
+ u64 kpage_cnt;
+ u64 hwpage_size; /* hw page size used for this MR */
+ u64 num_hwpages; /* number of hw pages */
+ u64 hwpage_cnt; /* counter for hw pages */
+ u64 next_hwpage; /* next hw page in buffer/chunk/listelem */
+
+ union {
+ struct { /* type EHCA_MR_PGI_PHYS section */
+ int num_phys_buf;
+ struct ib_phys_buf *phys_buf_array;
+ u64 next_buf;
+ } phy;
+ struct { /* type EHCA_MR_PGI_USER section */
+ struct ib_umem *region;
+ struct ib_umem_chunk *next_chunk;
+ u64 next_nmap;
+ } usr;
+ struct { /* type EHCA_MR_PGI_FMR section */
+ u64 fmr_pgsize;
+ u64 *page_list;
+ u64 next_listelem;
+ } fmr;
+ } u;
};
/* output parameters for MR/FMR hipz calls */
@@ -274,10 +310,11 @@ int ehca_init_av_cache(void);
void ehca_cleanup_av_cache(void);
int ehca_init_mrmw_cache(void);
void ehca_cleanup_mrmw_cache(void);
+int ehca_init_small_qp_cache(void);
+void ehca_cleanup_small_qp_cache(void);
-extern spinlock_t ehca_qp_idr_lock;
-extern spinlock_t ehca_cq_idr_lock;
-extern spinlock_t hcall_lock;
+extern rwlock_t ehca_qp_idr_lock;
+extern rwlock_t ehca_cq_idr_lock;
extern struct idr ehca_qp_idr;
extern struct idr ehca_cq_idr;
@@ -285,6 +322,7 @@ extern int ehca_static_rate;
extern int ehca_port_act_time;
extern int ehca_use_hp_mr;
extern int ehca_scaling_code;
+extern int ehca_mr_largepage;
struct ipzu_queue_resp {
u32 qe_size; /* queue entry size */
@@ -292,7 +330,7 @@ struct ipzu_queue_resp {
u32 queue_length; /* queue length allocated in bytes */
u32 pagesize;
u32 toggle_state;
- u32 dummy; /* padding for 8 byte alignment */
+ u32 offset; /* save offset within a page for small_qp */
};
struct ehca_create_cq_resp {
@@ -305,6 +343,7 @@ struct ehca_create_qp_resp {
u32 qp_num;
u32 token;
u32 qp_type;
+ u32 ext_type;
u32 qkey;
/* qp_num assigned by ehca: sqp0/1 may have got different numbers */
u32 real_qp_num;
@@ -320,28 +359,59 @@ struct ehca_alloc_cq_parms {
struct ipz_eq_handle eq_handle;
};
+enum ehca_service_type {
+ ST_RC = 0,
+ ST_UC = 1,
+ ST_RD = 2,
+ ST_UD = 3,
+};
+
+enum ehca_ll_comp_flags {
+ LLQP_SEND_COMP = 0x20,
+ LLQP_RECV_COMP = 0x40,
+ LLQP_COMP_MASK = 0x60,
+};
+
+struct ehca_alloc_queue_parms {
+ /* input parameters */
+ int max_wr;
+ int max_sge;
+ int page_size;
+ int is_small;
+
+ /* output parameters */
+ u16 act_nr_wqes;
+ u8 act_nr_sges;
+ u32 queue_size; /* bytes for small queues, pages otherwise */
+};
+
struct ehca_alloc_qp_parms {
- int servicetype;
+ struct ehca_alloc_queue_parms squeue;
+ struct ehca_alloc_queue_parms rqueue;
+
+ /* input parameters */
+ enum ehca_service_type servicetype;
+ int qp_storage;
int sigtype;
- int daqp_ctrl;
- int max_send_sge;
- int max_recv_sge;
+ enum ehca_ext_qp_type ext_type;
+ enum ehca_ll_comp_flags ll_comp_flags;
int ud_av_l_key_ctl;
- u16 act_nr_send_wqes;
- u16 act_nr_recv_wqes;
- u8 act_nr_recv_sges;
- u8 act_nr_send_sges;
+ u32 token;
+ struct ipz_eq_handle eq_handle;
+ struct ipz_pd pd;
+ struct ipz_cq_handle send_cq_handle, recv_cq_handle;
- u32 nr_rq_pages;
- u32 nr_sq_pages;
+ u32 srq_qpn, srq_token, srq_limit;
- struct ipz_eq_handle ipz_eq_handle;
- struct ipz_pd pd;
+ /* output parameters */
+ u32 real_qp_num;
+ struct ipz_qp_handle qp_handle;
+ struct h_galpas galpas;
};
int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
-struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
+struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
index 5665f213b81a..1798e6466bd0 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
@@ -154,83 +154,83 @@ struct hcp_modify_qp_control_block {
u32 reserved_70_127[58]; /* 70 */
};
-#define MQPCB_MASK_QKEY EHCA_BMASK_IBM(0,0)
-#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM(2,2)
-#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM(3,3)
-#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM(4,4)
-#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM(5,5)
-#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM(6,6)
-#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM(7,7)
-#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM(8,8)
-#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM(9,9)
-#define MQPCB_QP_STATE EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11,11)
-#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12,12)
-#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13,13)
-#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14,14)
-#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15,15)
-#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16,16)
-#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17,17)
-#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18,18)
-#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19,19)
-#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20,20)
-#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21,21)
-#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22,22)
-#define MQPCB_DLID EHCA_BMASK_IBM(16,31)
-#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23,23)
-#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29,31)
-#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24,24)
-#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25,31)
-#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25,25)
-#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26,26)
-#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27,27)
-#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28,28)
-#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12,31)
-#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30,30)
-#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31,31)
-#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28,31)
-#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32,32)
-#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31,31)
-#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33,33)
-#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31)
-#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34,34)
-#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27,31)
-#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35,35)
-#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36,36)
-#define MQPCB_DLID_AL EHCA_BMASK_IBM(16,31)
-#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37,37)
-#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31)
-#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38,38)
-#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25,31)
-#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39,39)
-#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40,40)
-#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41,41)
-#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24,31)
-#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42,42)
-#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12,31)
-#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44,44)
-#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45,45)
-#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16,31)
-#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46,46)
-#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16,31)
-#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47,47)
-#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31,31)
-#define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31)
-#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48)
-#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31)
-#define MQPCB_MASK_CURR_SQR_LIMIT EHCA_BMASK_IBM(49,49)
-#define MQPCB_CURR_SQR_LIMIT EHCA_BMASK_IBM(15,31)
-#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50)
-#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51)
+#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0)
+#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2)
+#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3)
+#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4)
+#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5)
+#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6)
+#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7)
+#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8)
+#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9)
+#define MQPCB_QP_STATE EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11)
+#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12)
+#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13)
+#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14)
+#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15)
+#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16)
+#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17)
+#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18)
+#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19)
+#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20)
+#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21)
+#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22)
+#define MQPCB_DLID EHCA_BMASK_IBM(16, 31)
+#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23)
+#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29, 31)
+#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24)
+#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25, 31)
+#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25)
+#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26)
+#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27)
+#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28)
+#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12, 31)
+#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30)
+#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31)
+#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28, 31)
+#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32)
+#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31, 31)
+#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33)
+#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31)
+#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34)
+#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27, 31)
+#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35)
+#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36)
+#define MQPCB_DLID_AL EHCA_BMASK_IBM(16, 31)
+#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37)
+#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31)
+#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38)
+#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25, 31)
+#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39)
+#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40)
+#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41)
+#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42)
+#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12, 31)
+#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44)
+#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45)
+#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31)
+#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46)
+#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31)
+#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47)
+#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31, 31)
+#define MQPCB_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
+#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48)
+#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31, 31)
+#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49)
+#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16, 31)
+#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50)
+#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51)
#endif /* __EHCA_CLASSES_PSERIES_H__ */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 67f0670fe3b1..81aff36101ba 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -56,11 +56,11 @@ int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
{
unsigned int qp_num = qp->real_qp_num;
unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
- unsigned long spl_flags;
+ unsigned long flags;
- spin_lock_irqsave(&cq->spinlock, spl_flags);
+ spin_lock_irqsave(&cq->spinlock, flags);
hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
- spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+ spin_unlock_irqrestore(&cq->spinlock, flags);
ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
cq->cq_number, qp_num);
@@ -74,9 +74,9 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
struct hlist_node *iter;
struct ehca_qp *qp;
- unsigned long spl_flags;
+ unsigned long flags;
- spin_lock_irqsave(&cq->spinlock, spl_flags);
+ spin_lock_irqsave(&cq->spinlock, flags);
hlist_for_each(iter, &cq->qp_hashtab[key]) {
qp = hlist_entry(iter, struct ehca_qp, list_entries);
if (qp->real_qp_num == real_qp_num) {
@@ -88,7 +88,7 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
break;
}
}
- spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+ spin_unlock_irqrestore(&cq->spinlock, flags);
if (ret)
ehca_err(cq->ib_cq.device,
"qp not found cq_num=%x real_qp_num=%x",
@@ -97,7 +97,7 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
return ret;
}
-struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
+struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
{
struct ehca_qp *ret = NULL;
unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
@@ -146,6 +146,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
spin_lock_init(&my_cq->spinlock);
spin_lock_init(&my_cq->cb_lock);
spin_lock_init(&my_cq->task_lock);
+ atomic_set(&my_cq->nr_events, 0);
init_waitqueue_head(&my_cq->wait_completion);
my_cq->ownpid = current->tgid;
@@ -162,9 +163,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
goto create_cq_exit1;
}
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ write_lock_irqsave(&ehca_cq_idr_lock, flags);
ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
} while (ret == -EAGAIN);
@@ -189,8 +190,8 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
goto create_cq_exit2;
}
- ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages,
- EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0);
+ ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages,
+ EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0);
if (!ipz_rc) {
ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p",
ipz_rc, device);
@@ -284,7 +285,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
return cq;
create_cq_exit4:
- ipz_queue_dtor(&my_cq->ipz_queue);
+ ipz_queue_dtor(NULL, &my_cq->ipz_queue);
create_cq_exit3:
h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
@@ -293,9 +294,9 @@ create_cq_exit3:
"cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret);
create_cq_exit2:
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ write_lock_irqsave(&ehca_cq_idr_lock, flags);
idr_remove(&ehca_cq_idr, my_cq->token);
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
create_cq_exit1:
kmem_cache_free(cq_cache, my_cq);
@@ -303,16 +304,6 @@ create_cq_exit1:
return cq;
}
-static int get_cq_nr_events(struct ehca_cq *my_cq)
-{
- int ret;
- unsigned long flags;
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
- ret = my_cq->nr_events;
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
- return ret;
-}
-
int ehca_destroy_cq(struct ib_cq *cq)
{
u64 h_ret;
@@ -339,17 +330,18 @@ int ehca_destroy_cq(struct ib_cq *cq)
}
}
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
- while (my_cq->nr_events) {
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
- wait_event(my_cq->wait_completion, !get_cq_nr_events(my_cq));
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
- /* recheck nr_events to assure no cqe has just arrived */
- }
-
+ /*
+ * remove the CQ from the idr first to make sure
+ * no more interrupt tasklets will touch this CQ
+ */
+ write_lock_irqsave(&ehca_cq_idr_lock, flags);
idr_remove(&ehca_cq_idr, my_cq->token);
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+ /* now wait until all pending events have completed */
+ wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events));
+ /* nobody's using our CQ any longer -- we can destroy it */
h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
if (h_ret == H_R_STATE) {
/* cq in err: read err data and destroy it forcibly */
@@ -367,7 +359,7 @@ int ehca_destroy_cq(struct ib_cq *cq)
"ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
return ehca2ib_return_code(h_ret);
}
- ipz_queue_dtor(&my_cq->ipz_queue);
+ ipz_queue_dtor(NULL, &my_cq->ipz_queue);
kmem_cache_free(cq_cache, my_cq);
return 0;
@@ -395,7 +387,7 @@ int ehca_init_cq_cache(void)
cq_cache = kmem_cache_create("ehca_cache_cq",
sizeof(struct ehca_cq), 0,
SLAB_HWCACHE_ALIGN,
- NULL, NULL);
+ NULL);
if (!cq_cache)
return -ENOMEM;
return 0;
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 4961eb88827c..1d41faa7a337 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -86,8 +86,8 @@ int ehca_create_eq(struct ehca_shca *shca,
return -EINVAL;
}
- ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages,
- EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0);
+ ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages,
+ EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0);
if (!ret) {
ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
goto create_eq_exit1;
@@ -96,7 +96,8 @@ int ehca_create_eq(struct ehca_shca *shca,
for (i = 0; i < nr_pages; i++) {
u64 rpage;
- if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) {
+ vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
+ if (!vpage) {
ret = H_RESOURCE;
goto create_eq_exit2;
}
@@ -144,7 +145,7 @@ int ehca_create_eq(struct ehca_shca *shca,
return 0;
create_eq_exit2:
- ipz_queue_dtor(&eq->ipz_queue);
+ ipz_queue_dtor(NULL, &eq->ipz_queue);
create_eq_exit1:
hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
@@ -180,7 +181,7 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
ehca_err(&shca->ib_device, "Can't free EQ resources.");
return -EINVAL;
}
- ipz_queue_dtor(&eq->ipz_queue);
+ ipz_queue_dtor(NULL, &eq->ipz_queue);
return 0;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 32b55a4f0e5b..cf22472d9414 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -45,11 +45,25 @@
int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
{
- int ret = 0;
+ int i, ret = 0;
struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
ib_device);
struct hipz_query_hca *rblock;
+ static const u32 cap_mapping[] = {
+ IB_DEVICE_RESIZE_MAX_WR, HCA_CAP_WQE_RESIZE,
+ IB_DEVICE_BAD_PKEY_CNTR, HCA_CAP_BAD_P_KEY_CTR,
+ IB_DEVICE_BAD_QKEY_CNTR, HCA_CAP_Q_KEY_VIOL_CTR,
+ IB_DEVICE_RAW_MULTI, HCA_CAP_RAW_PACKET_MCAST,
+ IB_DEVICE_AUTO_PATH_MIG, HCA_CAP_AUTO_PATH_MIG,
+ IB_DEVICE_CHANGE_PHY_PORT, HCA_CAP_SQD_RTS_PORT_CHANGE,
+ IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK,
+ IB_DEVICE_CURR_QP_STATE_MOD, HCA_CAP_CUR_QP_STATE_MOD,
+ IB_DEVICE_SHUTDOWN_PORT, HCA_CAP_SHUTDOWN_PORT,
+ IB_DEVICE_INIT_TYPE, HCA_CAP_INIT_TYPE,
+ IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT,
+ };
+
rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
@@ -79,9 +93,13 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
props->max_pd = min_t(int, rblock->max_pd, INT_MAX);
props->max_ah = min_t(int, rblock->max_ah, INT_MAX);
props->max_fmr = min_t(int, rblock->max_mr, INT_MAX);
- props->max_srq = 0;
- props->max_srq_wr = 0;
- props->max_srq_sge = 0;
+
+ if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
+ props->max_srq = props->max_qp;
+ props->max_srq_wr = props->max_qp_wr;
+ props->max_srq_sge = 3;
+ }
+
props->max_pkeys = 16;
props->local_ca_ack_delay
= rblock->local_ca_ack_delay;
@@ -96,6 +114,13 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
props->max_total_mcast_qp_attach
= min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX);
+ /* translate device capabilities */
+ props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID |
+ IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ;
+ for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2)
+ if (rblock->hca_cap_indicators & cap_mapping[i + 1])
+ props->device_cap_flags |= cap_mapping[i];
+
query_device1:
ehca_free_fw_ctrlblock(rblock);
@@ -106,6 +131,7 @@ int ehca_query_port(struct ib_device *ibdev,
u8 port, struct ib_port_attr *props)
{
int ret = 0;
+ u64 h_ret;
struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
ib_device);
struct hipz_query_port *rblock;
@@ -116,7 +142,8 @@ int ehca_query_port(struct ib_device *ibdev,
return -ENOMEM;
}
- if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+ h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
+ if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "Can't query port properties");
ret = -EINVAL;
goto query_port1;
@@ -172,12 +199,50 @@ query_port1:
return ret;
}
+int ehca_query_sma_attr(struct ehca_shca *shca,
+ u8 port, struct ehca_sma_attr *attr)
+{
+ int ret = 0;
+ u64 h_ret;
+ struct hipz_query_port *rblock;
+
+ rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
+ if (!rblock) {
+ ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+ return -ENOMEM;
+ }
+
+ h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "Can't query port properties");
+ ret = -EINVAL;
+ goto query_sma_attr1;
+ }
+
+ memset(attr, 0, sizeof(struct ehca_sma_attr));
+
+ attr->lid = rblock->lid;
+ attr->lmc = rblock->lmc;
+ attr->sm_sl = rblock->sm_sl;
+ attr->sm_lid = rblock->sm_lid;
+
+ attr->pkey_tbl_len = rblock->pkey_tbl_len;
+ memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys));
+
+query_sma_attr1:
+ ehca_free_fw_ctrlblock(rblock);
+
+ return ret;
+}
+
int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
{
int ret = 0;
- struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device);
+ u64 h_ret;
+ struct ehca_shca *shca;
struct hipz_query_port *rblock;
+ shca = container_of(ibdev, struct ehca_shca, ib_device);
if (index > 16) {
ehca_err(&shca->ib_device, "Invalid index: %x.", index);
return -EINVAL;
@@ -189,7 +254,8 @@ int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
return -ENOMEM;
}
- if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+ h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
+ if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "Can't query port properties");
ret = -EINVAL;
goto query_pkey1;
@@ -207,6 +273,7 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port,
int index, union ib_gid *gid)
{
int ret = 0;
+ u64 h_ret;
struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
ib_device);
struct hipz_query_port *rblock;
@@ -222,7 +289,8 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port,
return -ENOMEM;
}
- if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+ h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
+ if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "Can't query port properties");
ret = -EINVAL;
goto query_gid1;
@@ -247,11 +315,12 @@ int ehca_modify_port(struct ib_device *ibdev,
struct ib_port_modify *props)
{
int ret = 0;
- struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device);
+ struct ehca_shca *shca;
struct hipz_query_port *rblock;
u32 cap;
u64 hret;
+ shca = container_of(ibdev, struct ehca_shca, ib_device);
if ((props->set_port_cap_mask | props->clr_port_cap_mask)
& ~allowed_port_caps) {
ehca_err(&shca->ib_device, "Non-changeable bits set in masks "
@@ -261,7 +330,7 @@ int ehca_modify_port(struct ib_device *ibdev,
}
if (mutex_lock_interruptible(&shca->modify_mutex))
- return -ERESTARTSYS;
+ return -ERESTARTSYS;
rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
@@ -270,7 +339,8 @@ int ehca_modify_port(struct ib_device *ibdev,
goto modify_port1;
}
- if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+ hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
+ if (hret != H_SUCCESS) {
ehca_err(&shca->ib_device, "Can't query port properties");
ret = -EINVAL;
goto modify_port2;
@@ -282,7 +352,8 @@ int ehca_modify_port(struct ib_device *ibdev,
hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
cap, props->init_type, port_modify_mask);
if (hret != H_SUCCESS) {
- ehca_err(&shca->ib_device, "Modify port failed hret=%lx", hret);
+ ehca_err(&shca->ib_device, "Modify port failed hret=%lx",
+ hret);
ret = -EINVAL;
}
@@ -290,7 +361,7 @@ modify_port2:
ehca_free_fw_ctrlblock(rblock);
modify_port1:
- mutex_unlock(&shca->modify_mutex);
+ mutex_unlock(&shca->modify_mutex);
return ret;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 100329ba3343..a925ea52443f 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -5,6 +5,8 @@
*
* Authors: Heiko J Schick <schickhj@de.ibm.com>
* Khadija Souissi <souissi@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Joachim Fenkes <fenkes@de.ibm.com>
*
* Copyright (c) 2005 IBM Corporation
*
@@ -47,25 +49,26 @@
#include "hipz_fns.h"
#include "ipz_pt_fn.h"
-#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1)
-#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31)
-#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7)
-#define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31)
-#define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31)
-#define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63)
-#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63)
+#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)
+#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
+#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7)
+#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31)
+#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
+#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63)
+#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63)
-#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1)
-#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7)
-#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15)
-#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
+#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)
+#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7)
+#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15)
+#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
+#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16)
-#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63)
-#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7)
+#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63)
+#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7)
static void queue_comp_task(struct ehca_cq *__cq);
-static struct ehca_comp_pool* pool;
+static struct ehca_comp_pool *pool;
#ifdef CONFIG_HOTPLUG_CPU
static struct notifier_block comp_pool_callback_nb;
#endif
@@ -82,8 +85,8 @@ static inline void comp_event_callback(struct ehca_cq *cq)
return;
}
-static void print_error_data(struct ehca_shca * shca, void* data,
- u64* rblock, int length)
+static void print_error_data(struct ehca_shca *shca, void *data,
+ u64 *rblock, int length)
{
u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
u64 resource = rblock[1];
@@ -91,7 +94,7 @@ static void print_error_data(struct ehca_shca * shca, void* data,
switch (type) {
case 0x1: /* Queue Pair */
{
- struct ehca_qp *qp = (struct ehca_qp*)data;
+ struct ehca_qp *qp = (struct ehca_qp *)data;
/* only print error data if AER is set */
if (rblock[6] == 0)
@@ -104,7 +107,7 @@ static void print_error_data(struct ehca_shca * shca, void* data,
}
case 0x4: /* Completion Queue */
{
- struct ehca_cq *cq = (struct ehca_cq*)data;
+ struct ehca_cq *cq = (struct ehca_cq *)data;
ehca_err(&shca->ib_device,
"CQ 0x%x (resource=%lx) has errors.",
@@ -113,7 +116,7 @@ static void print_error_data(struct ehca_shca * shca, void* data,
}
default:
ehca_err(&shca->ib_device,
- "Unknown errror type: %lx on %s.",
+ "Unknown error type: %lx on %s.",
type, shca->ib_device.name);
break;
}
@@ -172,33 +175,55 @@ error_data1:
}
-static void qp_event_callback(struct ehca_shca *shca,
- u64 eqe,
+static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
enum ib_event_type event_type)
{
struct ib_event event;
+
+ event.device = &shca->ib_device;
+ event.event = event_type;
+
+ if (qp->ext_type == EQPT_SRQ) {
+ if (!qp->ib_srq.event_handler)
+ return;
+
+ event.element.srq = &qp->ib_srq;
+ qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context);
+ } else {
+ if (!qp->ib_qp.event_handler)
+ return;
+
+ event.element.qp = &qp->ib_qp;
+ qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+ }
+}
+
+static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
+ enum ib_event_type event_type, int fatal)
+{
struct ehca_qp *qp;
- unsigned long flags;
u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
- spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+ read_lock(&ehca_qp_idr_lock);
qp = idr_find(&ehca_qp_idr, token);
- spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-
+ read_unlock(&ehca_qp_idr_lock);
if (!qp)
return;
- ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
+ if (fatal)
+ ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
- if (!qp->ib_qp.event_handler)
- return;
+ dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ?
+ IB_EVENT_SRQ_ERR : event_type);
- event.device = &shca->ib_device;
- event.event = event_type;
- event.element.qp = &qp->ib_qp;
-
- qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+ /*
+ * eHCA only processes one WQE at a time for SRQ base QPs,
+ * so the last WQE has been processed as soon as the QP enters
+ * error state.
+ */
+ if (fatal && qp->ext_type == EQPT_SRQBASE)
+ dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
return;
}
@@ -207,18 +232,22 @@ static void cq_event_callback(struct ehca_shca *shca,
u64 eqe)
{
struct ehca_cq *cq;
- unsigned long flags;
u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ read_lock(&ehca_cq_idr_lock);
cq = idr_find(&ehca_cq_idr, token);
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ if (cq)
+ atomic_inc(&cq->nr_events);
+ read_unlock(&ehca_cq_idr_lock);
if (!cq)
return;
ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
+ if (atomic_dec_and_test(&cq->nr_events))
+ wake_up(&cq->wait_completion);
+
return;
}
@@ -228,17 +257,17 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe)
switch (identifier) {
case 0x02: /* path migrated */
- qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG);
+ qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0);
break;
case 0x03: /* communication established */
- qp_event_callback(shca, eqe, IB_EVENT_COMM_EST);
+ qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0);
break;
case 0x04: /* send queue drained */
- qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED);
+ qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0);
break;
case 0x05: /* QP error */
case 0x06: /* QP error */
- qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL);
+ qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1);
break;
case 0x07: /* CQ error */
case 0x08: /* CQ error */
@@ -272,6 +301,11 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe)
ehca_err(&shca->ib_device, "Interface trace stopped.");
break;
case 0x14: /* first error capture info available */
+ ehca_info(&shca->ib_device, "First error capture available");
+ break;
+ case 0x15: /* SRQ limit reached */
+ qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0);
+ break;
default:
ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
identifier, shca->ib_device.name);
@@ -281,30 +315,61 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe)
return;
}
-static void parse_ec(struct ehca_shca *shca, u64 eqe)
+static void dispatch_port_event(struct ehca_shca *shca, int port_num,
+ enum ib_event_type type, const char *msg)
{
struct ib_event event;
+
+ ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);
+ event.device = &shca->ib_device;
+ event.event = type;
+ event.element.port_num = port_num;
+ ib_dispatch_event(&event);
+}
+
+static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
+{
+ struct ehca_sma_attr new_attr;
+ struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;
+
+ ehca_query_sma_attr(shca, port_num, &new_attr);
+
+ if (new_attr.sm_sl != old_attr->sm_sl ||
+ new_attr.sm_lid != old_attr->sm_lid)
+ dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,
+ "SM changed");
+
+ if (new_attr.lid != old_attr->lid ||
+ new_attr.lmc != old_attr->lmc)
+ dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,
+ "LID changed");
+
+ if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||
+ memcmp(new_attr.pkeys, old_attr->pkeys,
+ sizeof(u16) * new_attr.pkey_tbl_len))
+ dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,
+ "P_Key changed");
+
+ *old_attr = new_attr;
+}
+
+static void parse_ec(struct ehca_shca *shca, u64 eqe)
+{
u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
switch (ec) {
case 0x30: /* port availability change */
if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
- ehca_info(&shca->ib_device,
- "port %x is active.", port);
- event.device = &shca->ib_device;
- event.event = IB_EVENT_PORT_ACTIVE;
- event.element.port_num = port;
shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
- ib_dispatch_event(&event);
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+ "is active");
+ ehca_query_sma_attr(shca, port,
+ &shca->sport[port - 1].saved_attr);
} else {
- ehca_info(&shca->ib_device,
- "port %x is inactive.", port);
- event.device = &shca->ib_device;
- event.event = IB_EVENT_PORT_ERR;
- event.element.port_num = port;
shca->sport[port - 1].port_state = IB_PORT_DOWN;
- ib_dispatch_event(&event);
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+ "is inactive");
}
break;
case 0x31:
@@ -312,24 +377,19 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
* disruptive change is caused by
* LID, PKEY or SM change
*/
- ehca_warn(&shca->ib_device,
- "disruptive port %x configuration change", port);
-
- ehca_info(&shca->ib_device,
- "port %x is inactive.", port);
- event.device = &shca->ib_device;
- event.event = IB_EVENT_PORT_ERR;
- event.element.port_num = port;
- shca->sport[port - 1].port_state = IB_PORT_DOWN;
- ib_dispatch_event(&event);
-
- ehca_info(&shca->ib_device,
- "port %x is active.", port);
- event.device = &shca->ib_device;
- event.event = IB_EVENT_PORT_ACTIVE;
- event.element.port_num = port;
- shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
- ib_dispatch_event(&event);
+ if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {
+ ehca_warn(&shca->ib_device, "disruptive port "
+ "%d configuration change", port);
+
+ shca->sport[port - 1].port_state = IB_PORT_DOWN;
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+ "is inactive");
+
+ shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+ "is active");
+ } else
+ notify_port_conf_change(shca, port);
break;
case 0x32: /* adapter malfunction */
ehca_err(&shca->ib_device, "Adapter malfunction.");
@@ -404,7 +464,6 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
{
u64 eqe_value;
u32 token;
- unsigned long flags;
struct ehca_cq *cq;
eqe_value = eqe->entry;
@@ -412,27 +471,24 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
ehca_dbg(&shca->ib_device, "Got completion event");
token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ read_lock(&ehca_cq_idr_lock);
cq = idr_find(&ehca_cq_idr, token);
+ if (cq)
+ atomic_inc(&cq->nr_events);
+ read_unlock(&ehca_cq_idr_lock);
if (cq == NULL) {
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
ehca_err(&shca->ib_device,
"Invalid eqe for non-existing cq token=%x",
token);
return;
}
reset_eq_pending(cq);
- cq->nr_events++;
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
if (ehca_scaling_code)
queue_comp_task(cq);
else {
comp_event_callback(cq);
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
- cq->nr_events--;
- if (!cq->nr_events)
+ if (atomic_dec_and_test(&cq->nr_events))
wake_up(&cq->wait_completion);
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
}
} else {
ehca_dbg(&shca->ib_device, "Got non completion event");
@@ -476,17 +532,17 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
eqe_value = eqe_cache[eqe_cnt].eqe->entry;
if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
- spin_lock(&ehca_cq_idr_lock);
+ read_lock(&ehca_cq_idr_lock);
eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
+ if (eqe_cache[eqe_cnt].cq)
+ atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
+ read_unlock(&ehca_cq_idr_lock);
if (!eqe_cache[eqe_cnt].cq) {
- spin_unlock(&ehca_cq_idr_lock);
ehca_err(&shca->ib_device,
"Invalid eqe for non-existing cq "
"token=%x", token);
continue;
}
- eqe_cache[eqe_cnt].cq->nr_events++;
- spin_unlock(&ehca_cq_idr_lock);
} else
eqe_cache[eqe_cnt].cq = NULL;
eqe_cnt++;
@@ -517,11 +573,8 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
else {
struct ehca_cq *cq = eq->eqe_cache[i].cq;
comp_event_callback(cq);
- spin_lock(&ehca_cq_idr_lock);
- cq->nr_events--;
- if (!cq->nr_events)
+ if (atomic_dec_and_test(&cq->nr_events))
wake_up(&cq->wait_completion);
- spin_unlock(&ehca_cq_idr_lock);
}
} else {
ehca_dbg(&shca->ib_device, "Got non completion event");
@@ -547,7 +600,7 @@ void ehca_tasklet_eq(unsigned long data)
ehca_process_eq((struct ehca_shca*)data, 1);
}
-static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
+static inline int find_next_online_cpu(struct ehca_comp_pool *pool)
{
int cpu;
unsigned long flags;
@@ -611,7 +664,7 @@ static void queue_comp_task(struct ehca_cq *__cq)
__queue_comp_task(__cq, cct);
}
-static void run_comp_task(struct ehca_cpu_comp_task* cct)
+static void run_comp_task(struct ehca_cpu_comp_task *cct)
{
struct ehca_cq *cq;
unsigned long flags;
@@ -621,13 +674,10 @@ static void run_comp_task(struct ehca_cpu_comp_task* cct)
while (!list_empty(&cct->cq_list)) {
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
spin_unlock_irqrestore(&cct->task_lock, flags);
- comp_event_callback(cq);
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
- cq->nr_events--;
- if (!cq->nr_events)
+ comp_event_callback(cq);
+ if (atomic_dec_and_test(&cq->nr_events))
wake_up(&cq->wait_completion);
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
spin_lock_irqsave(&cct->task_lock, flags);
spin_lock(&cq->task_lock);
@@ -644,12 +694,12 @@ static void run_comp_task(struct ehca_cpu_comp_task* cct)
static int comp_task(void *__cct)
{
- struct ehca_cpu_comp_task* cct = __cct;
+ struct ehca_cpu_comp_task *cct = __cct;
int cql_empty;
DECLARE_WAITQUEUE(wait, current);
set_current_state(TASK_INTERRUPTIBLE);
- while(!kthread_should_stop()) {
+ while (!kthread_should_stop()) {
add_wait_queue(&cct->wait_queue, &wait);
spin_lock_irq(&cct->task_lock);
@@ -723,7 +773,7 @@ static void take_over_work(struct ehca_comp_pool *pool,
list_splice_init(&cct->cq_list, &list);
- while(!list_empty(&list)) {
+ while (!list_empty(&list)) {
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
list_del(&cq->entry);
@@ -746,7 +796,7 @@ static int comp_pool_callback(struct notifier_block *nfb,
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
- if(!create_comp_task(pool, cpu)) {
+ if (!create_comp_task(pool, cpu)) {
ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
return NOTIFY_BAD;
}
@@ -816,7 +866,7 @@ int ehca_create_comp_pool(void)
#ifdef CONFIG_HOTPLUG_CPU
comp_pool_callback_nb.notifier_call = comp_pool_callback;
- comp_pool_callback_nb.priority =0;
+ comp_pool_callback_nb.priority = 0;
register_cpu_notifier(&comp_pool_callback_nb);
#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
index 6ed06ee033ed..3346cb06cea6 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.h
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -47,7 +47,6 @@ struct ehca_shca;
#include <linux/interrupt.h>
#include <linux/types.h>
-#include <asm/atomic.h>
int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index 37e7fe0908cf..dce503bb7d6b 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -49,6 +49,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props);
int ehca_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props);
+int ehca_query_sma_attr(struct ehca_shca *shca, u8 port,
+ struct ehca_sma_attr *attr);
+
int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
@@ -78,8 +81,9 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
int num_phys_buf,
int mr_access_flags, u64 *iova_start);
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
- int mr_access_flags, struct ib_udata *udata);
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int mr_access_flags,
+ struct ib_udata *udata);
int ehca_rereg_phys_mr(struct ib_mr *mr,
int mr_rereg_mask,
@@ -154,6 +158,21 @@ int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr);
+int ehca_post_srq_recv(struct ib_srq *srq,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr);
+
+struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata);
+
+int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+
+int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+
+int ehca_destroy_srq(struct ib_srq *srq);
+
u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
struct ib_qp_init_attr *qp_init_attr);
@@ -174,7 +193,7 @@ void ehca_poll_eqs(unsigned long data);
void *ehca_alloc_fw_ctrlblock(gfp_t flags);
void ehca_free_fw_ctrlblock(void *ptr);
#else
-#define ehca_alloc_fw_ctrlblock(flags) ((void *) get_zeroed_page(flags))
+#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags))
#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index c3f99f33b49c..99036b65bb84 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -63,6 +63,7 @@ int ehca_port_act_time = 30;
int ehca_poll_all_eqs = 1;
int ehca_static_rate = -1;
int ehca_scaling_code = 0;
+int ehca_mr_largepage = 0;
module_param_named(open_aqp1, ehca_open_aqp1, int, 0);
module_param_named(debug_level, ehca_debug_level, int, 0);
@@ -72,7 +73,8 @@ module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0);
module_param_named(port_act_time, ehca_port_act_time, int, 0);
module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0);
module_param_named(static_rate, ehca_static_rate, int, 0);
-module_param_named(scaling_code, ehca_scaling_code, int, 0);
+module_param_named(scaling_code, ehca_scaling_code, int, 0);
+module_param_named(mr_largepage, ehca_mr_largepage, int, 0);
MODULE_PARM_DESC(open_aqp1,
"AQP1 on startup (0: no (default), 1: yes)");
@@ -94,22 +96,23 @@ MODULE_PARM_DESC(poll_all_eqs,
MODULE_PARM_DESC(static_rate,
"set permanent static rate (default: disabled)");
MODULE_PARM_DESC(scaling_code,
- "set scaling code (0: disabled, 1: enabled/default)");
+ "set scaling code (0: disabled/default, 1: enabled)");
+MODULE_PARM_DESC(mr_largepage,
+ "use large page for MR (0: use PAGE_SIZE (default), "
+ "1: use large page depending on MR size");
-spinlock_t ehca_qp_idr_lock;
-spinlock_t ehca_cq_idr_lock;
-spinlock_t hcall_lock;
+DEFINE_RWLOCK(ehca_qp_idr_lock);
+DEFINE_RWLOCK(ehca_cq_idr_lock);
DEFINE_IDR(ehca_qp_idr);
DEFINE_IDR(ehca_cq_idr);
-
-static struct list_head shca_list; /* list of all registered ehcas */
-static spinlock_t shca_list_lock;
+static LIST_HEAD(shca_list); /* list of all registered ehcas */
+static DEFINE_SPINLOCK(shca_list_lock);
static struct timer_list poll_eqs_timer;
#ifdef CONFIG_PPC_64K_PAGES
-static struct kmem_cache *ctblk_cache = NULL;
+static struct kmem_cache *ctblk_cache;
void *ehca_alloc_fw_ctrlblock(gfp_t flags)
{
@@ -127,6 +130,23 @@ void ehca_free_fw_ctrlblock(void *ptr)
}
#endif
+int ehca2ib_return_code(u64 ehca_rc)
+{
+ switch (ehca_rc) {
+ case H_SUCCESS:
+ return 0;
+ case H_RESOURCE: /* Resource in use */
+ case H_BUSY:
+ return -EBUSY;
+ case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
+ case H_CONSTRAINED: /* resource constraint */
+ case H_NO_MEM:
+ return -ENOMEM;
+ default:
+ return -EINVAL;
+ }
+}
+
static int ehca_create_slab_caches(void)
{
int ret;
@@ -161,19 +181,28 @@ static int ehca_create_slab_caches(void)
goto create_slab_caches5;
}
+ ret = ehca_init_small_qp_cache();
+ if (ret) {
+ ehca_gen_err("Cannot create small queue SLAB cache.");
+ goto create_slab_caches6;
+ }
+
#ifdef CONFIG_PPC_64K_PAGES
ctblk_cache = kmem_cache_create("ehca_cache_ctblk",
EHCA_PAGESIZE, H_CB_ALIGNMENT,
SLAB_HWCACHE_ALIGN,
- NULL, NULL);
+ NULL);
if (!ctblk_cache) {
ehca_gen_err("Cannot create ctblk SLAB cache.");
- ehca_cleanup_mrmw_cache();
- goto create_slab_caches5;
+ ehca_cleanup_small_qp_cache();
+ goto create_slab_caches6;
}
#endif
return 0;
+create_slab_caches6:
+ ehca_cleanup_mrmw_cache();
+
create_slab_caches5:
ehca_cleanup_av_cache();
@@ -191,6 +220,7 @@ create_slab_caches2:
static void ehca_destroy_slab_caches(void)
{
+ ehca_cleanup_small_qp_cache();
ehca_cleanup_mrmw_cache();
ehca_cleanup_av_cache();
ehca_cleanup_qp_cache();
@@ -202,14 +232,38 @@ static void ehca_destroy_slab_caches(void)
#endif
}
-#define EHCA_HCAAVER EHCA_BMASK_IBM(32,39)
-#define EHCA_REVID EHCA_BMASK_IBM(40,63)
+#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39)
+#define EHCA_REVID EHCA_BMASK_IBM(40, 63)
+
+static struct cap_descr {
+ u64 mask;
+ char *descr;
+} hca_cap_descr[] = {
+ { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" },
+ { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" },
+ { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" },
+ { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" },
+ { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" },
+ { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" },
+ { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" },
+ { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" },
+ { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" },
+ { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" },
+ { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" },
+ { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" },
+ { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" },
+ { HCA_CAP_SRQ, "HCA_CAP_SRQ" },
+ { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" },
+ { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" },
+ { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
+};
int ehca_sense_attributes(struct ehca_shca *shca)
{
- int ret = 0;
+ int i, ret = 0;
u64 h_ret;
struct hipz_query_hca *rblock;
+ struct hipz_query_port *port;
rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!rblock) {
@@ -222,7 +276,7 @@ int ehca_sense_attributes(struct ehca_shca *shca)
ehca_gen_err("Cannot query device properties. h_ret=%lx",
h_ret);
ret = -EPERM;
- goto num_ports1;
+ goto sense_attributes1;
}
if (ehca_nr_ports == 1)
@@ -241,19 +295,52 @@ int ehca_sense_attributes(struct ehca_shca *shca)
ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
- if ((hcaaver == 1) && (revid == 0))
- shca->hw_level = 0;
- else if ((hcaaver == 1) && (revid == 1))
- shca->hw_level = 1;
- else if ((hcaaver == 1) && (revid == 2))
- shca->hw_level = 2;
- }
+ if (hcaaver == 1) {
+ if (revid <= 3)
+ shca->hw_level = 0x10 | (revid + 1);
+ else
+ shca->hw_level = 0x14;
+ } else if (hcaaver == 2) {
+ if (revid == 0)
+ shca->hw_level = 0x21;
+ else if (revid == 0x10)
+ shca->hw_level = 0x22;
+ else if (revid == 0x20 || revid == 0x21)
+ shca->hw_level = 0x23;
+ }
+
+ if (!shca->hw_level) {
+ ehca_gen_warn("unknown hardware version"
+ " - assuming default level");
+ shca->hw_level = 0x22;
+ }
+ } else
+ shca->hw_level = ehca_hw_level;
ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
shca->sport[0].rate = IB_RATE_30_GBPS;
shca->sport[1].rate = IB_RATE_30_GBPS;
-num_ports1:
+ shca->hca_cap = rblock->hca_cap_indicators;
+ ehca_gen_dbg(" ... HCA capabilities:");
+ for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++)
+ if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
+ ehca_gen_dbg(" %s", hca_cap_descr[i].descr);
+
+ shca->hca_cap_mr_pgsize = rblock->memory_page_size_supported;
+
+ port = (struct hipz_query_port *)rblock;
+ h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
+ if (h_ret != H_SUCCESS) {
+ ehca_gen_err("Cannot query port properties. h_ret=%lx",
+ h_ret);
+ ret = -EPERM;
+ goto sense_attributes1;
+ }
+
+ shca->max_mtu = port->max_mtu;
+
+sense_attributes1:
ehca_free_fw_ctrlblock(rblock);
return ret;
}
@@ -293,7 +380,7 @@ int ehca_init_device(struct ehca_shca *shca)
strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
shca->ib_device.owner = THIS_MODULE;
- shca->ib_device.uverbs_abi_ver = 6;
+ shca->ib_device.uverbs_abi_ver = 7;
shca->ib_device.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -361,6 +448,20 @@ int ehca_init_device(struct ehca_shca *shca)
/* shca->ib_device.process_mad = ehca_process_mad; */
shca->ib_device.mmap = ehca_mmap;
+ if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
+ shca->ib_device.uverbs_cmd_mask |=
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+
+ shca->ib_device.create_srq = ehca_create_srq;
+ shca->ib_device.modify_srq = ehca_modify_srq;
+ shca->ib_device.query_srq = ehca_query_srq;
+ shca->ib_device.destroy_srq = ehca_destroy_srq;
+ shca->ib_device.post_srq_recv = ehca_post_srq_recv;
+ }
+
return ret;
}
@@ -377,7 +478,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
return -EPERM;
}
- ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10, 0);
+ ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), 10, 0);
if (IS_ERR(ibcq)) {
ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
return PTR_ERR(ibcq);
@@ -523,6 +624,14 @@ static ssize_t ehca_show_adapter_handle(struct device *dev,
}
static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
+static ssize_t ehca_show_mr_largepage(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", ehca_mr_largepage);
+}
+static DEVICE_ATTR(mr_largepage, S_IRUGO, ehca_show_mr_largepage, NULL);
+
static struct attribute *ehca_dev_attrs[] = {
&dev_attr_adapter_handle.attr,
&dev_attr_num_ports.attr,
@@ -539,6 +648,7 @@ static struct attribute *ehca_dev_attrs[] = {
&dev_attr_cur_mw.attr,
&dev_attr_max_pd.attr,
&dev_attr_max_ah.attr,
+ &dev_attr_mr_largepage.attr,
NULL
};
@@ -604,7 +714,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
}
/* create internal protection domain */
- ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL);
+ ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL);
if (IS_ERR(ibpd)) {
ehca_err(&shca->ib_device, "Cannot create internal PD.");
ret = PTR_ERR(ibpd);
@@ -800,27 +910,22 @@ int __init ehca_module_init(void)
printk(KERN_INFO "eHCA Infiniband Device Driver "
"(Rel.: SVNEHCA_0023)\n");
- idr_init(&ehca_qp_idr);
- idr_init(&ehca_cq_idr);
- spin_lock_init(&ehca_qp_idr_lock);
- spin_lock_init(&ehca_cq_idr_lock);
- spin_lock_init(&hcall_lock);
- INIT_LIST_HEAD(&shca_list);
- spin_lock_init(&shca_list_lock);
-
- if ((ret = ehca_create_comp_pool())) {
+ ret = ehca_create_comp_pool();
+ if (ret) {
ehca_gen_err("Cannot create comp pool.");
return ret;
}
- if ((ret = ehca_create_slab_caches())) {
+ ret = ehca_create_slab_caches();
+ if (ret) {
ehca_gen_err("Cannot create SLAB caches");
ret = -ENOMEM;
goto module_init1;
}
- if ((ret = ibmebus_register_driver(&ehca_driver))) {
+ ret = ibmebus_register_driver(&ehca_driver);
+ if (ret) {
ehca_gen_err("Cannot register eHCA device driver");
ret = -EINVAL;
goto module_init2;
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index add79bd44e39..d97eda3e1da0 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -5,6 +5,7 @@
*
* Authors: Dietmar Decker <ddecker@de.ibm.com>
* Christoph Raisch <raisch@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
*
* Copyright (c) 2005 IBM Corporation
*
@@ -39,26 +40,60 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include <rdma/ib_umem.h>
-
#include <asm/current.h>
+#include <rdma/ib_umem.h>
+
#include "ehca_iverbs.h"
#include "ehca_mrmw.h"
#include "hcp_if.h"
#include "hipz_hw.h"
+#define NUM_CHUNKS(length, chunk_size) \
+ (((length) + (chunk_size - 1)) / (chunk_size))
+/* max number of rpages (per hcall register_rpages) */
+#define MAX_RPAGES 512
+
static struct kmem_cache *mr_cache;
static struct kmem_cache *mw_cache;
+enum ehca_mr_pgsize {
+ EHCA_MR_PGSIZE4K = 0x1000L,
+ EHCA_MR_PGSIZE64K = 0x10000L,
+ EHCA_MR_PGSIZE1M = 0x100000L,
+ EHCA_MR_PGSIZE16M = 0x1000000L
+};
+
+static u32 ehca_encode_hwpage_size(u32 pgsize)
+{
+ u32 idx = 0;
+ pgsize >>= 12;
+ /*
+ * map mr page size into hw code:
+ * 0, 1, 2, 3 for 4K, 64K, 1M, 64M
+ */
+ while (!(pgsize & 1)) {
+ idx++;
+ pgsize >>= 4;
+ }
+ return idx;
+}
+
+static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
+{
+ if (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)
+ return EHCA_MR_PGSIZE16M;
+ return EHCA_MR_PGSIZE4K;
+}
+
static struct ehca_mr *ehca_mr_new(void)
{
struct ehca_mr *me;
me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
- if (me) {
+ if (me)
spin_lock_init(&me->mrlock);
- } else
+ else
ehca_gen_err("alloc failed");
return me;
@@ -74,9 +109,9 @@ static struct ehca_mw *ehca_mw_new(void)
struct ehca_mw *me;
me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
- if (me) {
+ if (me)
spin_lock_init(&me->mwlock);
- } else
+ else
ehca_gen_err("alloc failed");
return me;
@@ -106,11 +141,12 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
goto get_dma_mr_exit0;
}
- ret = ehca_reg_maxmr(shca, e_maxmr, (u64*)KERNELBASE,
+ ret = ehca_reg_maxmr(shca, e_maxmr, (u64 *)KERNELBASE,
mr_access_flags, e_pd,
&e_maxmr->ib.ib_mr.lkey,
&e_maxmr->ib.ib_mr.rkey);
if (ret) {
+ ehca_mr_delete(e_maxmr);
ib_mr = ERR_PTR(ret);
goto get_dma_mr_exit0;
}
@@ -144,9 +180,6 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
u64 size;
- struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
- u32 num_pages_mr;
- u32 num_pages_4k; /* 4k portion "pages" */
if ((num_phys_buf <= 0) || !phys_buf_array) {
ehca_err(pd->device, "bad input values: num_phys_buf=%x "
@@ -190,12 +223,6 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
goto reg_phys_mr_exit0;
}
- /* determine number of MR pages */
- num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size +
- PAGE_SIZE - 1) / PAGE_SIZE);
- num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size +
- EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
-
/* register MR on HCA */
if (ehca_mr_is_maxmr(size, iova_start)) {
e_mr->flags |= EHCA_MR_FLAG_MAXMR;
@@ -207,13 +234,26 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
goto reg_phys_mr_exit1;
}
} else {
- pginfo.type = EHCA_MR_PGI_PHYS;
- pginfo.num_pages = num_pages_mr;
- pginfo.num_4k = num_pages_4k;
- pginfo.num_phys_buf = num_phys_buf;
- pginfo.phys_buf_array = phys_buf_array;
- pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) /
- EHCA_PAGESIZE);
+ struct ehca_mr_pginfo pginfo;
+ u32 num_kpages;
+ u32 num_hwpages;
+ u64 hw_pgsize;
+
+ num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
+ PAGE_SIZE);
+ /* for kernel space we try most possible pgsize */
+ hw_pgsize = ehca_get_max_hwpage_size(shca);
+ num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
+ hw_pgsize);
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_PHYS;
+ pginfo.num_kpages = num_kpages;
+ pginfo.hwpage_size = hw_pgsize;
+ pginfo.num_hwpages = num_hwpages;
+ pginfo.u.phy.num_phys_buf = num_phys_buf;
+ pginfo.u.phy.phys_buf_array = phys_buf_array;
+ pginfo.next_hwpage =
+ ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize;
ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
@@ -240,18 +280,20 @@ reg_phys_mr_exit0:
/*----------------------------------------------------------------------*/
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
- int mr_access_flags, struct ib_udata *udata)
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int mr_access_flags,
+ struct ib_udata *udata)
{
struct ib_mr *ib_mr;
struct ehca_mr *e_mr;
struct ehca_shca *shca =
container_of(pd->device, struct ehca_shca, ib_device);
struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
- struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+ struct ehca_mr_pginfo pginfo;
int ret;
- u32 num_pages_mr;
- u32 num_pages_4k; /* 4k portion "pages" */
+ u32 num_kpages;
+ u32 num_hwpages;
+ u64 hwpage_size;
if (!pd) {
ehca_gen_err("bad pd=%p", pd);
@@ -289,7 +331,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt
e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
mr_access_flags);
if (IS_ERR(e_mr->umem)) {
- ib_mr = (void *) e_mr->umem;
+ ib_mr = (void *)e_mr->umem;
goto reg_user_mr_exit1;
}
@@ -301,23 +343,52 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt
}
/* determine number of MR pages */
- num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) /
- PAGE_SIZE);
- num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) /
- EHCA_PAGESIZE);
+ num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
+ /* select proper hw_pgsize */
+ if (ehca_mr_largepage &&
+ (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) {
+ if (length <= EHCA_MR_PGSIZE4K
+ && PAGE_SIZE == EHCA_MR_PGSIZE4K)
+ hwpage_size = EHCA_MR_PGSIZE4K;
+ else if (length <= EHCA_MR_PGSIZE64K)
+ hwpage_size = EHCA_MR_PGSIZE64K;
+ else if (length <= EHCA_MR_PGSIZE1M)
+ hwpage_size = EHCA_MR_PGSIZE1M;
+ else
+ hwpage_size = EHCA_MR_PGSIZE16M;
+ } else
+ hwpage_size = EHCA_MR_PGSIZE4K;
+ ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size);
+reg_user_mr_fallback:
+ num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
/* register MR on HCA */
- pginfo.type = EHCA_MR_PGI_USER;
- pginfo.num_pages = num_pages_mr;
- pginfo.num_4k = num_pages_4k;
- pginfo.region = e_mr->umem;
- pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE;
- pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
- (&e_mr->umem->chunk_list),
- list);
-
- ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd,
- &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_USER;
+ pginfo.hwpage_size = hwpage_size;
+ pginfo.num_kpages = num_kpages;
+ pginfo.num_hwpages = num_hwpages;
+ pginfo.u.usr.region = e_mr->umem;
+ pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
+ pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk,
+ (&e_mr->umem->chunk_list),
+ list);
+
+ ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
+ e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
+ &e_mr->ib.ib_mr.rkey);
+ if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) {
+ ehca_warn(pd->device, "failed to register mr "
+ "with hwpage_size=%lx", hwpage_size);
+ ehca_info(pd->device, "try to register mr with "
+ "kpage_size=%lx", PAGE_SIZE);
+ /*
+ * this means kpages are not contiguous for a hw page
+ * try kernel page size as fallback solution
+ */
+ hwpage_size = PAGE_SIZE;
+ goto reg_user_mr_fallback;
+ }
if (ret) {
ib_mr = ERR_PTR(ret);
goto reg_user_mr_exit2;
@@ -360,9 +431,9 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
struct ehca_pd *new_pd;
u32 tmp_lkey, tmp_rkey;
unsigned long sl_flags;
- u32 num_pages_mr = 0;
- u32 num_pages_4k = 0; /* 4k portion "pages" */
- struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+ u32 num_kpages = 0;
+ u32 num_hwpages = 0;
+ struct ehca_mr_pginfo pginfo;
u32 cur_pid = current->tgid;
if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
@@ -414,7 +485,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
goto rereg_phys_mr_exit0;
}
if (!phys_buf_array || num_phys_buf <= 0) {
- ehca_err(mr->device, "bad input values: mr_rereg_mask=%x"
+ ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
" phys_buf_array=%p num_phys_buf=%x",
mr_rereg_mask, phys_buf_array, num_phys_buf);
ret = -EINVAL;
@@ -438,12 +509,14 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
/* set requested values dependent on rereg request */
spin_lock_irqsave(&e_mr->mrlock, sl_flags);
- new_start = e_mr->start; /* new == old address */
- new_size = e_mr->size; /* new == old length */
- new_acl = e_mr->acl; /* new == old access control */
- new_pd = container_of(mr->pd,struct ehca_pd,ib_pd); /*new == old PD*/
+ new_start = e_mr->start;
+ new_size = e_mr->size;
+ new_acl = e_mr->acl;
+ new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+ u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
+
new_start = iova_start; /* change address */
/* check physical buffer list and calculate size */
ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
@@ -458,17 +531,19 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
ret = -EINVAL;
goto rereg_phys_mr_exit1;
}
- num_pages_mr = ((((u64)new_start % PAGE_SIZE) + new_size +
- PAGE_SIZE - 1) / PAGE_SIZE);
- num_pages_4k = ((((u64)new_start % EHCA_PAGESIZE) + new_size +
- EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
- pginfo.type = EHCA_MR_PGI_PHYS;
- pginfo.num_pages = num_pages_mr;
- pginfo.num_4k = num_pages_4k;
- pginfo.num_phys_buf = num_phys_buf;
- pginfo.phys_buf_array = phys_buf_array;
- pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) /
- EHCA_PAGESIZE);
+ num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
+ new_size, PAGE_SIZE);
+ num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
+ new_size, hw_pgsize);
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_PHYS;
+ pginfo.num_kpages = num_kpages;
+ pginfo.hwpage_size = hw_pgsize;
+ pginfo.num_hwpages = num_hwpages;
+ pginfo.u.phy.num_phys_buf = num_phys_buf;
+ pginfo.u.phy.phys_buf_array = phys_buf_array;
+ pginfo.next_hwpage =
+ ((u64)iova_start & ~(hw_pgsize - 1)) / hw_pgsize;
}
if (mr_rereg_mask & IB_MR_REREG_ACCESS)
new_acl = mr_access_flags;
@@ -510,7 +585,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
u32 cur_pid = current->tgid;
unsigned long sl_flags;
- struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+ struct ehca_mr_hipzout_parms hipzout;
if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
(my_pd->ownpid != cur_pid)) {
@@ -536,14 +611,14 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
"hca_hndl=%lx mr_hndl=%lx lkey=%x",
h_ret, mr, shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle, mr->lkey);
- ret = ehca_mrmw_map_hrc_query_mr(h_ret);
+ ret = ehca2ib_return_code(h_ret);
goto query_mr_exit1;
}
- mr_attr->pd = mr->pd;
+ mr_attr->pd = mr->pd;
mr_attr->device_virt_addr = hipzout.vaddr;
- mr_attr->size = hipzout.len;
- mr_attr->lkey = hipzout.lkey;
- mr_attr->rkey = hipzout.rkey;
+ mr_attr->size = hipzout.len;
+ mr_attr->lkey = hipzout.lkey;
+ mr_attr->rkey = hipzout.rkey;
ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
query_mr_exit1:
@@ -596,7 +671,7 @@ int ehca_dereg_mr(struct ib_mr *mr)
"e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x",
h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle, mr->lkey);
- ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+ ret = ehca2ib_return_code(h_ret);
goto dereg_mr_exit0;
}
@@ -622,7 +697,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
struct ehca_shca *shca =
container_of(pd->device, struct ehca_shca, ib_device);
- struct ehca_mw_hipzout_parms hipzout = {{0},0};
+ struct ehca_mw_hipzout_parms hipzout;
e_mw = ehca_mw_new();
if (!e_mw) {
@@ -636,7 +711,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx "
"shca=%p hca_hndl=%lx mw=%p",
h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
- ib_mw = ERR_PTR(ehca_mrmw_map_hrc_alloc(h_ret));
+ ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
goto alloc_mw_exit1;
}
/* successful MW allocation */
@@ -679,7 +754,7 @@ int ehca_dealloc_mw(struct ib_mw *mw)
"mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx",
h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
e_mw->ipz_mw_handle.handle);
- return ehca_mrmw_map_hrc_free_mw(h_ret);
+ return ehca2ib_return_code(h_ret);
}
/* successful deallocation */
ehca_mw_delete(e_mw);
@@ -699,7 +774,8 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
struct ehca_mr *e_fmr;
int ret;
u32 tmp_lkey, tmp_rkey;
- struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+ struct ehca_mr_pginfo pginfo;
+ u64 hw_pgsize;
/* check other parameters */
if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
@@ -729,8 +805,8 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
ib_fmr = ERR_PTR(-EINVAL);
goto alloc_fmr_exit0;
}
- if (((1 << fmr_attr->page_shift) != EHCA_PAGESIZE) &&
- ((1 << fmr_attr->page_shift) != PAGE_SIZE)) {
+ hw_pgsize = ehca_get_max_hwpage_size(shca);
+ if ((1 << fmr_attr->page_shift) != hw_pgsize) {
ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
fmr_attr->page_shift);
ib_fmr = ERR_PTR(-EINVAL);
@@ -745,6 +821,11 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
e_fmr->flags |= EHCA_MR_FLAG_FMR;
/* register MR on HCA */
+ memset(&pginfo, 0, sizeof(pginfo));
+ /*
+ * pginfo.num_hwpages==0, ie register_rpages() will not be called
+ * but deferred to map_phys_fmr()
+ */
ret = ehca_reg_mr(shca, e_fmr, NULL,
fmr_attr->max_pages * (1 << fmr_attr->page_shift),
mr_access_flags, e_pd, &pginfo,
@@ -755,6 +836,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
}
/* successful */
+ e_fmr->hwpage_size = hw_pgsize;
e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
e_fmr->fmr_max_pages = fmr_attr->max_pages;
e_fmr->fmr_max_maps = fmr_attr->max_maps;
@@ -783,7 +865,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr,
container_of(fmr->device, struct ehca_shca, ib_device);
struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
- struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+ struct ehca_mr_pginfo pginfo;
u32 tmp_lkey, tmp_rkey;
if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
@@ -809,14 +891,18 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr,
fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
}
- pginfo.type = EHCA_MR_PGI_FMR;
- pginfo.num_pages = list_len;
- pginfo.num_4k = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE);
- pginfo.page_list = page_list;
- pginfo.next_4k = ((iova & (e_fmr->fmr_page_size-1)) /
- EHCA_PAGESIZE);
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_FMR;
+ pginfo.num_kpages = list_len;
+ pginfo.hwpage_size = e_fmr->hwpage_size;
+ pginfo.num_hwpages =
+ list_len * e_fmr->fmr_page_size / pginfo.hwpage_size;
+ pginfo.u.fmr.page_list = page_list;
+ pginfo.next_hwpage =
+ (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size;
+ pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size;
- ret = ehca_rereg_mr(shca, e_fmr, (u64*)iova,
+ ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova,
list_len * e_fmr->fmr_page_size,
e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
if (ret)
@@ -831,8 +917,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr,
map_phys_fmr_exit0:
if (ret)
ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x "
- "iova=%lx",
- ret, fmr, page_list, list_len, iova);
+ "iova=%lx", ret, fmr, page_list, list_len, iova);
return ret;
} /* end ehca_map_phys_fmr() */
@@ -922,7 +1007,7 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr)
"hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x",
h_ret, e_fmr, shca->ipz_hca_handle.handle,
e_fmr->ipz_mr_handle.handle, fmr->lkey);
- ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+ ret = ehca2ib_return_code(h_ret);
goto free_fmr_exit0;
}
/* successful deregistration */
@@ -950,12 +1035,12 @@ int ehca_reg_mr(struct ehca_shca *shca,
int ret;
u64 h_ret;
u32 hipz_acl;
- struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+ struct ehca_mr_hipzout_parms hipzout;
ehca_mrmw_map_acl(acl, &hipz_acl);
- ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+ ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
if (ehca_use_hp_mr == 1)
- hipz_acl |= 0x00000001;
+ hipz_acl |= 0x00000001;
h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
(u64)iova_start, size, hipz_acl,
@@ -963,7 +1048,7 @@ int ehca_reg_mr(struct ehca_shca *shca,
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx "
"hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle);
- ret = ehca_mrmw_map_hrc_alloc(h_ret);
+ ret = ehca2ib_return_code(h_ret);
goto ehca_reg_mr_exit0;
}
@@ -974,11 +1059,12 @@ int ehca_reg_mr(struct ehca_shca *shca,
goto ehca_reg_mr_exit1;
/* successful registration */
- e_mr->num_pages = pginfo->num_pages;
- e_mr->num_4k = pginfo->num_4k;
- e_mr->start = iova_start;
- e_mr->size = size;
- e_mr->acl = acl;
+ e_mr->num_kpages = pginfo->num_kpages;
+ e_mr->num_hwpages = pginfo->num_hwpages;
+ e_mr->hwpage_size = pginfo->hwpage_size;
+ e_mr->start = iova_start;
+ e_mr->size = size;
+ e_mr->acl = acl;
*lkey = hipzout.lkey;
*rkey = hipzout.rkey;
return 0;
@@ -988,10 +1074,10 @@ ehca_reg_mr_exit1:
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p "
"iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x "
- "pginfo=%p num_pages=%lx num_4k=%lx ret=%x",
+ "pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%x",
h_ret, shca, e_mr, iova_start, size, acl, e_pd,
- hipzout.lkey, pginfo, pginfo->num_pages,
- pginfo->num_4k, ret);
+ hipzout.lkey, pginfo, pginfo->num_kpages,
+ pginfo->num_hwpages, ret);
ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
"not recoverable");
}
@@ -999,9 +1085,9 @@ ehca_reg_mr_exit0:
if (ret)
ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
"iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
- "num_pages=%lx num_4k=%lx",
+ "num_kpages=%lx num_hwpages=%lx",
ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
- pginfo->num_pages, pginfo->num_4k);
+ pginfo->num_kpages, pginfo->num_hwpages);
return ret;
} /* end ehca_reg_mr() */
@@ -1018,6 +1104,9 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
u32 i;
u64 *kpage;
+ if (!pginfo->num_hwpages) /* in case of fmr */
+ return 0;
+
kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!kpage) {
ehca_err(&shca->ib_device, "kpage alloc failed");
@@ -1025,25 +1114,25 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
goto ehca_reg_mr_rpages_exit0;
}
- /* max 512 pages per shot */
- for (i = 0; i < ((pginfo->num_4k + 512 - 1) / 512); i++) {
+ /* max MAX_RPAGES ehca mr pages per register call */
+ for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) {
- if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) {
- rnum = pginfo->num_4k % 512; /* last shot */
+ if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
+ rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */
if (rnum == 0)
- rnum = 512; /* last shot is full */
+ rnum = MAX_RPAGES; /* last shot is full */
} else
- rnum = 512;
+ rnum = MAX_RPAGES;
+
+ ret = ehca_set_pagebuf(pginfo, rnum, kpage);
+ if (ret) {
+ ehca_err(&shca->ib_device, "ehca_set_pagebuf "
+ "bad rc, ret=%x rnum=%x kpage=%p",
+ ret, rnum, kpage);
+ goto ehca_reg_mr_rpages_exit1;
+ }
if (rnum > 1) {
- ret = ehca_set_pagebuf(e_mr, pginfo, rnum, kpage);
- if (ret) {
- ehca_err(&shca->ib_device, "ehca_set_pagebuf "
- "bad rc, ret=%x rnum=%x kpage=%p",
- ret, rnum, kpage);
- ret = -EFAULT;
- goto ehca_reg_mr_rpages_exit1;
- }
rpage = virt_to_abs(kpage);
if (!rpage) {
ehca_err(&shca->ib_device, "kpage=%p i=%x",
@@ -1051,21 +1140,15 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
ret = -EFAULT;
goto ehca_reg_mr_rpages_exit1;
}
- } else { /* rnum==1 */
- ret = ehca_set_pagebuf_1(e_mr, pginfo, &rpage);
- if (ret) {
- ehca_err(&shca->ib_device, "ehca_set_pagebuf_1 "
- "bad rc, ret=%x i=%x", ret, i);
- ret = -EFAULT;
- goto ehca_reg_mr_rpages_exit1;
- }
- }
+ } else
+ rpage = *kpage;
- h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr,
- 0, /* pagesize 4k */
- 0, rpage, rnum);
+ h_ret = hipz_h_register_rpage_mr(
+ shca->ipz_hca_handle, e_mr,
+ ehca_encode_hwpage_size(pginfo->hwpage_size),
+ 0, rpage, rnum);
- if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) {
+ if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
/*
* check for 'registration complete'==H_SUCCESS
* and for 'page registered'==H_PAGE_REGISTERED
@@ -1078,7 +1161,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle,
e_mr->ib.ib_mr.lkey);
- ret = ehca_mrmw_map_hrc_rrpg_last(h_ret);
+ ret = ehca2ib_return_code(h_ret);
break;
} else
ret = 0;
@@ -1089,7 +1172,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
e_mr->ib.ib_mr.lkey,
shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle);
- ret = ehca_mrmw_map_hrc_rrpg_notlast(h_ret);
+ ret = ehca2ib_return_code(h_ret);
break;
} else
ret = 0;
@@ -1101,8 +1184,8 @@ ehca_reg_mr_rpages_exit1:
ehca_reg_mr_rpages_exit0:
if (ret)
ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p "
- "num_pages=%lx num_4k=%lx", ret, shca, e_mr, pginfo,
- pginfo->num_pages, pginfo->num_4k);
+ "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr,
+ pginfo, pginfo->num_kpages, pginfo->num_hwpages);
return ret;
} /* end ehca_reg_mr_rpages() */
@@ -1124,10 +1207,10 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
u64 *kpage;
u64 rpage;
struct ehca_mr_pginfo pginfo_save;
- struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+ struct ehca_mr_hipzout_parms hipzout;
ehca_mrmw_map_acl(acl, &hipz_acl);
- ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+ ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!kpage) {
@@ -1137,12 +1220,12 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
}
pginfo_save = *pginfo;
- ret = ehca_set_pagebuf(e_mr, pginfo, pginfo->num_4k, kpage);
+ ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
if (ret) {
ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
- "pginfo=%p type=%x num_pages=%lx num_4k=%lx kpage=%p",
- e_mr, pginfo, pginfo->type, pginfo->num_pages,
- pginfo->num_4k,kpage);
+ "pginfo=%p type=%x num_kpages=%lx num_hwpages=%lx "
+ "kpage=%p", e_mr, pginfo, pginfo->type,
+ pginfo->num_kpages, pginfo->num_hwpages, kpage);
goto ehca_rereg_mr_rereg1_exit1;
}
rpage = virt_to_abs(kpage);
@@ -1164,7 +1247,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
"(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr);
*pginfo = pginfo_save;
ret = -EAGAIN;
- } else if ((u64*)hipzout.vaddr != iova_start) {
+ } else if ((u64 *)hipzout.vaddr != iova_start) {
ehca_err(&shca->ib_device, "PHYP changed iova_start in "
"rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p "
"mr_handle=%lx lkey=%x lkey_out=%x", iova_start,
@@ -1176,11 +1259,12 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
* successful reregistration
* note: start and start_out are identical for eServer HCAs
*/
- e_mr->num_pages = pginfo->num_pages;
- e_mr->num_4k = pginfo->num_4k;
- e_mr->start = iova_start;
- e_mr->size = size;
- e_mr->acl = acl;
+ e_mr->num_kpages = pginfo->num_kpages;
+ e_mr->num_hwpages = pginfo->num_hwpages;
+ e_mr->hwpage_size = pginfo->hwpage_size;
+ e_mr->start = iova_start;
+ e_mr->size = size;
+ e_mr->acl = acl;
*lkey = hipzout.lkey;
*rkey = hipzout.rkey;
}
@@ -1190,9 +1274,9 @@ ehca_rereg_mr_rereg1_exit1:
ehca_rereg_mr_rereg1_exit0:
if ( ret && (ret != -EAGAIN) )
ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x "
- "pginfo=%p num_pages=%lx num_4k=%lx",
- ret, *lkey, *rkey, pginfo, pginfo->num_pages,
- pginfo->num_4k);
+ "pginfo=%p num_kpages=%lx num_hwpages=%lx",
+ ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
+ pginfo->num_hwpages);
return ret;
} /* end ehca_rereg_mr_rereg1() */
@@ -1214,10 +1298,12 @@ int ehca_rereg_mr(struct ehca_shca *shca,
int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
/* first determine reregistration hCall(s) */
- if ((pginfo->num_4k > 512) || (e_mr->num_4k > 512) ||
- (pginfo->num_4k > e_mr->num_4k)) {
- ehca_dbg(&shca->ib_device, "Rereg3 case, pginfo->num_4k=%lx "
- "e_mr->num_4k=%x", pginfo->num_4k, e_mr->num_4k);
+ if ((pginfo->num_hwpages > MAX_RPAGES) ||
+ (e_mr->num_hwpages > MAX_RPAGES) ||
+ (pginfo->num_hwpages > e_mr->num_hwpages)) {
+ ehca_dbg(&shca->ib_device, "Rereg3 case, "
+ "pginfo->num_hwpages=%lx e_mr->num_hwpages=%x",
+ pginfo->num_hwpages, e_mr->num_hwpages);
rereg_1_hcall = 0;
rereg_3_hcall = 1;
}
@@ -1253,7 +1339,7 @@ int ehca_rereg_mr(struct ehca_shca *shca,
h_ret, e_mr, shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle,
e_mr->ib.ib_mr.lkey);
- ret = ehca_mrmw_map_hrc_free_mr(h_ret);
+ ret = ehca2ib_return_code(h_ret);
goto ehca_rereg_mr_exit0;
}
/* clean ehca_mr_t, without changing struct ib_mr and lock */
@@ -1262,13 +1348,14 @@ int ehca_rereg_mr(struct ehca_shca *shca,
/* set some MR values */
e_mr->flags = save_mr.flags;
+ e_mr->hwpage_size = save_mr.hwpage_size;
e_mr->fmr_page_size = save_mr.fmr_page_size;
e_mr->fmr_max_pages = save_mr.fmr_max_pages;
e_mr->fmr_max_maps = save_mr.fmr_max_maps;
e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
- e_pd, pginfo, lkey, rkey);
+ e_pd, pginfo, lkey, rkey);
if (ret) {
u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
memcpy(&e_mr->flags, &(save_mr.flags),
@@ -1281,9 +1368,9 @@ ehca_rereg_mr_exit0:
if (ret)
ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
"iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
- "num_pages=%lx lkey=%x rkey=%x rereg_1_hcall=%x "
+ "num_kpages=%lx lkey=%x rkey=%x rereg_1_hcall=%x "
"rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
- acl, e_pd, pginfo, pginfo->num_pages, *lkey, *rkey,
+ acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
rereg_1_hcall, rereg_3_hcall);
return ret;
} /* end ehca_rereg_mr() */
@@ -1295,97 +1382,84 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca,
{
int ret = 0;
u64 h_ret;
- int rereg_1_hcall = 1; /* 1: use hipz_mr_reregister directly */
- int rereg_3_hcall = 0; /* 1: use 3 hipz calls for unmapping */
struct ehca_pd *e_pd =
container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
struct ehca_mr save_fmr;
u32 tmp_lkey, tmp_rkey;
- struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
- struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
-
- /* first check if reregistration hCall can be used for unmap */
- if (e_fmr->fmr_max_pages > 512) {
- rereg_1_hcall = 0;
- rereg_3_hcall = 1;
- }
+ struct ehca_mr_pginfo pginfo;
+ struct ehca_mr_hipzout_parms hipzout;
+ struct ehca_mr save_mr;
- if (rereg_1_hcall) {
+ if (e_fmr->fmr_max_pages <= MAX_RPAGES) {
/*
* note: after using rereg hcall with len=0,
* rereg hcall must be used again for registering pages
*/
h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
0, 0, e_pd->fw_pd, 0, &hipzout);
- if (h_ret != H_SUCCESS) {
- /*
- * should not happen, because length checked above,
- * FMRs are not shared and no MW bound to FMRs
- */
- ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
- "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx "
- "mr_hndl=%lx lkey=%x lkey_out=%x",
- h_ret, e_fmr, shca->ipz_hca_handle.handle,
- e_fmr->ipz_mr_handle.handle,
- e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
- rereg_3_hcall = 1;
- } else {
+ if (h_ret == H_SUCCESS) {
/* successful reregistration */
e_fmr->start = NULL;
e_fmr->size = 0;
tmp_lkey = hipzout.lkey;
tmp_rkey = hipzout.rkey;
+ return 0;
}
+ /*
+ * should not happen, because length checked above,
+ * FMRs are not shared and no MW bound to FMRs
+ */
+ ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
+ "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx "
+ "mr_hndl=%lx lkey=%x lkey_out=%x",
+ h_ret, e_fmr, shca->ipz_hca_handle.handle,
+ e_fmr->ipz_mr_handle.handle,
+ e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
+ /* try free and rereg */
}
- if (rereg_3_hcall) {
- struct ehca_mr save_mr;
-
- /* first free old FMR */
- h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
- if (h_ret != H_SUCCESS) {
- ehca_err(&shca->ib_device, "hipz_free_mr failed, "
- "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
- "lkey=%x",
- h_ret, e_fmr, shca->ipz_hca_handle.handle,
- e_fmr->ipz_mr_handle.handle,
- e_fmr->ib.ib_fmr.lkey);
- ret = ehca_mrmw_map_hrc_free_mr(h_ret);
- goto ehca_unmap_one_fmr_exit0;
- }
- /* clean ehca_mr_t, without changing lock */
- save_fmr = *e_fmr;
- ehca_mr_deletenew(e_fmr);
-
- /* set some MR values */
- e_fmr->flags = save_fmr.flags;
- e_fmr->fmr_page_size = save_fmr.fmr_page_size;
- e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
- e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
- e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
- e_fmr->acl = save_fmr.acl;
-
- pginfo.type = EHCA_MR_PGI_FMR;
- pginfo.num_pages = 0;
- pginfo.num_4k = 0;
- ret = ehca_reg_mr(shca, e_fmr, NULL,
- (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
- e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
- &tmp_rkey);
- if (ret) {
- u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
- memcpy(&e_fmr->flags, &(save_mr.flags),
- sizeof(struct ehca_mr) - offset);
- goto ehca_unmap_one_fmr_exit0;
- }
+ /* first free old FMR */
+ h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "hipz_free_mr failed, "
+ "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
+ "lkey=%x",
+ h_ret, e_fmr, shca->ipz_hca_handle.handle,
+ e_fmr->ipz_mr_handle.handle,
+ e_fmr->ib.ib_fmr.lkey);
+ ret = ehca2ib_return_code(h_ret);
+ goto ehca_unmap_one_fmr_exit0;
+ }
+ /* clean ehca_mr_t, without changing lock */
+ save_fmr = *e_fmr;
+ ehca_mr_deletenew(e_fmr);
+
+ /* set some MR values */
+ e_fmr->flags = save_fmr.flags;
+ e_fmr->hwpage_size = save_fmr.hwpage_size;
+ e_fmr->fmr_page_size = save_fmr.fmr_page_size;
+ e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
+ e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
+ e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
+ e_fmr->acl = save_fmr.acl;
+
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_FMR;
+ ret = ehca_reg_mr(shca, e_fmr, NULL,
+ (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
+ e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
+ &tmp_rkey);
+ if (ret) {
+ u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
+ memcpy(&e_fmr->flags, &(save_mr.flags),
+ sizeof(struct ehca_mr) - offset);
}
ehca_unmap_one_fmr_exit0:
if (ret)
ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x "
- "fmr_max_pages=%x rereg_1_hcall=%x rereg_3_hcall=%x",
- ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages,
- rereg_1_hcall, rereg_3_hcall);
+ "fmr_max_pages=%x",
+ ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
return ret;
} /* end ehca_unmap_one_fmr() */
@@ -1403,10 +1477,10 @@ int ehca_reg_smr(struct ehca_shca *shca,
int ret = 0;
u64 h_ret;
u32 hipz_acl;
- struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+ struct ehca_mr_hipzout_parms hipzout;
ehca_mrmw_map_acl(acl, &hipz_acl);
- ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+ ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
(u64)iova_start, hipz_acl, e_pd->fw_pd,
@@ -1419,15 +1493,16 @@ int ehca_reg_smr(struct ehca_shca *shca,
shca->ipz_hca_handle.handle,
e_origmr->ipz_mr_handle.handle,
e_origmr->ib.ib_mr.lkey);
- ret = ehca_mrmw_map_hrc_reg_smr(h_ret);
+ ret = ehca2ib_return_code(h_ret);
goto ehca_reg_smr_exit0;
}
/* successful registration */
- e_newmr->num_pages = e_origmr->num_pages;
- e_newmr->num_4k = e_origmr->num_4k;
- e_newmr->start = iova_start;
- e_newmr->size = e_origmr->size;
- e_newmr->acl = acl;
+ e_newmr->num_kpages = e_origmr->num_kpages;
+ e_newmr->num_hwpages = e_origmr->num_hwpages;
+ e_newmr->hwpage_size = e_origmr->hwpage_size;
+ e_newmr->start = iova_start;
+ e_newmr->size = e_origmr->size;
+ e_newmr->acl = acl;
e_newmr->ipz_mr_handle = hipzout.handle;
*lkey = hipzout.lkey;
*rkey = hipzout.rkey;
@@ -1453,10 +1528,11 @@ int ehca_reg_internal_maxmr(
struct ehca_mr *e_mr;
u64 *iova_start;
u64 size_maxmr;
- struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
+ struct ehca_mr_pginfo pginfo;
struct ib_phys_buf ib_pbuf;
- u32 num_pages_mr;
- u32 num_pages_4k; /* 4k portion "pages" */
+ u32 num_kpages;
+ u32 num_hwpages;
+ u64 hw_pgsize;
e_mr = ehca_mr_new();
if (!e_mr) {
@@ -1468,28 +1544,31 @@ int ehca_reg_internal_maxmr(
/* register internal max-MR on HCA */
size_maxmr = (u64)high_memory - PAGE_OFFSET;
- iova_start = (u64*)KERNELBASE;
+ iova_start = (u64 *)KERNELBASE;
ib_pbuf.addr = 0;
ib_pbuf.size = size_maxmr;
- num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size_maxmr +
- PAGE_SIZE - 1) / PAGE_SIZE);
- num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size_maxmr +
- EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
-
- pginfo.type = EHCA_MR_PGI_PHYS;
- pginfo.num_pages = num_pages_mr;
- pginfo.num_4k = num_pages_4k;
- pginfo.num_phys_buf = 1;
- pginfo.phys_buf_array = &ib_pbuf;
+ num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
+ PAGE_SIZE);
+ hw_pgsize = ehca_get_max_hwpage_size(shca);
+ num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr,
+ hw_pgsize);
+
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_PHYS;
+ pginfo.num_kpages = num_kpages;
+ pginfo.num_hwpages = num_hwpages;
+ pginfo.hwpage_size = hw_pgsize;
+ pginfo.u.phy.num_phys_buf = 1;
+ pginfo.u.phy.phys_buf_array = &ib_pbuf;
ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
&pginfo, &e_mr->ib.ib_mr.lkey,
&e_mr->ib.ib_mr.rkey);
if (ret) {
ehca_err(&shca->ib_device, "reg of internal max MR failed, "
- "e_mr=%p iova_start=%p size_maxmr=%lx num_pages_mr=%x "
- "num_pages_4k=%x", e_mr, iova_start, size_maxmr,
- num_pages_mr, num_pages_4k);
+ "e_mr=%p iova_start=%p size_maxmr=%lx num_kpages=%x "
+ "num_hwpages=%x", e_mr, iova_start, size_maxmr,
+ num_kpages, num_hwpages);
goto ehca_reg_internal_maxmr_exit1;
}
@@ -1524,10 +1603,10 @@ int ehca_reg_maxmr(struct ehca_shca *shca,
u64 h_ret;
struct ehca_mr *e_origmr = shca->maxmr;
u32 hipz_acl;
- struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
+ struct ehca_mr_hipzout_parms hipzout;
ehca_mrmw_map_acl(acl, &hipz_acl);
- ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
+ ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
(u64)iova_start, hipz_acl, e_pd->fw_pd,
@@ -1538,14 +1617,15 @@ int ehca_reg_maxmr(struct ehca_shca *shca,
h_ret, e_origmr, shca->ipz_hca_handle.handle,
e_origmr->ipz_mr_handle.handle,
e_origmr->ib.ib_mr.lkey);
- return ehca_mrmw_map_hrc_reg_smr(h_ret);
+ return ehca2ib_return_code(h_ret);
}
/* successful registration */
- e_newmr->num_pages = e_origmr->num_pages;
- e_newmr->num_4k = e_origmr->num_4k;
- e_newmr->start = iova_start;
- e_newmr->size = e_origmr->size;
- e_newmr->acl = acl;
+ e_newmr->num_kpages = e_origmr->num_kpages;
+ e_newmr->num_hwpages = e_origmr->num_hwpages;
+ e_newmr->hwpage_size = e_origmr->hwpage_size;
+ e_newmr->start = iova_start;
+ e_newmr->size = e_origmr->size;
+ e_newmr->acl = acl;
e_newmr->ipz_mr_handle = hipzout.handle;
*lkey = hipzout.lkey;
*rkey = hipzout.rkey;
@@ -1677,299 +1757,352 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
/*----------------------------------------------------------------------*/
-/* setup page buffer from page info */
-int ehca_set_pagebuf(struct ehca_mr *e_mr,
- struct ehca_mr_pginfo *pginfo,
- u32 number,
- u64 *kpage)
+/* PAGE_SIZE >= pginfo->hwpage_size */
+static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
+ u32 number,
+ u64 *kpage)
{
int ret = 0;
struct ib_umem_chunk *prev_chunk;
struct ib_umem_chunk *chunk;
- struct ib_phys_buf *pbuf;
- u64 *fmrlist;
- u64 num4k, pgaddr, offs4k;
+ u64 pgaddr;
u32 i = 0;
u32 j = 0;
-
- if (pginfo->type == EHCA_MR_PGI_PHYS) {
- /* loop over desired phys_buf_array entries */
- while (i < number) {
- pbuf = pginfo->phys_buf_array + pginfo->next_buf;
- num4k = ((pbuf->addr % EHCA_PAGESIZE) + pbuf->size +
- EHCA_PAGESIZE - 1) / EHCA_PAGESIZE;
- offs4k = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE;
- while (pginfo->next_4k < offs4k + num4k) {
- /* sanity check */
- if ((pginfo->page_cnt >= pginfo->num_pages) ||
- (pginfo->page_4k_cnt >= pginfo->num_4k)) {
- ehca_gen_err("page_cnt >= num_pages, "
- "page_cnt=%lx "
- "num_pages=%lx "
- "page_4k_cnt=%lx "
- "num_4k=%lx i=%x",
- pginfo->page_cnt,
- pginfo->num_pages,
- pginfo->page_4k_cnt,
- pginfo->num_4k, i);
- ret = -EFAULT;
- goto ehca_set_pagebuf_exit0;
- }
- *kpage = phys_to_abs(
- (pbuf->addr & EHCA_PAGEMASK)
- + (pginfo->next_4k * EHCA_PAGESIZE));
- if ( !(*kpage) && pbuf->addr ) {
- ehca_gen_err("pbuf->addr=%lx "
- "pbuf->size=%lx "
- "next_4k=%lx", pbuf->addr,
- pbuf->size,
- pginfo->next_4k);
- ret = -EFAULT;
- goto ehca_set_pagebuf_exit0;
- }
- (pginfo->page_4k_cnt)++;
- (pginfo->next_4k)++;
- if (pginfo->next_4k %
- (PAGE_SIZE / EHCA_PAGESIZE) == 0)
- (pginfo->page_cnt)++;
- kpage++;
- i++;
- if (i >= number) break;
+ int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
+
+ /* loop over desired chunk entries */
+ chunk = pginfo->u.usr.next_chunk;
+ prev_chunk = pginfo->u.usr.next_chunk;
+ list_for_each_entry_continue(
+ chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
+ for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
+ pgaddr = page_to_pfn(chunk->page_list[i].page)
+ << PAGE_SHIFT ;
+ *kpage = phys_to_abs(pgaddr +
+ (pginfo->next_hwpage *
+ pginfo->hwpage_size));
+ if ( !(*kpage) ) {
+ ehca_gen_err("pgaddr=%lx "
+ "chunk->page_list[i]=%lx "
+ "i=%x next_hwpage=%lx",
+ pgaddr, (u64)sg_dma_address(
+ &chunk->page_list[i]),
+ i, pginfo->next_hwpage);
+ return -EFAULT;
}
- if (pginfo->next_4k >= offs4k + num4k) {
- (pginfo->next_buf)++;
- pginfo->next_4k = 0;
+ (pginfo->hwpage_cnt)++;
+ (pginfo->next_hwpage)++;
+ kpage++;
+ if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
+ (pginfo->kpage_cnt)++;
+ (pginfo->u.usr.next_nmap)++;
+ pginfo->next_hwpage = 0;
+ i++;
}
+ j++;
+ if (j >= number) break;
}
- } else if (pginfo->type == EHCA_MR_PGI_USER) {
- /* loop over desired chunk entries */
- chunk = pginfo->next_chunk;
- prev_chunk = pginfo->next_chunk;
- list_for_each_entry_continue(chunk,
- (&(pginfo->region->chunk_list)),
- list) {
- for (i = pginfo->next_nmap; i < chunk->nmap; ) {
+ if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
+ (j >= number)) {
+ pginfo->u.usr.next_nmap = 0;
+ prev_chunk = chunk;
+ break;
+ } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
+ pginfo->u.usr.next_nmap = 0;
+ prev_chunk = chunk;
+ } else if (j >= number)
+ break;
+ else
+ prev_chunk = chunk;
+ }
+ pginfo->u.usr.next_chunk =
+ list_prepare_entry(prev_chunk,
+ (&(pginfo->u.usr.region->chunk_list)),
+ list);
+ return ret;
+}
+
+/*
+ * check given pages for contiguous layout
+ * last page addr is returned in prev_pgaddr for further check
+ */
+static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
+ int start_idx, int end_idx,
+ u64 *prev_pgaddr)
+{
+ int t;
+ for (t = start_idx; t <= end_idx; t++) {
+ u64 pgaddr = page_to_pfn(page_list[t].page) << PAGE_SHIFT;
+ ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr,
+ *(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
+ if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
+ ehca_gen_err("uncontiguous page found pgaddr=%lx "
+ "prev_pgaddr=%lx page_list_i=%x",
+ pgaddr, *prev_pgaddr, t);
+ return -EINVAL;
+ }
+ *prev_pgaddr = pgaddr;
+ }
+ return 0;
+}
+
+/* PAGE_SIZE < pginfo->hwpage_size */
+static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
+ u32 number,
+ u64 *kpage)
+{
+ int ret = 0;
+ struct ib_umem_chunk *prev_chunk;
+ struct ib_umem_chunk *chunk;
+ u64 pgaddr, prev_pgaddr;
+ u32 i = 0;
+ u32 j = 0;
+ int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
+ int nr_kpages = kpages_per_hwpage;
+
+ /* loop over desired chunk entries */
+ chunk = pginfo->u.usr.next_chunk;
+ prev_chunk = pginfo->u.usr.next_chunk;
+ list_for_each_entry_continue(
+ chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
+ for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
+ if (nr_kpages == kpages_per_hwpage) {
pgaddr = ( page_to_pfn(chunk->page_list[i].page)
<< PAGE_SHIFT );
- *kpage = phys_to_abs(pgaddr +
- (pginfo->next_4k *
- EHCA_PAGESIZE));
+ *kpage = phys_to_abs(pgaddr);
if ( !(*kpage) ) {
- ehca_gen_err("pgaddr=%lx "
- "chunk->page_list[i]=%lx "
- "i=%x next_4k=%lx mr=%p",
- pgaddr,
- (u64)sg_dma_address(
- &chunk->
- page_list[i]),
- i, pginfo->next_4k, e_mr);
+ ehca_gen_err("pgaddr=%lx i=%x",
+ pgaddr, i);
ret = -EFAULT;
- goto ehca_set_pagebuf_exit0;
+ return ret;
}
- (pginfo->page_4k_cnt)++;
- (pginfo->next_4k)++;
- kpage++;
- if (pginfo->next_4k %
- (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
- (pginfo->page_cnt)++;
- (pginfo->next_nmap)++;
- pginfo->next_4k = 0;
- i++;
+ /*
+ * The first page in a hwpage must be aligned;
+ * the first MR page is exempt from this rule.
+ */
+ if (pgaddr & (pginfo->hwpage_size - 1)) {
+ if (pginfo->hwpage_cnt) {
+ ehca_gen_err(
+ "invalid alignment "
+ "pgaddr=%lx i=%x "
+ "mr_pgsize=%lx",
+ pgaddr, i,
+ pginfo->hwpage_size);
+ ret = -EFAULT;
+ return ret;
+ }
+ /* first MR page */
+ pginfo->kpage_cnt =
+ (pgaddr &
+ (pginfo->hwpage_size - 1)) >>
+ PAGE_SHIFT;
+ nr_kpages -= pginfo->kpage_cnt;
+ *kpage = phys_to_abs(
+ pgaddr &
+ ~(pginfo->hwpage_size - 1));
}
- j++;
- if (j >= number) break;
+ ehca_gen_dbg("kpage=%lx chunk_page=%lx "
+ "value=%016lx", *kpage, pgaddr,
+ *(u64 *)abs_to_virt(
+ phys_to_abs(pgaddr)));
+ prev_pgaddr = pgaddr;
+ i++;
+ pginfo->kpage_cnt++;
+ pginfo->u.usr.next_nmap++;
+ nr_kpages--;
+ if (!nr_kpages)
+ goto next_kpage;
+ continue;
}
- if ((pginfo->next_nmap >= chunk->nmap) &&
- (j >= number)) {
- pginfo->next_nmap = 0;
- prev_chunk = chunk;
+ if (i + nr_kpages > chunk->nmap) {
+ ret = ehca_check_kpages_per_ate(
+ chunk->page_list, i,
+ chunk->nmap - 1, &prev_pgaddr);
+ if (ret) return ret;
+ pginfo->kpage_cnt += chunk->nmap - i;
+ pginfo->u.usr.next_nmap += chunk->nmap - i;
+ nr_kpages -= chunk->nmap - i;
break;
- } else if (pginfo->next_nmap >= chunk->nmap) {
- pginfo->next_nmap = 0;
- prev_chunk = chunk;
- } else if (j >= number)
- break;
- else
- prev_chunk = chunk;
- }
- pginfo->next_chunk =
- list_prepare_entry(prev_chunk,
- (&(pginfo->region->chunk_list)),
- list);
- } else if (pginfo->type == EHCA_MR_PGI_FMR) {
- /* loop over desired page_list entries */
- fmrlist = pginfo->page_list + pginfo->next_listelem;
- for (i = 0; i < number; i++) {
- *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
- pginfo->next_4k * EHCA_PAGESIZE);
- if ( !(*kpage) ) {
- ehca_gen_err("*fmrlist=%lx fmrlist=%p "
- "next_listelem=%lx next_4k=%lx",
- *fmrlist, fmrlist,
- pginfo->next_listelem,
- pginfo->next_4k);
- ret = -EFAULT;
- goto ehca_set_pagebuf_exit0;
}
- (pginfo->page_4k_cnt)++;
- (pginfo->next_4k)++;
+
+ ret = ehca_check_kpages_per_ate(chunk->page_list, i,
+ i + nr_kpages - 1,
+ &prev_pgaddr);
+ if (ret) return ret;
+ i += nr_kpages;
+ pginfo->kpage_cnt += nr_kpages;
+ pginfo->u.usr.next_nmap += nr_kpages;
+next_kpage:
+ nr_kpages = kpages_per_hwpage;
+ (pginfo->hwpage_cnt)++;
kpage++;
- if (pginfo->next_4k %
- (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) {
- (pginfo->page_cnt)++;
- (pginfo->next_listelem)++;
- fmrlist++;
- pginfo->next_4k = 0;
- }
+ j++;
+ if (j >= number) break;
}
- } else {
- ehca_gen_err("bad pginfo->type=%x", pginfo->type);
- ret = -EFAULT;
- goto ehca_set_pagebuf_exit0;
+ if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
+ (j >= number)) {
+ pginfo->u.usr.next_nmap = 0;
+ prev_chunk = chunk;
+ break;
+ } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
+ pginfo->u.usr.next_nmap = 0;
+ prev_chunk = chunk;
+ } else if (j >= number)
+ break;
+ else
+ prev_chunk = chunk;
}
-
-ehca_set_pagebuf_exit0:
- if (ret)
- ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx "
- "num_4k=%lx next_buf=%lx next_4k=%lx number=%x "
- "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x "
- "next_listelem=%lx region=%p next_chunk=%p "
- "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type,
- pginfo->num_pages, pginfo->num_4k,
- pginfo->next_buf, pginfo->next_4k, number, kpage,
- pginfo->page_cnt, pginfo->page_4k_cnt, i,
- pginfo->next_listelem, pginfo->region,
- pginfo->next_chunk, pginfo->next_nmap);
+ pginfo->u.usr.next_chunk =
+ list_prepare_entry(prev_chunk,
+ (&(pginfo->u.usr.region->chunk_list)),
+ list);
return ret;
-} /* end ehca_set_pagebuf() */
-
-/*----------------------------------------------------------------------*/
+}
-/* setup 1 page from page info page buffer */
-int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
- struct ehca_mr_pginfo *pginfo,
- u64 *rpage)
+int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
+ u32 number,
+ u64 *kpage)
{
int ret = 0;
- struct ib_phys_buf *tmp_pbuf;
- u64 *fmrlist;
- struct ib_umem_chunk *chunk;
- struct ib_umem_chunk *prev_chunk;
- u64 pgaddr, num4k, offs4k;
-
- if (pginfo->type == EHCA_MR_PGI_PHYS) {
- /* sanity check */
- if ((pginfo->page_cnt >= pginfo->num_pages) ||
- (pginfo->page_4k_cnt >= pginfo->num_4k)) {
- ehca_gen_err("page_cnt >= num_pages, page_cnt=%lx "
- "num_pages=%lx page_4k_cnt=%lx num_4k=%lx",
- pginfo->page_cnt, pginfo->num_pages,
- pginfo->page_4k_cnt, pginfo->num_4k);
- ret = -EFAULT;
- goto ehca_set_pagebuf_1_exit0;
- }
- tmp_pbuf = pginfo->phys_buf_array + pginfo->next_buf;
- num4k = ((tmp_pbuf->addr % EHCA_PAGESIZE) + tmp_pbuf->size +
- EHCA_PAGESIZE - 1) / EHCA_PAGESIZE;
- offs4k = (tmp_pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE;
- *rpage = phys_to_abs((tmp_pbuf->addr & EHCA_PAGEMASK) +
- (pginfo->next_4k * EHCA_PAGESIZE));
- if ( !(*rpage) && tmp_pbuf->addr ) {
- ehca_gen_err("tmp_pbuf->addr=%lx"
- " tmp_pbuf->size=%lx next_4k=%lx",
- tmp_pbuf->addr, tmp_pbuf->size,
- pginfo->next_4k);
- ret = -EFAULT;
- goto ehca_set_pagebuf_1_exit0;
- }
- (pginfo->page_4k_cnt)++;
- (pginfo->next_4k)++;
- if (pginfo->next_4k % (PAGE_SIZE / EHCA_PAGESIZE) == 0)
- (pginfo->page_cnt)++;
- if (pginfo->next_4k >= offs4k + num4k) {
- (pginfo->next_buf)++;
- pginfo->next_4k = 0;
- }
- } else if (pginfo->type == EHCA_MR_PGI_USER) {
- chunk = pginfo->next_chunk;
- prev_chunk = pginfo->next_chunk;
- list_for_each_entry_continue(chunk,
- (&(pginfo->region->chunk_list)),
- list) {
- pgaddr = ( page_to_pfn(chunk->page_list[
- pginfo->next_nmap].page)
- << PAGE_SHIFT);
- *rpage = phys_to_abs(pgaddr +
- (pginfo->next_4k * EHCA_PAGESIZE));
- if ( !(*rpage) ) {
- ehca_gen_err("pgaddr=%lx chunk->page_list[]=%lx"
- " next_nmap=%lx next_4k=%lx mr=%p",
- pgaddr, (u64)sg_dma_address(
- &chunk->page_list[
- pginfo->
- next_nmap]),
- pginfo->next_nmap, pginfo->next_4k,
- e_mr);
- ret = -EFAULT;
- goto ehca_set_pagebuf_1_exit0;
- }
- (pginfo->page_4k_cnt)++;
- (pginfo->next_4k)++;
- if (pginfo->next_4k %
- (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
- (pginfo->page_cnt)++;
- (pginfo->next_nmap)++;
- pginfo->next_4k = 0;
+ struct ib_phys_buf *pbuf;
+ u64 num_hw, offs_hw;
+ u32 i = 0;
+
+ /* loop over desired phys_buf_array entries */
+ while (i < number) {
+ pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
+ num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) +
+ pbuf->size, pginfo->hwpage_size);
+ offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) /
+ pginfo->hwpage_size;
+ while (pginfo->next_hwpage < offs_hw + num_hw) {
+ /* sanity check */
+ if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
+ (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
+ ehca_gen_err("kpage_cnt >= num_kpages, "
+ "kpage_cnt=%lx num_kpages=%lx "
+ "hwpage_cnt=%lx "
+ "num_hwpages=%lx i=%x",
+ pginfo->kpage_cnt,
+ pginfo->num_kpages,
+ pginfo->hwpage_cnt,
+ pginfo->num_hwpages, i);
+ return -EFAULT;
}
- if (pginfo->next_nmap >= chunk->nmap) {
- pginfo->next_nmap = 0;
- prev_chunk = chunk;
+ *kpage = phys_to_abs(
+ (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
+ (pginfo->next_hwpage * pginfo->hwpage_size));
+ if ( !(*kpage) && pbuf->addr ) {
+ ehca_gen_err("pbuf->addr=%lx pbuf->size=%lx "
+ "next_hwpage=%lx", pbuf->addr,
+ pbuf->size, pginfo->next_hwpage);
+ return -EFAULT;
}
- break;
+ (pginfo->hwpage_cnt)++;
+ (pginfo->next_hwpage)++;
+ if (PAGE_SIZE >= pginfo->hwpage_size) {
+ if (pginfo->next_hwpage %
+ (PAGE_SIZE / pginfo->hwpage_size) == 0)
+ (pginfo->kpage_cnt)++;
+ } else
+ pginfo->kpage_cnt += pginfo->hwpage_size /
+ PAGE_SIZE;
+ kpage++;
+ i++;
+ if (i >= number) break;
}
- pginfo->next_chunk =
- list_prepare_entry(prev_chunk,
- (&(pginfo->region->chunk_list)),
- list);
- } else if (pginfo->type == EHCA_MR_PGI_FMR) {
- fmrlist = pginfo->page_list + pginfo->next_listelem;
- *rpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
- pginfo->next_4k * EHCA_PAGESIZE);
- if ( !(*rpage) ) {
+ if (pginfo->next_hwpage >= offs_hw + num_hw) {
+ (pginfo->u.phy.next_buf)++;
+ pginfo->next_hwpage = 0;
+ }
+ }
+ return ret;
+}
+
+int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
+ u32 number,
+ u64 *kpage)
+{
+ int ret = 0;
+ u64 *fmrlist;
+ u32 i;
+
+ /* loop over desired page_list entries */
+ fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
+ for (i = 0; i < number; i++) {
+ *kpage = phys_to_abs((*fmrlist & ~(pginfo->hwpage_size - 1)) +
+ pginfo->next_hwpage * pginfo->hwpage_size);
+ if ( !(*kpage) ) {
ehca_gen_err("*fmrlist=%lx fmrlist=%p "
- "next_listelem=%lx next_4k=%lx",
- *fmrlist, fmrlist, pginfo->next_listelem,
- pginfo->next_4k);
- ret = -EFAULT;
- goto ehca_set_pagebuf_1_exit0;
+ "next_listelem=%lx next_hwpage=%lx",
+ *fmrlist, fmrlist,
+ pginfo->u.fmr.next_listelem,
+ pginfo->next_hwpage);
+ return -EFAULT;
}
- (pginfo->page_4k_cnt)++;
- (pginfo->next_4k)++;
- if (pginfo->next_4k %
- (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) {
- (pginfo->page_cnt)++;
- (pginfo->next_listelem)++;
- pginfo->next_4k = 0;
+ (pginfo->hwpage_cnt)++;
+ if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) {
+ if (pginfo->next_hwpage %
+ (pginfo->u.fmr.fmr_pgsize /
+ pginfo->hwpage_size) == 0) {
+ (pginfo->kpage_cnt)++;
+ (pginfo->u.fmr.next_listelem)++;
+ fmrlist++;
+ pginfo->next_hwpage = 0;
+ } else
+ (pginfo->next_hwpage)++;
+ } else {
+ unsigned int cnt_per_hwpage = pginfo->hwpage_size /
+ pginfo->u.fmr.fmr_pgsize;
+ unsigned int j;
+ u64 prev = *kpage;
+ /* check if adrs are contiguous */
+ for (j = 1; j < cnt_per_hwpage; j++) {
+ u64 p = phys_to_abs(fmrlist[j] &
+ ~(pginfo->hwpage_size - 1));
+ if (prev + pginfo->u.fmr.fmr_pgsize != p) {
+ ehca_gen_err("uncontiguous fmr pages "
+ "found prev=%lx p=%lx "
+ "idx=%x", prev, p, i + j);
+ return -EINVAL;
+ }
+ prev = p;
+ }
+ pginfo->kpage_cnt += cnt_per_hwpage;
+ pginfo->u.fmr.next_listelem += cnt_per_hwpage;
+ fmrlist += cnt_per_hwpage;
}
- } else {
+ kpage++;
+ }
+ return ret;
+}
+
+/* setup page buffer from page info */
+int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
+ u32 number,
+ u64 *kpage)
+{
+ int ret;
+
+ switch (pginfo->type) {
+ case EHCA_MR_PGI_PHYS:
+ ret = ehca_set_pagebuf_phys(pginfo, number, kpage);
+ break;
+ case EHCA_MR_PGI_USER:
+ ret = PAGE_SIZE >= pginfo->hwpage_size ?
+ ehca_set_pagebuf_user1(pginfo, number, kpage) :
+ ehca_set_pagebuf_user2(pginfo, number, kpage);
+ break;
+ case EHCA_MR_PGI_FMR:
+ ret = ehca_set_pagebuf_fmr(pginfo, number, kpage);
+ break;
+ default:
ehca_gen_err("bad pginfo->type=%x", pginfo->type);
ret = -EFAULT;
- goto ehca_set_pagebuf_1_exit0;
+ break;
}
-
-ehca_set_pagebuf_1_exit0:
- if (ret)
- ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx "
- "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p "
- "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx "
- "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr,
- pginfo, pginfo->type, pginfo->num_pages,
- pginfo->num_4k, pginfo->next_buf, pginfo->next_4k,
- rpage, pginfo->page_cnt, pginfo->page_4k_cnt,
- pginfo->next_listelem, pginfo->region,
- pginfo->next_chunk, pginfo->next_nmap);
return ret;
-} /* end ehca_set_pagebuf_1() */
+} /* end ehca_set_pagebuf() */
/*----------------------------------------------------------------------*/
@@ -1982,7 +2115,7 @@ int ehca_mr_is_maxmr(u64 size,
{
/* a MR is treated as max-MR only if it fits following: */
if ((size == ((u64)high_memory - PAGE_OFFSET)) &&
- (iova_start == (void*)KERNELBASE)) {
+ (iova_start == (void *)KERNELBASE)) {
ehca_gen_dbg("this is a max-MR");
return 1;
} else
@@ -2011,9 +2144,9 @@ void ehca_mrmw_map_acl(int ib_acl,
/*----------------------------------------------------------------------*/
/* sets page size in hipz access control for MR/MW. */
-void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl) /*INOUT*/
+void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/
{
- return; /* HCA supports only 4k */
+ *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24);
} /* end ehca_mrmw_set_pgsize_hipz_acl() */
/*----------------------------------------------------------------------*/
@@ -2042,196 +2175,23 @@ void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
/*----------------------------------------------------------------------*/
/*
- * map HIPZ rc to IB retcodes for MR/MW allocations
- * Used for hipz_mr_reg_alloc and hipz_mw_alloc.
- */
-int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc)
-{
- switch (hipz_rc) {
- case H_SUCCESS: /* successful completion */
- return 0;
- case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
- case H_CONSTRAINED: /* resource constraint */
- case H_NO_MEM:
- return -ENOMEM;
- case H_BUSY: /* long busy */
- return -EBUSY;
- default:
- return -EINVAL;
- }
-} /* end ehca_mrmw_map_hrc_alloc() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * map HIPZ rc to IB retcodes for MR register rpage
- * Used for hipz_h_register_rpage_mr at registering last page
- */
-int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc)
-{
- switch (hipz_rc) {
- case H_SUCCESS: /* registration complete */
- return 0;
- case H_PAGE_REGISTERED: /* page registered */
- case H_ADAPTER_PARM: /* invalid adapter handle */
- case H_RH_PARM: /* invalid resource handle */
-/* case H_QT_PARM: invalid queue type */
- case H_PARAMETER: /*
- * invalid logical address,
- * or count zero or greater 512
- */
- case H_TABLE_FULL: /* page table full */
- case H_HARDWARE: /* HCA not operational */
- return -EINVAL;
- case H_BUSY: /* long busy */
- return -EBUSY;
- default:
- return -EINVAL;
- }
-} /* end ehca_mrmw_map_hrc_rrpg_last() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * map HIPZ rc to IB retcodes for MR register rpage
- * Used for hipz_h_register_rpage_mr at registering one page, but not last page
- */
-int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc)
-{
- switch (hipz_rc) {
- case H_PAGE_REGISTERED: /* page registered */
- return 0;
- case H_SUCCESS: /* registration complete */
- case H_ADAPTER_PARM: /* invalid adapter handle */
- case H_RH_PARM: /* invalid resource handle */
-/* case H_QT_PARM: invalid queue type */
- case H_PARAMETER: /*
- * invalid logical address,
- * or count zero or greater 512
- */
- case H_TABLE_FULL: /* page table full */
- case H_HARDWARE: /* HCA not operational */
- return -EINVAL;
- case H_BUSY: /* long busy */
- return -EBUSY;
- default:
- return -EINVAL;
- }
-} /* end ehca_mrmw_map_hrc_rrpg_notlast() */
-
-/*----------------------------------------------------------------------*/
-
-/* map HIPZ rc to IB retcodes for MR query. Used for hipz_mr_query. */
-int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc)
-{
- switch (hipz_rc) {
- case H_SUCCESS: /* successful completion */
- return 0;
- case H_ADAPTER_PARM: /* invalid adapter handle */
- case H_RH_PARM: /* invalid resource handle */
- return -EINVAL;
- case H_BUSY: /* long busy */
- return -EBUSY;
- default:
- return -EINVAL;
- }
-} /* end ehca_mrmw_map_hrc_query_mr() */
-
-/*----------------------------------------------------------------------*/
-/*----------------------------------------------------------------------*/
-
-/*
- * map HIPZ rc to IB retcodes for freeing MR resource
- * Used for hipz_h_free_resource_mr
- */
-int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc)
-{
- switch (hipz_rc) {
- case H_SUCCESS: /* resource freed */
- return 0;
- case H_ADAPTER_PARM: /* invalid adapter handle */
- case H_RH_PARM: /* invalid resource handle */
- case H_R_STATE: /* invalid resource state */
- case H_HARDWARE: /* HCA not operational */
- return -EINVAL;
- case H_RESOURCE: /* Resource in use */
- case H_BUSY: /* long busy */
- return -EBUSY;
- default:
- return -EINVAL;
- }
-} /* end ehca_mrmw_map_hrc_free_mr() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * map HIPZ rc to IB retcodes for freeing MW resource
- * Used for hipz_h_free_resource_mw
- */
-int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc)
-{
- switch (hipz_rc) {
- case H_SUCCESS: /* resource freed */
- return 0;
- case H_ADAPTER_PARM: /* invalid adapter handle */
- case H_RH_PARM: /* invalid resource handle */
- case H_R_STATE: /* invalid resource state */
- case H_HARDWARE: /* HCA not operational */
- return -EINVAL;
- case H_RESOURCE: /* Resource in use */
- case H_BUSY: /* long busy */
- return -EBUSY;
- default:
- return -EINVAL;
- }
-} /* end ehca_mrmw_map_hrc_free_mw() */
-
-/*----------------------------------------------------------------------*/
-
-/*
- * map HIPZ rc to IB retcodes for SMR registrations
- * Used for hipz_h_register_smr.
- */
-int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc)
-{
- switch (hipz_rc) {
- case H_SUCCESS: /* successful completion */
- return 0;
- case H_ADAPTER_PARM: /* invalid adapter handle */
- case H_RH_PARM: /* invalid resource handle */
- case H_MEM_PARM: /* invalid MR virtual address */
- case H_MEM_ACCESS_PARM: /* invalid access controls */
- case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
- return -EINVAL;
- case H_BUSY: /* long busy */
- return -EBUSY;
- default:
- return -EINVAL;
- }
-} /* end ehca_mrmw_map_hrc_reg_smr() */
-
-/*----------------------------------------------------------------------*/
-
-/*
* MR destructor and constructor
* used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
* except struct ib_mr and spinlock
*/
void ehca_mr_deletenew(struct ehca_mr *mr)
{
- mr->flags = 0;
- mr->num_pages = 0;
- mr->num_4k = 0;
- mr->acl = 0;
- mr->start = NULL;
+ mr->flags = 0;
+ mr->num_kpages = 0;
+ mr->num_hwpages = 0;
+ mr->acl = 0;
+ mr->start = NULL;
mr->fmr_page_size = 0;
mr->fmr_max_pages = 0;
- mr->fmr_max_maps = 0;
- mr->fmr_map_cnt = 0;
+ mr->fmr_max_maps = 0;
+ mr->fmr_map_cnt = 0;
memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
memset(&mr->galpas, 0, sizeof(mr->galpas));
- mr->nr_of_pages = 0;
- mr->pagearray = NULL;
} /* end ehca_mr_deletenew() */
int ehca_init_mrmw_cache(void)
@@ -2239,13 +2199,13 @@ int ehca_init_mrmw_cache(void)
mr_cache = kmem_cache_create("ehca_cache_mr",
sizeof(struct ehca_mr), 0,
SLAB_HWCACHE_ALIGN,
- NULL, NULL);
+ NULL);
if (!mr_cache)
return -ENOMEM;
mw_cache = kmem_cache_create("ehca_cache_mw",
sizeof(struct ehca_mw), 0,
SLAB_HWCACHE_ALIGN,
- NULL, NULL);
+ NULL);
if (!mw_cache) {
kmem_cache_destroy(mr_cache);
mr_cache = NULL;
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h
index d936e40a5748..bc8f4e31c123 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.h
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h
@@ -101,40 +101,21 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
u64 *page_list,
int list_len);
-int ehca_set_pagebuf(struct ehca_mr *e_mr,
- struct ehca_mr_pginfo *pginfo,
+int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
u32 number,
u64 *kpage);
-int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
- struct ehca_mr_pginfo *pginfo,
- u64 *rpage);
-
int ehca_mr_is_maxmr(u64 size,
u64 *iova_start);
void ehca_mrmw_map_acl(int ib_acl,
u32 *hipz_acl);
-void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl);
+void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl);
void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
int *ib_acl);
-int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc);
-
-int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc);
-
-int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc);
-
-int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc);
-
-int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc);
-
-int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc);
-
-int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc);
-
void ehca_mr_deletenew(struct ehca_mr *mr);
#endif /*_EHCA_MRMW_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c
index 79d0591a8043..43bcf085fcf2 100644
--- a/drivers/infiniband/hw/ehca/ehca_pd.c
+++ b/drivers/infiniband/hw/ehca/ehca_pd.c
@@ -49,6 +49,7 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device,
struct ib_ucontext *context, struct ib_udata *udata)
{
struct ehca_pd *pd;
+ int i;
pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL);
if (!pd) {
@@ -58,6 +59,11 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device,
}
pd->ownpid = current->tgid;
+ for (i = 0; i < 2; i++) {
+ INIT_LIST_HEAD(&pd->free[i]);
+ INIT_LIST_HEAD(&pd->full[i]);
+ }
+ mutex_init(&pd->lock);
/*
* Kernel PD: when device = -1, 0
@@ -81,6 +87,8 @@ int ehca_dealloc_pd(struct ib_pd *pd)
{
u32 cur_pid = current->tgid;
struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
+ int i, leftovers = 0;
+ struct ipz_small_queue_page *page, *tmp;
if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
my_pd->ownpid != cur_pid) {
@@ -89,8 +97,20 @@ int ehca_dealloc_pd(struct ib_pd *pd)
return -EINVAL;
}
- kmem_cache_free(pd_cache,
- container_of(pd, struct ehca_pd, ib_pd));
+ for (i = 0; i < 2; i++) {
+ list_splice(&my_pd->full[i], &my_pd->free[i]);
+ list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) {
+ leftovers = 1;
+ free_page(page->page);
+ kmem_cache_free(small_qp_cache, page);
+ }
+ }
+
+ if (leftovers)
+ ehca_warn(pd->device,
+ "Some small queue pages were not freed");
+
+ kmem_cache_free(pd_cache, my_pd);
return 0;
}
@@ -100,7 +120,7 @@ int ehca_init_pd_cache(void)
pd_cache = kmem_cache_create("ehca_cache_pd",
sizeof(struct ehca_pd), 0,
SLAB_HWCACHE_ALIGN,
- NULL, NULL);
+ NULL);
if (!pd_cache)
return -ENOMEM;
return 0;
diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h
index 8707d297ce4c..818803057ebf 100644
--- a/drivers/infiniband/hw/ehca/ehca_qes.h
+++ b/drivers/infiniband/hw/ehca/ehca_qes.h
@@ -53,13 +53,13 @@ struct ehca_vsgentry {
u32 length;
};
-#define GRH_FLAG_MASK EHCA_BMASK_IBM(7,7)
-#define GRH_IPVERSION_MASK EHCA_BMASK_IBM(0,3)
-#define GRH_TCLASS_MASK EHCA_BMASK_IBM(4,12)
-#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13,31)
-#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32,47)
-#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48,55)
-#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56,63)
+#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7)
+#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3)
+#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12)
+#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31)
+#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47)
+#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55)
+#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63)
/*
* Unreliable Datagram Address Vector Format
@@ -206,10 +206,10 @@ struct ehca_wqe {
};
-#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0)
-#define WC_IMM_DATA EHCA_BMASK_IBM(1,1)
-#define WC_GRH_PRESENT EHCA_BMASK_IBM(2,2)
-#define WC_SE_BIT EHCA_BMASK_IBM(3,3)
+#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0)
+#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1)
+#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2)
+#define WC_SE_BIT EHCA_BMASK_IBM(3, 3)
#define WC_STATUS_ERROR_BIT 0x80000000
#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
#define WC_STATUS_PURGE_BIT 0x10
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index b5bc787c77b6..84d435a5ee11 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -3,7 +3,9 @@
*
* QP functions
*
- * Authors: Waleri Fomin <fomin@de.ibm.com>
+ * Authors: Joachim Fenkes <fenkes@de.ibm.com>
+ * Stefan Roscher <stefan.roscher@de.ibm.com>
+ * Waleri Fomin <fomin@de.ibm.com>
* Hoang-Nam Nguyen <hnguyen@de.ibm.com>
* Reinhard Ernst <rernst@de.ibm.com>
* Heiko J Schick <schickhj@de.ibm.com>
@@ -234,13 +236,6 @@ static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
return index;
}
-enum ehca_service_type {
- ST_RC = 0,
- ST_UC = 1,
- ST_RD = 2,
- ST_UD = 3
-};
-
/*
* ibqptype2servicetype returns hcp service type corresponding to given
* ib qp type used by create_qp()
@@ -268,143 +263,169 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
}
/*
- * init_qp_queues initializes/constructs r/squeue and registers queue pages.
+ * init userspace queue info from ipz_queue data
+ */
+static inline void queue2resp(struct ipzu_queue_resp *resp,
+ struct ipz_queue *queue)
+{
+ resp->qe_size = queue->qe_size;
+ resp->act_nr_of_sg = queue->act_nr_of_sg;
+ resp->queue_length = queue->queue_length;
+ resp->pagesize = queue->pagesize;
+ resp->toggle_state = queue->toggle_state;
+}
+
+/*
+ * init_qp_queue initializes/constructs r/squeue and registers queue pages.
*/
-static inline int init_qp_queues(struct ehca_shca *shca,
- struct ehca_qp *my_qp,
- int nr_sq_pages,
- int nr_rq_pages,
- int swqe_size,
- int rwqe_size,
- int nr_send_sges, int nr_receive_sges)
+static inline int init_qp_queue(struct ehca_shca *shca,
+ struct ehca_pd *pd,
+ struct ehca_qp *my_qp,
+ struct ipz_queue *queue,
+ int q_type,
+ u64 expected_hret,
+ struct ehca_alloc_queue_parms *parms,
+ int wqe_size)
{
- int ret, cnt, ipz_rc;
+ int ret, cnt, ipz_rc, nr_q_pages;
void *vpage;
u64 rpage, h_ret;
struct ib_device *ib_dev = &shca->ib_device;
struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
- ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue,
- nr_sq_pages,
- EHCA_PAGESIZE, swqe_size, nr_send_sges);
- if (!ipz_rc) {
- ehca_err(ib_dev,"Cannot allocate page for squeue. ipz_rc=%x",
- ipz_rc);
- return -EBUSY;
+ if (!parms->queue_size)
+ return 0;
+
+ if (parms->is_small) {
+ nr_q_pages = 1;
+ ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
+ 128 << parms->page_size,
+ wqe_size, parms->act_nr_sges, 1);
+ } else {
+ nr_q_pages = parms->queue_size;
+ ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
+ EHCA_PAGESIZE, wqe_size,
+ parms->act_nr_sges, 0);
}
- ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue,
- nr_rq_pages,
- EHCA_PAGESIZE, rwqe_size, nr_receive_sges);
if (!ipz_rc) {
- ehca_err(ib_dev, "Cannot allocate page for rqueue. ipz_rc=%x",
+ ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x",
ipz_rc);
- ret = -EBUSY;
- goto init_qp_queues0;
+ return -EBUSY;
}
- /* register SQ pages */
- for (cnt = 0; cnt < nr_sq_pages; cnt++) {
- vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue);
+
+ /* register queue pages */
+ for (cnt = 0; cnt < nr_q_pages; cnt++) {
+ vpage = ipz_qpageit_get_inc(queue);
if (!vpage) {
- ehca_err(ib_dev, "SQ ipz_qpageit_get_inc() "
+ ehca_err(ib_dev, "ipz_qpageit_get_inc() "
"failed p_vpage= %p", vpage);
ret = -EINVAL;
- goto init_qp_queues1;
+ goto init_qp_queue1;
}
rpage = virt_to_abs(vpage);
h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
my_qp->ipz_qp_handle,
- &my_qp->pf, 0, 0,
- rpage, 1,
+ NULL, 0, q_type,
+ rpage, parms->is_small ? 0 : 1,
my_qp->galpas.kernel);
- if (h_ret < H_SUCCESS) {
- ehca_err(ib_dev, "SQ hipz_qp_register_rpage()"
- " failed rc=%lx", h_ret);
- ret = ehca2ib_return_code(h_ret);
- goto init_qp_queues1;
- }
- }
-
- ipz_qeit_reset(&my_qp->ipz_squeue);
-
- /* register RQ pages */
- for (cnt = 0; cnt < nr_rq_pages; cnt++) {
- vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
- if (!vpage) {
- ehca_err(ib_dev, "RQ ipz_qpageit_get_inc() "
- "failed p_vpage = %p", vpage);
- ret = -EINVAL;
- goto init_qp_queues1;
- }
-
- rpage = virt_to_abs(vpage);
-
- h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
- my_qp->ipz_qp_handle,
- &my_qp->pf, 0, 1,
- rpage, 1,my_qp->galpas.kernel);
- if (h_ret < H_SUCCESS) {
- ehca_err(ib_dev, "RQ hipz_qp_register_rpage() failed "
- "rc=%lx", h_ret);
- ret = ehca2ib_return_code(h_ret);
- goto init_qp_queues1;
- }
- if (cnt == (nr_rq_pages - 1)) { /* last page! */
- if (h_ret != H_SUCCESS) {
- ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
+ if (cnt == (nr_q_pages - 1)) { /* last page! */
+ if (h_ret != expected_hret) {
+ ehca_err(ib_dev, "hipz_qp_register_rpage() "
"h_ret= %lx ", h_ret);
ret = ehca2ib_return_code(h_ret);
- goto init_qp_queues1;
+ goto init_qp_queue1;
}
vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
if (vpage) {
ehca_err(ib_dev, "ipz_qpageit_get_inc() "
"should not succeed vpage=%p", vpage);
ret = -EINVAL;
- goto init_qp_queues1;
+ goto init_qp_queue1;
}
} else {
if (h_ret != H_PAGE_REGISTERED) {
- ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
+ ehca_err(ib_dev, "hipz_qp_register_rpage() "
"h_ret= %lx ", h_ret);
ret = ehca2ib_return_code(h_ret);
- goto init_qp_queues1;
+ goto init_qp_queue1;
}
}
}
- ipz_qeit_reset(&my_qp->ipz_rqueue);
+ ipz_qeit_reset(queue);
return 0;
-init_qp_queues1:
- ipz_queue_dtor(&my_qp->ipz_rqueue);
-init_qp_queues0:
- ipz_queue_dtor(&my_qp->ipz_squeue);
+init_qp_queue1:
+ ipz_queue_dtor(pd, queue);
return ret;
}
-struct ib_qp *ehca_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp)
+{
+ if (is_llqp)
+ return 128 << act_nr_sge;
+ else
+ return offsetof(struct ehca_wqe,
+ u.nud.sg_list[act_nr_sge]);
+}
+
+static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue,
+ int req_nr_sge, int is_llqp)
+{
+ u32 wqe_size, q_size;
+ int act_nr_sge = req_nr_sge;
+
+ if (!is_llqp)
+ /* round up #SGEs so WQE size is a power of 2 */
+ for (act_nr_sge = 4; act_nr_sge <= 252;
+ act_nr_sge = 4 + 2 * act_nr_sge)
+ if (act_nr_sge >= req_nr_sge)
+ break;
+
+ wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp);
+ q_size = wqe_size * (queue->max_wr + 1);
+
+ if (q_size <= 512)
+ queue->page_size = 2;
+ else if (q_size <= 1024)
+ queue->page_size = 3;
+ else
+ queue->page_size = 0;
+
+ queue->is_small = (queue->page_size != 0);
+}
+
+/*
+ * Create an ib_qp struct that is either a QP or an SRQ, depending on
+ * the value of the is_srq parameter. If init_attr and srq_init_attr share
+ * fields, the field out of init_attr is used.
+ */
+static struct ehca_qp *internal_create_qp(
+ struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata, int is_srq)
{
- static int da_rc_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 };
- static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 };
struct ehca_qp *my_qp;
struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
ib_device);
struct ib_ucontext *context = NULL;
u64 h_ret;
- int max_send_sge, max_recv_sge, ret;
+ int is_llqp = 0, has_srq = 0;
+ int qp_type, max_send_sge, max_recv_sge, ret;
/* h_call's out parameters */
struct ehca_alloc_qp_parms parms;
- u32 swqe_size = 0, rwqe_size = 0;
- u8 daqp_completion, isdaqp;
+ u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
unsigned long flags;
+ memset(&parms, 0, sizeof(parms));
+ qp_type = init_attr->qp_type;
+
if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
@@ -412,41 +433,98 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
}
- /* save daqp completion bits */
- daqp_completion = init_attr->qp_type & 0x60;
- /* save daqp bit */
- isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0;
- init_attr->qp_type = init_attr->qp_type & 0x1F;
+ /* save LLQP info */
+ if (qp_type & 0x80) {
+ is_llqp = 1;
+ parms.ext_type = EQPT_LLQP;
+ parms.ll_comp_flags = qp_type & LLQP_COMP_MASK;
+ }
+ qp_type &= 0x1F;
+ init_attr->qp_type &= 0x1F;
- if (init_attr->qp_type != IB_QPT_UD &&
- init_attr->qp_type != IB_QPT_SMI &&
- init_attr->qp_type != IB_QPT_GSI &&
- init_attr->qp_type != IB_QPT_UC &&
- init_attr->qp_type != IB_QPT_RC) {
- ehca_err(pd->device, "wrong QP Type=%x", init_attr->qp_type);
- return ERR_PTR(-EINVAL);
+ /* handle SRQ base QPs */
+ if (init_attr->srq) {
+ struct ehca_qp *my_srq =
+ container_of(init_attr->srq, struct ehca_qp, ib_srq);
+
+ has_srq = 1;
+ parms.ext_type = EQPT_SRQBASE;
+ parms.srq_qpn = my_srq->real_qp_num;
+ parms.srq_token = my_srq->token;
}
- if ((init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD)
- && isdaqp) {
- ehca_err(pd->device, "unsupported LL QP Type=%x",
- init_attr->qp_type);
+
+ if (is_llqp && has_srq) {
+ ehca_err(pd->device, "LLQPs can't have an SRQ");
return ERR_PTR(-EINVAL);
- } else if (init_attr->qp_type == IB_QPT_RC && isdaqp &&
- (init_attr->cap.max_send_wr > 255 ||
- init_attr->cap.max_recv_wr > 255 )) {
- ehca_err(pd->device, "Invalid Number of max_sq_wr =%x "
- "or max_rq_wr=%x for QP Type=%x",
- init_attr->cap.max_send_wr,
- init_attr->cap.max_recv_wr,init_attr->qp_type);
- return ERR_PTR(-EINVAL);
- } else if (init_attr->qp_type == IB_QPT_UD && isdaqp &&
- init_attr->cap.max_send_wr > 255) {
- ehca_err(pd->device,
- "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x",
- init_attr->cap.max_send_wr, init_attr->qp_type);
+ }
+
+ /* handle SRQs */
+ if (is_srq) {
+ parms.ext_type = EQPT_SRQ;
+ parms.srq_limit = srq_init_attr->attr.srq_limit;
+ if (init_attr->cap.max_recv_sge > 3) {
+ ehca_err(pd->device, "no more than three SGEs "
+ "supported for SRQ pd=%p max_sge=%x",
+ pd, init_attr->cap.max_recv_sge);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ /* check QP type */
+ if (qp_type != IB_QPT_UD &&
+ qp_type != IB_QPT_UC &&
+ qp_type != IB_QPT_RC &&
+ qp_type != IB_QPT_SMI &&
+ qp_type != IB_QPT_GSI) {
+ ehca_err(pd->device, "wrong QP Type=%x", qp_type);
return ERR_PTR(-EINVAL);
}
+ if (is_llqp) {
+ switch (qp_type) {
+ case IB_QPT_RC:
+ if ((init_attr->cap.max_send_wr > 255) ||
+ (init_attr->cap.max_recv_wr > 255)) {
+ ehca_err(pd->device,
+ "Invalid Number of max_sq_wr=%x "
+ "or max_rq_wr=%x for RC LLQP",
+ init_attr->cap.max_send_wr,
+ init_attr->cap.max_recv_wr);
+ return ERR_PTR(-EINVAL);
+ }
+ break;
+ case IB_QPT_UD:
+ if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) {
+ ehca_err(pd->device, "UD LLQP not supported "
+ "by this adapter");
+ return ERR_PTR(-ENOSYS);
+ }
+ if (!(init_attr->cap.max_send_sge <= 5
+ && init_attr->cap.max_send_sge >= 1
+ && init_attr->cap.max_recv_sge <= 5
+ && init_attr->cap.max_recv_sge >= 1)) {
+ ehca_err(pd->device,
+ "Invalid Number of max_send_sge=%x "
+ "or max_recv_sge=%x for UD LLQP",
+ init_attr->cap.max_send_sge,
+ init_attr->cap.max_recv_sge);
+ return ERR_PTR(-EINVAL);
+ } else if (init_attr->cap.max_send_wr > 255) {
+ ehca_err(pd->device,
+ "Invalid Number of "
+ "ax_send_wr=%x for UD QP_TYPE=%x",
+ init_attr->cap.max_send_wr, qp_type);
+ return ERR_PTR(-EINVAL);
+ }
+ break;
+ default:
+ ehca_err(pd->device, "unsupported LL QP Type=%x",
+ qp_type);
+ return ERR_PTR(-EINVAL);
+ break;
+ }
+ }
+
if (pd->uobject && udata)
context = pd->uobject->context;
@@ -456,16 +534,17 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
return ERR_PTR(-ENOMEM);
}
- memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms));
spin_lock_init(&my_qp->spinlock_s);
spin_lock_init(&my_qp->spinlock_r);
+ my_qp->qp_type = qp_type;
+ my_qp->ext_type = parms.ext_type;
- my_qp->recv_cq =
- container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
- my_qp->send_cq =
- container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
-
- my_qp->init_attr = *init_attr;
+ if (init_attr->recv_cq)
+ my_qp->recv_cq =
+ container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
+ if (init_attr->send_cq)
+ my_qp->send_cq =
+ container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
do {
if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) {
@@ -474,9 +553,9 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
goto create_qp_exit0;
}
- spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+ write_lock_irqsave(&ehca_qp_idr_lock, flags);
ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
- spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
} while (ret == -EAGAIN);
@@ -486,10 +565,10 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
goto create_qp_exit0;
}
- parms.servicetype = ibqptype2servicetype(init_attr->qp_type);
+ parms.servicetype = ibqptype2servicetype(qp_type);
if (parms.servicetype < 0) {
ret = -EINVAL;
- ehca_err(pd->device, "Invalid qp_type=%x", init_attr->qp_type);
+ ehca_err(pd->device, "Invalid qp_type=%x", qp_type);
goto create_qp_exit0;
}
@@ -501,21 +580,37 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
/* UD_AV CIRCUMVENTION */
max_send_sge = init_attr->cap.max_send_sge;
max_recv_sge = init_attr->cap.max_recv_sge;
- if (IB_QPT_UD == init_attr->qp_type ||
- IB_QPT_GSI == init_attr->qp_type ||
- IB_QPT_SMI == init_attr->qp_type) {
+ if (parms.servicetype == ST_UD && !is_llqp) {
max_send_sge += 2;
max_recv_sge += 2;
}
- parms.ipz_eq_handle = shca->eq.ipz_eq_handle;
- parms.daqp_ctrl = isdaqp | daqp_completion;
+ parms.token = my_qp->token;
+ parms.eq_handle = shca->eq.ipz_eq_handle;
parms.pd = my_pd->fw_pd;
- parms.max_recv_sge = max_recv_sge;
- parms.max_send_sge = max_send_sge;
-
- h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms);
-
+ if (my_qp->send_cq)
+ parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle;
+ if (my_qp->recv_cq)
+ parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle;
+
+ parms.squeue.max_wr = init_attr->cap.max_send_wr;
+ parms.rqueue.max_wr = init_attr->cap.max_recv_wr;
+ parms.squeue.max_sge = max_send_sge;
+ parms.rqueue.max_sge = max_recv_sge;
+
+ if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)
+ && !(context && udata)) { /* no small QP support in userspace ATM */
+ if (HAS_SQ(my_qp))
+ ehca_determine_small_queue(
+ &parms.squeue, max_send_sge, is_llqp);
+ if (HAS_RQ(my_qp))
+ ehca_determine_small_queue(
+ &parms.rqueue, max_recv_sge, is_llqp);
+ parms.qp_storage =
+ (parms.squeue.is_small || parms.rqueue.is_small);
+ }
+
+ h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
if (h_ret != H_SUCCESS) {
ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx",
h_ret);
@@ -523,55 +618,38 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
goto create_qp_exit1;
}
- my_qp->ib_qp.qp_num = my_qp->real_qp_num;
+ ib_qp_num = my_qp->real_qp_num = parms.real_qp_num;
+ my_qp->ipz_qp_handle = parms.qp_handle;
+ my_qp->galpas = parms.galpas;
- switch (init_attr->qp_type) {
+ swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp);
+ rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp);
+
+ switch (qp_type) {
case IB_QPT_RC:
- if (isdaqp == 0) {
- swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
- (parms.act_nr_send_sges)]);
- rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
- (parms.act_nr_recv_sges)]);
- } else { /* for daqp we need to use msg size, not wqe size */
- swqe_size = da_rc_msg_size[max_send_sge];
- rwqe_size = da_rc_msg_size[max_recv_sge];
- parms.act_nr_send_sges = 1;
- parms.act_nr_recv_sges = 1;
+ if (is_llqp) {
+ parms.squeue.act_nr_sges = 1;
+ parms.rqueue.act_nr_sges = 1;
}
break;
- case IB_QPT_UC:
- swqe_size = offsetof(struct ehca_wqe,
- u.nud.sg_list[parms.act_nr_send_sges]);
- rwqe_size = offsetof(struct ehca_wqe,
- u.nud.sg_list[parms.act_nr_recv_sges]);
- break;
-
case IB_QPT_UD:
case IB_QPT_GSI:
case IB_QPT_SMI:
/* UD circumvention */
- parms.act_nr_recv_sges -= 2;
- parms.act_nr_send_sges -= 2;
- if (isdaqp) {
- swqe_size = da_ud_sq_msg_size[max_send_sge];
- rwqe_size = da_rc_msg_size[max_recv_sge];
- parms.act_nr_send_sges = 1;
- parms.act_nr_recv_sges = 1;
+ if (is_llqp) {
+ parms.squeue.act_nr_sges = 1;
+ parms.rqueue.act_nr_sges = 1;
} else {
- swqe_size = offsetof(struct ehca_wqe,
- u.ud_av.sg_list[parms.act_nr_send_sges]);
- rwqe_size = offsetof(struct ehca_wqe,
- u.ud_av.sg_list[parms.act_nr_recv_sges]);
+ parms.squeue.act_nr_sges -= 2;
+ parms.rqueue.act_nr_sges -= 2;
}
- if (IB_QPT_GSI == init_attr->qp_type ||
- IB_QPT_SMI == init_attr->qp_type) {
- parms.act_nr_send_wqes = init_attr->cap.max_send_wr;
- parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr;
- parms.act_nr_send_sges = init_attr->cap.max_send_sge;
- parms.act_nr_recv_sges = init_attr->cap.max_recv_sge;
- my_qp->ib_qp.qp_num =
- (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1;
+ if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) {
+ parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr;
+ parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr;
+ parms.squeue.act_nr_sges = init_attr->cap.max_send_sge;
+ parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge;
+ ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1;
}
break;
@@ -580,108 +658,234 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
break;
}
- /* initializes r/squeue and registers queue pages */
- ret = init_qp_queues(shca, my_qp,
- parms.nr_sq_pages, parms.nr_rq_pages,
- swqe_size, rwqe_size,
- parms.act_nr_send_sges, parms.act_nr_recv_sges);
- if (ret) {
- ehca_err(pd->device,
- "Couldn't initialize r/squeue and pages ret=%x", ret);
- goto create_qp_exit2;
+ /* initialize r/squeue and register queue pages */
+ if (HAS_SQ(my_qp)) {
+ ret = init_qp_queue(
+ shca, my_pd, my_qp, &my_qp->ipz_squeue, 0,
+ HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS,
+ &parms.squeue, swqe_size);
+ if (ret) {
+ ehca_err(pd->device, "Couldn't initialize squeue "
+ "and pages ret=%x", ret);
+ goto create_qp_exit2;
+ }
}
- my_qp->ib_qp.pd = &my_pd->ib_pd;
- my_qp->ib_qp.device = my_pd->ib_pd.device;
+ if (HAS_RQ(my_qp)) {
+ ret = init_qp_queue(
+ shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1,
+ H_SUCCESS, &parms.rqueue, rwqe_size);
+ if (ret) {
+ ehca_err(pd->device, "Couldn't initialize rqueue "
+ "and pages ret=%x", ret);
+ goto create_qp_exit3;
+ }
+ }
+
+ if (is_srq) {
+ my_qp->ib_srq.pd = &my_pd->ib_pd;
+ my_qp->ib_srq.device = my_pd->ib_pd.device;
- my_qp->ib_qp.recv_cq = init_attr->recv_cq;
- my_qp->ib_qp.send_cq = init_attr->send_cq;
+ my_qp->ib_srq.srq_context = init_attr->qp_context;
+ my_qp->ib_srq.event_handler = init_attr->event_handler;
+ } else {
+ my_qp->ib_qp.qp_num = ib_qp_num;
+ my_qp->ib_qp.pd = &my_pd->ib_pd;
+ my_qp->ib_qp.device = my_pd->ib_pd.device;
- my_qp->ib_qp.qp_type = init_attr->qp_type;
+ my_qp->ib_qp.recv_cq = init_attr->recv_cq;
+ my_qp->ib_qp.send_cq = init_attr->send_cq;
- my_qp->qp_type = init_attr->qp_type;
- my_qp->ib_qp.srq = init_attr->srq;
+ my_qp->ib_qp.qp_type = qp_type;
+ my_qp->ib_qp.srq = init_attr->srq;
- my_qp->ib_qp.qp_context = init_attr->qp_context;
- my_qp->ib_qp.event_handler = init_attr->event_handler;
+ my_qp->ib_qp.qp_context = init_attr->qp_context;
+ my_qp->ib_qp.event_handler = init_attr->event_handler;
+ }
init_attr->cap.max_inline_data = 0; /* not supported yet */
- init_attr->cap.max_recv_sge = parms.act_nr_recv_sges;
- init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes;
- init_attr->cap.max_send_sge = parms.act_nr_send_sges;
- init_attr->cap.max_send_wr = parms.act_nr_send_wqes;
+ init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges;
+ init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes;
+ init_attr->cap.max_send_sge = parms.squeue.act_nr_sges;
+ init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes;
+ my_qp->init_attr = *init_attr;
/* NOTE: define_apq0() not supported yet */
- if (init_attr->qp_type == IB_QPT_GSI) {
+ if (qp_type == IB_QPT_GSI) {
h_ret = ehca_define_sqp(shca, my_qp, init_attr);
if (h_ret != H_SUCCESS) {
ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx",
h_ret);
ret = ehca2ib_return_code(h_ret);
- goto create_qp_exit3;
+ goto create_qp_exit4;
}
}
- if (init_attr->send_cq) {
- struct ehca_cq *cq = container_of(init_attr->send_cq,
- struct ehca_cq, ib_cq);
- ret = ehca_cq_assign_qp(cq, my_qp);
+
+ if (my_qp->send_cq) {
+ ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp);
if (ret) {
- ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x",
- ret);
- goto create_qp_exit3;
+ ehca_err(pd->device,
+ "Couldn't assign qp to send_cq ret=%x", ret);
+ goto create_qp_exit4;
}
- my_qp->send_cq = cq;
}
+
/* copy queues, galpa data to user space */
if (context && udata) {
- struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue;
- struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue;
struct ehca_create_qp_resp resp;
memset(&resp, 0, sizeof(resp));
resp.qp_num = my_qp->real_qp_num;
resp.token = my_qp->token;
resp.qp_type = my_qp->qp_type;
+ resp.ext_type = my_qp->ext_type;
resp.qkey = my_qp->qkey;
resp.real_qp_num = my_qp->real_qp_num;
- /* rqueue properties */
- resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size;
- resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg;
- resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length;
- resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize;
- resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state;
- /* squeue properties */
- resp.ipz_squeue.qe_size = ipz_squeue->qe_size;
- resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg;
- resp.ipz_squeue.queue_length = ipz_squeue->queue_length;
- resp.ipz_squeue.pagesize = ipz_squeue->pagesize;
- resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state;
+ resp.ipz_rqueue.offset = my_qp->ipz_rqueue.offset;
+ resp.ipz_squeue.offset = my_qp->ipz_squeue.offset;
+ if (HAS_SQ(my_qp))
+ queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
+ if (HAS_RQ(my_qp))
+ queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
+
if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
ehca_err(pd->device, "Copy to udata failed");
ret = -EINVAL;
- goto create_qp_exit3;
+ goto create_qp_exit4;
}
}
- return &my_qp->ib_qp;
+ return my_qp;
+
+create_qp_exit4:
+ if (HAS_RQ(my_qp))
+ ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
create_qp_exit3:
- ipz_queue_dtor(&my_qp->ipz_rqueue);
- ipz_queue_dtor(&my_qp->ipz_squeue);
+ if (HAS_SQ(my_qp))
+ ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
create_qp_exit2:
hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
create_qp_exit1:
- spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+ write_lock_irqsave(&ehca_qp_idr_lock, flags);
idr_remove(&ehca_qp_idr, my_qp->token);
- spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
create_qp_exit0:
kmem_cache_free(qp_cache, my_qp);
return ERR_PTR(ret);
}
+struct ib_qp *ehca_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata)
+{
+ struct ehca_qp *ret;
+
+ ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0);
+ return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp;
+}
+
+static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
+ struct ib_uobject *uobject);
+
+struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata)
+{
+ struct ib_qp_init_attr qp_init_attr;
+ struct ehca_qp *my_qp;
+ struct ib_srq *ret;
+ struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+ ib_device);
+ struct hcp_modify_qp_control_block *mqpcb;
+ u64 hret, update_mask;
+
+ /* For common attributes, internal_create_qp() takes its info
+ * out of qp_init_attr, so copy all common attrs there.
+ */
+ memset(&qp_init_attr, 0, sizeof(qp_init_attr));
+ qp_init_attr.event_handler = srq_init_attr->event_handler;
+ qp_init_attr.qp_context = srq_init_attr->srq_context;
+ qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ qp_init_attr.qp_type = IB_QPT_RC;
+ qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr;
+ qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge;
+
+ my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1);
+ if (IS_ERR(my_qp))
+ return (struct ib_srq *)my_qp;
+
+ /* copy back return values */
+ srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr;
+ srq_init_attr->attr.max_sge = qp_init_attr.cap.max_recv_sge;
+
+ /* drive SRQ into RTR state */
+ mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!mqpcb) {
+ ehca_err(pd->device, "Could not get zeroed page for mqpcb "
+ "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
+ ret = ERR_PTR(-ENOMEM);
+ goto create_srq1;
+ }
+
+ mqpcb->qp_state = EHCA_QPS_INIT;
+ mqpcb->prim_phys_port = 1;
+ update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
+ hret = hipz_h_modify_qp(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle,
+ &my_qp->pf,
+ update_mask,
+ mqpcb, my_qp->galpas.kernel);
+ if (hret != H_SUCCESS) {
+ ehca_err(pd->device, "Could not modify SRQ to INIT"
+ "ehca_qp=%p qp_num=%x hret=%lx",
+ my_qp, my_qp->real_qp_num, hret);
+ goto create_srq2;
+ }
+
+ mqpcb->qp_enable = 1;
+ update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
+ hret = hipz_h_modify_qp(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle,
+ &my_qp->pf,
+ update_mask,
+ mqpcb, my_qp->galpas.kernel);
+ if (hret != H_SUCCESS) {
+ ehca_err(pd->device, "Could not enable SRQ"
+ "ehca_qp=%p qp_num=%x hret=%lx",
+ my_qp, my_qp->real_qp_num, hret);
+ goto create_srq2;
+ }
+
+ mqpcb->qp_state = EHCA_QPS_RTR;
+ update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
+ hret = hipz_h_modify_qp(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle,
+ &my_qp->pf,
+ update_mask,
+ mqpcb, my_qp->galpas.kernel);
+ if (hret != H_SUCCESS) {
+ ehca_err(pd->device, "Could not modify SRQ to RTR"
+ "ehca_qp=%p qp_num=%x hret=%lx",
+ my_qp, my_qp->real_qp_num, hret);
+ goto create_srq2;
+ }
+
+ return &my_qp->ib_srq;
+
+create_srq2:
+ ret = ERR_PTR(ehca2ib_return_code(hret));
+ ehca_free_fw_ctrlblock(mqpcb);
+
+create_srq1:
+ internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject);
+
+ return ret;
+}
+
/*
* prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
* set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
@@ -707,7 +911,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
my_qp, qp_num, h_ret);
return ehca2ib_return_code(h_ret);
}
- bad_send_wqe_p = (void*)((u64)bad_send_wqe_p & (~(1L<<63)));
+ bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63)));
ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
qp_num, bad_send_wqe_p);
/* convert wqe pointer to vadr */
@@ -722,7 +926,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
}
/* loop sets wqe's purge bit */
- wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs);
+ wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
*bad_wqe_cnt = 0;
while (wqe->optype != 0xff && wqe->wqef != 0xff) {
if (ehca_debug_level)
@@ -730,7 +934,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
wqe->nr_of_data_seg = 0; /* suppress data access */
wqe->wqef = WQEF_PURGE; /* WQE to be purged */
q_ofs = ipz_queue_advance_offset(squeue, q_ofs);
- wqe = (struct ehca_wqe*)ipz_qeit_calc(squeue, q_ofs);
+ wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
*bad_wqe_cnt = (*bad_wqe_cnt)+1;
}
/*
@@ -765,7 +969,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
u64 h_ret;
int bad_wqe_cnt = 0;
int squeue_locked = 0;
- unsigned long spl_flags = 0;
+ unsigned long flags = 0;
/* do query_qp to obtain current attr values */
mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
@@ -835,7 +1039,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
goto modify_qp_exit1;
}
- ehca_dbg(ibqp->device,"ehca_qp=%p qp_num=%x current qp_state=%x "
+ ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x "
"new qp_state=%x attribute_mask=%x",
my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask);
@@ -851,7 +1055,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
goto modify_qp_exit1;
}
- if ((mqpcb->qp_state = ib2ehca_qp_state(qp_new_state)))
+ mqpcb->qp_state = ib2ehca_qp_state(qp_new_state);
+ if (mqpcb->qp_state)
update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
else {
ret = -EINVAL;
@@ -886,6 +1091,17 @@ static int internal_modify_qp(struct ib_qp *ibqp,
"ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
my_qp, ibqp->qp_num, statetrans);
+ /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set
+ * in non-LL UD QPs.
+ */
+ if ((my_qp->qp_type == IB_QPT_UD) &&
+ (my_qp->ext_type != EQPT_LLQP) &&
+ (statetrans == IB_QPST_INIT2RTR) &&
+ (shca->hw_level >= 0x22)) {
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
+ mqpcb->send_grh_flag = 1;
+ }
+
/* sqe -> rts: set purge bit of bad wqe before actual trans */
if ((my_qp->qp_type == IB_QPT_UD ||
my_qp->qp_type == IB_QPT_GSI ||
@@ -895,10 +1111,10 @@ static int internal_modify_qp(struct ib_qp *ibqp,
if (!ibqp->uobject) {
struct ehca_wqe *wqe;
/* lock send queue */
- spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+ spin_lock_irqsave(&my_qp->spinlock_s, flags);
squeue_locked = 1;
/* mark next free wqe */
- wqe = (struct ehca_wqe*)
+ wqe = (struct ehca_wqe *)
ipz_qeit_get(&my_qp->ipz_squeue);
wqe->optype = wqe->wqef = 0xff;
ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p",
@@ -1133,7 +1349,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
if (h_ret != H_SUCCESS) {
ret = ehca2ib_return_code(h_ret);
ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx "
- "ehca_qp=%p qp_num=%x",h_ret, my_qp, ibqp->qp_num);
+ "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num);
goto modify_qp_exit2;
}
@@ -1181,7 +1397,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
modify_qp_exit2:
if (squeue_locked) { /* this means: sqe -> rts */
- spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
+ spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
my_qp->sqerr_purgeflag = 1;
}
@@ -1232,7 +1448,7 @@ int ehca_query_qp(struct ib_qp *qp,
}
if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) {
- ehca_err(qp->device,"Invalid attribute mask "
+ ehca_err(qp->device, "Invalid attribute mask "
"ehca_qp=%p qp_num=%x qp_attr_mask=%x ",
my_qp, qp->qp_num, qp_attr_mask);
return -EINVAL;
@@ -1240,7 +1456,7 @@ int ehca_query_qp(struct ib_qp *qp,
qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
if (!qpcb) {
- ehca_err(qp->device,"Out of memory for qpcb "
+ ehca_err(qp->device, "Out of memory for qpcb "
"ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
return -ENOMEM;
}
@@ -1252,7 +1468,7 @@ int ehca_query_qp(struct ib_qp *qp,
if (h_ret != H_SUCCESS) {
ret = ehca2ib_return_code(h_ret);
- ehca_err(qp->device,"hipz_h_query_qp() failed "
+ ehca_err(qp->device, "hipz_h_query_qp() failed "
"ehca_qp=%p qp_num=%x h_ret=%lx",
my_qp, qp->qp_num, h_ret);
goto query_qp_exit1;
@@ -1263,7 +1479,7 @@ int ehca_query_qp(struct ib_qp *qp,
if (qp_attr->cur_qp_state == -EINVAL) {
ret = -EINVAL;
- ehca_err(qp->device,"Got invalid ehca_qp_state=%x "
+ ehca_err(qp->device, "Got invalid ehca_qp_state=%x "
"ehca_qp=%p qp_num=%x",
qpcb->qp_state, my_qp, qp->qp_num);
goto query_qp_exit1;
@@ -1312,6 +1528,9 @@ int ehca_query_qp(struct ib_qp *qp,
qp_attr->alt_port_num = qpcb->alt_phys_port;
qp_attr->alt_timeout = qpcb->timeout_al;
+ qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res;
+ qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp;
+
/* primary av */
qp_attr->ah_attr.sl = qpcb->service_level;
@@ -1367,53 +1586,170 @@ query_qp_exit1:
return ret;
}
-int ehca_destroy_qp(struct ib_qp *ibqp)
+int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
{
- struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
- struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+ struct ehca_qp *my_qp =
+ container_of(ibsrq, struct ehca_qp, ib_srq);
+ struct ehca_pd *my_pd =
+ container_of(ibsrq->pd, struct ehca_pd, ib_pd);
+ struct ehca_shca *shca =
+ container_of(ibsrq->pd->device, struct ehca_shca, ib_device);
+ struct hcp_modify_qp_control_block *mqpcb;
+ u64 update_mask;
+ u64 h_ret;
+ int ret = 0;
+
+ u32 cur_pid = current->tgid;
+ if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+ my_pd->ownpid != cur_pid) {
+ ehca_err(ibsrq->pd->device, "Invalid caller pid=%x ownpid=%x",
+ cur_pid, my_pd->ownpid);
+ return -EINVAL;
+ }
+
+ mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!mqpcb) {
+ ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb "
+ "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
+ return -ENOMEM;
+ }
+
+ update_mask = 0;
+ if (attr_mask & IB_SRQ_LIMIT) {
+ attr_mask &= ~IB_SRQ_LIMIT;
+ update_mask |=
+ EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1);
+ mqpcb->curr_srq_limit =
+ EHCA_BMASK_SET(MQPCB_CURR_SRQ_LIMIT, attr->srq_limit);
+ mqpcb->qp_aff_asyn_ev_log_reg =
+ EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1);
+ }
+
+ /* by now, all bits in attr_mask should have been cleared */
+ if (attr_mask) {
+ ehca_err(ibsrq->device, "invalid attribute mask bits set "
+ "attr_mask=%x", attr_mask);
+ ret = -EINVAL;
+ goto modify_srq_exit0;
+ }
+
+ if (ehca_debug_level)
+ ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
+
+ h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle,
+ NULL, update_mask, mqpcb,
+ my_qp->galpas.kernel);
+
+ if (h_ret != H_SUCCESS) {
+ ret = ehca2ib_return_code(h_ret);
+ ehca_err(ibsrq->device, "hipz_h_modify_qp() failed rc=%lx "
+ "ehca_qp=%p qp_num=%x",
+ h_ret, my_qp, my_qp->real_qp_num);
+ }
+
+modify_srq_exit0:
+ ehca_free_fw_ctrlblock(mqpcb);
+
+ return ret;
+}
+
+int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
+{
+ struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq);
+ struct ehca_pd *my_pd = container_of(srq->pd, struct ehca_pd, ib_pd);
+ struct ehca_shca *shca = container_of(srq->device, struct ehca_shca,
ib_device);
+ struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+ struct hcp_modify_qp_control_block *qpcb;
+ u32 cur_pid = current->tgid;
+ int ret = 0;
+ u64 h_ret;
+
+ if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+ my_pd->ownpid != cur_pid) {
+ ehca_err(srq->device, "Invalid caller pid=%x ownpid=%x",
+ cur_pid, my_pd->ownpid);
+ return -EINVAL;
+ }
+
+ qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!qpcb) {
+ ehca_err(srq->device, "Out of memory for qpcb "
+ "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num);
+ return -ENOMEM;
+ }
+
+ h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle,
+ NULL, qpcb, my_qp->galpas.kernel);
+
+ if (h_ret != H_SUCCESS) {
+ ret = ehca2ib_return_code(h_ret);
+ ehca_err(srq->device, "hipz_h_query_qp() failed "
+ "ehca_qp=%p qp_num=%x h_ret=%lx",
+ my_qp, my_qp->real_qp_num, h_ret);
+ goto query_srq_exit1;
+ }
+
+ srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
+ srq_attr->srq_limit = EHCA_BMASK_GET(
+ MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit);
+
+ if (ehca_debug_level)
+ ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
+
+query_srq_exit1:
+ ehca_free_fw_ctrlblock(qpcb);
+
+ return ret;
+}
+
+static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
+ struct ib_uobject *uobject)
+{
+ struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
ib_pd);
u32 cur_pid = current->tgid;
- u32 qp_num = ibqp->qp_num;
+ u32 qp_num = my_qp->real_qp_num;
int ret;
u64 h_ret;
u8 port_num;
enum ib_qp_type qp_type;
unsigned long flags;
- if (ibqp->uobject) {
+ if (uobject) {
if (my_qp->mm_count_galpa ||
my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
- ehca_err(ibqp->device, "Resources still referenced in "
- "user space qp_num=%x", ibqp->qp_num);
+ ehca_err(dev, "Resources still referenced in "
+ "user space qp_num=%x", qp_num);
return -EINVAL;
}
if (my_pd->ownpid != cur_pid) {
- ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x",
+ ehca_err(dev, "Invalid caller pid=%x ownpid=%x",
cur_pid, my_pd->ownpid);
return -EINVAL;
}
}
if (my_qp->send_cq) {
- ret = ehca_cq_unassign_qp(my_qp->send_cq,
- my_qp->real_qp_num);
+ ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num);
if (ret) {
- ehca_err(ibqp->device, "Couldn't unassign qp from "
+ ehca_err(dev, "Couldn't unassign qp from "
"send_cq ret=%x qp_num=%x cq_num=%x", ret,
- my_qp->ib_qp.qp_num, my_qp->send_cq->cq_number);
+ qp_num, my_qp->send_cq->cq_number);
return ret;
}
}
- spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+ write_lock_irqsave(&ehca_qp_idr_lock, flags);
idr_remove(&ehca_qp_idr, my_qp->token);
- spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
if (h_ret != H_SUCCESS) {
- ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx "
+ ehca_err(dev, "hipz_h_destroy_qp() failed rc=%lx "
"ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
return ehca2ib_return_code(h_ret);
}
@@ -1424,7 +1760,7 @@ int ehca_destroy_qp(struct ib_qp *ibqp)
/* no support for IB_QPT_SMI yet */
if (qp_type == IB_QPT_GSI) {
struct ib_event event;
- ehca_info(ibqp->device, "device %s: port %x is inactive.",
+ ehca_info(dev, "device %s: port %x is inactive.",
shca->ib_device.name, port_num);
event.device = &shca->ib_device;
event.event = IB_EVENT_PORT_ERR;
@@ -1433,18 +1769,34 @@ int ehca_destroy_qp(struct ib_qp *ibqp)
ib_dispatch_event(&event);
}
- ipz_queue_dtor(&my_qp->ipz_rqueue);
- ipz_queue_dtor(&my_qp->ipz_squeue);
+ if (HAS_RQ(my_qp))
+ ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
+ if (HAS_SQ(my_qp))
+ ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
kmem_cache_free(qp_cache, my_qp);
return 0;
}
+int ehca_destroy_qp(struct ib_qp *qp)
+{
+ return internal_destroy_qp(qp->device,
+ container_of(qp, struct ehca_qp, ib_qp),
+ qp->uobject);
+}
+
+int ehca_destroy_srq(struct ib_srq *srq)
+{
+ return internal_destroy_qp(srq->device,
+ container_of(srq, struct ehca_qp, ib_srq),
+ srq->uobject);
+}
+
int ehca_init_qp_cache(void)
{
qp_cache = kmem_cache_create("ehca_cache_qp",
sizeof(struct ehca_qp), 0,
SLAB_HWCACHE_ALIGN,
- NULL, NULL);
+ NULL);
if (!qp_cache)
return -ENOMEM;
return 0;
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index caec9dee09e1..94eed70fedf5 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -3,8 +3,9 @@
*
* post_send/recv, poll_cq, req_notify
*
- * Authors: Waleri Fomin <fomin@de.ibm.com>
- * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Waleri Fomin <fomin@de.ibm.com>
+ * Joachim Fenkes <fenkes@de.ibm.com>
* Reinhard Ernst <rernst@de.ibm.com>
*
* Copyright (c) 2005 IBM Corporation
@@ -78,7 +79,8 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
}
if (ehca_debug_level) {
- ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue);
+ ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",
+ ipz_rqueue);
ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
}
@@ -98,7 +100,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr;
struct ib_sge *sge = send_wr->sg_list;
ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x "
- "send_flags=%x opcode=%x",idx, send_wr->wr_id,
+ "send_flags=%x opcode=%x", idx, send_wr->wr_id,
send_wr->num_sge, send_wr->send_flags,
send_wr->opcode);
if (mad_hdr) {
@@ -115,7 +117,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
mad_hdr->attr_mod);
}
for (j = 0; j < send_wr->num_sge; j++) {
- u8 *data = (u8 *) abs_to_virt(sge->addr);
+ u8 *data = (u8 *)abs_to_virt(sge->addr);
ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
"lkey=%x",
idx, j, data, sge->length, sge->lkey);
@@ -362,10 +364,10 @@ int ehca_post_send(struct ib_qp *qp,
struct ehca_wqe *wqe_p;
int wqe_cnt = 0;
int ret = 0;
- unsigned long spl_flags;
+ unsigned long flags;
/* LOCK the QUEUE */
- spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+ spin_lock_irqsave(&my_qp->spinlock_s, flags);
/* loop processes list of send reqs */
for (cur_send_wr = send_wr; cur_send_wr != NULL;
@@ -406,26 +408,31 @@ int ehca_post_send(struct ib_qp *qp,
} /* eof for cur_send_wr */
post_send_exit0:
- /* UNLOCK the QUEUE */
- spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
iosync(); /* serialize GAL register access */
hipz_update_sqa(my_qp, wqe_cnt);
+ spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
return ret;
}
-int ehca_post_recv(struct ib_qp *qp,
- struct ib_recv_wr *recv_wr,
- struct ib_recv_wr **bad_recv_wr)
+static int internal_post_recv(struct ehca_qp *my_qp,
+ struct ib_device *dev,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr)
{
- struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
struct ib_recv_wr *cur_recv_wr;
struct ehca_wqe *wqe_p;
int wqe_cnt = 0;
int ret = 0;
- unsigned long spl_flags;
+ unsigned long flags;
+
+ if (unlikely(!HAS_RQ(my_qp))) {
+ ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
+ my_qp, my_qp->real_qp_num, my_qp->ext_type);
+ return -ENODEV;
+ }
/* LOCK the QUEUE */
- spin_lock_irqsave(&my_qp->spinlock_r, spl_flags);
+ spin_lock_irqsave(&my_qp->spinlock_r, flags);
/* loop processes list of send reqs */
for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
@@ -439,8 +446,8 @@ int ehca_post_recv(struct ib_qp *qp,
*bad_recv_wr = cur_recv_wr;
if (wqe_cnt == 0) {
ret = -ENOMEM;
- ehca_err(qp->device, "Too many posted WQEs "
- "qp_num=%x", qp->qp_num);
+ ehca_err(dev, "Too many posted WQEs "
+ "qp_num=%x", my_qp->real_qp_num);
}
goto post_recv_exit0;
}
@@ -455,23 +462,39 @@ int ehca_post_recv(struct ib_qp *qp,
*bad_recv_wr = cur_recv_wr;
if (wqe_cnt == 0) {
ret = -EINVAL;
- ehca_err(qp->device, "Could not write WQE "
- "qp_num=%x", qp->qp_num);
+ ehca_err(dev, "Could not write WQE "
+ "qp_num=%x", my_qp->real_qp_num);
}
goto post_recv_exit0;
}
wqe_cnt++;
- ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d",
- my_qp, qp->qp_num, wqe_cnt);
+ ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d",
+ my_qp, my_qp->real_qp_num, wqe_cnt);
} /* eof for cur_recv_wr */
post_recv_exit0:
- spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags);
iosync(); /* serialize GAL register access */
hipz_update_rqa(my_qp, wqe_cnt);
+ spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
return ret;
}
+int ehca_post_recv(struct ib_qp *qp,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr)
+{
+ return internal_post_recv(container_of(qp, struct ehca_qp, ib_qp),
+ qp->device, recv_wr, bad_recv_wr);
+}
+
+int ehca_post_srq_recv(struct ib_srq *srq,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr)
+{
+ return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq),
+ srq->device, recv_wr, bad_recv_wr);
+}
+
/*
* ib_wc_opcode table converts ehca wc opcode to ib
* Since we use zero to indicate invalid opcode, the actual ib opcode must
@@ -494,6 +517,7 @@ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
int ret = 0;
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
struct ehca_cqe *cqe;
+ struct ehca_qp *my_qp;
int cqe_count = 0;
poll_cq_one_read_cqe:
@@ -511,9 +535,11 @@ poll_cq_one_read_cqe:
cqe_count++;
if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
- struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number);
+ struct ehca_qp *qp;
int purgeflag;
- unsigned long spl_flags;
+ unsigned long flags;
+
+ qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number);
if (!qp) {
ehca_err(cq->device, "cq_num=%x qp_num=%x "
"could not find qp -> ignore cqe",
@@ -523,13 +549,13 @@ poll_cq_one_read_cqe:
/* ignore this purged cqe */
goto poll_cq_one_read_cqe;
}
- spin_lock_irqsave(&qp->spinlock_s, spl_flags);
+ spin_lock_irqsave(&qp->spinlock_s, flags);
purgeflag = qp->sqerr_purgeflag;
- spin_unlock_irqrestore(&qp->spinlock_s, spl_flags);
+ spin_unlock_irqrestore(&qp->spinlock_s, flags);
if (purgeflag) {
- ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x "
- "src_qp=%x",
+ ehca_dbg(cq->device,
+ "Got CQE with purged bit qp_num=%x src_qp=%x",
cqe->local_qp_number, cqe->remote_qp_number);
if (ehca_debug_level)
ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
@@ -545,7 +571,7 @@ poll_cq_one_read_cqe:
}
/* tracing cqe */
- if (ehca_debug_level) {
+ if (unlikely(ehca_debug_level)) {
ehca_dbg(cq->device,
"Received COMPLETION ehca_cq=%p cq_num=%x -----",
my_cq, my_cq->cq_number);
@@ -579,7 +605,11 @@ poll_cq_one_read_cqe:
} else
wc->status = IB_WC_SUCCESS;
- wc->qp = NULL;
+ read_lock(&ehca_qp_idr_lock);
+ my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
+ wc->qp = &my_qp->ib_qp;
+ read_unlock(&ehca_qp_idr_lock);
+
wc->byte_len = cqe->nr_bytes_transferred;
wc->pkey_index = cqe->pkey_index;
wc->slid = cqe->rlid;
@@ -589,7 +619,7 @@ poll_cq_one_read_cqe:
wc->imm_data = cpu_to_be32(cqe->immediate_data);
wc->sl = cqe->service_level;
- if (wc->status != IB_WC_SUCCESS)
+ if (unlikely(wc->status != IB_WC_SUCCESS))
ehca_dbg(cq->device,
"ehca_cq=%p cq_num=%x WARNING unsuccessful cqe "
"OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx "
@@ -610,7 +640,7 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
int nr;
struct ib_wc *current_wc = wc;
int ret = 0;
- unsigned long spl_flags;
+ unsigned long flags;
if (num_entries < 1) {
ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
@@ -619,14 +649,14 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
goto poll_cq_exit0;
}
- spin_lock_irqsave(&my_cq->spinlock, spl_flags);
+ spin_lock_irqsave(&my_cq->spinlock, flags);
for (nr = 0; nr < num_entries; nr++) {
ret = ehca_poll_cq_one(cq, current_wc);
if (ret)
break;
current_wc++;
} /* eof for nr */
- spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
+ spin_unlock_irqrestore(&my_cq->spinlock, flags);
if (ret == -EAGAIN || !ret)
ret = nr;
@@ -637,7 +667,6 @@ poll_cq_exit0:
int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
{
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
- unsigned long spl_flags;
int ret = 0;
switch (notify_flags & IB_CQ_SOLICITED_MASK) {
@@ -652,6 +681,7 @@ int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
}
if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
+ unsigned long spl_flags;
spin_lock_irqsave(&my_cq->spinlock, spl_flags);
ret = ipz_qeit_is_valid(&my_cq->ipz_queue);
spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
index 973c4b591545..57c77a715f46 100644
--- a/drivers/infiniband/hw/ehca/ehca_tools.h
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -59,6 +59,7 @@
#include <linux/cpu.h>
#include <linux/device.h>
+#include <asm/atomic.h>
#include <asm/abs_addr.h>
#include <asm/ibmebus.h>
#include <asm/io.h>
@@ -92,14 +93,14 @@ extern int ehca_debug_level;
#define ehca_gen_dbg(format, arg...) \
do { \
if (unlikely(ehca_debug_level)) \
- printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n",\
+ printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \
get_paca()->paca_index, __FUNCTION__, ## arg); \
} while (0)
#define ehca_gen_warn(format, arg...) \
do { \
if (unlikely(ehca_debug_level)) \
- printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n",\
+ printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
get_paca()->paca_index, __FUNCTION__, ## arg); \
} while (0)
@@ -113,12 +114,12 @@ extern int ehca_debug_level;
* <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex>
*/
#define ehca_dmp(adr, len, format, args...) \
- do { \
- unsigned int x; \
+ do { \
+ unsigned int x; \
unsigned int l = (unsigned int)(len); \
- unsigned char *deb = (unsigned char*)(adr); \
+ unsigned char *deb = (unsigned char *)(adr); \
for (x = 0; x < l; x += 16) { \
- printk("EHCA_DMP:%s " format \
+ printk(KERN_INFO "EHCA_DMP:%s " format \
" adr=%p ofs=%04x %016lx %016lx\n", \
__FUNCTION__, ##args, deb, x, \
*((u64 *)&deb[0]), *((u64 *)&deb[8])); \
@@ -127,16 +128,16 @@ extern int ehca_debug_level;
} while (0)
/* define a bitmask, little endian version */
-#define EHCA_BMASK(pos,length) (((pos)<<16)+(length))
+#define EHCA_BMASK(pos, length) (((pos) << 16) + (length))
/* define a bitmask, the ibm way... */
-#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1))
+#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1))
/* internal function, don't use */
-#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff)
+#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff)
/* internal function, don't use */
-#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff))
+#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff))
/**
* EHCA_BMASK_SET - return value shifted and masked by mask
@@ -144,30 +145,16 @@ extern int ehca_debug_level;
* variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask
* in variable
*/
-#define EHCA_BMASK_SET(mask,value) \
- ((EHCA_BMASK_MASK(mask) & ((u64)(value)))<<EHCA_BMASK_SHIFTPOS(mask))
+#define EHCA_BMASK_SET(mask, value) \
+ ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask))
/**
* EHCA_BMASK_GET - extract a parameter from value by mask
*/
-#define EHCA_BMASK_GET(mask,value) \
- (EHCA_BMASK_MASK(mask)& (((u64)(value))>>EHCA_BMASK_SHIFTPOS(mask)))
-
+#define EHCA_BMASK_GET(mask, value) \
+ (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask)))
/* Converts ehca to ib return code */
-static inline int ehca2ib_return_code(u64 ehca_rc)
-{
- switch (ehca_rc) {
- case H_SUCCESS:
- return 0;
- case H_BUSY:
- return -EBUSY;
- case H_NO_MEM:
- return -ENOMEM;
- default:
- return -EINVAL;
- }
-}
-
+int ehca2ib_return_code(u64 ehca_rc);
#endif /* EHCA_TOOLS_H */
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index 73db920b6945..4bc687fdf531 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -70,7 +70,7 @@ int ehca_dealloc_ucontext(struct ib_ucontext *context)
static void ehca_mm_open(struct vm_area_struct *vma)
{
- u32 *count = (u32*)vma->vm_private_data;
+ u32 *count = (u32 *)vma->vm_private_data;
if (!count) {
ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
vma->vm_start, vma->vm_end);
@@ -86,7 +86,7 @@ static void ehca_mm_open(struct vm_area_struct *vma)
static void ehca_mm_close(struct vm_area_struct *vma)
{
- u32 *count = (u32*)vma->vm_private_data;
+ u32 *count = (u32 *)vma->vm_private_data;
if (!count) {
ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
vma->vm_start, vma->vm_end);
@@ -149,7 +149,7 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
ehca_gen_err("vm_insert_page() failed rc=%x", ret);
return ret;
}
- start += PAGE_SIZE;
+ start += PAGE_SIZE;
}
vma->vm_private_data = mm_count;
(*mm_count)++;
@@ -215,7 +215,8 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
case 2: /* qp rqueue_addr */
ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue",
qp->ib_qp.qp_num);
- ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, &qp->mm_count_rqueue);
+ ret = ehca_mmap_queue(vma, &qp->ipz_rqueue,
+ &qp->mm_count_rqueue);
if (unlikely(ret)) {
ehca_err(qp->ib_qp.device,
"ehca_mmap_queue(rq) failed rc=%x qp_num=%x",
@@ -227,7 +228,8 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
case 3: /* qp squeue_addr */
ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue",
qp->ib_qp.qp_num);
- ret = ehca_mmap_queue(vma, &qp->ipz_squeue, &qp->mm_count_squeue);
+ ret = ehca_mmap_queue(vma, &qp->ipz_squeue,
+ &qp->mm_count_squeue);
if (unlikely(ret)) {
ehca_err(qp->ib_qp.device,
"ehca_mmap_queue(sq) failed rc=%x qp_num=%x",
@@ -253,16 +255,16 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
u32 cur_pid = current->tgid;
u32 ret;
- unsigned long flags;
struct ehca_cq *cq;
struct ehca_qp *qp;
struct ehca_pd *pd;
+ struct ib_uobject *uobject;
switch (q_type) {
case 1: /* CQ */
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ read_lock(&ehca_cq_idr_lock);
cq = idr_find(&ehca_cq_idr, idr_handle);
- spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ read_unlock(&ehca_cq_idr_lock);
/* make sure this mmap really belongs to the authorized user */
if (!cq)
@@ -288,9 +290,9 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
break;
case 2: /* QP */
- spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+ read_lock(&ehca_qp_idr_lock);
qp = idr_find(&ehca_qp_idr, idr_handle);
- spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ read_unlock(&ehca_qp_idr_lock);
/* make sure this mmap really belongs to the authorized user */
if (!qp)
@@ -304,7 +306,8 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
return -ENOMEM;
}
- if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context)
+ uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject;
+ if (!uobject || uobject->context != context)
return -EINVAL;
ret = ehca_mmap_qp(vma, qp, rsrc_type);
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 5766ae3a2029..24f454162f24 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -5,6 +5,7 @@
*
* Authors: Christoph Raisch <raisch@de.ibm.com>
* Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Joachim Fenkes <fenkes@de.ibm.com>
* Gerd Bayer <gerd.bayer@de.ibm.com>
* Waleri Fomin <fomin@de.ibm.com>
*
@@ -51,10 +52,13 @@
#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11)
#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12)
#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15)
+#define H_ALL_RES_QP_STORAGE EHCA_BMASK_IBM(16, 17)
#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18)
#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21)
#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23)
#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31)
+#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35)
+#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39)
#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63)
#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15)
@@ -62,6 +66,12 @@
#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39)
#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47)
+#define H_ALL_RES_QP_UD_AV_LKEY EHCA_BMASK_IBM(32, 63)
+#define H_ALL_RES_QP_SRQ_QP_TOKEN EHCA_BMASK_IBM(0, 31)
+#define H_ALL_RES_QP_SRQ_QP_HANDLE EHCA_BMASK_IBM(0, 64)
+#define H_ALL_RES_QP_SRQ_LIMIT EHCA_BMASK_IBM(48, 63)
+#define H_ALL_RES_QP_SRQ_QPN EHCA_BMASK_IBM(40, 63)
+
#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31)
#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63)
#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15)
@@ -74,10 +84,7 @@
#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48)
#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49)
-/* direct access qp controls */
-#define DAQP_CTRL_ENABLE 0x01
-#define DAQP_CTRL_SEND_COMP 0x20
-#define DAQP_CTRL_RECV_COMP 0x40
+static DEFINE_SPINLOCK(hcall_lock);
static u32 get_longbusy_msecs(int longbusy_rc)
{
@@ -155,7 +162,7 @@ static long ehca_plpar_hcall9(unsigned long opcode,
{
long ret;
int i, sleep_msecs, lock_is_set = 0;
- unsigned long flags;
+ unsigned long flags = 0;
ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
"arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
@@ -284,68 +291,73 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
}
u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
- struct ehca_qp *qp,
struct ehca_alloc_qp_parms *parms)
{
u64 ret;
- u64 allocate_controls;
- u64 max_r10_reg;
+ u64 allocate_controls, max_r10_reg, r11, r12;
u64 outs[PLPAR_HCALL9_BUFSIZE];
- u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1;
- u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1;
- int daqp_ctrl = parms->daqp_ctrl;
allocate_controls =
- EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS,
- (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0)
+ EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type)
| EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
| EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
| EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE,
+ parms->squeue.page_size)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE,
+ parms->rqueue.page_size)
| EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
- (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0)
+ !!(parms->ll_comp_flags & LLQP_RECV_COMP))
| EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
- (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0)
+ !!(parms->ll_comp_flags & LLQP_SEND_COMP))
| EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
parms->ud_av_l_key_ctl)
| EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
max_r10_reg =
EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
- max_nr_send_wqes)
+ parms->squeue.max_wr + 1)
| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
- max_nr_receive_wqes)
+ parms->rqueue.max_wr + 1)
| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
- parms->max_send_sge)
+ parms->squeue.max_sge)
| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
- parms->max_recv_sge);
+ parms->rqueue.max_sge);
+
+ r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token);
+
+ if (parms->ext_type == EQPT_SRQ)
+ r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit);
+ else
+ r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn);
ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
adapter_handle.handle, /* r4 */
allocate_controls, /* r5 */
- qp->send_cq->ipz_cq_handle.handle,
- qp->recv_cq->ipz_cq_handle.handle,
- parms->ipz_eq_handle.handle,
- ((u64)qp->token << 32) | parms->pd.value,
- max_r10_reg, /* r10 */
- parms->ud_av_l_key_ctl, /* r11 */
- 0);
- qp->ipz_qp_handle.handle = outs[0];
- qp->real_qp_num = (u32)outs[1];
- parms->act_nr_send_wqes =
+ parms->send_cq_handle.handle,
+ parms->recv_cq_handle.handle,
+ parms->eq_handle.handle,
+ ((u64)parms->token << 32) | parms->pd.value,
+ max_r10_reg, r11, r12);
+
+ parms->qp_handle.handle = outs[0];
+ parms->real_qp_num = (u32)outs[1];
+ parms->squeue.act_nr_wqes =
(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
- parms->act_nr_recv_wqes =
+ parms->rqueue.act_nr_wqes =
(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
- parms->act_nr_send_sges =
+ parms->squeue.act_nr_sges =
(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
- parms->act_nr_recv_sges =
+ parms->rqueue.act_nr_sges =
(u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
- parms->nr_sq_pages =
+ parms->squeue.queue_size =
(u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
- parms->nr_rq_pages =
+ parms->rqueue.queue_size =
(u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
if (ret == H_SUCCESS)
- hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]);
+ hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]);
if (ret == H_NOT_ENOUGH_RESOURCES)
ehca_gen_err("Not enough resources. ret=%lx", ret);
@@ -423,7 +435,8 @@ u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
{
return ehca_plpar_hcall_norets(H_REGISTER_RPAGES,
adapter_handle.handle, /* r4 */
- queue_type | pagesize << 8, /* r5 */
+ (u64)queue_type | ((u64)pagesize) << 8,
+ /* r5 */
resource_handle, /* r6 */
logical_address_of_page, /* r7 */
count, /* r8 */
@@ -492,13 +505,13 @@ u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
const u64 count,
const struct h_galpa galpa)
{
- if (count != 1) {
+ if (count > 1) {
ehca_gen_err("Page counter=%lx", count);
return H_PARAMETER;
}
- return hipz_h_register_rpage(adapter_handle,pagesize,queue_type,
- qp_handle.handle,logical_address_of_page,
+ return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
+ qp_handle.handle, logical_address_of_page,
count);
}
@@ -518,9 +531,9 @@ u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
qp_handle.handle, /* r6 */
0, 0, 0, 0, 0, 0);
if (log_addr_next_sq_wqe2processed)
- *log_addr_next_sq_wqe2processed = (void*)outs[0];
+ *log_addr_next_sq_wqe2processed = (void *)outs[0];
if (log_addr_next_rq_wqe2processed)
- *log_addr_next_rq_wqe2processed = (void*)outs[1];
+ *log_addr_next_rq_wqe2processed = (void *)outs[1];
return ret;
}
@@ -720,6 +733,9 @@ u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
u64 ret;
u64 outs[PLPAR_HCALL9_BUFSIZE];
+ ehca_gen_dbg("kernel PAGE_SIZE=%x access_ctrl=%016x "
+ "vaddr=%lx length=%lx",
+ (u32)PAGE_SIZE, access_ctrl, vaddr, length);
ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
adapter_handle.handle, /* r4 */
5, /* r5 */
@@ -744,6 +760,19 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
{
u64 ret;
+ if (unlikely(ehca_debug_level >= 2)) {
+ if (count > 1) {
+ u64 *kpage;
+ int i;
+ kpage = (u64 *)abs_to_virt(logical_address_of_page);
+ for (i = 0; i < count; i++)
+ ehca_gen_dbg("kpage[%d]=%p",
+ i, (void *)kpage[i]);
+ } else
+ ehca_gen_dbg("kpage=%p",
+ (void *)logical_address_of_page);
+ }
+
if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) {
ehca_gen_err("logical_address_of_page not on a 4k boundary "
"adapter_handle=%lx mr=%p mr_handle=%lx "
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
index 2869f7dd6196..60ce02b70663 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.h
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -78,7 +78,6 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
* initialize resources, create empty QPPTs (2 rings).
*/
u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
- struct ehca_qp *qp,
struct ehca_alloc_qp_parms *parms);
u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c
index 0b1a4772c78a..214821095cb1 100644
--- a/drivers/infiniband/hw/ehca/hcp_phyp.c
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.c
@@ -50,7 +50,7 @@ int hcall_map_page(u64 physaddr, u64 *mapaddr)
int hcall_unmap_page(u64 mapaddr)
{
- iounmap((volatile void __iomem*)mapaddr);
+ iounmap((volatile void __iomem *) mapaddr);
return 0;
}
diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h
index 20898a153446..868735fd3187 100644
--- a/drivers/infiniband/hw/ehca/hipz_fns_core.h
+++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h
@@ -53,10 +53,10 @@
#define hipz_galpa_load_cq(gal, offset) \
hipz_galpa_load(gal, CQTEMM_OFFSET(offset))
-#define hipz_galpa_store_qp(gal,offset, value) \
+#define hipz_galpa_store_qp(gal, offset, value) \
hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value)
#define hipz_galpa_load_qp(gal, offset) \
- hipz_galpa_load(gal,QPTEMM_OFFSET(offset))
+ hipz_galpa_load(gal, QPTEMM_OFFSET(offset))
static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes)
{
diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h
index fad91368dc5a..d9739e554515 100644
--- a/drivers/infiniband/hw/ehca/hipz_hw.h
+++ b/drivers/infiniband/hw/ehca/hipz_hw.h
@@ -161,10 +161,11 @@ struct hipz_qptemm {
/* 0x1000 */
};
-#define QPX_SQADDER EHCA_BMASK_IBM(48,63)
-#define QPX_RQADDER EHCA_BMASK_IBM(48,63)
+#define QPX_SQADDER EHCA_BMASK_IBM(48, 63)
+#define QPX_RQADDER EHCA_BMASK_IBM(48, 63)
+#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3)
-#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x)
+#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x)
/* MRMWPT Entry Memory Map */
struct hipz_mrmwmm {
@@ -186,7 +187,7 @@ struct hipz_mrmwmm {
};
-#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x)
+#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x)
struct hipz_qpedmm {
/* 0x00 */
@@ -237,7 +238,7 @@ struct hipz_qpedmm {
u64 qpedx_rrva3;
};
-#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm,x)
+#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x)
/* CQ Table Entry Memory Map */
struct hipz_cqtemm {
@@ -262,12 +263,12 @@ struct hipz_cqtemm {
/* 0x1000 */
};
-#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32,63)
-#define CQX_FECADDER EHCA_BMASK_IBM(32,63)
-#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0)
-#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0)
+#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63)
+#define CQX_FECADDER EHCA_BMASK_IBM(32, 63)
+#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0)
+#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0)
-#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x)
+#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x)
/* EQ Table Entry Memory Map */
struct hipz_eqtemm {
@@ -292,7 +293,7 @@ struct hipz_eqtemm {
};
-#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x)
+#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x)
/* access control defines for MR/MW */
#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000
@@ -360,6 +361,24 @@ struct hipz_query_hca {
u32 max_neq;
} __attribute__ ((packed));
+#define HCA_CAP_AH_PORT_NR_CHECK EHCA_BMASK_IBM( 0, 0)
+#define HCA_CAP_ATOMIC EHCA_BMASK_IBM( 1, 1)
+#define HCA_CAP_AUTO_PATH_MIG EHCA_BMASK_IBM( 2, 2)
+#define HCA_CAP_BAD_P_KEY_CTR EHCA_BMASK_IBM( 3, 3)
+#define HCA_CAP_SQD_RTS_PORT_CHANGE EHCA_BMASK_IBM( 4, 4)
+#define HCA_CAP_CUR_QP_STATE_MOD EHCA_BMASK_IBM( 5, 5)
+#define HCA_CAP_INIT_TYPE EHCA_BMASK_IBM( 6, 6)
+#define HCA_CAP_PORT_ACTIVE_EVENT EHCA_BMASK_IBM( 7, 7)
+#define HCA_CAP_Q_KEY_VIOL_CTR EHCA_BMASK_IBM( 8, 8)
+#define HCA_CAP_WQE_RESIZE EHCA_BMASK_IBM( 9, 9)
+#define HCA_CAP_RAW_PACKET_MCAST EHCA_BMASK_IBM(10, 10)
+#define HCA_CAP_SHUTDOWN_PORT EHCA_BMASK_IBM(11, 11)
+#define HCA_CAP_RC_LL_QP EHCA_BMASK_IBM(12, 12)
+#define HCA_CAP_SRQ EHCA_BMASK_IBM(13, 13)
+#define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16)
+#define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17)
+#define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18)
+
/* query port response block */
struct hipz_query_port {
u32 state;
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
index bf7a40088f61..29bd476fbd54 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -40,6 +40,11 @@
#include "ehca_tools.h"
#include "ipz_pt_fn.h"
+#include "ehca_classes.h"
+
+#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT)
+
+struct kmem_cache *small_qp_cache;
void *ipz_qpageit_get_inc(struct ipz_queue *queue)
{
@@ -49,7 +54,7 @@ void *ipz_qpageit_get_inc(struct ipz_queue *queue)
queue->current_q_offset -= queue->pagesize;
ret = NULL;
}
- if (((u64)ret) % EHCA_PAGESIZE) {
+ if (((u64)ret) % queue->pagesize) {
ehca_gen_err("ERROR!! not at PAGE-Boundary");
return NULL;
}
@@ -83,80 +88,195 @@ int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset)
return -EINVAL;
}
-int ipz_queue_ctor(struct ipz_queue *queue,
- const u32 nr_of_pages,
- const u32 pagesize, const u32 qe_size, const u32 nr_of_sg)
+#if PAGE_SHIFT < EHCA_PAGESHIFT
+#error Kernel pages must be at least as large than eHCA pages (4K) !
+#endif
+
+/*
+ * allocate pages for queue:
+ * outer loop allocates whole kernel pages (page aligned) and
+ * inner loop divides a kernel page into smaller hca queue pages
+ */
+static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages)
{
- int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
- int f;
+ int k, f = 0;
+ u8 *kpage;
- if (pagesize > PAGE_SIZE) {
- ehca_gen_err("FATAL ERROR: pagesize=%x is greater "
- "than kernel page size", pagesize);
- return 0;
- }
- if (!pages_per_kpage) {
- ehca_gen_err("FATAL ERROR: invalid kernel page size. "
- "pages_per_kpage=%x", pages_per_kpage);
- return 0;
- }
- queue->queue_length = nr_of_pages * pagesize;
- queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
- if (!queue->queue_pages) {
- ehca_gen_err("ERROR!! didn't get the memory");
- return 0;
- }
- memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
- /*
- * allocate pages for queue:
- * outer loop allocates whole kernel pages (page aligned) and
- * inner loop divides a kernel page into smaller hca queue pages
- */
- f = 0;
while (f < nr_of_pages) {
- u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL);
- int k;
+ kpage = (u8 *)get_zeroed_page(GFP_KERNEL);
if (!kpage)
- goto ipz_queue_ctor_exit0; /*NOMEM*/
- for (k = 0; k < pages_per_kpage && f < nr_of_pages; k++) {
- (queue->queue_pages)[f] = (struct ipz_page *)kpage;
+ goto out;
+
+ for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) {
+ queue->queue_pages[f] = (struct ipz_page *)kpage;
kpage += EHCA_PAGESIZE;
f++;
}
}
+ return 1;
- queue->current_q_offset = 0;
+out:
+ for (f = 0; f < nr_of_pages && queue->queue_pages[f];
+ f += PAGES_PER_KPAGE)
+ free_page((unsigned long)(queue->queue_pages)[f]);
+ return 0;
+}
+
+static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
+{
+ int order = ilog2(queue->pagesize) - 9;
+ struct ipz_small_queue_page *page;
+ unsigned long bit;
+
+ mutex_lock(&pd->lock);
+
+ if (!list_empty(&pd->free[order]))
+ page = list_entry(pd->free[order].next,
+ struct ipz_small_queue_page, list);
+ else {
+ page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL);
+ if (!page)
+ goto out;
+
+ page->page = get_zeroed_page(GFP_KERNEL);
+ if (!page->page) {
+ kmem_cache_free(small_qp_cache, page);
+ goto out;
+ }
+
+ list_add(&page->list, &pd->free[order]);
+ }
+
+ bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order);
+ __set_bit(bit, page->bitmap);
+ page->fill++;
+
+ if (page->fill == IPZ_SPAGE_PER_KPAGE >> order)
+ list_move(&page->list, &pd->full[order]);
+
+ mutex_unlock(&pd->lock);
+
+ queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9)));
+ queue->small_page = page;
+ return 1;
+
+out:
+ ehca_err(pd->ib_pd.device, "failed to allocate small queue page");
+ return 0;
+}
+
+static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
+{
+ int order = ilog2(queue->pagesize) - 9;
+ struct ipz_small_queue_page *page = queue->small_page;
+ unsigned long bit;
+ int free_page = 0;
+
+ bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK)
+ >> (order + 9);
+
+ mutex_lock(&pd->lock);
+
+ __clear_bit(bit, page->bitmap);
+ page->fill--;
+
+ if (page->fill == 0) {
+ list_del(&page->list);
+ free_page = 1;
+ }
+
+ if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1)
+ /* the page was full until we freed the chunk */
+ list_move_tail(&page->list, &pd->free[order]);
+
+ mutex_unlock(&pd->lock);
+
+ if (free_page) {
+ free_page(page->page);
+ kmem_cache_free(small_qp_cache, page);
+ }
+}
+
+int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
+ const u32 nr_of_pages, const u32 pagesize,
+ const u32 qe_size, const u32 nr_of_sg,
+ int is_small)
+{
+ if (pagesize > PAGE_SIZE) {
+ ehca_gen_err("FATAL ERROR: pagesize=%x "
+ "is greater than kernel page size", pagesize);
+ return 0;
+ }
+
+ /* init queue fields */
+ queue->queue_length = nr_of_pages * pagesize;
+ queue->pagesize = pagesize;
queue->qe_size = qe_size;
queue->act_nr_of_sg = nr_of_sg;
- queue->pagesize = pagesize;
+ queue->current_q_offset = 0;
queue->toggle_state = 1;
- return 1;
+ queue->small_page = NULL;
- ipz_queue_ctor_exit0:
- ehca_gen_err("Couldn't get alloc pages queue=%p f=%x nr_of_pages=%x",
- queue, f, nr_of_pages);
- for (f = 0; f < nr_of_pages; f += pages_per_kpage) {
- if (!(queue->queue_pages)[f])
- break;
- free_page((unsigned long)(queue->queue_pages)[f]);
+ /* allocate queue page pointers */
+ queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
+ if (!queue->queue_pages) {
+ ehca_gen_err("Couldn't allocate queue page list");
+ return 0;
}
+ memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
+
+ /* allocate actual queue pages */
+ if (is_small) {
+ if (!alloc_small_queue_page(queue, pd))
+ goto ipz_queue_ctor_exit0;
+ } else
+ if (!alloc_queue_pages(queue, nr_of_pages))
+ goto ipz_queue_ctor_exit0;
+
+ return 1;
+
+ipz_queue_ctor_exit0:
+ ehca_gen_err("Couldn't alloc pages queue=%p "
+ "nr_of_pages=%x", queue, nr_of_pages);
+ vfree(queue->queue_pages);
+
return 0;
}
-int ipz_queue_dtor(struct ipz_queue *queue)
+int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue)
{
- int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
- int g;
- int nr_pages;
+ int i, nr_pages;
if (!queue || !queue->queue_pages) {
ehca_gen_dbg("queue or queue_pages is NULL");
return 0;
}
- nr_pages = queue->queue_length / queue->pagesize;
- for (g = 0; g < nr_pages; g += pages_per_kpage)
- free_page((unsigned long)(queue->queue_pages)[g]);
+
+ if (queue->small_page)
+ free_small_queue_page(queue, pd);
+ else {
+ nr_pages = queue->queue_length / queue->pagesize;
+ for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE)
+ free_page((unsigned long)queue->queue_pages[i]);
+ }
+
vfree(queue->queue_pages);
return 1;
}
+
+int ehca_init_small_qp_cache(void)
+{
+ small_qp_cache = kmem_cache_create("ehca_cache_small_qp",
+ sizeof(struct ipz_small_queue_page),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!small_qp_cache)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void ehca_cleanup_small_qp_cache(void)
+{
+ kmem_cache_destroy(small_qp_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
index 57f141a36bce..a801274ea337 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -51,11 +51,27 @@
#include "ehca_tools.h"
#include "ehca_qes.h"
+struct ehca_pd;
+struct ipz_small_queue_page;
+
+extern struct kmem_cache *small_qp_cache;
+
/* struct generic ehca page */
struct ipz_page {
u8 entries[EHCA_PAGESIZE];
};
+#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512)
+
+struct ipz_small_queue_page {
+ unsigned long page;
+ unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG];
+ int fill;
+ void *mapped_addr;
+ u32 mmap_count;
+ struct list_head list;
+};
+
/* struct generic queue in linux kernel virtual memory (kv) */
struct ipz_queue {
u64 current_q_offset; /* current queue entry */
@@ -66,7 +82,8 @@ struct ipz_queue {
u32 queue_length; /* queue length allocated in bytes */
u32 pagesize;
u32 toggle_state; /* toggle flag - per page */
- u32 dummy3; /* 64 bit alignment */
+ u32 offset; /* save offset within page for small_qp */
+ struct ipz_small_queue_page *small_page;
};
/*
@@ -105,7 +122,6 @@ void *ipz_qpageit_get_inc(struct ipz_queue *queue);
* step in struct ipz_queue, will wrap in ringbuffer
* returns address (kv) of Queue Entry BEFORE increment
* warning don't use in parallel with ipz_qpageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
*/
static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
{
@@ -121,31 +137,24 @@ static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
}
/*
+ * return a bool indicating whether current Queue Entry is valid
+ */
+static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
+{
+ struct ehca_cqe *cqe = ipz_qeit_get(queue);
+ return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1));
+}
+
+/*
* return current Queue Entry, increment Queue Entry iterator by one
* step in struct ipz_queue, will wrap in ringbuffer
* returns address (kv) of Queue Entry BEFORE increment
* returns 0 and does not increment, if wrong valid state
* warning don't use in parallel with ipz_qpageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
*/
static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
{
- struct ehca_cqe *cqe = ipz_qeit_get(queue);
- u32 cqe_flags = cqe->cqe_flags;
-
- if ((cqe_flags >> 7) != (queue->toggle_state & 1))
- return NULL;
-
- ipz_qeit_get_inc(queue);
- return cqe;
-}
-
-static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
-{
- struct ehca_cqe *cqe = ipz_qeit_get(queue);
- u32 cqe_flags = cqe->cqe_flags;
-
- return cqe_flags >> 7 == (queue->toggle_state & 1);
+ return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL;
}
/*
@@ -196,9 +205,10 @@ struct ipz_qpt {
* see ipz_qpt_ctor()
* returns true if ok, false if out of memory
*/
-int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages,
- const u32 pagesize, const u32 qe_size,
- const u32 nr_of_sg);
+int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
+ const u32 nr_of_pages, const u32 pagesize,
+ const u32 qe_size, const u32 nr_of_sg,
+ int is_small);
/*
* destructor for a ipz_queue_t
@@ -206,7 +216,7 @@ int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages,
* see ipz_queue_ctor()
* returns true if ok, false if queue was NULL-ptr of free failed
*/
-int ipz_queue_dtor(struct ipz_queue *queue);
+int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue);
/*
* constructor for a ipz_qpt_t,
@@ -248,7 +258,7 @@ void *ipz_qeit_eq_get_inc(struct ipz_queue *queue);
static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
{
void *ret = ipz_qeit_get(queue);
- u32 qe = *(u8 *) ret;
+ u32 qe = *(u8 *)ret;
if ((qe >> 7) != (queue->toggle_state & 1))
return NULL;
ipz_qeit_eq_get_inc(queue); /* this is a good one */
@@ -258,7 +268,7 @@ static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue)
{
void *ret = ipz_qeit_get(queue);
- u32 qe = *(u8 *) ret;
+ u32 qe = *(u8 *)ret;
if ((qe >> 7) != (queue->toggle_state & 1))
return NULL;
return ret;
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig
index 90c14543677d..044da5828a78 100644
--- a/drivers/infiniband/hw/ipath/Kconfig
+++ b/drivers/infiniband/hw/ipath/Kconfig
@@ -1,6 +1,6 @@
config INFINIBAND_IPATH
tristate "QLogic InfiniPath Driver"
- depends on (PCI_MSI || HT_IRQ) && 64BIT && INFINIBAND && NET
+ depends on (PCI_MSI || HT_IRQ) && 64BIT && NET
---help---
This is a driver for QLogic InfiniPath host channel adapters,
including InfiniBand verbs support. This driver allows these
diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
index ec2e603ea241..fe6738826865 100644
--- a/drivers/infiniband/hw/ipath/Makefile
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -14,7 +14,6 @@ ib_ipath-y := \
ipath_init_chip.o \
ipath_intr.o \
ipath_keys.o \
- ipath_layer.o \
ipath_mad.o \
ipath_mmap.o \
ipath_mr.o \
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 10c008f22ba6..6ad822c35930 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -100,8 +100,7 @@ struct infinipath_stats {
__u64 sps_hwerrs;
/* number of times IB link changed state unexpectedly */
__u64 sps_iblink;
- /* kernel receive interrupts that didn't read intstat */
- __u64 sps_fastrcvint;
+ __u64 sps_unused; /* was fastrcvint, no longer implemented */
/* number of kernel (port0) packets received */
__u64 sps_port0pkts;
/* number of "ethernet" packets sent by driver */
@@ -189,8 +188,7 @@ typedef enum _ipath_ureg {
#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4
#define IPATH_RUNTIME_RCVHDR_COPY 0x8
#define IPATH_RUNTIME_MASTER 0x10
-#define IPATH_RUNTIME_PBC_REWRITE 0x20
-#define IPATH_RUNTIME_LOOSE_DMA_ALIGN 0x40
+/* 0x20 and 0x40 are no longer used, but are reserved for ABI compatibility */
/*
* This structure is returned by ipath_userinit() immediately after
@@ -432,8 +430,15 @@ struct ipath_user_info {
#define IPATH_CMD_UNUSED_1 25
#define IPATH_CMD_UNUSED_2 26
#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */
+#define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */
-#define IPATH_CMD_MAX 27
+#define IPATH_CMD_MAX 28
+
+/*
+ * Poll types
+ */
+#define IPATH_POLL_TYPE_URGENT 0x01
+#define IPATH_POLL_TYPE_OVERFLOW 0x02
struct ipath_port_info {
__u32 num_active; /* number of active units */
@@ -474,6 +479,8 @@ struct ipath_cmd {
__u16 part_key;
/* user address of __u32 bitmask of active slaves */
__u64 slave_mask_addr;
+ /* type of polling we want */
+ __u16 poll_type;
} cmd;
};
@@ -502,13 +509,30 @@ struct __ipath_sendpkt {
struct ipath_iovec sps_iov[4];
};
-/* Passed into diag data special file's ->write method. */
+/*
+ * diagnostics can send a packet by "writing" one of the following
+ * two structs to diag data special file
+ * The first is the legacy version for backward compatibility
+ */
struct ipath_diag_pkt {
__u32 unit;
__u64 data;
__u32 len;
};
+/* The second diag_pkt struct is the expanded version that allows
+ * more control over the packet, specifically, by allowing a custom
+ * pbc (+ extra) qword, so that special modes and deliberate
+ * changes to CRCs can be used. The elements were also re-ordered
+ * for better alignment and to avoid padding issues.
+ */
+struct ipath_diag_xpkt {
+ __u64 data;
+ __u64 pbc_wd;
+ __u32 unit;
+ __u32 len;
+};
+
/*
* Data layout in I2C flash (for GUID, etc.)
* All fields are little-endian binary unless otherwise stated
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 3e9241badba0..a6f04d27ec57 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -90,6 +90,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
wc->queue[head].sl = entry->sl;
wc->queue[head].dlid_path_bits = entry->dlid_path_bits;
wc->queue[head].port_num = entry->port_num;
+ /* Make sure queue entry is written before the head index. */
+ smp_wmb();
wc->head = next;
if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -139,7 +141,8 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
if (tail == wc->head)
break;
-
+ /* Make sure entry is read after head index is read. */
+ smp_rmb();
qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
wc->queue[tail].qp_num);
entry->qp = &qp->ibqp;
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index 42bfbdb0d3e6..19c56e6491eb 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 63e8368b0e95..cf25cdab02f9 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -44,6 +44,7 @@
#include <linux/io.h>
#include <linux/pci.h>
#include <linux/vmalloc.h>
+#include <linux/fs.h>
#include <asm/uaccess.h>
#include "ipath_kernel.h"
@@ -323,13 +324,14 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
{
u32 __iomem *piobuf;
u32 plen, clen, pbufn;
- struct ipath_diag_pkt dp;
+ struct ipath_diag_pkt odp;
+ struct ipath_diag_xpkt dp;
u32 *tmpbuf = NULL;
struct ipath_devdata *dd;
ssize_t ret = 0;
u64 val;
- if (count < sizeof(dp)) {
+ if (count != sizeof(dp)) {
ret = -EINVAL;
goto bail;
}
@@ -339,6 +341,29 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
goto bail;
}
+ /*
+ * Due to padding/alignment issues (lessened with new struct)
+ * the old and new structs are the same length. We need to
+ * disambiguate them, which we can do because odp.len has never
+ * been less than the total of LRH+BTH+DETH so far, while
+ * dp.unit (same offset) unit is unlikely to get that high.
+ * Similarly, dp.data, the pointer to user at the same offset
+ * as odp.unit, is almost certainly at least one (512byte)page
+ * "above" NULL. The if-block below can be omitted if compatibility
+ * between a new driver and older diagnostic code is unimportant.
+ * compatibility the other direction (new diags, old driver) is
+ * handled in the diagnostic code, with a warning.
+ */
+ if (dp.unit >= 20 && dp.data < 512) {
+ /* very probable version mismatch. Fix it up */
+ memcpy(&odp, &dp, sizeof(odp));
+ /* We got a legacy dp, copy elements to dp */
+ dp.unit = odp.unit;
+ dp.data = odp.data;
+ dp.len = odp.len;
+ dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */
+ }
+
/* send count must be an exact number of dwords */
if (dp.len & 3) {
ret = -EINVAL;
@@ -371,9 +396,10 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
ret = -ENODEV;
goto bail;
}
+ /* Check link state, but not if we have custom PBC */
val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
- if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM &&
- val != IPATH_IBSTATE_ACTIVE) {
+ if (!dp.pbc_wd && val != IPATH_IBSTATE_INIT &&
+ val != IPATH_IBSTATE_ARM && val != IPATH_IBSTATE_ACTIVE) {
ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
dd->ipath_unit, (unsigned long long) val);
ret = -EINVAL;
@@ -419,9 +445,13 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
dd->ipath_unit, plen - 1, pbufn);
+ if (dp.pbc_wd == 0)
+ /* Legacy operation, use computed pbc_wd */
+ dp.pbc_wd = plen;
+
/* we have to flush after the PBC for correctness on some cpus
* or WC buffer can be written out of order */
- writeq(plen, piobuf);
+ writeq(dp.pbc_wd, piobuf);
ipath_flush_wc();
/* copy all by the trigger word, then flush, so it's written
* to chip before trigger word, then write trigger word, then
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index e3a223209710..6ccba365a24c 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -104,6 +104,9 @@ static int __devinit ipath_init_one(struct pci_dev *,
#define PCI_DEVICE_ID_INFINIPATH_HT 0xd
#define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
+/* Number of seconds before our card status check... */
+#define STATUS_TIMEOUT 60
+
static const struct pci_device_id ipath_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) },
@@ -119,6 +122,18 @@ static struct pci_driver ipath_driver = {
.id_table = ipath_pci_tbl,
};
+static void ipath_check_status(struct work_struct *work)
+{
+ struct ipath_devdata *dd = container_of(work, struct ipath_devdata,
+ status_work.work);
+
+ /*
+ * If we don't have any interrupts, let the user know and
+ * don't bother checking again.
+ */
+ if (dd->ipath_int_counter == 0)
+ dev_err(&dd->pcidev->dev, "No interrupts detected.\n");
+}
static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
u32 *bar0, u32 *bar1)
@@ -187,6 +202,8 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
dd->pcidev = pdev;
pci_set_drvdata(pdev, dd);
+ INIT_DELAYED_WORK(&dd->status_work, ipath_check_status);
+
list_add(&dd->ipath_list, &ipath_dev_list);
bail_unlock:
@@ -270,7 +287,6 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
struct ipath_devdata *dd;
unsigned long long addr;
u32 bar0 = 0, bar1 = 0;
- u8 rev;
dd = ipath_alloc_devdata(pdev);
if (IS_ERR(dd)) {
@@ -432,13 +448,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
dd->ipath_deviceid = ent->device; /* save for later use */
dd->ipath_vendorid = ent->vendor;
- ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev);
- if (ret) {
- ipath_dev_err(dd, "Failed to read PCI revision ID unit "
- "%u: err %d\n", dd->ipath_unit, -ret);
- goto bail_regions; /* shouldn't ever happen */
- }
- dd->ipath_pcirev = rev;
+ dd->ipath_pcirev = pdev->revision;
#if defined(__powerpc__)
/* There isn't a generic way to specify writethrough mappings */
@@ -511,6 +521,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
ipath_diag_add(dd);
ipath_register_ib_device(dd);
+ /* Check that card status in STATUS_TIMEOUT seconds. */
+ schedule_delayed_work(&dd->status_work, HZ * STATUS_TIMEOUT);
+
goto bail;
bail_irqsetup:
@@ -638,6 +651,9 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
*/
ipath_shutdown_device(dd);
+ cancel_delayed_work(&dd->status_work);
+ flush_scheduled_work();
+
if (dd->verbs_dev)
ipath_unregister_ib_device(dd->verbs_dev);
@@ -706,9 +722,9 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
u64 sendctrl, sendorig;
ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
- sendorig = dd->ipath_sendctrl | INFINIPATH_S_DISARM;
+ sendorig = dd->ipath_sendctrl;
for (i = first; i < last; i++) {
- sendctrl = sendorig |
+ sendctrl = sendorig | INFINIPATH_S_DISARM |
(i << INFINIPATH_S_DISARMPIOBUF_SHIFT);
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
sendctrl);
@@ -719,12 +735,12 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
* while we were looping; no critical bits that would require
* locking.
*
- * Write a 0, and then the original value, reading scratch in
+ * disable PIOAVAILUPD, then re-enable, reading scratch in
* between. This seems to avoid a chip timing race that causes
* pioavail updates to memory to stop.
*/
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- 0);
+ sendorig & ~INFINIPATH_S_PIOBUFAVAILUPD);
sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl);
@@ -1021,14 +1037,10 @@ void ipath_kreceive(struct ipath_devdata *dd)
goto bail;
}
- /* There is already a thread processing this queue. */
- if (test_and_set_bit(0, &dd->ipath_rcv_pending))
- goto bail;
-
l = dd->ipath_port0head;
hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr);
if (l == hdrqtail)
- goto done;
+ goto bail;
reloop:
for (i = 0; l != hdrqtail; i++) {
@@ -1163,10 +1175,6 @@ reloop:
ipath_stats.sps_avgpkts_call =
ipath_stats.sps_port0pkts / ++totcalls;
-done:
- clear_bit(0, &dd->ipath_rcv_pending);
- smp_mb__after_clear_bit();
-
bail:;
}
@@ -1596,6 +1604,38 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
return ret;
}
+
+/*
+ * Flush all sends that might be in the ready to send state, as well as any
+ * that are in the process of being sent. Used whenever we need to be
+ * sure the send side is idle. Cleans up all buffer state by canceling
+ * all pio buffers, and issuing an abort, which cleans up anything in the
+ * launch fifo. The cancel is superfluous on some chip versions, but
+ * it's safer to always do it.
+ * PIOAvail bits are updated by the chip as if normal send had happened.
+ */
+void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
+{
+ ipath_dbg("Cancelling all in-progress send buffers\n");
+ dd->ipath_lastcancel = jiffies+HZ/2; /* skip armlaunch errs a bit */
+ /*
+ * the abort bit is auto-clearing. We read scratch to be sure
+ * that cancels and the abort have taken effect in the chip.
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ INFINIPATH_S_ABORT);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ ipath_disarm_piobufs(dd, 0,
+ (unsigned)(dd->ipath_piobcnt2k + dd->ipath_piobcnt4k));
+ if (restore_sendctrl) /* else done by caller later */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+
+ /* and again, be sure all have hit the chip */
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+}
+
+
static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
{
static const char *what[4] = {
@@ -1617,14 +1657,8 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
/* flush all queued sends when going to DOWN or INIT, to be sure that
* they don't block MAD packets */
- if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) {
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- INFINIPATH_S_ABORT);
- ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
- (unsigned)(dd->ipath_piobcnt2k +
- dd->ipath_piobcnt4k) -
- dd->ipath_lastport_piobuf);
- }
+ if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT)
+ ipath_cancel_sends(dd, 1);
ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
dd->ipath_ibcctrl | which);
@@ -1846,6 +1880,87 @@ void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
ipath_write_kreg(dd, where, value);
}
+/*
+ * Following deal with the "obviously simple" task of overriding the state
+ * of the LEDS, which normally indicate link physical and logical status.
+ * The complications arise in dealing with different hardware mappings
+ * and the board-dependent routine being called from interrupts.
+ * and then there's the requirement to _flash_ them.
+ */
+#define LED_OVER_FREQ_SHIFT 8
+#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
+/* Below is "non-zero" to force override, but both actual LEDs are off */
+#define LED_OVER_BOTH_OFF (8)
+
+static void ipath_run_led_override(unsigned long opaque)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+ int timeoff;
+ int pidx;
+ u64 lstate, ltstate, val;
+
+ if (!(dd->ipath_flags & IPATH_INITTED))
+ return;
+
+ pidx = dd->ipath_led_override_phase++ & 1;
+ dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
+ timeoff = dd->ipath_led_override_timeoff;
+
+ /*
+ * below potentially restores the LED values per current status,
+ * should also possibly setup the traffic-blink register,
+ * but leave that to per-chip functions.
+ */
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+ ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
+ INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
+ lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
+ INFINIPATH_IBCS_LINKSTATE_MASK;
+
+ dd->ipath_f_setextled(dd, lstate, ltstate);
+ mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
+}
+
+void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
+{
+ int timeoff, freq;
+
+ if (!(dd->ipath_flags & IPATH_INITTED))
+ return;
+
+ /* First check if we are blinking. If not, use 1HZ polling */
+ timeoff = HZ;
+ freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
+
+ if (freq) {
+ /* For blink, set each phase from one nybble of val */
+ dd->ipath_led_override_vals[0] = val & 0xF;
+ dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
+ timeoff = (HZ << 4)/freq;
+ } else {
+ /* Non-blink set both phases the same. */
+ dd->ipath_led_override_vals[0] = val & 0xF;
+ dd->ipath_led_override_vals[1] = val & 0xF;
+ }
+ dd->ipath_led_override_timeoff = timeoff;
+
+ /*
+ * If the timer has not already been started, do so. Use a "quick"
+ * timeout so the function will be called soon, to look at our request.
+ */
+ if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
+ /* Need to start timer */
+ init_timer(&dd->ipath_led_override_timer);
+ dd->ipath_led_override_timer.function =
+ ipath_run_led_override;
+ dd->ipath_led_override_timer.data = (unsigned long) dd;
+ dd->ipath_led_override_timer.expires = jiffies + 1;
+ add_timer(&dd->ipath_led_override_timer);
+ } else {
+ atomic_dec(&dd->ipath_led_override_timer_active);
+ }
+}
+
/**
* ipath_shutdown_device - shut down a device
* @dd: the infinipath device
@@ -1886,17 +2001,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
*/
udelay(5);
- /*
- * abort any armed or launched PIO buffers that didn't go. (self
- * clearing). Will cause any packet currently being transmitted to
- * go out with an EBP, and may also cause a short packet error on
- * the receiver.
- */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- INFINIPATH_S_ABORT);
-
ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+ ipath_cancel_sends(dd, 0);
/* disable IBC */
dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
@@ -1909,7 +2016,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
* Turn the LEDs off explictly for the same reason.
*/
dd->ipath_f_quiet_serdes(dd);
- dd->ipath_f_setextled(dd, 0, 0);
if (dd->ipath_stats_timer_active) {
del_timer_sync(&dd->ipath_stats_timer);
@@ -1925,6 +2031,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
+
+ ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
+ ipath_update_eeprom_log(dd);
}
/**
@@ -2085,6 +2194,16 @@ int ipath_reset_device(int unit)
goto bail;
}
+ if (atomic_read(&dd->ipath_led_override_timer_active)) {
+ /* Need to stop LED timer, _then_ shut off LEDs */
+ del_timer_sync(&dd->ipath_led_override_timer);
+ atomic_set(&dd->ipath_led_override_timer_active, 0);
+ }
+
+ /* Shut off LEDs after we are sure timer is not running */
+ dd->ipath_led_override = LED_OVER_BOTH_OFF;
+ dd->ipath_f_setextled(dd, 0, 0);
+
dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index 030185f90ee2..b4503e9c1e95 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -95,39 +95,37 @@ static int i2c_gpio_set(struct ipath_devdata *dd,
enum i2c_type line,
enum i2c_state new_line_state)
{
- u64 read_val, write_val, mask, *gpioval;
+ u64 out_mask, dir_mask, *gpioval;
+ unsigned long flags = 0;
gpioval = &dd->ipath_gpio_out;
- read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
- if (line == i2c_line_scl)
- mask = dd->ipath_gpio_scl;
- else
- mask = dd->ipath_gpio_sda;
- if (new_line_state == i2c_line_high)
+ if (line == i2c_line_scl) {
+ dir_mask = dd->ipath_gpio_scl;
+ out_mask = (1UL << dd->ipath_gpio_scl_num);
+ } else {
+ dir_mask = dd->ipath_gpio_sda;
+ out_mask = (1UL << dd->ipath_gpio_sda_num);
+ }
+
+ spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+ if (new_line_state == i2c_line_high) {
/* tri-state the output rather than force high */
- write_val = read_val & ~mask;
- else
+ dd->ipath_extctrl &= ~dir_mask;
+ } else {
/* config line to be an output */
- write_val = read_val | mask;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
+ dd->ipath_extctrl |= dir_mask;
+ }
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
- /* set high and verify */
+ /* set output as well (no real verify) */
if (new_line_state == i2c_line_high)
- write_val = 0x1UL;
+ *gpioval |= out_mask;
else
- write_val = 0x0UL;
+ *gpioval &= ~out_mask;
- if (line == i2c_line_scl) {
- write_val <<= dd->ipath_gpio_scl_num;
- *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_scl_num);
- *gpioval |= write_val;
- } else {
- write_val <<= dd->ipath_gpio_sda_num;
- *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_sda_num);
- *gpioval |= write_val;
- }
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
+ spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
return 0;
}
@@ -145,8 +143,9 @@ static int i2c_gpio_get(struct ipath_devdata *dd,
enum i2c_type line,
enum i2c_state *curr_statep)
{
- u64 read_val, write_val, mask;
+ u64 read_val, mask;
int ret;
+ unsigned long flags = 0;
/* check args */
if (curr_statep == NULL) {
@@ -154,15 +153,21 @@ static int i2c_gpio_get(struct ipath_devdata *dd,
goto bail;
}
- read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
/* config line to be an input */
if (line == i2c_line_scl)
mask = dd->ipath_gpio_scl;
else
mask = dd->ipath_gpio_sda;
- write_val = read_val & ~mask;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
+
+ spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+ dd->ipath_extctrl &= ~mask;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
+ /*
+ * Below is very unlikely to reflect true input state if Output
+ * Enable actually changed.
+ */
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
+ spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
if (read_val & mask)
*curr_statep = i2c_line_high;
@@ -192,6 +197,7 @@ static void i2c_wait_for_writes(struct ipath_devdata *dd)
static void scl_out(struct ipath_devdata *dd, u8 bit)
{
+ udelay(1);
i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
i2c_wait_for_writes(dd);
@@ -314,12 +320,18 @@ static int eeprom_reset(struct ipath_devdata *dd)
int clock_cycles_left = 9;
u64 *gpioval = &dd->ipath_gpio_out;
int ret;
+ unsigned long flags;
- eeprom_init = 1;
+ spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+ /* Make sure shadows are consistent */
+ dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
*gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
+ spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
+
ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
"is %llx\n", (unsigned long long) *gpioval);
+ eeprom_init = 1;
/*
* This is to get the i2c into a known state, by first going low,
* then tristate sda (and then tristate scl as first thing
@@ -355,8 +367,8 @@ bail:
* @len: number of bytes to receive
*/
-int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
- void *buffer, int len)
+static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
+ u8 eeprom_offset, void *buffer, int len)
{
/* compiler complains unless initialized */
u8 single_byte = 0;
@@ -406,6 +418,7 @@ bail:
return ret;
}
+
/**
* ipath_eeprom_write - writes data to the eeprom via I2C
* @dd: the infinipath device
@@ -413,8 +426,8 @@ bail:
* @buffer: data to write
* @len: number of bytes to write
*/
-int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
- const void *buffer, int len)
+static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
+ const void *buffer, int len)
{
u8 single_byte;
int sub_len;
@@ -488,6 +501,38 @@ bail:
return ret;
}
+/*
+ * The public entry-points ipath_eeprom_read() and ipath_eeprom_write()
+ * are now just wrappers around the internal functions.
+ */
+int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
+ void *buff, int len)
+{
+ int ret;
+
+ ret = down_interruptible(&dd->ipath_eep_sem);
+ if (!ret) {
+ ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
+ up(&dd->ipath_eep_sem);
+ }
+
+ return ret;
+}
+
+int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
+ const void *buff, int len)
+{
+ int ret;
+
+ ret = down_interruptible(&dd->ipath_eep_sem);
+ if (!ret) {
+ ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
+ up(&dd->ipath_eep_sem);
+ }
+
+ return ret;
+}
+
static u8 flash_csum(struct ipath_flash *ifp, int adjust)
{
u8 *ip = (u8 *) ifp;
@@ -515,7 +560,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
void *buf;
struct ipath_flash *ifp;
__be64 guid;
- int len;
+ int len, eep_stat;
u8 csum, *bguid;
int t = dd->ipath_unit;
struct ipath_devdata *dd0 = ipath_lookup(0);
@@ -559,7 +604,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
goto bail;
}
- if (ipath_eeprom_read(dd, 0, buf, len)) {
+ down(&dd->ipath_eep_sem);
+ eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
+ up(&dd->ipath_eep_sem);
+
+ if (eep_stat) {
ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
goto done;
}
@@ -634,8 +683,192 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
(unsigned long long) be64_to_cpu(dd->ipath_guid));
+ memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
+ /*
+ * Power-on (actually "active") hours are kept as little-endian value
+ * in EEPROM, but as seconds in a (possibly as small as 24-bit)
+ * atomic_t while running.
+ */
+ atomic_set(&dd->ipath_active_time, 0);
+ dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
+
done:
vfree(buf);
bail:;
}
+
+/**
+ * ipath_update_eeprom_log - copy active-time and error counters to eeprom
+ * @dd: the infinipath device
+ *
+ * Although the time is kept as seconds in the ipath_devdata struct, it is
+ * rounded to hours for re-write, as we have only 16 bits in EEPROM.
+ * First-cut code reads whole (expected) struct ipath_flash, modifies,
+ * re-writes. Future direction: read/write only what we need, assuming
+ * that the EEPROM had to have been "good enough" for driver init, and
+ * if not, we aren't making it worse.
+ *
+ */
+
+int ipath_update_eeprom_log(struct ipath_devdata *dd)
+{
+ void *buf;
+ struct ipath_flash *ifp;
+ int len, hi_water;
+ uint32_t new_time, new_hrs;
+ u8 csum;
+ int ret, idx;
+ unsigned long flags;
+
+ /* first, check if we actually need to do anything. */
+ ret = 0;
+ for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+ if (dd->ipath_eep_st_new_errs[idx]) {
+ ret = 1;
+ break;
+ }
+ }
+ new_time = atomic_read(&dd->ipath_active_time);
+
+ if (ret == 0 && new_time < 3600)
+ return 0;
+
+ /*
+ * The quick-check above determined that there is something worthy
+ * of logging, so get current contents and do a more detailed idea.
+ */
+ len = offsetof(struct ipath_flash, if_future);
+ buf = vmalloc(len);
+ ret = 1;
+ if (!buf) {
+ ipath_dev_err(dd, "Couldn't allocate memory to read %u "
+ "bytes from eeprom for logging\n", len);
+ goto bail;
+ }
+
+ /* Grab semaphore and read current EEPROM. If we get an
+ * error, let go, but if not, keep it until we finish write.
+ */
+ ret = down_interruptible(&dd->ipath_eep_sem);
+ if (ret) {
+ ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
+ goto free_bail;
+ }
+ ret = ipath_eeprom_internal_read(dd, 0, buf, len);
+ if (ret) {
+ up(&dd->ipath_eep_sem);
+ ipath_dev_err(dd, "Unable read EEPROM for logging\n");
+ goto free_bail;
+ }
+ ifp = (struct ipath_flash *)buf;
+
+ csum = flash_csum(ifp, 0);
+ if (csum != ifp->if_csum) {
+ up(&dd->ipath_eep_sem);
+ ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
+ csum, ifp->if_csum);
+ ret = 1;
+ goto free_bail;
+ }
+ hi_water = 0;
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+ int new_val = dd->ipath_eep_st_new_errs[idx];
+ if (new_val) {
+ /*
+ * If we have seen any errors, add to EEPROM values
+ * We need to saturate at 0xFF (255) and we also
+ * would need to adjust the checksum if we were
+ * trying to minimize EEPROM traffic
+ * Note that we add to actual current count in EEPROM,
+ * in case it was altered while we were running.
+ */
+ new_val += ifp->if_errcntp[idx];
+ if (new_val > 0xFF)
+ new_val = 0xFF;
+ if (ifp->if_errcntp[idx] != new_val) {
+ ifp->if_errcntp[idx] = new_val;
+ hi_water = offsetof(struct ipath_flash,
+ if_errcntp) + idx;
+ }
+ /*
+ * update our shadow (used to minimize EEPROM
+ * traffic), to match what we are about to write.
+ */
+ dd->ipath_eep_st_errs[idx] = new_val;
+ dd->ipath_eep_st_new_errs[idx] = 0;
+ }
+ }
+ /*
+ * now update active-time. We would like to round to the nearest hour
+ * but unless atomic_t are sure to be proper signed ints we cannot,
+ * because we need to account for what we "transfer" to EEPROM and
+ * if we log an hour at 31 minutes, then we would need to set
+ * active_time to -29 to accurately count the _next_ hour.
+ */
+ if (new_time > 3600) {
+ new_hrs = new_time / 3600;
+ atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
+ new_hrs += dd->ipath_eep_hrs;
+ if (new_hrs > 0xFFFF)
+ new_hrs = 0xFFFF;
+ dd->ipath_eep_hrs = new_hrs;
+ if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
+ ifp->if_powerhour[0] = new_hrs & 0xFF;
+ hi_water = offsetof(struct ipath_flash, if_powerhour);
+ }
+ if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
+ ifp->if_powerhour[1] = new_hrs >> 8;
+ hi_water = offsetof(struct ipath_flash, if_powerhour)
+ + 1;
+ }
+ }
+ /*
+ * There is a tiny possibility that we could somehow fail to write
+ * the EEPROM after updating our shadows, but problems from holding
+ * the spinlock too long are a much bigger issue.
+ */
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+ if (hi_water) {
+ /* we made some change to the data, uopdate cksum and write */
+ csum = flash_csum(ifp, 1);
+ ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
+ }
+ up(&dd->ipath_eep_sem);
+ if (ret)
+ ipath_dev_err(dd, "Failed updating EEPROM\n");
+
+free_bail:
+ vfree(buf);
+bail:
+ return ret;
+
+}
+
+/**
+ * ipath_inc_eeprom_err - increment one of the four error counters
+ * that are logged to EEPROM.
+ * @dd: the infinipath device
+ * @eidx: 0..3, the counter to increment
+ * @incr: how much to add
+ *
+ * Each counter is 8-bits, and saturates at 255 (0xFF). They
+ * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
+ * is called, but it can only be called in a context that allows sleep.
+ * This function can be called even at interrupt level.
+ */
+
+void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
+{
+ uint new_val;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
+ if (new_val > 255)
+ new_val = 255;
+ dd->ipath_eep_st_new_errs[eidx] = new_val;
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+ return;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 1272aaf2a785..33ab0d6b80ff 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -396,7 +396,8 @@ static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
"TID %u, vaddr %lx, physaddr %llx pgp %p\n",
tid, vaddr, (unsigned long long) physaddr,
pagep[i]);
- dd->ipath_f_put_tid(dd, &tidbase[tid], 1, physaddr);
+ dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED,
+ physaddr);
/*
* don't check this tid in ipath_portshadow, since we
* just filled it in; start with the next one.
@@ -422,7 +423,8 @@ static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
if (dd->ipath_pageshadow[porttid + tid]) {
ipath_cdbg(VERBOSE, "Freeing TID %u\n",
tid);
- dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
+ dd->ipath_f_put_tid(dd, &tidbase[tid],
+ RCVHQ_RCV_TYPE_EXPECTED,
dd->ipath_tidinvalid);
pci_unmap_page(dd->pcidev,
dd->ipath_physshadow[porttid + tid],
@@ -538,7 +540,8 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
if (dd->ipath_pageshadow[porttid + tid]) {
ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
pd->port_pid, tid);
- dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
+ dd->ipath_f_put_tid(dd, &tidbase[tid],
+ RCVHQ_RCV_TYPE_EXPECTED,
dd->ipath_tidinvalid);
pci_unmap_page(dd->pcidev,
dd->ipath_physshadow[porttid + tid],
@@ -921,7 +924,8 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
(u64 __iomem *)
((char __iomem *)
dd->ipath_kregbase +
- dd->ipath_rcvegrbase), 0, pa);
+ dd->ipath_rcvegrbase),
+ RCVHQ_RCV_TYPE_EAGER, pa);
pa += egrsize;
}
cond_resched(); /* don't hog the cpu */
@@ -1337,68 +1341,133 @@ bail:
return ret;
}
-static unsigned int ipath_poll(struct file *fp,
- struct poll_table_struct *pt)
+static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
+ struct file *fp,
+ struct poll_table_struct *pt)
{
- struct ipath_portdata *pd;
- u32 head, tail;
- int bit;
unsigned pollflag = 0;
struct ipath_devdata *dd;
- pd = port_fp(fp);
- if (!pd)
- goto bail;
dd = pd->port_dd;
- bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT;
- set_bit(bit, &dd->ipath_rcvctrl);
+ if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) {
+ pollflag |= POLLERR;
+ clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag);
+ }
- /*
- * Before blocking, make sure that head is still == tail,
- * reading from the chip, so we can be sure the interrupt
- * enable has made it to the chip. If not equal, disable
- * interrupt again and return immediately. This avoids races,
- * and the overhead of the chip read doesn't matter much at
- * this point, since we are waiting for something anyway.
- */
+ if (test_bit(IPATH_PORT_WAITING_URG, &pd->int_flag)) {
+ pollflag |= POLLIN | POLLRDNORM;
+ clear_bit(IPATH_PORT_WAITING_URG, &pd->int_flag);
+ }
- ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
- dd->ipath_rcvctrl);
+ if (!pollflag) {
+ set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
+ if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW)
+ set_bit(IPATH_PORT_WAITING_OVERFLOW,
+ &pd->port_flag);
+
+ poll_wait(fp, &pd->port_wait, pt);
+ }
+
+ return pollflag;
+}
+
+static unsigned int ipath_poll_next(struct ipath_portdata *pd,
+ struct file *fp,
+ struct poll_table_struct *pt)
+{
+ u32 head, tail;
+ unsigned pollflag = 0;
+ struct ipath_devdata *dd;
+
+ dd = pd->port_dd;
head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
- tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
+ tail = *(volatile u64 *)pd->port_rcvhdrtail_kvaddr;
- if (tail == head) {
+ if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) {
+ pollflag |= POLLERR;
+ clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag);
+ }
+
+ if (tail != head ||
+ test_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag)) {
+ pollflag |= POLLIN | POLLRDNORM;
+ clear_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag);
+ }
+
+ if (!pollflag) {
set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
+ if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW)
+ set_bit(IPATH_PORT_WAITING_OVERFLOW,
+ &pd->port_flag);
+
+ set_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT,
+ &dd->ipath_rcvctrl);
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+
if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
- (void)ipath_write_ureg(dd, ur_rcvhdrhead,
- dd->ipath_rhdrhead_intr_off
- | head, pd->port_port);
- poll_wait(fp, &pd->port_wait, pt);
+ ipath_write_ureg(dd, ur_rcvhdrhead,
+ dd->ipath_rhdrhead_intr_off | head,
+ pd->port_port);
- if (test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
- /* timed out, no packets received */
- clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
- pd->port_rcvwait_to++;
- }
- else
- pollflag = POLLIN | POLLRDNORM;
- }
- else {
- /* it's already happened; don't do wait_event overhead */
- pollflag = POLLIN | POLLRDNORM;
- pd->port_rcvnowait++;
+ poll_wait(fp, &pd->port_wait, pt);
}
- clear_bit(bit, &dd->ipath_rcvctrl);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
- dd->ipath_rcvctrl);
+ return pollflag;
+}
+
+static unsigned int ipath_poll(struct file *fp,
+ struct poll_table_struct *pt)
+{
+ struct ipath_portdata *pd;
+ unsigned pollflag;
+
+ pd = port_fp(fp);
+ if (!pd)
+ pollflag = 0;
+ else if (pd->poll_type & IPATH_POLL_TYPE_URGENT)
+ pollflag = ipath_poll_urgent(pd, fp, pt);
+ else
+ pollflag = ipath_poll_next(pd, fp, pt);
-bail:
return pollflag;
}
+static int ipath_supports_subports(int user_swmajor, int user_swminor)
+{
+ /* no subport implementation prior to software version 1.3 */
+ return (user_swmajor > 1) || (user_swminor >= 3);
+}
+
+static int ipath_compatible_subports(int user_swmajor, int user_swminor)
+{
+ /* this code is written long-hand for clarity */
+ if (IPATH_USER_SWMAJOR != user_swmajor) {
+ /* no promise of compatibility if major mismatch */
+ return 0;
+ }
+ if (IPATH_USER_SWMAJOR == 1) {
+ switch (IPATH_USER_SWMINOR) {
+ case 0:
+ case 1:
+ case 2:
+ /* no subport implementation so cannot be compatible */
+ return 0;
+ case 3:
+ /* 3 is only compatible with itself */
+ return user_swminor == 3;
+ default:
+ /* >= 4 are compatible (or are expected to be) */
+ return user_swminor >= 4;
+ }
+ }
+ /* make no promises yet for future major versions */
+ return 0;
+}
+
static int init_subports(struct ipath_devdata *dd,
struct ipath_portdata *pd,
const struct ipath_user_info *uinfo)
@@ -1408,20 +1477,32 @@ static int init_subports(struct ipath_devdata *dd,
size_t size;
/*
- * If the user is requesting zero or one port,
+ * If the user is requesting zero subports,
* skip the subport allocation.
*/
- if (uinfo->spu_subport_cnt <= 1)
+ if (uinfo->spu_subport_cnt <= 0)
+ goto bail;
+
+ /* Self-consistency check for ipath_compatible_subports() */
+ if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) &&
+ !ipath_compatible_subports(IPATH_USER_SWMAJOR,
+ IPATH_USER_SWMINOR)) {
+ dev_info(&dd->pcidev->dev,
+ "Inconsistent ipath_compatible_subports()\n");
goto bail;
+ }
- /* Old user binaries don't know about new subport implementation */
- if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) {
+ /* Check for subport compatibility */
+ if (!ipath_compatible_subports(uinfo->spu_userversion >> 16,
+ uinfo->spu_userversion & 0xffff)) {
dev_info(&dd->pcidev->dev,
- "Mismatched user minor version (%d) and driver "
- "minor version (%d) while port sharing. Ensure "
+ "Mismatched user version (%d.%d) and driver "
+ "version (%d.%d) while port sharing. Ensure "
"that driver and library are from the same "
"release.\n",
+ (int) (uinfo->spu_userversion >> 16),
(int) (uinfo->spu_userversion & 0xffff),
+ IPATH_USER_SWMAJOR,
IPATH_USER_SWMINOR);
goto bail;
}
@@ -1725,14 +1806,13 @@ static int ipath_open(struct inode *in, struct file *fp)
return fp->private_data ? 0 : -ENOMEM;
}
-
/* Get port early, so can set affinity prior to memory allocation */
static int ipath_assign_port(struct file *fp,
const struct ipath_user_info *uinfo)
{
int ret;
int i_minor;
- unsigned swminor;
+ unsigned swmajor, swminor;
/* Check to be sure we haven't already initialized this file */
if (port_fp(fp)) {
@@ -1741,7 +1821,8 @@ static int ipath_assign_port(struct file *fp,
}
/* for now, if major version is different, bail */
- if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
+ swmajor = uinfo->spu_userversion >> 16;
+ if (swmajor != IPATH_USER_SWMAJOR) {
ipath_dbg("User major version %d not same as driver "
"major %d\n", uinfo->spu_userversion >> 16,
IPATH_USER_SWMAJOR);
@@ -1756,7 +1837,8 @@ static int ipath_assign_port(struct file *fp,
mutex_lock(&ipath_mutex);
- if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt &&
+ if (ipath_compatible_subports(swmajor, swminor) &&
+ uinfo->spu_subport_cnt &&
(ret = find_shared_port(fp, uinfo))) {
mutex_unlock(&ipath_mutex);
if (ret > 0)
@@ -2020,7 +2102,8 @@ static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
info.port = pd->port_port;
info.subport = subport;
/* Don't return new fields if old library opened the port. */
- if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) {
+ if (ipath_supports_subports(pd->userversion >> 16,
+ pd->userversion & 0xffff)) {
/* Number of user ports available for this device. */
info.num_ports = pd->port_dd->ipath_cfgports - 1;
info.num_subports = pd->port_subport_cnt;
@@ -2123,6 +2206,11 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
src = NULL;
dest = NULL;
break;
+ case IPATH_CMD_POLL_TYPE:
+ copy = sizeof(cmd.cmd.poll_type);
+ dest = &cmd.cmd.poll_type;
+ src = &ucmd->cmd.poll_type;
+ break;
default:
ret = -EINVAL;
goto bail;
@@ -2195,6 +2283,9 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
case IPATH_CMD_PIOAVAILUPD:
ret = ipath_force_pio_avail_update(pd->port_dd);
break;
+ case IPATH_CMD_POLL_TYPE:
+ pd->poll_type = cmd.cmd.poll_type;
+ break;
}
if (ret >= 0)
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index ebd5c7bd2cdb..2e689b974e1f 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -257,9 +257,14 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
/* Notimpl InitType (actually, an SMA decision) */
/* VLHighLimit is 0 (only one VL) */
; /* VLArbitrationHighCap is 0 (only one VL) */
+ /*
+ * Note: the chips support a maximum MTU of 4096, but the driver
+ * hasn't implemented this feature yet, so set the maximum
+ * to 2048.
+ */
portinfo[10] = /* VLArbitrationLowCap is 0 (only one VL) */
/* InitTypeReply is SMA decision */
- (5 << 16) /* MTUCap 4096 */
+ (4 << 16) /* MTUCap 2048 */
| (7 << 13) /* VLStallCount */
| (0x1f << 8) /* HOQLife */
| (1 << 4)
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 4171198fc202..650745d83fac 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -36,6 +36,7 @@
* HT chip.
*/
+#include <linux/vmalloc.h>
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/htirq.h>
@@ -439,6 +440,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
u32 bits, ctrl;
int isfatal = 0;
char bitsmsg[64];
+ int log_idx;
hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
@@ -467,6 +469,11 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
hwerrs &= dd->ipath_hwerrmask;
+ /* We log some errors to EEPROM, check if we have any of those. */
+ for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
+ if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
+ ipath_inc_eeprom_err(dd, log_idx, 1);
+
/*
* make sure we get this much out, unless told to be quiet,
* it's a parity error we may recover from,
@@ -502,9 +509,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
if (!hwerrs) {
ipath_dbg("Clearing freezemode on ignored or "
"recovered hardware error\n");
- ctrl &= ~INFINIPATH_C_FREEZEMODE;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
- ctrl);
+ ipath_clear_freeze(dd);
}
}
@@ -672,10 +677,16 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
if (n)
snprintf(name, namelen, "%s", n);
+ if (dd->ipath_boardrev != 6 && dd->ipath_boardrev != 7 &&
+ dd->ipath_boardrev != 11) {
+ ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name);
+ ret = 1;
+ goto bail;
+ }
if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
- dd->ipath_minrev > 3)) {
+ dd->ipath_minrev > 4)) {
/*
- * This version of the driver only supports Rev 3.2 and 3.3
+ * This version of the driver only supports Rev 3.2 - 3.4
*/
ipath_dev_err(dd,
"Unsupported InfiniPath hardware revision %u.%u!\n",
@@ -689,36 +700,11 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
* copies
*/
dd->ipath_flags |= IPATH_32BITCOUNTERS;
+ dd->ipath_flags |= IPATH_GPIO_INTR;
if (dd->ipath_htspeed != 800)
ipath_dev_err(dd,
"Incorrectly configured for HT @ %uMHz\n",
dd->ipath_htspeed);
- if (dd->ipath_boardrev == 7 || dd->ipath_boardrev == 11 ||
- dd->ipath_boardrev == 6)
- dd->ipath_flags |= IPATH_GPIO_INTR;
- else
- dd->ipath_flags |= IPATH_POLL_RX_INTR;
- if (dd->ipath_boardrev == 8) { /* LS/X-1 */
- u64 val;
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
- if (val & INFINIPATH_EXTS_SERDESSEL) {
- /*
- * hardware disabled
- *
- * This means that the chip is hardware disabled,
- * and will not be able to bring up the link,
- * in any case. We special case this and abort
- * early, to avoid later messages. We also set
- * the DISABLED status bit
- */
- ipath_dbg("Unit %u is hardware-disabled\n",
- dd->ipath_unit);
- *dd->ipath_statusp |= IPATH_STATUS_DISABLED;
- /* this value is handled differently */
- ret = 2;
- goto bail;
- }
- }
ret = 0;
bail:
@@ -1058,12 +1044,24 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
u64 lst, u64 ltst)
{
u64 extctl;
+ unsigned long flags = 0;
/* the diags use the LED to indicate diag info, so we leave
* the external LED alone when the diags are running */
if (ipath_diag_inuse)
return;
+ /* Allow override of LED display for, e.g. Locating system in rack */
+ if (dd->ipath_led_override) {
+ ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
+ ? INFINIPATH_IBCS_LT_STATE_LINKUP
+ : INFINIPATH_IBCS_LT_STATE_DISABLED;
+ lst = (dd->ipath_led_override & IPATH_LED_LOG)
+ ? INFINIPATH_IBCS_L_STATE_ACTIVE
+ : INFINIPATH_IBCS_L_STATE_DOWN;
+ }
+
+ spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
/*
* start by setting both LED control bits to off, then turn
* on the appropriate bit(s).
@@ -1092,6 +1090,7 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
}
dd->ipath_extctrl = extctl;
ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
+ spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
}
static void ipath_init_ht_variables(struct ipath_devdata *dd)
@@ -1157,6 +1156,22 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd)
dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+
+ /*
+ * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
+ * 2 is Some Misc, 3 is reserved for future.
+ */
+ dd->ipath_eep_st_masks[0].hwerrs_to_log =
+ INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
+
+ dd->ipath_eep_st_masks[1].hwerrs_to_log =
+ INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
+
+ dd->ipath_eep_st_masks[2].errs_to_log =
+ INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
+
}
/**
@@ -1372,7 +1387,7 @@ static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
* ipath_pe_put_tid - write a TID in chip
* @dd: the infinipath device
* @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: 0 for eager, 1 for expected
+ * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
* @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
*
* This exists as a separate routine to allow for special locking etc.
@@ -1393,7 +1408,7 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
"40 bits, using only 40!!!\n", pa);
pa &= INFINIPATH_RT_ADDR_MASK;
}
- if (type == 0)
+ if (type == RCVHQ_RCV_TYPE_EAGER)
pa |= dd->ipath_tidtemplate;
else {
/* in words (fixed, full page). */
@@ -1433,7 +1448,8 @@ static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
port * dd->ipath_rcvtidcnt *
sizeof(*tidbase));
for (i = 0; i < dd->ipath_rcvtidcnt; i++)
- ipath_ht_put_tid(dd, &tidbase[i], 1, dd->ipath_tidinvalid);
+ ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
+ dd->ipath_tidinvalid);
tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
dd->ipath_rcvegrbase +
@@ -1441,7 +1457,8 @@ static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
sizeof(*tidbase));
for (i = 0; i < dd->ipath_rcvegrcnt; i++)
- ipath_ht_put_tid(dd, &tidbase[i], 0, dd->ipath_tidinvalid);
+ ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
+ dd->ipath_tidinvalid);
}
/**
@@ -1528,11 +1545,6 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
writel(16, piobuf);
piobuf += pioincr;
}
- /*
- * self-clearing
- */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- INFINIPATH_S_ABORT);
ipath_get_eeprom_info(dd);
if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
@@ -1543,8 +1555,10 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
* with 128, rather than 112.
*/
dd->ipath_flags |= IPATH_GPIO_INTR;
- dd->ipath_flags &= ~IPATH_POLL_RX_INTR;
- }
+ } else
+ ipath_dev_err(dd, "Unsupported InfiniPath serial "
+ "number %.16s!\n", dd->ipath_serial);
+
return 0;
}
@@ -1561,7 +1575,6 @@ static int ipath_ht_txe_recover(struct ipath_devdata *dd)
}
dev_info(&dd->pcidev->dev,
"Recovering from TXE PIO parity error\n");
- ipath_disarm_senderrbufs(dd, 1);
return 1;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 4e2e3dfeb2c8..5b6ac9a1a709 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -296,13 +296,6 @@ static const struct ipath_cregs ipath_pe_cregs = {
#define IPATH_GPIO_SCL (1ULL << \
(_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-/*
- * Rev2 silicon allows suppressing check for ArmLaunch errors.
- * this can speed up short packet sends on systems that do
- * not guaranteee write-order.
- */
-#define INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR (1ULL<<63)
-
/* 6120 specific hardware errors... */
static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
@@ -328,6 +321,8 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
static int ipath_pe_txe_recover(struct ipath_devdata *);
+static void ipath_pe_put_tid_2(struct ipath_devdata *, u64 __iomem *,
+ u32, unsigned long);
/**
* ipath_pe_handle_hwerrors - display hardware errors.
@@ -347,6 +342,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
u32 bits, ctrl;
int isfatal = 0;
char bitsmsg[64];
+ int log_idx;
hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
if (!hwerrs) {
@@ -374,6 +370,11 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
hwerrs &= dd->ipath_hwerrmask;
+ /* We log some errors to EEPROM, check if we have any of those. */
+ for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
+ if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
+ ipath_inc_eeprom_err(dd, log_idx, 1);
+
/*
* make sure we get this much out, unless told to be quiet,
* or it's occurred within the last 5 seconds
@@ -431,10 +432,12 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
dd->ipath_flags &= ~IPATH_INITTED;
} else {
- ipath_dbg("Clearing freezemode on ignored hardware "
- "error\n");
- ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
- dd->ipath_control);
+ static u32 freeze_cnt;
+
+ freeze_cnt++;
+ ipath_dbg("Clearing freezemode on ignored or recovered "
+ "hardware error (%u)\n", freeze_cnt);
+ ipath_clear_freeze(dd);
}
}
@@ -554,8 +557,11 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n",
dd->ipath_majrev, dd->ipath_minrev);
ret = 1;
- } else
+ } else {
ret = 0;
+ if (dd->ipath_minrev >= 2)
+ dd->ipath_f_put_tid = ipath_pe_put_tid_2;
+ }
return ret;
}
@@ -680,17 +686,6 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
val |= dd->ipath_rx_pol_inv <<
INFINIPATH_XGXS_RX_POL_SHIFT;
}
- if (dd->ipath_minrev >= 2) {
- /* Rev 2. can tolerate multiple writes to PBC, and
- * allowing them can provide lower latency on some
- * CPUs, but this feature is off by default, only
- * turned on by setting D63 of XGXSconfig reg.
- * May want to make this conditional more
- * fine-grained in future. This is not exactly
- * related to XGXS, but where the bit ended up.
- */
- val |= INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR;
- }
if (val != prev_val)
ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
@@ -791,12 +786,24 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
u64 ltst)
{
u64 extctl;
+ unsigned long flags = 0;
/* the diags use the LED to indicate diag info, so we leave
* the external LED alone when the diags are running */
if (ipath_diag_inuse)
return;
+ /* Allow override of LED display for, e.g. Locating system in rack */
+ if (dd->ipath_led_override) {
+ ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
+ ? INFINIPATH_IBCS_LT_STATE_LINKUP
+ : INFINIPATH_IBCS_LT_STATE_DISABLED;
+ lst = (dd->ipath_led_override & IPATH_LED_LOG)
+ ? INFINIPATH_IBCS_L_STATE_ACTIVE
+ : INFINIPATH_IBCS_L_STATE_DOWN;
+ }
+
+ spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
INFINIPATH_EXTC_LED2PRIPORT_ON);
@@ -806,6 +813,7 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
dd->ipath_extctrl = extctl;
ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
+ spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
}
/**
@@ -955,6 +963,27 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd)
dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+
+ /*
+ * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
+ * 2 is Some Misc, 3 is reserved for future.
+ */
+ dd->ipath_eep_st_masks[0].hwerrs_to_log =
+ INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
+
+ /* Ignore errors in PIO/PBC on systems with unordered write-combining */
+ if (ipath_unordered_wc())
+ dd->ipath_eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY;
+
+ dd->ipath_eep_st_masks[1].hwerrs_to_log =
+ INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
+
+ dd->ipath_eep_st_masks[2].errs_to_log =
+ INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
+
+
}
/* setup the MSI stuff again after a reset. I'd like to just call
@@ -1082,7 +1111,7 @@ bail:
* ipath_pe_put_tid - write a TID in chip
* @dd: the infinipath device
* @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: 0 for eager, 1 for expected
+ * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
* @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
*
* This exists as a separate routine to allow for special locking etc.
@@ -1108,7 +1137,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
"BUG: Physical page address 0x%lx "
"has bits set in 31-29\n", pa);
- if (type == 0)
+ if (type == RCVHQ_RCV_TYPE_EAGER)
pa |= dd->ipath_tidtemplate;
else /* for now, always full 4KB page */
pa |= 2 << 29;
@@ -1132,7 +1161,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
* ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher
* @dd: the infinipath device
* @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: 0 for eager, 1 for expected
+ * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
* @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
*
* This exists as a separate routine to allow for selection of the
@@ -1157,7 +1186,7 @@ static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr,
"BUG: Physical page address 0x%lx "
"has bits set in 31-29\n", pa);
- if (type == 0)
+ if (type == RCVHQ_RCV_TYPE_EAGER)
pa |= dd->ipath_tidtemplate;
else /* for now, always full 4KB page */
pa |= 2 << 29;
@@ -1196,7 +1225,8 @@ static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port)
port * dd->ipath_rcvtidcnt * sizeof(*tidbase));
for (i = 0; i < dd->ipath_rcvtidcnt; i++)
- ipath_pe_put_tid(dd, &tidbase[i], 0, tidinv);
+ dd->ipath_f_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
+ tidinv);
tidbase = (u64 __iomem *)
((char __iomem *)(dd->ipath_kregbase) +
@@ -1204,7 +1234,8 @@ static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port)
port * dd->ipath_rcvegrcnt * sizeof(*tidbase));
for (i = 0; i < dd->ipath_rcvegrcnt; i++)
- ipath_pe_put_tid(dd, &tidbase[i], 1, tidinv);
+ dd->ipath_f_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
+ tidinv);
}
/**
@@ -1311,13 +1342,6 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
dd = pd->port_dd;
- if (dd != NULL && dd->ipath_minrev >= 2) {
- ipath_cdbg(PROC, "IBA6120 Rev2, allow multiple PBC write\n");
- kinfo->spi_runtime_flags |= IPATH_RUNTIME_PBC_REWRITE;
- ipath_cdbg(PROC, "IBA6120 Rev2, allow loose DMA alignment\n");
- kinfo->spi_runtime_flags |= IPATH_RUNTIME_LOOSE_DMA_ALIGN;
- }
-
done:
kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
return 0;
@@ -1354,7 +1378,6 @@ static int ipath_pe_txe_recover(struct ipath_devdata *dd)
dev_info(&dd->pcidev->dev,
"Recovering from TXE PIO parity error\n");
}
- ipath_disarm_senderrbufs(dd, 1);
return 1;
}
@@ -1377,10 +1400,11 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes;
dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes;
dd->ipath_f_clear_tids = ipath_pe_clear_tids;
- if (dd->ipath_minrev >= 2)
- dd->ipath_f_put_tid = ipath_pe_put_tid_2;
- else
- dd->ipath_f_put_tid = ipath_pe_put_tid;
+ /*
+ * this may get changed after we read the chip revision,
+ * but we start with the safe version for all revs
+ */
+ dd->ipath_f_put_tid = ipath_pe_put_tid;
dd->ipath_f_cleanup = ipath_setup_pe_cleanup;
dd->ipath_f_setextled = ipath_setup_pe_setextled;
dd->ipath_f_get_base_info = ipath_pe_get_base_info;
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 7045ba689494..9dd0bacf8461 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -133,7 +133,8 @@ static int create_port0_egr(struct ipath_devdata *dd)
dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
((char __iomem *) dd->ipath_kregbase +
- dd->ipath_rcvegrbase), 0,
+ dd->ipath_rcvegrbase),
+ RCVHQ_RCV_TYPE_EAGER,
dd->ipath_port0_skbinfo[e].phys);
}
@@ -310,7 +311,12 @@ static int init_chip_first(struct ipath_devdata *dd,
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
dd->ipath_piosize2k = val & ~0U;
dd->ipath_piosize4k = val >> 32;
- dd->ipath_ibmtu = 4096; /* default to largest legal MTU */
+ /*
+ * Note: the chips support a maximum MTU of 4096, but the driver
+ * hasn't implemented this feature yet, so set the initial value
+ * to 2048.
+ */
+ dd->ipath_ibmtu = 2048;
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
dd->ipath_piobcnt2k = val & ~0U;
dd->ipath_piobcnt4k = val >> 32;
@@ -340,6 +346,10 @@ static int init_chip_first(struct ipath_devdata *dd,
spin_lock_init(&dd->ipath_tid_lock);
+ spin_lock_init(&dd->ipath_gpio_lock);
+ spin_lock_init(&dd->ipath_eep_st_lock);
+ sema_init(&dd->ipath_eep_sem, 1);
+
done:
*pdp = pd;
return ret;
@@ -646,7 +656,7 @@ static int init_housekeeping(struct ipath_devdata *dd,
ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn);
snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion),
- "Driver %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
+ "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
"SW Compat %u\n",
IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
(unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
@@ -727,7 +737,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
if (ipath_kpiobufs == 0) {
/* not set by user (this is default) */
- if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32)
+ if (piobufs > 144)
kpiobufs = 32;
else
kpiobufs = 16;
@@ -767,6 +777,12 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
piobufs, dd->ipath_pbufsport, uports);
dd->ipath_f_early_init(dd);
+ /*
+ * cancel any possible active sends from early driver load.
+ * Follows early_init because some chips have to initialize
+ * PIO buffers in early_init to avoid false parity errors.
+ */
+ ipath_cancel_sends(dd, 0);
/* early_init sets rcvhdrentsize and rcvhdrsize, so this must be
* done after early_init */
@@ -835,13 +851,14 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
dd->ipath_hwerrmask);
- dd->ipath_maskederrs = dd->ipath_ignorederrs;
/* clear all */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
/* enable errors that are masked, at least this first time. */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
~dd->ipath_maskederrs);
- /* clear any interrups up to this point (ints still not enabled) */
+ dd->ipath_errormask = ipath_read_kreg64(dd,
+ dd->ipath_kregs->kr_errormask);
+ /* clear any interrupts up to this point (ints still not enabled) */
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
/*
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index a90d3b5699c4..b29fe7e9b11a 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -70,7 +70,7 @@ static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
* If rewrite is true, and bits are set in the sendbufferror registers,
* we'll write to the buffer, for error recovery on parity errors.
*/
-void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
+static void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
{
u32 piobcnt;
unsigned long sbuf[4];
@@ -93,7 +93,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
int i;
- if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) {
+ if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
+ dd->ipath_lastcancel > jiffies) {
__IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
"SendbufErrs %lx %lx", sbuf[0],
sbuf[1]);
@@ -108,7 +109,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
ipath_clrpiobuf(dd, i);
ipath_disarm_piobufs(dd, i, 1);
}
- dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
+ /* ignore armlaunch errs for a bit */
+ dd->ipath_lastcancel = jiffies+3;
}
}
@@ -131,6 +133,17 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
INFINIPATH_E_INVALIDADDR)
/*
+ * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
+ * errors not related to freeze and cancelling buffers. Can't ignore
+ * armlaunch because could get more while still cleaning up, and need
+ * to cancel those as they happen.
+ */
+#define E_SPKT_ERRS_IGNORE \
+ (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
+ INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \
+ INFINIPATH_E_SPKTLEN)
+
+/*
* these are errors that can occur when the link changes state while
* a packet is being sent or received. This doesn't cover things
* like EBP or VCRC that can be the result of a sending having the
@@ -290,12 +303,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
* Flush all queued sends when link went to DOWN or INIT,
* to be sure that they don't block SMA and other MAD packets
*/
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- INFINIPATH_S_ABORT);
- ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
- (unsigned)(dd->ipath_piobcnt2k +
- dd->ipath_piobcnt4k) -
- dd->ipath_lastport_piobuf);
+ ipath_cancel_sends(dd, 1);
}
else if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
lstate == IPATH_IBSTATE_ACTIVE) {
@@ -505,19 +513,24 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
int i, iserr = 0;
int chkerrpkts = 0, noprint = 0;
unsigned supp_msgs;
+ int log_idx;
supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
- /*
- * don't report errors that are masked (includes those always
- * ignored)
- */
+ /* don't report errors that are masked */
errs &= ~dd->ipath_maskederrs;
/* do these first, they are most important */
if (errs & INFINIPATH_E_HARDWARE) {
/* reuse same msg buf */
dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
+ } else {
+ u64 mask;
+ for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
+ mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
+ if (errs & mask)
+ ipath_inc_eeprom_err(dd, log_idx, 1);
+ }
}
if (!noprint && (errs & ~dd->ipath_e_bitsextant))
@@ -550,19 +563,19 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* ones on this particular interrupt, which also isn't great
*/
dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
+ dd->ipath_errormask &= ~dd->ipath_maskederrs;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
- ~dd->ipath_maskederrs);
+ dd->ipath_errormask);
s_iserr = ipath_decode_err(msg, sizeof msg,
- (dd->ipath_maskederrs & ~dd->
- ipath_ignorederrs));
+ dd->ipath_maskederrs);
- if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
+ if (dd->ipath_maskederrs &
~(INFINIPATH_E_RRCVEGRFULL |
INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
ipath_dev_err(dd, "Temporarily disabling "
"error(s) %llx reporting; too frequent (%s)\n",
- (unsigned long long) (dd->ipath_maskederrs &
- ~dd->ipath_ignorederrs), msg);
+ (unsigned long long)dd->ipath_maskederrs,
+ msg);
else {
/*
* rcvegrfull and rcvhdrqfull are "normal",
@@ -675,6 +688,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
chkerrpkts = 1;
dd->ipath_lastrcvhdrqtails[i] = tl;
pd->port_hdrqfull++;
+ if (test_bit(IPATH_PORT_WAITING_OVERFLOW,
+ &pd->port_flag)) {
+ clear_bit(
+ IPATH_PORT_WAITING_OVERFLOW,
+ &pd->port_flag);
+ set_bit(
+ IPATH_PORT_WAITING_OVERFLOW,
+ &pd->int_flag);
+ wake_up_interruptible(
+ &pd->port_wait);
+ }
}
}
}
@@ -744,6 +768,76 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
return chkerrpkts;
}
+
+/*
+ * try to cleanup as much as possible for anything that might have gone
+ * wrong while in freeze mode, such as pio buffers being written by user
+ * processes (causing armlaunch), send errors due to going into freeze mode,
+ * etc., and try to avoid causing extra interrupts while doing so.
+ * Forcibly update the in-memory pioavail register copies after cleanup
+ * because the chip won't do it for anything changing while in freeze mode
+ * (we don't want to wait for the next pio buffer state change).
+ * Make sure that we don't lose any important interrupts by using the chip
+ * feature that says that writing 0 to a bit in *clear that is set in
+ * *status will cause an interrupt to be generated again (if allowed by
+ * the *mask value).
+ */
+void ipath_clear_freeze(struct ipath_devdata *dd)
+{
+ int i, im;
+ __le64 val;
+
+ /* disable error interrupts, to avoid confusion */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
+
+ /* also disable interrupts; errormask is sometimes overwriten */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
+
+ /*
+ * clear all sends, because they have may been
+ * completed by usercode while in freeze mode, and
+ * therefore would not be sent, and eventually
+ * might cause the process to run out of bufs
+ */
+ ipath_cancel_sends(dd, 0);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+
+ /* ensure pio avail updates continue */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+
+ /*
+ * We just enabled pioavailupdate, so dma copy is almost certainly
+ * not yet right, so read the registers directly. Similar to init
+ */
+ for (i = 0; i < dd->ipath_pioavregs; i++) {
+ /* deal with 6110 chip bug */
+ im = i > 3 ? ((i&1) ? i-1 : i+1) : i;
+ val = ipath_read_kreg64(dd, (0x1000/sizeof(u64))+im);
+ dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i]
+ = le64_to_cpu(val);
+ }
+
+ /*
+ * force new interrupt if any hwerr, error or interrupt bits are
+ * still set, and clear "safe" send packet errors related to freeze
+ * and cancelling sends. Re-enable error interrupts before possible
+ * force of re-interrupt on pending interrupts.
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
+ E_SPKT_ERRS_IGNORE);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+ dd->ipath_errormask);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
+}
+
+
/* this is separate to allow for better optimization of ipath_intr() */
static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp)
@@ -872,14 +966,25 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
dd->ipath_i_rcvurg_mask);
for (i = 1; i < dd->ipath_cfgports; i++) {
struct ipath_portdata *pd = dd->ipath_pd[i];
- if (portr & (1 << i) && pd && pd->port_cnt &&
- test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
- clear_bit(IPATH_PORT_WAITING_RCV,
- &pd->port_flag);
- clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
- &dd->ipath_rcvctrl);
- wake_up_interruptible(&pd->port_wait);
- rcvdint = 1;
+ if (portr & (1 << i) && pd && pd->port_cnt) {
+ if (test_bit(IPATH_PORT_WAITING_RCV,
+ &pd->port_flag)) {
+ clear_bit(IPATH_PORT_WAITING_RCV,
+ &pd->port_flag);
+ set_bit(IPATH_PORT_WAITING_RCV,
+ &pd->int_flag);
+ clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
+ &dd->ipath_rcvctrl);
+ wake_up_interruptible(&pd->port_wait);
+ rcvdint = 1;
+ } else if (test_bit(IPATH_PORT_WAITING_URG,
+ &pd->port_flag)) {
+ clear_bit(IPATH_PORT_WAITING_URG,
+ &pd->port_flag);
+ set_bit(IPATH_PORT_WAITING_URG,
+ &pd->int_flag);
+ wake_up_interruptible(&pd->port_wait);
+ }
}
}
if (rcvdint) {
@@ -898,13 +1003,15 @@ irqreturn_t ipath_intr(int irq, void *data)
u32 istat, chk0rcv = 0;
ipath_err_t estat = 0;
irqreturn_t ret;
- u32 oldhead, curtail;
static unsigned unexpected = 0;
static const u32 port0rbits = (1U<<INFINIPATH_I_RCVAVAIL_SHIFT) |
(1U<<INFINIPATH_I_RCVURG_SHIFT);
ipath_stats.sps_ints++;
+ if (dd->ipath_int_counter != (u32) -1)
+ dd->ipath_int_counter++;
+
if (!(dd->ipath_flags & IPATH_PRESENT)) {
/*
* This return value is not great, but we do not want the
@@ -928,36 +1035,6 @@ irqreturn_t ipath_intr(int irq, void *data)
goto bail;
}
- /*
- * We try to avoid reading the interrupt status register, since
- * that's a PIO read, and stalls the processor for up to about
- * ~0.25 usec. The idea is that if we processed a port0 packet,
- * we blindly clear the port 0 receive interrupt bits, and nothing
- * else, then return. If other interrupts are pending, the chip
- * will re-interrupt us as soon as we write the intclear register.
- * We then won't process any more kernel packets (if not the 2nd
- * time, then the 3rd or 4th) and we'll then handle the other
- * interrupts. We clear the interrupts first so that we don't
- * lose intr for later packets that arrive while we are processing.
- */
- oldhead = dd->ipath_port0head;
- curtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr);
- if (oldhead != curtail) {
- if (dd->ipath_flags & IPATH_GPIO_INTR) {
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
- (u64) (1 << IPATH_GPIO_PORT0_BIT));
- istat = port0rbits | INFINIPATH_I_GPIO;
- }
- else
- istat = port0rbits;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
- ipath_kreceive(dd);
- if (oldhead != dd->ipath_port0head) {
- ipath_stats.sps_fastrcvint++;
- goto done;
- }
- }
-
istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus);
if (unlikely(!istat)) {
@@ -1118,7 +1195,6 @@ irqreturn_t ipath_intr(int irq, void *data)
handle_layer_pioavail(dd);
}
-done:
ret = IRQ_HANDLED;
bail:
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 12194f3dd8cc..7a7966f7e4ff 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -1,7 +1,7 @@
#ifndef _IPATH_KERNEL_H
#define _IPATH_KERNEL_H
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -57,6 +57,24 @@
extern struct infinipath_stats ipath_stats;
#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
+/*
+ * First-cut critierion for "device is active" is
+ * two thousand dwords combined Tx, Rx traffic per
+ * 5-second interval. SMA packets are 64 dwords,
+ * and occur "a few per second", presumably each way.
+ */
+#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000)
+/*
+ * Struct used to indicate which errors are logged in each of the
+ * error-counters that are logged to EEPROM. A counter is incremented
+ * _once_ (saturating at 255) for each event with any bits set in
+ * the error or hwerror register masks below.
+ */
+#define IPATH_EEP_LOG_CNT (4)
+struct ipath_eep_log_mask {
+ u64 errs_to_log;
+ u64 hwerrs_to_log;
+};
struct ipath_portdata {
void **port_rcvegrbuf;
@@ -109,6 +127,8 @@ struct ipath_portdata {
u32 port_tidcursor;
/* next expected TID to check */
unsigned long port_flag;
+ /* what happened */
+ unsigned long int_flag;
/* WAIT_RCV that timed out, no interrupt */
u32 port_rcvwait_to;
/* WAIT_PIO that timed out, no interrupt */
@@ -137,6 +157,8 @@ struct ipath_portdata {
u32 userversion;
/* Bitmask of active slaves */
u32 active_slaves;
+ /* Type of packets or conditions we want to poll for */
+ u16 poll_type;
};
struct sk_buff;
@@ -239,18 +261,10 @@ struct ipath_devdata {
* limiting of hwerror reporting
*/
ipath_err_t ipath_lasthwerror;
- /*
- * errors masked because they occur too fast, also includes errors
- * that are always ignored (ipath_ignorederrs)
- */
+ /* errors masked because they occur too fast */
ipath_err_t ipath_maskederrs;
/* time in jiffies at which to re-enable maskederrs */
unsigned long ipath_unmasktime;
- /*
- * errors always ignored (masked), at least for a given
- * chip/device, because they are wrong or not useful
- */
- ipath_err_t ipath_ignorederrs;
/* count of egrfull errors, combined for all ports */
u64 ipath_last_tidfull;
/* for ipath_qcheck() */
@@ -275,6 +289,8 @@ struct ipath_devdata {
u32 ipath_lastport_piobuf;
/* is a stats timer active */
u32 ipath_stats_timer_active;
+ /* number of interrupts for this device -- saturates... */
+ u32 ipath_int_counter;
/* dwords sent read from counter */
u32 ipath_lastsword;
/* dwords received read from counter */
@@ -369,9 +385,6 @@ struct ipath_devdata {
struct class_device *diag_class_dev;
/* timer used to prevent stats overflow, error throttling, etc. */
struct timer_list ipath_stats_timer;
- /* check for stale messages in rcv queue */
- /* only allow one intr at a time. */
- unsigned long ipath_rcv_pending;
void *ipath_dummy_hdrq; /* used after port close */
dma_addr_t ipath_dummy_hdrq_phys;
@@ -399,6 +412,8 @@ struct ipath_devdata {
u64 ipath_gpio_out;
/* shadow the gpio mask register */
u64 ipath_gpio_mask;
+ /* shadow the gpio output enable, etc... */
+ u64 ipath_extctrl;
/* kr_revision shadow */
u64 ipath_revision;
/*
@@ -413,6 +428,7 @@ struct ipath_devdata {
u64 ipath_lastibcstat;
/* hwerrmask shadow */
ipath_err_t ipath_hwerrmask;
+ ipath_err_t ipath_errormask; /* errormask shadow */
/* interrupt config reg shadow */
u64 ipath_intconfig;
/* kr_sendpiobufbase value */
@@ -473,8 +489,6 @@ struct ipath_devdata {
u32 ipath_cregbase;
/* shadow the control register contents */
u32 ipath_control;
- /* shadow the gpio output contents */
- u32 ipath_extctrl;
/* PCI revision register (HTC rev on FPGA) */
u32 ipath_pcirev;
@@ -552,6 +566,9 @@ struct ipath_devdata {
u32 ipath_overrun_thresh_errs;
u32 ipath_lli_errs;
+ /* status check work */
+ struct delayed_work status_work;
+
/*
* Not all devices managed by a driver instance are the same
* type, so these fields must be per-device.
@@ -575,6 +592,37 @@ struct ipath_devdata {
u16 ipath_gpio_scl_num;
u64 ipath_gpio_sda;
u64 ipath_gpio_scl;
+
+ /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */
+ spinlock_t ipath_gpio_lock;
+
+ /* used to override LED behavior */
+ u8 ipath_led_override; /* Substituted for normal value, if non-zero */
+ u16 ipath_led_override_timeoff; /* delta to next timer event */
+ u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */
+ u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */
+ atomic_t ipath_led_override_timer_active;
+ /* Used to flash LEDs in override mode */
+ struct timer_list ipath_led_override_timer;
+
+ /* Support (including locks) for EEPROM logging of errors and time */
+ /* control access to actual counters, timer */
+ spinlock_t ipath_eep_st_lock;
+ /* control high-level access to EEPROM */
+ struct semaphore ipath_eep_sem;
+ /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
+ uint64_t ipath_traffic_wds;
+ /* active time is kept in seconds, but logged in hours */
+ atomic_t ipath_active_time;
+ /* Below are nominal shadow of EEPROM, new since last EEPROM update */
+ uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT];
+ uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT];
+ uint16_t ipath_eep_hrs;
+ /*
+ * masks for which bits of errs, hwerrs that cause
+ * each of the counters to increment.
+ */
+ struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
};
/* Private data for file operations */
@@ -592,6 +640,7 @@ int ipath_enable_wc(struct ipath_devdata *dd);
void ipath_disable_wc(struct ipath_devdata *dd);
int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
void ipath_shutdown_device(struct ipath_devdata *);
+void ipath_clear_freeze(struct ipath_devdata *);
struct file_operations;
int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
@@ -627,6 +676,7 @@ int ipath_unordered_wc(void);
void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
unsigned cnt);
+void ipath_cancel_sends(struct ipath_devdata *, int);
int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
@@ -685,7 +735,6 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
* are 64bit */
#define IPATH_32BITCOUNTERS 0x20000
/* can miss port0 rx interrupts */
-#define IPATH_POLL_RX_INTR 0x40000
#define IPATH_DISABLED 0x80000 /* administratively disabled */
/* Use GPIO interrupts for new counters */
#define IPATH_GPIO_ERRINTRS 0x100000
@@ -704,6 +753,10 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
#define IPATH_PORT_WAITING_PIO 3
/* master has not finished initializing */
#define IPATH_PORT_MASTER_UNINIT 4
+ /* waiting for an urgent packet to arrive */
+#define IPATH_PORT_WAITING_URG 5
+ /* waiting for a header overflow */
+#define IPATH_PORT_WAITING_OVERFLOW 6
/* free up any allocated data at closes */
void ipath_free_data(struct ipath_portdata *dd);
@@ -713,8 +766,18 @@ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
void ipath_init_iba6120_funcs(struct ipath_devdata *);
void ipath_init_iba6110_funcs(struct ipath_devdata *);
void ipath_get_eeprom_info(struct ipath_devdata *);
+int ipath_update_eeprom_log(struct ipath_devdata *dd);
+void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
-void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
+
+/*
+ * Set LED override, only the two LSBs have "public" meaning, but
+ * any non-zero value substitutes them for the Link and LinkTrain
+ * LED states.
+ */
+#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
+#define IPATH_LED_LOG 2 /* Logical (link) YELLOW LED */
+void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val);
/*
* number of words used for protocol header if not set by ipath_userinit();
@@ -749,7 +812,6 @@ static inline u64 ipath_mdio_req(int cmd, int dev, int reg, int data)
#define IPATH_MDIO_CTRL_8355_REG_10 0x1D
int ipath_get_user_pages(unsigned long, size_t, struct page **);
-int ipath_get_user_pages_nocopy(unsigned long, struct page **);
void ipath_release_user_pages(struct page **, size_t);
void ipath_release_user_pages_on_close(struct page **, size_t);
int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int);
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index dd487c100f5b..85a4aefc6c03 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c
deleted file mode 100644
index 05a1d2b01d9d..000000000000
--- a/drivers/infiniband/hw/ipath/ipath_layer.c
+++ /dev/null
@@ -1,365 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * These are the routines used by layered drivers, currently just the
- * layered ethernet driver and verbs layer.
- */
-
-#include <linux/io.h>
-#include <asm/byteorder.h>
-
-#include "ipath_kernel.h"
-#include "ipath_layer.h"
-#include "ipath_verbs.h"
-#include "ipath_common.h"
-
-/* Acquire before ipath_devs_lock. */
-static DEFINE_MUTEX(ipath_layer_mutex);
-
-u16 ipath_layer_rcv_opcode;
-
-static int (*layer_intr)(void *, u32);
-static int (*layer_rcv)(void *, void *, struct sk_buff *);
-static int (*layer_rcv_lid)(void *, void *);
-
-static void *(*layer_add_one)(int, struct ipath_devdata *);
-static void (*layer_remove_one)(void *);
-
-int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg)
-{
- int ret = -ENODEV;
-
- if (dd->ipath_layer.l_arg && layer_intr)
- ret = layer_intr(dd->ipath_layer.l_arg, arg);
-
- return ret;
-}
-
-int ipath_layer_intr(struct ipath_devdata *dd, u32 arg)
-{
- int ret;
-
- mutex_lock(&ipath_layer_mutex);
-
- ret = __ipath_layer_intr(dd, arg);
-
- mutex_unlock(&ipath_layer_mutex);
-
- return ret;
-}
-
-int __ipath_layer_rcv(struct ipath_devdata *dd, void *hdr,
- struct sk_buff *skb)
-{
- int ret = -ENODEV;
-
- if (dd->ipath_layer.l_arg && layer_rcv)
- ret = layer_rcv(dd->ipath_layer.l_arg, hdr, skb);
-
- return ret;
-}
-
-int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr)
-{
- int ret = -ENODEV;
-
- if (dd->ipath_layer.l_arg && layer_rcv_lid)
- ret = layer_rcv_lid(dd->ipath_layer.l_arg, hdr);
-
- return ret;
-}
-
-void ipath_layer_lid_changed(struct ipath_devdata *dd)
-{
- mutex_lock(&ipath_layer_mutex);
-
- if (dd->ipath_layer.l_arg && layer_intr)
- layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID);
-
- mutex_unlock(&ipath_layer_mutex);
-}
-
-void ipath_layer_add(struct ipath_devdata *dd)
-{
- mutex_lock(&ipath_layer_mutex);
-
- if (layer_add_one)
- dd->ipath_layer.l_arg =
- layer_add_one(dd->ipath_unit, dd);
-
- mutex_unlock(&ipath_layer_mutex);
-}
-
-void ipath_layer_remove(struct ipath_devdata *dd)
-{
- mutex_lock(&ipath_layer_mutex);
-
- if (dd->ipath_layer.l_arg && layer_remove_one) {
- layer_remove_one(dd->ipath_layer.l_arg);
- dd->ipath_layer.l_arg = NULL;
- }
-
- mutex_unlock(&ipath_layer_mutex);
-}
-
-int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
- void (*l_remove)(void *),
- int (*l_intr)(void *, u32),
- int (*l_rcv)(void *, void *, struct sk_buff *),
- u16 l_rcv_opcode,
- int (*l_rcv_lid)(void *, void *))
-{
- struct ipath_devdata *dd, *tmp;
- unsigned long flags;
-
- mutex_lock(&ipath_layer_mutex);
-
- layer_add_one = l_add;
- layer_remove_one = l_remove;
- layer_intr = l_intr;
- layer_rcv = l_rcv;
- layer_rcv_lid = l_rcv_lid;
- ipath_layer_rcv_opcode = l_rcv_opcode;
-
- spin_lock_irqsave(&ipath_devs_lock, flags);
-
- list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
- if (!(dd->ipath_flags & IPATH_INITTED))
- continue;
-
- if (dd->ipath_layer.l_arg)
- continue;
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
- dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd);
- spin_lock_irqsave(&ipath_devs_lock, flags);
- }
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
- mutex_unlock(&ipath_layer_mutex);
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_register);
-
-void ipath_layer_unregister(void)
-{
- struct ipath_devdata *dd, *tmp;
- unsigned long flags;
-
- mutex_lock(&ipath_layer_mutex);
- spin_lock_irqsave(&ipath_devs_lock, flags);
-
- list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
- if (dd->ipath_layer.l_arg && layer_remove_one) {
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
- layer_remove_one(dd->ipath_layer.l_arg);
- spin_lock_irqsave(&ipath_devs_lock, flags);
- dd->ipath_layer.l_arg = NULL;
- }
- }
-
- spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
- layer_add_one = NULL;
- layer_remove_one = NULL;
- layer_intr = NULL;
- layer_rcv = NULL;
- layer_rcv_lid = NULL;
-
- mutex_unlock(&ipath_layer_mutex);
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_unregister);
-
-int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax)
-{
- int ret;
- u32 intval = 0;
-
- mutex_lock(&ipath_layer_mutex);
-
- if (!dd->ipath_layer.l_arg) {
- ret = -EINVAL;
- goto bail;
- }
-
- ret = ipath_setrcvhdrsize(dd, IPATH_HEADER_QUEUE_WORDS);
-
- if (ret < 0)
- goto bail;
-
- *pktmax = dd->ipath_ibmaxlen;
-
- if (*dd->ipath_statusp & IPATH_STATUS_IB_READY)
- intval |= IPATH_LAYER_INT_IF_UP;
- if (dd->ipath_lid)
- intval |= IPATH_LAYER_INT_LID;
- if (dd->ipath_mlid)
- intval |= IPATH_LAYER_INT_BCAST;
- /*
- * do this on open, in case low level is already up and
- * just layered driver was reloaded, etc.
- */
- if (intval)
- layer_intr(dd->ipath_layer.l_arg, intval);
-
- ret = 0;
-bail:
- mutex_unlock(&ipath_layer_mutex);
-
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_open);
-
-u16 ipath_layer_get_lid(struct ipath_devdata *dd)
-{
- return dd->ipath_lid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_lid);
-
-/**
- * ipath_layer_get_mac - get the MAC address
- * @dd: the infinipath device
- * @mac: the MAC is put here
- *
- * This is the EUID-64 OUI octets (top 3), then
- * skip the next 2 (which should both be zero or 0xff).
- * The returned MAC is in network order
- * mac points to at least 6 bytes of buffer
- * We assume that by the time the LID is set, that the GUID is as valid
- * as it's ever going to be, rather than adding yet another status bit.
- */
-
-int ipath_layer_get_mac(struct ipath_devdata *dd, u8 * mac)
-{
- u8 *guid;
-
- guid = (u8 *) &dd->ipath_guid;
-
- mac[0] = guid[0];
- mac[1] = guid[1];
- mac[2] = guid[2];
- mac[3] = guid[5];
- mac[4] = guid[6];
- mac[5] = guid[7];
- if ((guid[3] || guid[4]) && !(guid[3] == 0xff && guid[4] == 0xff))
- ipath_dbg("Warning, guid bytes 3 and 4 not 0 or 0xffff: "
- "%x %x\n", guid[3], guid[4]);
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_mac);
-
-u16 ipath_layer_get_bcast(struct ipath_devdata *dd)
-{
- return dd->ipath_mlid;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_get_bcast);
-
-int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr)
-{
- int ret = 0;
- u32 __iomem *piobuf;
- u32 plen, *uhdr;
- size_t count;
- __be16 vlsllnh;
-
- if (!(dd->ipath_flags & IPATH_RCVHDRSZ_SET)) {
- ipath_dbg("send while not open\n");
- ret = -EINVAL;
- } else
- if ((dd->ipath_flags & (IPATH_LINKUNK | IPATH_LINKDOWN)) ||
- dd->ipath_lid == 0) {
- /*
- * lid check is for when sma hasn't yet configured
- */
- ret = -ENETDOWN;
- ipath_cdbg(VERBOSE, "send while not ready, "
- "mylid=%u, flags=0x%x\n",
- dd->ipath_lid, dd->ipath_flags);
- }
-
- vlsllnh = *((__be16 *) hdr);
- if (vlsllnh != htons(IPATH_LRH_BTH)) {
- ipath_dbg("Warning: lrh[0] wrong (%x, not %x); "
- "not sending\n", be16_to_cpu(vlsllnh),
- IPATH_LRH_BTH);
- ret = -EINVAL;
- }
- if (ret)
- goto done;
-
- /* Get a PIO buffer to use. */
- piobuf = ipath_getpiobuf(dd, NULL);
- if (piobuf == NULL) {
- ret = -EBUSY;
- goto done;
- }
-
- plen = (sizeof(*hdr) >> 2); /* actual length */
- ipath_cdbg(EPKT, "0x%x+1w pio %p\n", plen, piobuf);
-
- writeq(plen+1, piobuf); /* len (+1 for pad) to pbc, no flags */
- ipath_flush_wc();
- piobuf += 2;
- uhdr = (u32 *)hdr;
- count = plen-1; /* amount we can copy before trigger word */
- __iowrite32_copy(piobuf, uhdr, count);
- ipath_flush_wc();
- __raw_writel(uhdr[count], piobuf + count);
- ipath_flush_wc(); /* ensure it's sent, now */
-
- ipath_stats.sps_ether_spkts++; /* ether packet sent */
-
-done:
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_send_hdr);
-
-int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd)
-{
- set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
-
- ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
- dd->ipath_sendctrl);
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int);
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h
deleted file mode 100644
index 3854a4eae684..000000000000
--- a/drivers/infiniband/hw/ipath/ipath_layer.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _IPATH_LAYER_H
-#define _IPATH_LAYER_H
-
-/*
- * This header file is for symbols shared between the infinipath driver
- * and drivers layered upon it (such as ipath).
- */
-
-struct sk_buff;
-struct ipath_devdata;
-struct ether_header;
-
-int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
- void (*l_remove)(void *),
- int (*l_intr)(void *, u32),
- int (*l_rcv)(void *, void *,
- struct sk_buff *),
- u16 rcv_opcode,
- int (*l_rcv_lid)(void *, void *));
-void ipath_layer_unregister(void);
-int ipath_layer_open(struct ipath_devdata *, u32 * pktmax);
-u16 ipath_layer_get_lid(struct ipath_devdata *dd);
-int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *);
-u16 ipath_layer_get_bcast(struct ipath_devdata *dd);
-int ipath_layer_send_hdr(struct ipath_devdata *dd,
- struct ether_header *hdr);
-int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd);
-
-/* ipath_ether interrupt values */
-#define IPATH_LAYER_INT_IF_UP 0x2
-#define IPATH_LAYER_INT_IF_DOWN 0x4
-#define IPATH_LAYER_INT_LID 0x8
-#define IPATH_LAYER_INT_SEND_CONTINUE 0x10
-#define IPATH_LAYER_INT_BCAST 0x40
-
-extern unsigned ipath_debug; /* debugging bit mask */
-
-#endif /* _IPATH_LAYER_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 25908b02fbe5..d61c03044545 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -103,7 +103,7 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
/* This is already in network order */
nip->sys_guid = to_idev(ibdev)->sys_image_guid;
nip->node_guid = dd->ipath_guid;
- nip->port_guid = nip->sys_guid;
+ nip->port_guid = dd->ipath_guid;
nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
nip->device_id = cpu_to_be16(dd->ipath_deviceid);
majrev = dd->ipath_majrev;
@@ -292,7 +292,12 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
/* pip->vl_arb_high_cap; // only one VL */
/* pip->vl_arb_low_cap; // only one VL */
/* InitTypeReply = 0 */
- pip->inittypereply_mtucap = IB_MTU_4096;
+ /*
+ * Note: the chips support a maximum MTU of 4096, but the driver
+ * hasn't implemented this feature yet, so set the maximum value
+ * to 2048.
+ */
+ pip->inittypereply_mtucap = IB_MTU_2048;
// HCAs ignore VLStallCount and HOQLife
/* pip->vlstallcnt_hoqlife; */
pip->operationalvl_pei_peo_fpi_fpo = 0x10; /* OVLs = 1 */
diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c
index 937bc3396b53..fa830e22002f 100644
--- a/drivers/infiniband/hw/ipath/ipath_mmap.c
+++ b/drivers/infiniband/hw/ipath/ipath_mmap.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index bdeef8d4f279..e442470a2375 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index bfef08ecd342..1324b35ff1f8 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -336,7 +336,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->qkey = 0;
qp->qp_access_flags = 0;
qp->s_busy = 0;
- qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR;
+ qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
qp->s_hdrwords = 0;
qp->s_psn = 0;
qp->r_psn = 0;
@@ -507,16 +507,13 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->port_num > ibqp->device->phys_port_cnt)
goto inval;
+ /*
+ * Note: the chips support a maximum MTU of 4096, but the driver
+ * hasn't implemented this feature yet, so don't allow Path MTU
+ * values greater than 2048.
+ */
if (attr_mask & IB_QP_PATH_MTU)
- if (attr->path_mtu > IB_MTU_4096)
- goto inval;
-
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- if (attr->max_dest_rd_atomic > 1)
- goto inval;
-
- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
- if (attr->max_rd_atomic > 1)
+ if (attr->path_mtu > IB_MTU_2048)
goto inval;
if (attr_mask & IB_QP_PATH_MIG_STATE)
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 1915771fd038..46744ea2babd 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -125,8 +125,10 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
if (len > pmtu) {
len = pmtu;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
- } else
+ } else {
qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
+ e->sent = 1;
+ }
ohdr->u.aeth = ipath_compute_aeth(qp);
hwords++;
qp->s_ack_rdma_psn = e->psn;
@@ -143,6 +145,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
cpu_to_be32(e->atomic_data);
hwords += sizeof(ohdr->u.at) / sizeof(u32);
bth2 = e->psn;
+ e->sent = 1;
}
bth0 = qp->s_ack_state << 24;
break;
@@ -158,6 +161,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
ohdr->u.aeth = ipath_compute_aeth(qp);
hwords++;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+ qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
}
bth0 = qp->s_ack_state << 24;
bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
@@ -188,7 +192,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
}
qp->s_hdrwords = hwords;
qp->s_cur_size = len;
- *bth0p = bth0;
+ *bth0p = bth0 | (1 << 22); /* Set M bit */
*bth2p = bth2;
return 1;
@@ -240,7 +244,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
- bth0 = 0;
+ bth0 = 1 << 22; /* Set M bit */
/* Send a request. */
wqe = get_swqe_ptr(qp, qp->s_cur);
@@ -604,7 +608,7 @@ static void send_rc_ack(struct ipath_qp *qp)
}
/* read pkey_index w/o lock (its atomic) */
bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |
- OP(ACKNOWLEDGE) << 24;
+ (OP(ACKNOWLEDGE) << 24) | (1 << 22);
if (qp->r_nak_state)
ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
(qp->r_nak_state <<
@@ -806,13 +810,15 @@ static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
* Called at interrupt level with the QP s_lock held and interrupts disabled.
* Returns 1 if OK, 0 if current operation should be aborted (NAK).
*/
-static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
+static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
+ u64 val)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_wc wc;
struct ipath_swqe *wqe;
int ret = 0;
u32 ack_psn;
+ int diff;
/*
* Remove the QP from the timeout queue (or RNR timeout queue).
@@ -840,7 +846,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
* The MSN might be for a later WQE than the PSN indicates so
* only complete WQEs that the PSN finishes.
*/
- while (ipath_cmp24(ack_psn, wqe->lpsn) >= 0) {
+ while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {
+ /*
+ * RDMA_READ_RESPONSE_ONLY is a special case since
+ * we want to generate completion events for everything
+ * before the RDMA read, copy the data, then generate
+ * the completion for the read.
+ */
+ if (wqe->wr.opcode == IB_WR_RDMA_READ &&
+ opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
+ diff == 0) {
+ ret = 1;
+ goto bail;
+ }
/*
* If this request is a RDMA read or atomic, and the ACK is
* for a later operation, this ACK NAKs the RDMA read or
@@ -851,12 +869,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
* is sent but before the response is received.
*/
if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
- (opcode != OP(RDMA_READ_RESPONSE_LAST) ||
- ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
+ (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
- (opcode != OP(ATOMIC_ACKNOWLEDGE) ||
- ipath_cmp24(wqe->psn, psn) != 0))) {
+ (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
/*
* The last valid PSN seen is the previous
* request's.
@@ -870,6 +886,9 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
*/
goto bail;
}
+ if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ *(u64 *) wqe->sg_list[0].vaddr = val;
if (qp->s_num_rd_atomic &&
(wqe->wr.opcode == IB_WR_RDMA_READ ||
wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
@@ -1079,6 +1098,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
int diff;
u32 pad;
u32 aeth;
+ u64 val;
spin_lock_irqsave(&qp->s_lock, flags);
@@ -1118,8 +1138,6 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
data += sizeof(__be32);
}
if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
- u64 val;
-
if (!header_in_data) {
__be32 *p = ohdr->u.at.atomic_ack_eth;
@@ -1127,12 +1145,13 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
be32_to_cpu(p[1]);
} else
val = be64_to_cpu(((__be64 *) data)[0]);
- *(u64 *) wqe->sg_list[0].vaddr = val;
- }
- if (!do_rc_ack(qp, aeth, psn, opcode) ||
+ } else
+ val = 0;
+ if (!do_rc_ack(qp, aeth, psn, opcode, val) ||
opcode != OP(RDMA_READ_RESPONSE_FIRST))
goto ack_done;
hdrsize += 4;
+ wqe = get_swqe_ptr(qp, qp->s_last);
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
goto ack_op_err;
/*
@@ -1176,13 +1195,12 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
goto bail;
case OP(RDMA_READ_RESPONSE_ONLY):
- if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
- dev->n_rdma_seq++;
- ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+ if (!header_in_data)
+ aeth = be32_to_cpu(ohdr->u.aeth);
+ else
+ aeth = be32_to_cpu(((__be32 *) data)[0]);
+ if (!do_rc_ack(qp, aeth, psn, opcode, 0))
goto ack_done;
- }
- if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
- goto ack_op_err;
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/*
@@ -1197,6 +1215,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
* have to be careful to copy the data to the right
* location.
*/
+ wqe = get_swqe_ptr(qp, qp->s_last);
qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
wqe, psn, pmtu);
goto read_last;
@@ -1230,7 +1249,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
data += sizeof(__be32);
}
ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
- (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST));
+ (void) do_rc_ack(qp, aeth, psn,
+ OP(RDMA_READ_RESPONSE_LAST), 0);
goto ack_done;
}
@@ -1344,8 +1364,11 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
e = NULL;
break;
}
- if (ipath_cmp24(psn, e->psn) >= 0)
+ if (ipath_cmp24(psn, e->psn) >= 0) {
+ if (prev == qp->s_tail_ack_queue)
+ old_req = 0;
break;
+ }
}
switch (opcode) {
case OP(RDMA_READ_REQUEST): {
@@ -1460,6 +1483,22 @@ static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
spin_unlock_irqrestore(&qp->s_lock, flags);
}
+static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
+{
+ unsigned long flags;
+ unsigned next;
+
+ next = n + 1;
+ if (next > IPATH_MAX_RDMA_ATOMIC)
+ next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (n == qp->s_tail_ack_queue) {
+ qp->s_tail_ack_queue = next;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+}
+
/**
* ipath_rc_rcv - process an incoming RC packet
* @dev: the device this packet came in on
@@ -1672,6 +1711,9 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_WRITE)))
+ goto nack_inv;
/* consume RWQE */
/* RETH comes after BTH */
if (!header_in_data)
@@ -1701,9 +1743,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
qp->r_sge.sge.length = 0;
qp->r_sge.sge.sge_length = 0;
}
- if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_WRITE)))
- goto nack_acc;
if (opcode == OP(RDMA_WRITE_FIRST))
goto send_middle;
else if (opcode == OP(RDMA_WRITE_ONLY))
@@ -1717,13 +1756,17 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
u32 len;
u8 next;
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
- goto nack_acc;
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_READ)))
+ goto nack_inv;
next = qp->r_head_ack_queue + 1;
if (next > IPATH_MAX_RDMA_ATOMIC)
next = 0;
- if (unlikely(next == qp->s_tail_ack_queue))
- goto nack_inv;
+ if (unlikely(next == qp->s_tail_ack_queue)) {
+ if (!qp->s_ack_queue[next].sent)
+ goto nack_inv;
+ ipath_update_ack_queue(qp, next);
+ }
e = &qp->s_ack_queue[qp->r_head_ack_queue];
/* RETH comes after BTH */
if (!header_in_data)
@@ -1758,6 +1801,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
e->rdma_sge.sge.sge_length = 0;
}
e->opcode = opcode;
+ e->sent = 0;
e->psn = psn;
/*
* We need to increment the MSN here instead of when we
@@ -1789,12 +1833,15 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
if (unlikely(!(qp->qp_access_flags &
IB_ACCESS_REMOTE_ATOMIC)))
- goto nack_acc;
+ goto nack_inv;
next = qp->r_head_ack_queue + 1;
if (next > IPATH_MAX_RDMA_ATOMIC)
next = 0;
- if (unlikely(next == qp->s_tail_ack_queue))
- goto nack_inv;
+ if (unlikely(next == qp->s_tail_ack_queue)) {
+ if (!qp->s_ack_queue[next].sent)
+ goto nack_inv;
+ ipath_update_ack_queue(qp, next);
+ }
if (!header_in_data)
ateth = &ohdr->u.atomic_eth;
else
@@ -1819,6 +1866,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
be64_to_cpu(ateth->compare_data),
sdata);
e->opcode = opcode;
+ e->sent = 0;
e->psn = psn & IPATH_PSN_MASK;
qp->r_msn++;
qp->r_psn++;
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index c182bcd62098..708eba3165d7 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index d9c2a9b15d86..c69c25239443 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -194,6 +194,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
ret = 0;
goto bail;
}
+ /* Make sure entry is read after head index is read. */
+ smp_rmb();
wqe = get_rwqe_ptr(rq, tail);
if (++tail >= rq->size)
tail = 0;
@@ -267,7 +269,7 @@ again:
spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
- qp->s_rnr_timeout) {
+ sqp->s_rnr_timeout) {
spin_unlock_irqrestore(&sqp->s_lock, flags);
goto done;
}
@@ -319,12 +321,22 @@ again:
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_WRITE))) {
+ wc.status = IB_WC_REM_INV_REQ_ERR;
+ goto err;
+ }
wc.wc_flags = IB_WC_WITH_IMM;
wc.imm_data = wqe->wr.imm_data;
if (!ipath_get_rwqe(qp, 1))
goto rnr_nak;
/* FALLTHROUGH */
case IB_WR_RDMA_WRITE:
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_WRITE))) {
+ wc.status = IB_WC_REM_INV_REQ_ERR;
+ goto err;
+ }
if (wqe->length == 0)
break;
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
@@ -354,8 +366,10 @@ again:
case IB_WR_RDMA_READ:
if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_READ)))
- goto acc_err;
+ IB_ACCESS_REMOTE_READ))) {
+ wc.status = IB_WC_REM_INV_REQ_ERR;
+ goto err;
+ }
if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
@@ -369,8 +383,10 @@ again:
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
if (unlikely(!(qp->qp_access_flags &
- IB_ACCESS_REMOTE_ATOMIC)))
- goto acc_err;
+ IB_ACCESS_REMOTE_ATOMIC))) {
+ wc.status = IB_WC_REM_INV_REQ_ERR;
+ goto err;
+ }
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
wqe->wr.wr.atomic.remote_addr,
wqe->wr.wr.atomic.rkey,
@@ -396,6 +412,8 @@ again:
if (len > sge->length)
len = sge->length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
BUG_ON(len == 0);
ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
sge->vaddr += len;
@@ -489,7 +507,7 @@ static int want_buffer(struct ipath_devdata *dd)
*
* Called when we run out of PIO buffers.
*/
-void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
+static void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
{
unsigned long flags;
@@ -503,11 +521,9 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
* could be called. If we are still in the tasklet function,
* tasklet_hi_schedule() will not call us until the next time
* tasklet_hi_schedule() is called.
- * We clear the tasklet flag now since we are committing to return
- * from the tasklet function.
+ * We leave the busy flag set so that another post send doesn't
+ * try to put the same QP on the piowait list again.
*/
- clear_bit(IPATH_S_BUSY, &qp->s_busy);
- tasklet_unlock(&qp->s_task);
want_buffer(dev->dd);
dev->n_piowait++;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index 03acae66ba81..40c36ec19016 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -80,6 +80,8 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
wqe->num_sge = wr->num_sge;
for (i = 0; i < wr->num_sge; i++)
wqe->sg_list[i] = wr->sg_list[i];
+ /* Make sure queue entry is written before the head index. */
+ smp_wmb();
wq->head = next;
spin_unlock_irqrestore(&srq->rq.lock, flags);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index d8b5e4cefe25..bae4f56f7271 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -55,6 +55,7 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
u64 val64;
unsigned long t0, t1;
u64 ret;
+ unsigned long flags;
t0 = jiffies;
/* If fast increment counters are only 32 bits, snapshot them,
@@ -91,12 +92,18 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
if (creg == dd->ipath_cregs->cr_wordsendcnt) {
if (val != dd->ipath_lastsword) {
dd->ipath_sword += val - dd->ipath_lastsword;
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ dd->ipath_traffic_wds += val - dd->ipath_lastsword;
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
dd->ipath_lastsword = val;
}
val64 = dd->ipath_sword;
} else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
if (val != dd->ipath_lastrword) {
dd->ipath_rword += val - dd->ipath_lastrword;
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ dd->ipath_traffic_wds += val - dd->ipath_lastrword;
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
dd->ipath_lastrword = val;
}
val64 = dd->ipath_rword;
@@ -189,6 +196,45 @@ static void ipath_qcheck(struct ipath_devdata *dd)
}
}
+static void ipath_chk_errormask(struct ipath_devdata *dd)
+{
+ static u32 fixed;
+ u32 ctrl;
+ unsigned long errormask;
+ unsigned long hwerrs;
+
+ if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
+ return;
+
+ errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
+
+ if (errormask == dd->ipath_errormask)
+ return;
+ fixed++;
+
+ hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
+ ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+ dd->ipath_errormask);
+
+ if ((hwerrs & dd->ipath_hwerrmask) ||
+ (ctrl & INFINIPATH_C_FREEZEMODE)) {
+ /* force re-interrupt of pending events, just in case */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
+ dev_info(&dd->pcidev->dev,
+ "errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
+ fixed, errormask, (unsigned long)dd->ipath_errormask,
+ ctrl, hwerrs);
+ } else
+ ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
+ fixed, errormask,
+ (unsigned long)dd->ipath_errormask);
+}
+
+
/**
* ipath_get_faststats - get word counters from chip before they overflow
* @opaque - contains a pointer to the infinipath device ipath_devdata
@@ -200,6 +246,7 @@ void ipath_get_faststats(unsigned long opaque)
struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
u32 val;
static unsigned cnt;
+ unsigned long flags;
/*
* don't access the chip while running diags, or memory diags can
@@ -210,9 +257,20 @@ void ipath_get_faststats(unsigned long opaque)
/* but re-arm the timer, for diags case; won't hurt other */
goto done;
+ /*
+ * We now try to maintain a "active timer", based on traffic
+ * exceeding a threshold, so we need to check the word-counts
+ * even if they are 64-bit.
+ */
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ if (dd->ipath_traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
+ atomic_add(5, &dd->ipath_active_time); /* S/B #define */
+ dd->ipath_traffic_wds = 0;
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+
if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
- ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
}
@@ -232,14 +290,13 @@ void ipath_get_faststats(unsigned long opaque)
dd->ipath_lasterror = 0;
if (dd->ipath_lasthwerror)
dd->ipath_lasthwerror = 0;
- if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
+ if (dd->ipath_maskederrs
&& time_after(jiffies, dd->ipath_unmasktime)) {
char ebuf[256];
int iserr;
iserr = ipath_decode_err(ebuf, sizeof ebuf,
- (dd->ipath_maskederrs & ~dd->
- ipath_ignorederrs));
- if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
+ dd->ipath_maskederrs);
+ if (dd->ipath_maskederrs &
~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
INFINIPATH_E_PKTERRS ))
ipath_dev_err(dd, "Re-enabling masked errors "
@@ -259,9 +316,12 @@ void ipath_get_faststats(unsigned long opaque)
ipath_cdbg(ERRPKT, "Re-enabling packet"
" problem interrupt (%s)\n", ebuf);
}
- dd->ipath_maskederrs = dd->ipath_ignorederrs;
+
+ /* re-enable masked errors */
+ dd->ipath_errormask |= dd->ipath_maskederrs;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
- ~dd->ipath_maskederrs);
+ dd->ipath_errormask);
+ dd->ipath_maskederrs = 0;
}
/* limit qfull messages to ~one per minute per port */
@@ -275,6 +335,7 @@ void ipath_get_faststats(unsigned long opaque)
}
}
+ ipath_chk_errormask(dd);
done:
mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index 4dc398d5e011..16238cd3a036 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -596,6 +596,43 @@ bail:
return ret;
}
+static ssize_t store_led_override(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret > 0)
+ ipath_set_led_override(dd, val);
+ else
+ ipath_dev_err(dd, "attempt to set invalid LED override\n");
+ return ret;
+}
+
+static ssize_t show_logged_errs(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int idx, count;
+
+ /* force consistency with actual EEPROM */
+ if (ipath_update_eeprom_log(dd) != 0)
+ return -ENXIO;
+
+ count = 0;
+ for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+ count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
+ dd->ipath_eep_st_errs[idx],
+ idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' ');
+ }
+
+ return count;
+}
static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
@@ -625,6 +662,8 @@ static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
+static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
+static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
static struct attribute *dev_attributes[] = {
&dev_attr_guid.attr,
@@ -641,6 +680,8 @@ static struct attribute *dev_attributes[] = {
&dev_attr_unit.attr,
&dev_attr_enabled.attr,
&dev_attr_rx_pol_inv.attr,
+ &dev_attr_led_override.attr,
+ &dev_attr_logged_errors.attr,
NULL
};
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 1c2b03c2ef5e..8380fbc50d2c 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -58,7 +58,6 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
wc->port_num = 0;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
}
- wqe = get_swqe_ptr(qp, qp->s_last);
}
/**
@@ -87,7 +86,7 @@ int ipath_make_uc_req(struct ipath_qp *qp,
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
- bth0 = 0;
+ bth0 = 1 << 22; /* Set M bit */
/* Get the next send request. */
wqe = get_swqe_ptr(qp, qp->s_last);
@@ -97,8 +96,10 @@ int ipath_make_uc_req(struct ipath_qp *qp,
* Signal the completion of the last send
* (if there is one).
*/
- if (qp->s_last != qp->s_tail)
+ if (qp->s_last != qp->s_tail) {
complete_last_send(qp, wqe, &wc);
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ }
/* Check if send work queue is empty. */
if (qp->s_tail == qp->s_head)
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index a518f7c8fa83..f9a3338a5fb7 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -176,6 +176,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
dev->n_pkt_drops++;
goto bail_sge;
}
+ /* Make sure entry is read after head index is read. */
+ smp_rmb();
wqe = get_rwqe_ptr(rq, tail);
if (++tail >= rq->size)
tail = 0;
@@ -231,6 +233,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
if (len > length)
len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
BUG_ON(len == 0);
ipath_copy_sge(&rsge, sge->vaddr, len);
sge->vaddr += len;
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index 8536aeb96af8..0190edc8044e 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -171,32 +171,6 @@ int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
return ret;
}
-/**
- * ipath_get_user_pages_nocopy - lock a single page for I/O and mark shared
- * @start_page: the page to lock
- * @p: the output page structure
- *
- * This is similar to ipath_get_user_pages, but it's always one page, and we
- * mark the page as locked for I/O, and shared. This is used for the user
- * process page that contains the destination address for the rcvhdrq tail
- * update, so we need to have the vma. If we don't do this, the page can be
- * taken away from us on fork, even if the child never touches it, and then
- * the user process never sees the tail register updates.
- */
-int ipath_get_user_pages_nocopy(unsigned long page, struct page **p)
-{
- struct vm_area_struct *vma;
- int ret;
-
- down_write(&current->mm->mmap_sem);
-
- ret = __get_user_pages(page, 1, p, &vma);
-
- up_write(&current->mm->mmap_sem);
-
- return ret;
-}
-
void ipath_release_user_pages(struct page **p, size_t num_pages)
{
down_write(&current->mm->mmap_sem);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index bb70845279b8..16aa61fd8085 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -164,9 +164,11 @@ void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
while (length) {
u32 len = sge->length;
- BUG_ON(len == 0);
if (len > length)
len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
memcpy(sge->vaddr, data, len);
sge->vaddr += len;
sge->length -= len;
@@ -202,9 +204,11 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
while (length) {
u32 len = sge->length;
- BUG_ON(len == 0);
if (len > length)
len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
@@ -323,6 +327,8 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
wqe->num_sge = wr->num_sge;
for (i = 0; i < wr->num_sge; i++)
wqe->sg_list[i] = wr->sg_list[i];
+ /* Make sure queue entry is written before the head index. */
+ smp_wmb();
wq->head = next;
spin_unlock_irqrestore(&qp->r_rq.lock, flags);
}
@@ -482,7 +488,7 @@ bail:;
* This is called from ipath_do_rcv_timer() at interrupt level to check for
* QPs which need retransmits and to collect performance numbers.
*/
-void ipath_ib_timer(struct ipath_ibdev *dev)
+static void ipath_ib_timer(struct ipath_ibdev *dev)
{
struct ipath_qp *resend = NULL;
struct list_head *last;
@@ -948,6 +954,7 @@ int ipath_ib_piobufavail(struct ipath_ibdev *dev)
qp = list_entry(dev->piowait.next, struct ipath_qp,
piowait);
list_del_init(&qp->piowait);
+ clear_bit(IPATH_S_BUSY, &qp->s_busy);
tasklet_hi_schedule(&qp->s_task);
}
spin_unlock_irqrestore(&dev->pending_lock, flags);
@@ -981,6 +988,8 @@ static int ipath_query_device(struct ib_device *ibdev,
props->max_ah = ib_ipath_max_ahs;
props->max_cqe = ib_ipath_max_cqes;
props->max_mr = dev->lk_table.max;
+ props->max_fmr = dev->lk_table.max;
+ props->max_map_per_fmr = 32767;
props->max_pd = ib_ipath_max_pds;
props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
props->max_qp_init_rd_atom = 255;
@@ -1051,7 +1060,12 @@ static int ipath_query_port(struct ib_device *ibdev,
props->max_vl_num = 1; /* VLCap = VL0 */
props->init_type_reply = 0;
- props->max_mtu = IB_MTU_4096;
+ /*
+ * Note: the chips support a maximum MTU of 4096, but the driver
+ * hasn't implemented this feature yet, so set the maximum value
+ * to 2048.
+ */
+ props->max_mtu = IB_MTU_2048;
switch (dev->dd->ipath_ibmtu) {
case 4096:
mtu = IB_MTU_4096;
@@ -1361,13 +1375,6 @@ static void __verbs_timer(unsigned long arg)
{
struct ipath_devdata *dd = (struct ipath_devdata *) arg;
- /*
- * If port 0 receive packet interrupts are not available, or
- * can be missed, poll the receive queue
- */
- if (dd->ipath_flags & IPATH_POLL_RX_INTR)
- ipath_kreceive(dd);
-
/* Handle verbs layer timeouts. */
ipath_ib_timer(dd->verbs_dev);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 088b837ebea8..1a24c6a4a814 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -42,8 +42,6 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
-#include "ipath_layer.h"
-
#define IPATH_MAX_RDMA_ATOMIC 4
#define QPN_MAX (1 << 24)
@@ -321,6 +319,7 @@ struct ipath_sge_state {
*/
struct ipath_ack_entry {
u8 opcode;
+ u8 sent;
u32 psn;
union {
struct ipath_sge_state rdma_sge;
@@ -781,8 +780,6 @@ void ipath_update_mmap_info(struct ipath_ibdev *dev,
int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev);
-
void ipath_insert_rnr_queue(struct ipath_qp *qp);
int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
@@ -806,8 +803,6 @@ void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
int ipath_ib_piobufavail(struct ipath_ibdev *);
-void ipath_ib_timer(struct ipath_ibdev *);
-
unsigned ipath_get_npkeys(struct ipath_devdata *);
u32 ipath_get_cr_errpkey(struct ipath_devdata *);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index dd691cfa5079..9e5abf9c309d 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
index 0095bb70f34e..1d7bd82a1fb1 100644
--- a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
+++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
index 04696e62da87..3428acb0868c 100644
--- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
+++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -63,12 +63,29 @@ int ipath_enable_wc(struct ipath_devdata *dd)
* of 2 address matching the length (which has to be a power of 2).
* For rev1, that means the base address, for rev2, it will be just
* the PIO buffers themselves.
+ * For chips with two sets of buffers, the calculations are
+ * somewhat more complicated; we need to sum, and the piobufbase
+ * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
+ * The buffers are still packed, so a single range covers both.
*/
- pioaddr = addr + dd->ipath_piobufbase;
- piolen = (dd->ipath_piobcnt2k +
- dd->ipath_piobcnt4k) *
- ALIGN(dd->ipath_piobcnt2k +
- dd->ipath_piobcnt4k, dd->ipath_palign);
+ if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */
+ unsigned long pio2kbase, pio4kbase;
+ pio2kbase = dd->ipath_piobufbase & 0xffffffffUL;
+ pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL;
+ if (pio2kbase < pio4kbase) { /* all, for now */
+ pioaddr = addr + pio2kbase;
+ piolen = pio4kbase - pio2kbase +
+ dd->ipath_piobcnt4k * dd->ipath_4kalign;
+ } else {
+ pioaddr = addr + pio4kbase;
+ piolen = pio2kbase - pio4kbase +
+ dd->ipath_piobcnt2k * dd->ipath_palign;
+ }
+ } else { /* single buffer size (2K, currently) */
+ pioaddr = addr + dd->ipath_piobufbase;
+ piolen = dd->ipath_piobcnt2k * dd->ipath_palign +
+ dd->ipath_piobcnt4k * dd->ipath_4kalign;
+ }
for (bits = 0; !(piolen & (1ULL << bits)); bits++)
/* do nothing */ ;
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
index b8912cdb9663..4175a4bd0c78 100644
--- a/drivers/infiniband/hw/mlx4/Kconfig
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -1,6 +1,5 @@
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
- depends on INFINIBAND
select MLX4_CORE
---help---
This driver provides low-level InfiniBand support for
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 660b27aecae5..8bf44daf45ec 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -389,7 +389,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
wc->opcode = IB_WC_SEND;
break;
case MLX4_OPCODE_RDMA_READ:
- wc->opcode = IB_WC_SEND;
+ wc->opcode = IB_WC_RDMA_READ;
wc->byte_len = be32_to_cpu(cqe->byte_cnt);
break;
case MLX4_OPCODE_ATOMIC_CS:
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 333091787c5f..0ed02b7834da 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -109,7 +109,7 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
in_modifier, op_modifier,
MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
- if (!err);
+ if (!err)
memcpy(response_mad, outmailbox->buf, 256);
mlx4_free_cmd_mailbox(dev->dev, inmailbox);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index c591616dccde..dde8fe9af47e 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -169,7 +169,7 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
props->phys_state = out_mad->data[33] >> 4;
props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
- props->max_msg_sz = 0x80000000;
+ props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
@@ -523,11 +523,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
ibdev->ib_dev.query_device = mlx4_ib_query_device;
@@ -546,10 +548,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah;
ibdev->ib_dev.create_srq = mlx4_ib_create_srq;
ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq;
+ ibdev->ib_dev.query_srq = mlx4_ib_query_srq;
ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq;
ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv;
ibdev->ib_dev.create_qp = mlx4_ib_create_qp;
ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
+ ibdev->ib_dev.query_qp = mlx4_ib_query_qp;
ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
ibdev->ib_dev.post_send = mlx4_ib_post_send;
ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 24ccadd6e4f8..705ff2fa237e 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -35,6 +35,7 @@
#include <linux/compiler.h>
#include <linux/list.h>
+#include <linux/mutex.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
@@ -255,6 +256,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
struct ib_udata *udata);
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
int mlx4_ib_destroy_srq(struct ib_srq *srq);
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
@@ -266,6 +268,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
int mlx4_ib_destroy_qp(struct ib_qp *qp);
int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
+int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr);
int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 28a08bdd1800..85c51bdc36f1 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -415,9 +415,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
return 0;
err_wrid:
- if (pd->uobject && !init_attr->srq)
- mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
- else {
+ if (pd->uobject) {
+ if (!init_attr->srq)
+ mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context),
+ &qp->db);
+ } else {
kfree(qp->sq.wrid);
kfree(qp->rq.wrid);
}
@@ -742,7 +744,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
printk(KERN_ERR "path MTU (%u) is invalid\n",
attr->path_mtu);
- return -EINVAL;
+ goto out;
}
context->mtu_msgmax = (attr->path_mtu << 5) | 31;
}
@@ -781,10 +783,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (attr_mask & IB_QP_AV) {
if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
- attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) {
- err = -EINVAL;
+ attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
goto out;
- }
optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
MLX4_QP_OPTPAR_SCHED_QUEUE);
@@ -798,15 +798,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (attr_mask & IB_QP_ALT_PATH) {
if (attr->alt_port_num == 0 ||
attr->alt_port_num > dev->dev->caps.num_ports)
- return -EINVAL;
+ goto out;
if (attr->alt_pkey_index >=
dev->dev->caps.pkey_table_len[attr->alt_port_num])
- return -EINVAL;
+ goto out;
if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
attr->alt_port_num))
- return -EINVAL;
+ goto out;
context->alt_path.pkey_index = attr->alt_pkey_index;
context->alt_path.ackto = attr->alt_timeout << 3;
@@ -1183,12 +1183,79 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
return cur + nreq >= wq->max_post;
}
+static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
+ u64 remote_addr, u32 rkey)
+{
+ rseg->raddr = cpu_to_be64(remote_addr);
+ rseg->rkey = cpu_to_be32(rkey);
+ rseg->reserved = 0;
+}
+
+static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
+{
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
+ aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
+ } else {
+ aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+ aseg->compare = 0;
+ }
+
+}
+
+static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
+ struct ib_send_wr *wr)
+{
+ memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
+ dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+}
+
+static void set_mlx_icrc_seg(void *dseg)
+{
+ u32 *t = dseg;
+ struct mlx4_wqe_inline_seg *iseg = dseg;
+
+ t[1] = 0;
+
+ /*
+ * Need a barrier here before writing the byte_count field to
+ * make sure that all the data is visible before the
+ * byte_count field is set. Otherwise, if the segment begins
+ * a new cacheline, the HCA prefetcher could grab the 64-byte
+ * chunk and get a valid (!= * 0xffffffff) byte count but
+ * stale data, and end up sending the wrong data.
+ */
+ wmb();
+
+ iseg->byte_count = cpu_to_be32((1 << 31) | 4);
+}
+
+static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
+{
+ dseg->lkey = cpu_to_be32(sg->lkey);
+ dseg->addr = cpu_to_be64(sg->addr);
+
+ /*
+ * Need a barrier here before writing the byte_count field to
+ * make sure that all the data is visible before the
+ * byte_count field is set. Otherwise, if the segment begins
+ * a new cacheline, the HCA prefetcher could grab the 64-byte
+ * chunk and get a valid (!= * 0xffffffff) byte count but
+ * stale data, and end up sending the wrong data.
+ */
+ wmb();
+
+ dseg->byte_count = cpu_to_be32(sg->length);
+}
+
int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
struct mlx4_ib_qp *qp = to_mqp(ibqp);
void *wqe;
struct mlx4_wqe_ctrl_seg *ctrl;
+ struct mlx4_wqe_data_seg *dseg;
unsigned long flags;
int nreq;
int err = 0;
@@ -1238,26 +1305,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- ((struct mlx4_wqe_raddr_seg *) wqe)->raddr =
- cpu_to_be64(wr->wr.atomic.remote_addr);
- ((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
- cpu_to_be32(wr->wr.atomic.rkey);
- ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
-
+ set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
+ wr->wr.atomic.rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg);
- if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
- ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
- cpu_to_be64(wr->wr.atomic.swap);
- ((struct mlx4_wqe_atomic_seg *) wqe)->compare =
- cpu_to_be64(wr->wr.atomic.compare_add);
- } else {
- ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
- cpu_to_be64(wr->wr.atomic.compare_add);
- ((struct mlx4_wqe_atomic_seg *) wqe)->compare = 0;
- }
-
+ set_atomic_seg(wqe, wr);
wqe += sizeof (struct mlx4_wqe_atomic_seg);
+
size += (sizeof (struct mlx4_wqe_raddr_seg) +
sizeof (struct mlx4_wqe_atomic_seg)) / 16;
@@ -1266,15 +1320,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- ((struct mlx4_wqe_raddr_seg *) wqe)->raddr =
- cpu_to_be64(wr->wr.rdma.remote_addr);
- ((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
- cpu_to_be32(wr->wr.rdma.rkey);
- ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
-
+ set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg);
size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
-
break;
default:
@@ -1284,13 +1333,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case IB_QPT_UD:
- memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av,
- &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
- ((struct mlx4_wqe_datagram_seg *) wqe)->dqpn =
- cpu_to_be32(wr->wr.ud.remote_qpn);
- ((struct mlx4_wqe_datagram_seg *) wqe)->qkey =
- cpu_to_be32(wr->wr.ud.remote_qkey);
-
+ set_datagram_seg(wqe, wr);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
break;
@@ -1312,27 +1355,27 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
}
- for (i = 0; i < wr->num_sge; ++i) {
- ((struct mlx4_wqe_data_seg *) wqe)->byte_count =
- cpu_to_be32(wr->sg_list[i].length);
- ((struct mlx4_wqe_data_seg *) wqe)->lkey =
- cpu_to_be32(wr->sg_list[i].lkey);
- ((struct mlx4_wqe_data_seg *) wqe)->addr =
- cpu_to_be64(wr->sg_list[i].addr);
-
- wqe += sizeof (struct mlx4_wqe_data_seg);
- size += sizeof (struct mlx4_wqe_data_seg) / 16;
- }
+ /*
+ * Write data segments in reverse order, so as to
+ * overwrite cacheline stamp last within each
+ * cacheline. This avoids issues with WQE
+ * prefetching.
+ */
+
+ dseg = wqe;
+ dseg += wr->num_sge - 1;
+ size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
/* Add one more inline data segment for ICRC for MLX sends */
- if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) {
- ((struct mlx4_wqe_inline_seg *) wqe)->byte_count =
- cpu_to_be32((1 << 31) | 4);
- ((u32 *) wqe)[1] = 0;
- wqe += sizeof (struct mlx4_wqe_data_seg);
+ if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI ||
+ qp->ibqp.qp_type == IB_QPT_GSI)) {
+ set_mlx_icrc_seg(dseg + 1);
size += sizeof (struct mlx4_wqe_data_seg) / 16;
}
+ for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
+ set_data_seg(dseg, wr->sg_list + i);
+
ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
MLX4_WQE_CTRL_FENCE : 0) | size;
@@ -1455,3 +1498,151 @@ out:
return err;
}
+
+static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)
+{
+ switch (mlx4_state) {
+ case MLX4_QP_STATE_RST: return IB_QPS_RESET;
+ case MLX4_QP_STATE_INIT: return IB_QPS_INIT;
+ case MLX4_QP_STATE_RTR: return IB_QPS_RTR;
+ case MLX4_QP_STATE_RTS: return IB_QPS_RTS;
+ case MLX4_QP_STATE_SQ_DRAINING:
+ case MLX4_QP_STATE_SQD: return IB_QPS_SQD;
+ case MLX4_QP_STATE_SQER: return IB_QPS_SQE;
+ case MLX4_QP_STATE_ERR: return IB_QPS_ERR;
+ default: return -1;
+ }
+}
+
+static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state)
+{
+ switch (mlx4_mig_state) {
+ case MLX4_QP_PM_ARMED: return IB_MIG_ARMED;
+ case MLX4_QP_PM_REARM: return IB_MIG_REARM;
+ case MLX4_QP_PM_MIGRATED: return IB_MIG_MIGRATED;
+ default: return -1;
+ }
+}
+
+static int to_ib_qp_access_flags(int mlx4_flags)
+{
+ int ib_flags = 0;
+
+ if (mlx4_flags & MLX4_QP_BIT_RRE)
+ ib_flags |= IB_ACCESS_REMOTE_READ;
+ if (mlx4_flags & MLX4_QP_BIT_RWE)
+ ib_flags |= IB_ACCESS_REMOTE_WRITE;
+ if (mlx4_flags & MLX4_QP_BIT_RAE)
+ ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
+
+ return ib_flags;
+}
+
+static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,
+ struct mlx4_qp_path *path)
+{
+ memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
+ ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1;
+
+ if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
+ return;
+
+ ib_ah_attr->dlid = be16_to_cpu(path->rlid);
+ ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
+ ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
+ ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
+ ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
+ if (ib_ah_attr->ah_flags) {
+ ib_ah_attr->grh.sgid_index = path->mgid_index;
+ ib_ah_attr->grh.hop_limit = path->hop_limit;
+ ib_ah_attr->grh.traffic_class =
+ (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
+ ib_ah_attr->grh.flow_label =
+ be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
+ memcpy(ib_ah_attr->grh.dgid.raw,
+ path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
+ }
+}
+
+int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ struct mlx4_qp_context context;
+ int mlx4_state;
+ int err;
+
+ if (qp->state == IB_QPS_RESET) {
+ qp_attr->qp_state = IB_QPS_RESET;
+ goto done;
+ }
+
+ err = mlx4_qp_query(dev->dev, &qp->mqp, &context);
+ if (err)
+ return -EINVAL;
+
+ mlx4_state = be32_to_cpu(context.flags) >> 28;
+
+ qp_attr->qp_state = to_ib_qp_state(mlx4_state);
+ qp_attr->path_mtu = context.mtu_msgmax >> 5;
+ qp_attr->path_mig_state =
+ to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);
+ qp_attr->qkey = be32_to_cpu(context.qkey);
+ qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;
+ qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff;
+ qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff;
+ qp_attr->qp_access_flags =
+ to_ib_qp_access_flags(be32_to_cpu(context.params2));
+
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
+ to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);
+ to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);
+ qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
+ qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
+ }
+
+ qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
+ if (qp_attr->qp_state == IB_QPS_INIT)
+ qp_attr->port_num = qp->port;
+ else
+ qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
+
+ /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
+ qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
+
+ qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7);
+
+ qp_attr->max_dest_rd_atomic =
+ 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7);
+ qp_attr->min_rnr_timer =
+ (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f;
+ qp_attr->timeout = context.pri_path.ackto >> 3;
+ qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7;
+ qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7;
+ qp_attr->alt_timeout = context.alt_path.ackto >> 3;
+
+done:
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
+ qp_attr->cap.max_recv_sge = qp->rq.max_gs;
+
+ if (!ibqp->uobject) {
+ qp_attr->cap.max_send_wr = qp->sq.wqe_cnt;
+ qp_attr->cap.max_send_sge = qp->sq.max_gs;
+ } else {
+ qp_attr->cap.max_send_wr = 0;
+ qp_attr->cap.max_send_sge = 0;
+ }
+
+ /*
+ * We don't support inline sends for kernel QPs (yet), and we
+ * don't know what userspace's value should be.
+ */
+ qp_attr->cap.max_inline_data = 0;
+
+ qp_init_attr->cap = qp_attr->cap;
+
+ return 0;
+}
+
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 12fac1c8989d..408748fb5285 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -240,6 +240,24 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
return 0;
}
+int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
+ struct mlx4_ib_srq *srq = to_msrq(ibsrq);
+ int ret;
+ int limit_watermark;
+
+ ret = mlx4_srq_query(dev->dev, &srq->msrq, &limit_watermark);
+ if (ret)
+ return ret;
+
+ srq_attr->srq_limit = be16_to_cpu(limit_watermark);
+ srq_attr->max_wr = srq->msrq.max - 1;
+ srq_attr->max_sge = srq->msrq.max_gs;
+
+ return 0;
+}
+
int mlx4_ib_destroy_srq(struct ib_srq *srq)
{
struct mlx4_ib_dev *dev = to_mdev(srq->device);
diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig
index 9aa5a4468a75..03efc074967e 100644
--- a/drivers/infiniband/hw/mthca/Kconfig
+++ b/drivers/infiniband/hw/mthca/Kconfig
@@ -1,6 +1,6 @@
config INFINIBAND_MTHCA
tristate "Mellanox HCA support"
- depends on PCI && INFINIBAND
+ depends on PCI
---help---
This is a low-level driver for Mellanox InfiniHost host
channel adapters (HCAs), including the MT23108 PCI-X HCA
diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c
index f930e55b58fc..a76306709618 100644
--- a/drivers/infiniband/hw/mthca/mthca_allocator.c
+++ b/drivers/infiniband/hw/mthca/mthca_allocator.c
@@ -255,7 +255,7 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
dma_list[i] = t;
pci_unmap_addr_set(&buf->page_list[i], mapping, t);
- memset(buf->page_list[i].buf, 0, PAGE_SIZE);
+ clear_page(buf->page_list[i].buf);
}
}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index f40558d76475..acc95892713a 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -357,8 +357,6 @@ void mthca_cmd_event(struct mthca_dev *dev,
context->status = status;
context->out_param = out_param;
- context->token += dev->cmd.token_mask + 1;
-
complete(&context->done);
}
@@ -380,6 +378,7 @@ static int mthca_cmd_wait(struct mthca_dev *dev,
spin_lock(&dev->cmd.context_lock);
BUG_ON(dev->cmd.free_head < 0);
context = &dev->cmd.context[dev->cmd.free_head];
+ context->token += dev->cmd.token_mask + 1;
dev->cmd.free_head = context->next;
spin_unlock(&dev->cmd.context_lock);
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 8ec9fa1ff9ea..8592b26dc4e1 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -522,7 +522,7 @@ static int mthca_create_eq(struct mthca_dev *dev,
dma_list[i] = t;
pci_unmap_addr_set(&eq->page_list[i], mapping, t);
- memset(eq->page_list[i].buf, 0, PAGE_SIZE);
+ clear_page(eq->page_list[i].buf);
}
for (i = 0; i < eq->nent; ++i)
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index aa563e61de65..76fed7545c53 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -67,7 +67,7 @@ MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
static int msi = 0;
module_param(msi, int, 0444);
-MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero");
+MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero (deprecated, use MSI-X instead)");
#else /* CONFIG_PCI_MSI */
@@ -1117,9 +1117,21 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
if (msi_x && !mthca_enable_msi_x(mdev))
mdev->mthca_flags |= MTHCA_FLAG_MSI_X;
- if (msi && !(mdev->mthca_flags & MTHCA_FLAG_MSI_X) &&
- !pci_enable_msi(pdev))
- mdev->mthca_flags |= MTHCA_FLAG_MSI;
+ else if (msi) {
+ static int warned;
+
+ if (!warned) {
+ printk(KERN_WARNING PFX "WARNING: MSI support will be "
+ "removed from the ib_mthca driver in January 2008.\n");
+ printk(KERN_WARNING " If you are using MSI and cannot "
+ "switch to MSI-X, please tell "
+ "<general@lists.openfabrics.org>.\n");
+ ++warned;
+ }
+
+ if (!pci_enable_msi(pdev))
+ mdev->mthca_flags |= MTHCA_FLAG_MSI;
+ }
if (mthca_cmd_init(mdev)) {
mthca_err(mdev, "Failed to init command interface, aborting.\n");
@@ -1135,7 +1147,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
goto err_cmd;
if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
- mthca_warn(mdev, "HCA FW version %d.%d.%3d is old (%d.%d.%3d is current).\n",
+ mthca_warn(mdev, "HCA FW version %d.%d.%03d is old (%d.%d.%03d is current).\n",
(int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
(int) (mdev->fw_ver & 0xffff),
(int) (mthca_hca_table[hca_type].latest_fw >> 32),
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index eef415b12b2e..df01b2026a64 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1578,6 +1578,45 @@ static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,
return cur + nreq >= wq->max;
}
+static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,
+ u64 remote_addr, u32 rkey)
+{
+ rseg->raddr = cpu_to_be64(remote_addr);
+ rseg->rkey = cpu_to_be32(rkey);
+ rseg->reserved = 0;
+}
+
+static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
+ struct ib_send_wr *wr)
+{
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
+ aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
+ } else {
+ aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+ aseg->compare = 0;
+ }
+
+}
+
+static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
+ struct ib_send_wr *wr)
+{
+ useg->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
+ useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
+ useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+
+}
+
+static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
+ struct ib_send_wr *wr)
+{
+ memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);
+ useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+}
+
int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
@@ -1590,8 +1629,15 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int nreq;
int i;
int size;
- int size0 = 0;
- u32 f0;
+ /*
+ * f0 and size0 are only used if nreq != 0, and they will
+ * always be initialized the first time through the main loop
+ * before nreq is incremented. So nreq cannot become non-zero
+ * without initializing f0 and size0, and they are in fact
+ * never used uninitialized.
+ */
+ int uninitialized_var(size0);
+ u32 uninitialized_var(f0);
int ind;
u8 op0 = 0;
@@ -1636,25 +1682,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- ((struct mthca_raddr_seg *) wqe)->raddr =
- cpu_to_be64(wr->wr.atomic.remote_addr);
- ((struct mthca_raddr_seg *) wqe)->rkey =
- cpu_to_be32(wr->wr.atomic.rkey);
- ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-
+ set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
+ wr->wr.atomic.rkey);
wqe += sizeof (struct mthca_raddr_seg);
- if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
- ((struct mthca_atomic_seg *) wqe)->swap_add =
- cpu_to_be64(wr->wr.atomic.swap);
- ((struct mthca_atomic_seg *) wqe)->compare =
- cpu_to_be64(wr->wr.atomic.compare_add);
- } else {
- ((struct mthca_atomic_seg *) wqe)->swap_add =
- cpu_to_be64(wr->wr.atomic.compare_add);
- ((struct mthca_atomic_seg *) wqe)->compare = 0;
- }
-
+ set_atomic_seg(wqe, wr);
wqe += sizeof (struct mthca_atomic_seg);
size += (sizeof (struct mthca_raddr_seg) +
sizeof (struct mthca_atomic_seg)) / 16;
@@ -1663,12 +1695,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
case IB_WR_RDMA_READ:
- ((struct mthca_raddr_seg *) wqe)->raddr =
- cpu_to_be64(wr->wr.rdma.remote_addr);
- ((struct mthca_raddr_seg *) wqe)->rkey =
- cpu_to_be32(wr->wr.rdma.rkey);
- ((struct mthca_raddr_seg *) wqe)->reserved = 0;
- wqe += sizeof (struct mthca_raddr_seg);
+ set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ wqe += sizeof (struct mthca_raddr_seg);
size += sizeof (struct mthca_raddr_seg) / 16;
break;
@@ -1683,12 +1712,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- ((struct mthca_raddr_seg *) wqe)->raddr =
- cpu_to_be64(wr->wr.rdma.remote_addr);
- ((struct mthca_raddr_seg *) wqe)->rkey =
- cpu_to_be32(wr->wr.rdma.rkey);
- ((struct mthca_raddr_seg *) wqe)->reserved = 0;
- wqe += sizeof (struct mthca_raddr_seg);
+ set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ wqe += sizeof (struct mthca_raddr_seg);
size += sizeof (struct mthca_raddr_seg) / 16;
break;
@@ -1700,16 +1726,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case UD:
- ((struct mthca_tavor_ud_seg *) wqe)->lkey =
- cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
- ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
- cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
- ((struct mthca_tavor_ud_seg *) wqe)->dqpn =
- cpu_to_be32(wr->wr.ud.remote_qpn);
- ((struct mthca_tavor_ud_seg *) wqe)->qkey =
- cpu_to_be32(wr->wr.ud.remote_qkey);
-
- wqe += sizeof (struct mthca_tavor_ud_seg);
+ set_tavor_ud_seg(wqe, wr);
+ wqe += sizeof (struct mthca_tavor_ud_seg);
size += sizeof (struct mthca_tavor_ud_seg) / 16;
break;
@@ -1734,13 +1752,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
for (i = 0; i < wr->num_sge; ++i) {
- ((struct mthca_data_seg *) wqe)->byte_count =
- cpu_to_be32(wr->sg_list[i].length);
- ((struct mthca_data_seg *) wqe)->lkey =
- cpu_to_be32(wr->sg_list[i].lkey);
- ((struct mthca_data_seg *) wqe)->addr =
- cpu_to_be64(wr->sg_list[i].addr);
- wqe += sizeof (struct mthca_data_seg);
+ mthca_set_data_seg(wqe, wr->sg_list + i);
+ wqe += sizeof (struct mthca_data_seg);
size += sizeof (struct mthca_data_seg) / 16;
}
@@ -1768,11 +1781,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
mthca_opcode[wr->opcode]);
wmb();
((struct mthca_next_seg *) prev_wqe)->ee_nds =
- cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size |
+ cpu_to_be32((nreq ? 0 : MTHCA_NEXT_DBD) | size |
((wr->send_flags & IB_SEND_FENCE) ?
MTHCA_NEXT_FENCE : 0));
- if (!size0) {
+ if (!nreq) {
size0 = size;
op0 = mthca_opcode[wr->opcode];
f0 = wr->send_flags & IB_SEND_FENCE ?
@@ -1822,7 +1835,14 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
int nreq;
int i;
int size;
- int size0 = 0;
+ /*
+ * size0 is only used if nreq != 0, and it will always be
+ * initialized the first time through the main loop before
+ * nreq is incremented. So nreq cannot become non-zero
+ * without initializing size0, and it is in fact never used
+ * uninitialized.
+ */
+ int uninitialized_var(size0);
int ind;
void *wqe;
void *prev_wqe;
@@ -1863,13 +1883,8 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
}
for (i = 0; i < wr->num_sge; ++i) {
- ((struct mthca_data_seg *) wqe)->byte_count =
- cpu_to_be32(wr->sg_list[i].length);
- ((struct mthca_data_seg *) wqe)->lkey =
- cpu_to_be32(wr->sg_list[i].lkey);
- ((struct mthca_data_seg *) wqe)->addr =
- cpu_to_be64(wr->sg_list[i].addr);
- wqe += sizeof (struct mthca_data_seg);
+ mthca_set_data_seg(wqe, wr->sg_list + i);
+ wqe += sizeof (struct mthca_data_seg);
size += sizeof (struct mthca_data_seg) / 16;
}
@@ -1881,7 +1896,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
((struct mthca_next_seg *) prev_wqe)->ee_nds =
cpu_to_be32(MTHCA_NEXT_DBD | size);
- if (!size0)
+ if (!nreq)
size0 = size;
++ind;
@@ -1903,7 +1918,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
qp->rq.next_ind = ind;
qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
- size0 = 0;
}
}
@@ -1945,8 +1959,15 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int nreq;
int i;
int size;
- int size0 = 0;
- u32 f0;
+ /*
+ * f0 and size0 are only used if nreq != 0, and they will
+ * always be initialized the first time through the main loop
+ * before nreq is incremented. So nreq cannot become non-zero
+ * without initializing f0 and size0, and they are in fact
+ * never used uninitialized.
+ */
+ int uninitialized_var(size0);
+ u32 uninitialized_var(f0);
int ind;
u8 op0 = 0;
@@ -1966,7 +1987,6 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
- size0 = 0;
/*
* Make sure that descriptors are written before
@@ -2017,26 +2037,12 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- ((struct mthca_raddr_seg *) wqe)->raddr =
- cpu_to_be64(wr->wr.atomic.remote_addr);
- ((struct mthca_raddr_seg *) wqe)->rkey =
- cpu_to_be32(wr->wr.atomic.rkey);
- ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-
+ set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
+ wr->wr.atomic.rkey);
wqe += sizeof (struct mthca_raddr_seg);
- if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
- ((struct mthca_atomic_seg *) wqe)->swap_add =
- cpu_to_be64(wr->wr.atomic.swap);
- ((struct mthca_atomic_seg *) wqe)->compare =
- cpu_to_be64(wr->wr.atomic.compare_add);
- } else {
- ((struct mthca_atomic_seg *) wqe)->swap_add =
- cpu_to_be64(wr->wr.atomic.compare_add);
- ((struct mthca_atomic_seg *) wqe)->compare = 0;
- }
-
- wqe += sizeof (struct mthca_atomic_seg);
+ set_atomic_seg(wqe, wr);
+ wqe += sizeof (struct mthca_atomic_seg);
size += (sizeof (struct mthca_raddr_seg) +
sizeof (struct mthca_atomic_seg)) / 16;
break;
@@ -2044,12 +2050,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- ((struct mthca_raddr_seg *) wqe)->raddr =
- cpu_to_be64(wr->wr.rdma.remote_addr);
- ((struct mthca_raddr_seg *) wqe)->rkey =
- cpu_to_be32(wr->wr.rdma.rkey);
- ((struct mthca_raddr_seg *) wqe)->reserved = 0;
- wqe += sizeof (struct mthca_raddr_seg);
+ set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ wqe += sizeof (struct mthca_raddr_seg);
size += sizeof (struct mthca_raddr_seg) / 16;
break;
@@ -2064,12 +2067,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
- ((struct mthca_raddr_seg *) wqe)->raddr =
- cpu_to_be64(wr->wr.rdma.remote_addr);
- ((struct mthca_raddr_seg *) wqe)->rkey =
- cpu_to_be32(wr->wr.rdma.rkey);
- ((struct mthca_raddr_seg *) wqe)->reserved = 0;
- wqe += sizeof (struct mthca_raddr_seg);
+ set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ wqe += sizeof (struct mthca_raddr_seg);
size += sizeof (struct mthca_raddr_seg) / 16;
break;
@@ -2081,14 +2081,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case UD:
- memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
- to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);
- ((struct mthca_arbel_ud_seg *) wqe)->dqpn =
- cpu_to_be32(wr->wr.ud.remote_qpn);
- ((struct mthca_arbel_ud_seg *) wqe)->qkey =
- cpu_to_be32(wr->wr.ud.remote_qkey);
-
- wqe += sizeof (struct mthca_arbel_ud_seg);
+ set_arbel_ud_seg(wqe, wr);
+ wqe += sizeof (struct mthca_arbel_ud_seg);
size += sizeof (struct mthca_arbel_ud_seg) / 16;
break;
@@ -2113,13 +2107,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
for (i = 0; i < wr->num_sge; ++i) {
- ((struct mthca_data_seg *) wqe)->byte_count =
- cpu_to_be32(wr->sg_list[i].length);
- ((struct mthca_data_seg *) wqe)->lkey =
- cpu_to_be32(wr->sg_list[i].lkey);
- ((struct mthca_data_seg *) wqe)->addr =
- cpu_to_be64(wr->sg_list[i].addr);
- wqe += sizeof (struct mthca_data_seg);
+ mthca_set_data_seg(wqe, wr->sg_list + i);
+ wqe += sizeof (struct mthca_data_seg);
size += sizeof (struct mthca_data_seg) / 16;
}
@@ -2151,7 +2140,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
((wr->send_flags & IB_SEND_FENCE) ?
MTHCA_NEXT_FENCE : 0));
- if (!size0) {
+ if (!nreq) {
size0 = size;
op0 = mthca_opcode[wr->opcode];
f0 = wr->send_flags & IB_SEND_FENCE ?
@@ -2241,20 +2230,12 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
}
for (i = 0; i < wr->num_sge; ++i) {
- ((struct mthca_data_seg *) wqe)->byte_count =
- cpu_to_be32(wr->sg_list[i].length);
- ((struct mthca_data_seg *) wqe)->lkey =
- cpu_to_be32(wr->sg_list[i].lkey);
- ((struct mthca_data_seg *) wqe)->addr =
- cpu_to_be64(wr->sg_list[i].addr);
+ mthca_set_data_seg(wqe, wr->sg_list + i);
wqe += sizeof (struct mthca_data_seg);
}
- if (i < qp->rq.max_gs) {
- ((struct mthca_data_seg *) wqe)->byte_count = 0;
- ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
- ((struct mthca_data_seg *) wqe)->addr = 0;
- }
+ if (i < qp->rq.max_gs)
+ mthca_set_data_seg_inval(wqe);
qp->wrid[ind] = wr->wr_id;
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index b8f05a526673..88d219e730ad 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -543,20 +543,12 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
}
for (i = 0; i < wr->num_sge; ++i) {
- ((struct mthca_data_seg *) wqe)->byte_count =
- cpu_to_be32(wr->sg_list[i].length);
- ((struct mthca_data_seg *) wqe)->lkey =
- cpu_to_be32(wr->sg_list[i].lkey);
- ((struct mthca_data_seg *) wqe)->addr =
- cpu_to_be64(wr->sg_list[i].addr);
+ mthca_set_data_seg(wqe, wr->sg_list + i);
wqe += sizeof (struct mthca_data_seg);
}
- if (i < srq->max_gs) {
- ((struct mthca_data_seg *) wqe)->byte_count = 0;
- ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
- ((struct mthca_data_seg *) wqe)->addr = 0;
- }
+ if (i < srq->max_gs)
+ mthca_set_data_seg_inval(wqe);
((struct mthca_next_seg *) prev_wqe)->nda_op =
cpu_to_be32((ind << srq->wqe_shift) | 1);
@@ -662,20 +654,12 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
}
for (i = 0; i < wr->num_sge; ++i) {
- ((struct mthca_data_seg *) wqe)->byte_count =
- cpu_to_be32(wr->sg_list[i].length);
- ((struct mthca_data_seg *) wqe)->lkey =
- cpu_to_be32(wr->sg_list[i].lkey);
- ((struct mthca_data_seg *) wqe)->addr =
- cpu_to_be64(wr->sg_list[i].addr);
+ mthca_set_data_seg(wqe, wr->sg_list + i);
wqe += sizeof (struct mthca_data_seg);
}
- if (i < srq->max_gs) {
- ((struct mthca_data_seg *) wqe)->byte_count = 0;
- ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
- ((struct mthca_data_seg *) wqe)->addr = 0;
- }
+ if (i < srq->max_gs)
+ mthca_set_data_seg_inval(wqe);
srq->wrid[ind] = wr->wr_id;
srq->first_free = next_ind;
diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h
index e7d2c1e86199..f6a66fe78e48 100644
--- a/drivers/infiniband/hw/mthca/mthca_wqe.h
+++ b/drivers/infiniband/hw/mthca/mthca_wqe.h
@@ -113,4 +113,19 @@ struct mthca_mlx_seg {
__be16 vcrc;
};
+static __always_inline void mthca_set_data_seg(struct mthca_data_seg *dseg,
+ struct ib_sge *sg)
+{
+ dseg->byte_count = cpu_to_be32(sg->length);
+ dseg->lkey = cpu_to_be32(sg->lkey);
+ dseg->addr = cpu_to_be64(sg->addr);
+}
+
+static __always_inline void mthca_set_data_seg_inval(struct mthca_data_seg *dseg)
+{
+ dseg->byte_count = 0;
+ dseg->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
+ dseg->addr = 0;
+}
+
#endif /* MTHCA_WQE_H */