Skip to content

Commit 050b787

Browse files
committed
efa: Fix work request index double use
When polling a CQE related to a destroyed QP, we do not update the current work queue reference, what results in "returning" the work request index of the CQE to a wrong pool. This can later result in two different inflight work requests using same index and mistakenly completing with same work request id. Fix by zeroing the work queue pointer on error and unify conditions with unsolicited completion flow. Fixes: 40ee2e2 ("efa: Introduce create extended CQ support") Reviewed-by: Daniel Kranzdorf <[email protected]> Reviewed-by: Nitzan Lavy <[email protected]> Signed-off-by: Michael Margolin <[email protected]>
1 parent 378bd1b commit 050b787

File tree

1 file changed

+19
-10
lines changed

1 file changed

+19
-10
lines changed

providers/efa/verbs.c

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,13 @@ static void efa_process_cqe(struct efa_cq *cq, struct ibv_wc *wc,
647647
struct efa_io_rx_cdesc_ex *rcqe =
648648
container_of(cqe, struct efa_io_rx_cdesc_ex, base.common);
649649

650-
cq->cur_wq = &qp->rq.wq;
650+
if (EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_UNSOLICITED)) {
651+
cq->cur_wq = NULL;
652+
wc->wr_id = 0;
653+
} else {
654+
cq->cur_wq = &qp->rq.wq;
655+
wc->wr_id = cq->cur_wq->wrid[wrid_idx];
656+
}
651657

652658
wc->byte_len = rcqe->base.length;
653659

@@ -667,9 +673,6 @@ static void efa_process_cqe(struct efa_cq *cq, struct ibv_wc *wc,
667673
wc->wc_flags |= IBV_WC_WITH_IMM;
668674
}
669675

670-
wc->wr_id = !EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_UNSOLICITED) ?
671-
cq->cur_wq->wrid[wrid_idx] : 0;
672-
673676
rdma_tracepoint(rdma_core_efa, process_completion, cq->dev->name, wc->wr_id,
674677
wc->status, wc->opcode, wc->src_qp, wc->qp_num, wc->slid,
675678
wc->byte_len);
@@ -693,9 +696,14 @@ static void efa_process_ex_cqe(struct efa_cq *cq, struct efa_qp *qp)
693696
ibvcqx->status, efa_wc_read_opcode(ibvcqx), cqe->qp_num,
694697
UINT32_MAX, UINT16_MAX, efa_wc_read_byte_len(ibvcqx));
695698
} else {
696-
cq->cur_wq = &qp->rq.wq;
697-
ibvcqx->wr_id = !EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_UNSOLICITED) ?
698-
cq->cur_wq->wrid[wrid_idx] : 0;
699+
if (EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_UNSOLICITED)) {
700+
cq->cur_wq = NULL;
701+
ibvcqx->wr_id = 0;
702+
} else {
703+
cq->cur_wq = &qp->rq.wq;
704+
ibvcqx->wr_id = cq->cur_wq->wrid[wrid_idx];
705+
}
706+
699707
ibvcqx->status = to_ibv_status(cqe->status);
700708

701709
rdma_tracepoint(rdma_core_efa, process_completion, cq->dev->name, ibvcqx->wr_id,
@@ -727,6 +735,7 @@ static inline int efa_poll_sub_cq(struct efa_cq *cq, struct efa_sub_cq *sub_cq,
727735
*/
728736
*cur_qp = ctx->qp_table[qpn & ctx->qp_table_sz_m1];
729737
if (!*cur_qp) {
738+
cq->cur_wq = NULL;
730739
verbs_err(&ctx->ibvctx,
731740
"QP[%u] does not exist in QP table\n",
732741
qpn);
@@ -738,7 +747,7 @@ static inline int efa_poll_sub_cq(struct efa_cq *cq, struct efa_sub_cq *sub_cq,
738747
efa_process_ex_cqe(cq, *cur_qp);
739748
} else {
740749
efa_process_cqe(cq, wc, *cur_qp);
741-
if (!EFA_GET(&cq->cur_cqe->flags, EFA_IO_CDESC_COMMON_UNSOLICITED))
750+
if (cq->cur_wq)
742751
efa_wq_put_wrid_idx_unlocked(cq->cur_wq, cq->cur_cqe->req_id);
743752
}
744753

@@ -823,7 +832,7 @@ static int efa_next_poll(struct ibv_cq_ex *ibvcqx)
823832
struct efa_cq *cq = to_efa_cq_ex(ibvcqx);
824833
int ret;
825834

826-
if (!EFA_GET(&cq->cur_cqe->flags, EFA_IO_CDESC_COMMON_UNSOLICITED))
835+
if (cq->cur_wq)
827836
efa_wq_put_wrid_idx_unlocked(cq->cur_wq, cq->cur_cqe->req_id);
828837
ret = efa_poll_sub_cqs(cq, NULL, true);
829838

@@ -835,7 +844,7 @@ static void efa_end_poll(struct ibv_cq_ex *ibvcqx)
835844
struct efa_cq *cq = to_efa_cq_ex(ibvcqx);
836845

837846
if (cq->cur_cqe) {
838-
if (!EFA_GET(&cq->cur_cqe->flags, EFA_IO_CDESC_COMMON_UNSOLICITED))
847+
if (cq->cur_wq)
839848
efa_wq_put_wrid_idx_unlocked(cq->cur_wq, cq->cur_cqe->req_id);
840849
if (cq->db)
841850
efa_update_cq_doorbell(cq, false);

0 commit comments

Comments
 (0)