Skip to content

Commit ee1b331

Browse files
committed
efa: Add support to bind QPs and CQs to thread domains
Add an option to transfer parent domains as PD for QP and CQ creation. If the parent domain for QP and CQ are the same and a thread domain is attached to the parent domain we can remove the lock from the WQ and improve poll CQ and post WR performance. Signed-off-by: Yonatan Nachum <[email protected]>
1 parent a5e9c31 commit ee1b331

File tree

2 files changed

+95
-14
lines changed

2 files changed

+95
-14
lines changed

providers/efa/efa.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ struct efa_cq {
9999
struct efa_wq *cur_wq;
100100
struct efa_io_cdesc_common *cur_cqe;
101101
struct ibv_device *dev;
102+
struct efa_parent_domain *parent_domain;
102103
struct efa_sub_cq sub_cq_arr[];
103104
};
104105

@@ -120,6 +121,7 @@ struct efa_wq {
120121
int max_sge;
121122
int phase;
122123
pthread_spinlock_t wqlock;
124+
bool need_lock;
123125

124126
uint32_t *db;
125127
uint16_t sub_cq_idx;
@@ -158,6 +160,7 @@ struct efa_qp {
158160
int sq_sig_all;
159161
int wr_session_err;
160162
struct ibv_device *dev;
163+
struct efa_parent_domain *parent_domain;
161164
};
162165

163166
struct efa_mr {

providers/efa/verbs.c

Lines changed: 92 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ struct efa_wq_init_attr {
5656
int cmd_fd;
5757
int pgsz;
5858
uint16_t sub_cq_idx;
59+
bool need_lock;
5960
};
6061

6162
int efa_query_port(struct ibv_context *ibvctx, uint8_t port,
@@ -449,11 +450,15 @@ static uint32_t efa_wq_get_next_wrid_idx_locked(struct efa_wq *wq,
449450

450451
static void efa_wq_put_wrid_idx_unlocked(struct efa_wq *wq, uint32_t wrid_idx)
451452
{
452-
pthread_spin_lock(&wq->wqlock);
453+
if (wq->need_lock)
454+
pthread_spin_lock(&wq->wqlock);
455+
453456
wq->wrid_idx_pool_next--;
454457
wq->wrid_idx_pool[wq->wrid_idx_pool_next] = wrid_idx;
455458
wq->wqe_completed++;
456-
pthread_spin_unlock(&wq->wqlock);
459+
460+
if (wq->need_lock)
461+
pthread_spin_unlock(&wq->wqlock);
457462
}
458463

459464
static uint32_t efa_sub_cq_get_current_index(struct efa_sub_cq *sub_cq)
@@ -978,19 +983,21 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
978983
{
979984
struct efa_context *ctx = to_efa_context(ibvctx);
980985
struct verbs_create_cq_prov_attr prov_attr = {};
986+
struct efa_parent_domain *parent_domain = NULL;
981987
uint16_t cqe_size = ctx->ex_cqe_size;
982988
struct efa_create_cq_resp resp = {};
983989
struct efa_create_cq cmd = {};
984990
uint32_t cmd_flags = 0;
985991
uint16_t num_sub_cqs;
986992
struct efa_cq *cq;
993+
struct efa_pd *pd;
987994
int sub_buf_size;
988995
int sub_cq_size;
989996
uint8_t *buf;
990997
int err;
991998
int i;
992999

993-
if (!check_comp_mask(attr->comp_mask, 0) ||
1000+
if (!check_comp_mask(attr->comp_mask, IBV_CQ_INIT_ATTR_MASK_PD) ||
9941001
!check_comp_mask(attr->wc_flags, IBV_WC_STANDARD_FLAGS)) {
9951002
verbs_err(verbs_get_ctx(ibvctx),
9961003
"Invalid comp_mask or wc_flags\n");
@@ -1004,6 +1011,17 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
10041011
return NULL;
10051012
}
10061013

1014+
if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) {
1015+
pd = to_efa_pd(attr->parent_domain);
1016+
if (!pd->orig_pd) {
1017+
verbs_err(verbs_get_ctx(ibvctx), "Parent domain set but not provided\n");
1018+
errno = EINVAL;
1019+
return NULL;
1020+
}
1021+
1022+
parent_domain = to_efa_parent_domain(attr->parent_domain);
1023+
}
1024+
10071025
cq = calloc(1, sizeof(*cq) +
10081026
sizeof(*cq->sub_cq_arr) * ctx->sub_cqs_per_cq);
10091027
if (!cq)
@@ -1040,6 +1058,7 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
10401058
cq->num_sub_cqs = num_sub_cqs;
10411059
cq->cqe_size = cqe_size;
10421060
cq->dev = ibvctx->device;
1061+
cq->parent_domain = parent_domain;
10431062

10441063
if (efa_attr->flags & EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF) {
10451064
cq->buf_size = efa_attr->ext_mem_dmabuf.length;
@@ -1075,6 +1094,8 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
10751094

10761095
efa_cq_fill_pfns(cq, attr, efa_attr);
10771096
pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE);
1097+
if (cq->parent_domain)
1098+
atomic_fetch_add(&cq->parent_domain->refcount, 1);
10781099

10791100
return &cq->verbs_cq.cq_ex;
10801101

@@ -1198,6 +1219,8 @@ int efa_destroy_cq(struct ibv_cq *ibvcq)
11981219
munmap(cq->buf, cq->buf_size);
11991220

12001221
pthread_spin_destroy(&cq->lock);
1222+
if (cq->parent_domain)
1223+
atomic_fetch_sub(&cq->parent_domain->refcount, 1);
12011224

12021225
free(cq);
12031226

@@ -1208,7 +1231,8 @@ static void efa_wq_terminate(struct efa_wq *wq, int pgsz)
12081231
{
12091232
void *db_aligned;
12101233

1211-
pthread_spin_destroy(&wq->wqlock);
1234+
if (wq->need_lock)
1235+
pthread_spin_destroy(&wq->wqlock);
12121236

12131237
db_aligned = (void *)((uintptr_t)wq->db & ~(pgsz - 1));
12141238
munmap(db_aligned, pgsz);
@@ -1246,7 +1270,9 @@ static int efa_wq_initialize(struct efa_wq *wq, struct efa_wq_init_attr *attr)
12461270
for (i = 0; i < wq->wqe_cnt; i++)
12471271
wq->wrid_idx_pool[i] = i;
12481272

1249-
pthread_spin_init(&wq->wqlock, PTHREAD_PROCESS_PRIVATE);
1273+
wq->need_lock = attr->need_lock;
1274+
if (wq->need_lock)
1275+
pthread_spin_init(&wq->wqlock, PTHREAD_PROCESS_PRIVATE);
12501276

12511277
wq->sub_cq_idx = attr->sub_cq_idx;
12521278

@@ -1259,6 +1285,24 @@ static int efa_wq_initialize(struct efa_wq *wq, struct efa_wq_init_attr *attr)
12591285
return err;
12601286
}
12611287

1288+
static bool efa_check_cq_on_same_pd_td(struct ibv_pd *ibvpd, struct ibv_cq *ibvcq)
1289+
{
1290+
struct efa_parent_domain *parent_domain;
1291+
struct efa_pd *pd;
1292+
struct efa_cq *cq;
1293+
1294+
pd = to_efa_pd(ibvpd);
1295+
cq = to_efa_cq(ibvcq);
1296+
1297+
if (pd->orig_pd) {
1298+
parent_domain = to_efa_parent_domain(ibvpd);
1299+
if (parent_domain == cq->parent_domain && parent_domain->td)
1300+
return true;
1301+
}
1302+
1303+
return false;
1304+
}
1305+
12621306
static void efa_sq_terminate(struct efa_qp *qp)
12631307
{
12641308
struct efa_sq *sq = &qp->sq;
@@ -1280,17 +1324,21 @@ static int efa_sq_initialize(struct efa_qp *qp,
12801324
struct efa_wq_init_attr wq_attr;
12811325
struct efa_sq *sq = &qp->sq;
12821326
size_t desc_ring_size;
1327+
bool need_lock;
12831328
int err;
12841329

12851330
if (!sq->wq.wqe_cnt)
12861331
return 0;
12871332

1333+
need_lock = !efa_check_cq_on_same_pd_td(attr->pd, attr->send_cq);
1334+
12881335
wq_attr = (struct efa_wq_init_attr) {
12891336
.db_mmap_key = resp->sq_db_mmap_key,
12901337
.db_off = resp->sq_db_offset,
12911338
.cmd_fd = qp->verbs_qp.qp.context->cmd_fd,
12921339
.pgsz = qp->page_size,
12931340
.sub_cq_idx = resp->send_sub_cq_idx,
1341+
.need_lock = need_lock,
12941342
};
12951343

12961344
err = efa_wq_initialize(&qp->sq.wq, &wq_attr);
@@ -1356,21 +1404,27 @@ static void efa_rq_terminate(struct efa_qp *qp)
13561404
efa_wq_terminate(&rq->wq, qp->page_size);
13571405
}
13581406

1359-
static int efa_rq_initialize(struct efa_qp *qp, struct efa_create_qp_resp *resp)
1407+
static int efa_rq_initialize(struct efa_qp *qp,
1408+
const struct ibv_qp_init_attr_ex *attr,
1409+
struct efa_create_qp_resp *resp)
13601410
{
13611411
struct efa_wq_init_attr wq_attr;
13621412
struct efa_rq *rq = &qp->rq;
1413+
bool need_lock;
13631414
int err;
13641415

13651416
if (!rq->wq.wqe_cnt)
13661417
return 0;
13671418

1419+
need_lock = !efa_check_cq_on_same_pd_td(attr->pd, attr->recv_cq);
1420+
13681421
wq_attr = (struct efa_wq_init_attr) {
13691422
.db_mmap_key = resp->rq_db_mmap_key,
13701423
.db_off = resp->rq_db_offset,
13711424
.cmd_fd = qp->verbs_qp.qp.context->cmd_fd,
13721425
.pgsz = qp->page_size,
13731426
.sub_cq_idx = resp->recv_sub_cq_idx,
1427+
.need_lock = need_lock,
13741428
};
13751429

13761430
err = efa_wq_initialize(&qp->rq.wq, &wq_attr);
@@ -1584,10 +1638,12 @@ static struct ibv_qp *create_qp(struct ibv_context *ibvctx,
15841638
{
15851639
struct efa_context *ctx = to_efa_context(ibvctx);
15861640
struct efa_dev *dev = to_efa_dev(ibvctx->device);
1641+
struct efa_parent_domain *parent_domain;
15871642
struct efa_create_qp_resp resp = {};
15881643
struct efa_create_qp req = {};
15891644
struct ibv_qp *ibvqp;
15901645
struct efa_qp *qp;
1646+
struct efa_pd *pd;
15911647
int err;
15921648

15931649
err = efa_check_qp_attr(ctx, attr, efa_attr);
@@ -1631,7 +1687,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibvctx,
16311687
qp->sq_sig_all = attr->sq_sig_all;
16321688
qp->dev = ibvctx->device;
16331689

1634-
err = efa_rq_initialize(qp, &resp);
1690+
err = efa_rq_initialize(qp, attr, &resp);
16351691
if (err)
16361692
goto err_destroy_qp;
16371693

@@ -1648,6 +1704,13 @@ static struct ibv_qp *create_qp(struct ibv_context *ibvctx,
16481704
qp->verbs_qp.comp_mask |= VERBS_QP_EX;
16491705
}
16501706

1707+
pd = to_efa_pd(attr->pd);
1708+
if (pd->orig_pd) {
1709+
parent_domain = to_efa_parent_domain(attr->pd);
1710+
qp->parent_domain = parent_domain;
1711+
atomic_fetch_add(&parent_domain->refcount, 1);
1712+
}
1713+
16511714
return ibvqp;
16521715

16531716
err_terminate_rq:
@@ -1852,6 +1915,9 @@ int efa_destroy_qp(struct ibv_qp *ibvqp)
18521915
return err;
18531916
}
18541917

1918+
if (qp->parent_domain)
1919+
atomic_fetch_sub(&qp->parent_domain->refcount, 1);
1920+
18551921
pthread_spin_lock(&ctx->qp_table_lock);
18561922
efa_lock_cqs(ibvqp);
18571923

@@ -2085,7 +2151,9 @@ int efa_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
20852151
struct efa_ah *ah;
20862152
int err = 0;
20872153

2088-
mmio_wc_spinlock(&wq->wqlock);
2154+
if (wq->need_lock)
2155+
mmio_wc_spinlock(&wq->wqlock);
2156+
20892157
while (wr) {
20902158
err = efa_post_send_validate_wr(qp, wr);
20912159
if (err) {
@@ -2151,7 +2219,9 @@ int efa_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
21512219
* Not using mmio_wc_spinunlock as the doorbell write should be done
21522220
* inside the lock.
21532221
*/
2154-
pthread_spin_unlock(&wq->wqlock);
2222+
if (wq->need_lock)
2223+
pthread_spin_unlock(&wq->wqlock);
2224+
21552225
return err;
21562226
}
21572227

@@ -2427,7 +2497,9 @@ static void efa_send_wr_start(struct ibv_qp_ex *ibvqpx)
24272497
struct efa_qp *qp = to_efa_qp_ex(ibvqpx);
24282498
struct efa_sq *sq = &qp->sq;
24292499

2430-
mmio_wc_spinlock(&qp->sq.wq.wqlock);
2500+
if (qp->sq.wq.need_lock)
2501+
mmio_wc_spinlock(&qp->sq.wq.wqlock);
2502+
24312503
qp->wr_session_err = 0;
24322504
sq->num_wqe_pending = 0;
24332505
sq->phase_rb = qp->sq.wq.phase;
@@ -2505,7 +2577,8 @@ static int efa_send_wr_complete(struct ibv_qp_ex *ibvqpx)
25052577
* Not using mmio_wc_spinunlock as the doorbell write should be done
25062578
* inside the lock.
25072579
*/
2508-
pthread_spin_unlock(&sq->wq.wqlock);
2580+
if (sq->wq.need_lock)
2581+
pthread_spin_unlock(&sq->wq.wqlock);
25092582

25102583
return qp->wr_session_err;
25112584
}
@@ -2515,7 +2588,8 @@ static void efa_send_wr_abort(struct ibv_qp_ex *ibvqpx)
25152588
struct efa_sq *sq = &to_efa_qp_ex(ibvqpx)->sq;
25162589

25172590
efa_sq_roll_back(sq);
2518-
pthread_spin_unlock(&sq->wq.wqlock);
2591+
if (sq->wq.need_lock)
2592+
pthread_spin_unlock(&sq->wq.wqlock);
25192593
}
25202594

25212595
static void efa_qp_fill_wr_pfns(struct ibv_qp_ex *ibvqpx,
@@ -2588,7 +2662,9 @@ int efa_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
25882662
int err = 0;
25892663
size_t i;
25902664

2591-
pthread_spin_lock(&wq->wqlock);
2665+
if (wq->need_lock)
2666+
pthread_spin_lock(&wq->wqlock);
2667+
25922668
while (wr) {
25932669
err = efa_post_recv_validate(qp, wr);
25942670
if (err) {
@@ -2641,7 +2717,9 @@ int efa_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
26412717
ring_db:
26422718
efa_rq_ring_doorbell(&qp->rq, wq->pc);
26432719

2644-
pthread_spin_unlock(&wq->wqlock);
2720+
if (wq->need_lock)
2721+
pthread_spin_unlock(&wq->wqlock);
2722+
26452723
return err;
26462724
}
26472725

0 commit comments

Comments
 (0)