@@ -56,6 +56,7 @@ struct efa_wq_init_attr {
5656 int cmd_fd ;
5757 int pgsz ;
5858 uint16_t sub_cq_idx ;
59+ bool need_lock ;
5960};
6061
6162int efa_query_port (struct ibv_context * ibvctx , uint8_t port ,
@@ -449,11 +450,15 @@ static uint32_t efa_wq_get_next_wrid_idx_locked(struct efa_wq *wq,
449450
450451static void efa_wq_put_wrid_idx_unlocked (struct efa_wq * wq , uint32_t wrid_idx )
451452{
452- pthread_spin_lock (& wq -> wqlock );
453+ if (wq -> need_lock )
454+ pthread_spin_lock (& wq -> wqlock );
455+
453456 wq -> wrid_idx_pool_next -- ;
454457 wq -> wrid_idx_pool [wq -> wrid_idx_pool_next ] = wrid_idx ;
455458 wq -> wqe_completed ++ ;
456- pthread_spin_unlock (& wq -> wqlock );
459+
460+ if (wq -> need_lock )
461+ pthread_spin_unlock (& wq -> wqlock );
457462}
458463
459464static uint32_t efa_sub_cq_get_current_index (struct efa_sub_cq * sub_cq )
@@ -978,19 +983,21 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
978983{
979984 struct efa_context * ctx = to_efa_context (ibvctx );
980985 struct verbs_create_cq_prov_attr prov_attr = {};
986+ struct efa_parent_domain * parent_domain = NULL ;
981987 uint16_t cqe_size = ctx -> ex_cqe_size ;
982988 struct efa_create_cq_resp resp = {};
983989 struct efa_create_cq cmd = {};
984990 uint32_t cmd_flags = 0 ;
985991 uint16_t num_sub_cqs ;
986992 struct efa_cq * cq ;
993+ struct efa_pd * pd ;
987994 int sub_buf_size ;
988995 int sub_cq_size ;
989996 uint8_t * buf ;
990997 int err ;
991998 int i ;
992999
993- if (!check_comp_mask (attr -> comp_mask , 0 ) ||
1000+ if (!check_comp_mask (attr -> comp_mask , IBV_CQ_INIT_ATTR_MASK_PD ) ||
9941001 !check_comp_mask (attr -> wc_flags , IBV_WC_STANDARD_FLAGS )) {
9951002 verbs_err (verbs_get_ctx (ibvctx ),
9961003 "Invalid comp_mask or wc_flags\n" );
@@ -1004,6 +1011,17 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
10041011 return NULL ;
10051012 }
10061013
1014+ if (attr -> comp_mask & IBV_CQ_INIT_ATTR_MASK_PD ) {
1015+ pd = to_efa_pd (attr -> parent_domain );
1016+ if (!pd -> orig_pd ) {
1017+ verbs_err (verbs_get_ctx (ibvctx ), "Parent domain set but not provided\n" );
1018+ errno = EINVAL ;
1019+ return NULL ;
1020+ }
1021+
1022+ parent_domain = to_efa_parent_domain (attr -> parent_domain );
1023+ }
1024+
10071025 cq = calloc (1 , sizeof (* cq ) +
10081026 sizeof (* cq -> sub_cq_arr ) * ctx -> sub_cqs_per_cq );
10091027 if (!cq )
@@ -1040,6 +1058,7 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
10401058 cq -> num_sub_cqs = num_sub_cqs ;
10411059 cq -> cqe_size = cqe_size ;
10421060 cq -> dev = ibvctx -> device ;
1061+ cq -> parent_domain = parent_domain ;
10431062
10441063 if (efa_attr -> flags & EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF ) {
10451064 cq -> buf_size = efa_attr -> ext_mem_dmabuf .length ;
@@ -1075,6 +1094,8 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
10751094
10761095 efa_cq_fill_pfns (cq , attr , efa_attr );
10771096 pthread_spin_init (& cq -> lock , PTHREAD_PROCESS_PRIVATE );
1097+ if (cq -> parent_domain )
1098+ atomic_fetch_add (& cq -> parent_domain -> refcount , 1 );
10781099
10791100 return & cq -> verbs_cq .cq_ex ;
10801101
@@ -1198,6 +1219,8 @@ int efa_destroy_cq(struct ibv_cq *ibvcq)
11981219 munmap (cq -> buf , cq -> buf_size );
11991220
12001221 pthread_spin_destroy (& cq -> lock );
1222+ if (cq -> parent_domain )
1223+ atomic_fetch_sub (& cq -> parent_domain -> refcount , 1 );
12011224
12021225 free (cq );
12031226
@@ -1208,7 +1231,8 @@ static void efa_wq_terminate(struct efa_wq *wq, int pgsz)
12081231{
12091232 void * db_aligned ;
12101233
1211- pthread_spin_destroy (& wq -> wqlock );
1234+ if (wq -> need_lock )
1235+ pthread_spin_destroy (& wq -> wqlock );
12121236
12131237 db_aligned = (void * )((uintptr_t )wq -> db & ~(pgsz - 1 ));
12141238 munmap (db_aligned , pgsz );
@@ -1246,7 +1270,9 @@ static int efa_wq_initialize(struct efa_wq *wq, struct efa_wq_init_attr *attr)
12461270 for (i = 0 ; i < wq -> wqe_cnt ; i ++ )
12471271 wq -> wrid_idx_pool [i ] = i ;
12481272
1249- pthread_spin_init (& wq -> wqlock , PTHREAD_PROCESS_PRIVATE );
1273+ wq -> need_lock = attr -> need_lock ;
1274+ if (wq -> need_lock )
1275+ pthread_spin_init (& wq -> wqlock , PTHREAD_PROCESS_PRIVATE );
12501276
12511277 wq -> sub_cq_idx = attr -> sub_cq_idx ;
12521278
@@ -1259,6 +1285,24 @@ static int efa_wq_initialize(struct efa_wq *wq, struct efa_wq_init_attr *attr)
12591285 return err ;
12601286}
12611287
1288+ static bool efa_check_cq_on_same_pd_td (struct ibv_pd * ibvpd , struct ibv_cq * ibvcq )
1289+ {
1290+ struct efa_parent_domain * parent_domain ;
1291+ struct efa_pd * pd ;
1292+ struct efa_cq * cq ;
1293+
1294+ pd = to_efa_pd (ibvpd );
1295+ cq = to_efa_cq (ibvcq );
1296+
1297+ if (pd -> orig_pd ) {
1298+ parent_domain = to_efa_parent_domain (ibvpd );
1299+ if (parent_domain == cq -> parent_domain && parent_domain -> td )
1300+ return true;
1301+ }
1302+
1303+ return false;
1304+ }
1305+
12621306static void efa_sq_terminate (struct efa_qp * qp )
12631307{
12641308 struct efa_sq * sq = & qp -> sq ;
@@ -1280,17 +1324,21 @@ static int efa_sq_initialize(struct efa_qp *qp,
12801324 struct efa_wq_init_attr wq_attr ;
12811325 struct efa_sq * sq = & qp -> sq ;
12821326 size_t desc_ring_size ;
1327+ bool need_lock ;
12831328 int err ;
12841329
12851330 if (!sq -> wq .wqe_cnt )
12861331 return 0 ;
12871332
1333+ need_lock = !efa_check_cq_on_same_pd_td (attr -> pd , attr -> send_cq );
1334+
12881335 wq_attr = (struct efa_wq_init_attr ) {
12891336 .db_mmap_key = resp -> sq_db_mmap_key ,
12901337 .db_off = resp -> sq_db_offset ,
12911338 .cmd_fd = qp -> verbs_qp .qp .context -> cmd_fd ,
12921339 .pgsz = qp -> page_size ,
12931340 .sub_cq_idx = resp -> send_sub_cq_idx ,
1341+ .need_lock = need_lock ,
12941342 };
12951343
12961344 err = efa_wq_initialize (& qp -> sq .wq , & wq_attr );
@@ -1356,21 +1404,27 @@ static void efa_rq_terminate(struct efa_qp *qp)
13561404 efa_wq_terminate (& rq -> wq , qp -> page_size );
13571405}
13581406
1359- static int efa_rq_initialize (struct efa_qp * qp , struct efa_create_qp_resp * resp )
1407+ static int efa_rq_initialize (struct efa_qp * qp ,
1408+ const struct ibv_qp_init_attr_ex * attr ,
1409+ struct efa_create_qp_resp * resp )
13601410{
13611411 struct efa_wq_init_attr wq_attr ;
13621412 struct efa_rq * rq = & qp -> rq ;
1413+ bool need_lock ;
13631414 int err ;
13641415
13651416 if (!rq -> wq .wqe_cnt )
13661417 return 0 ;
13671418
1419+ need_lock = !efa_check_cq_on_same_pd_td (attr -> pd , attr -> recv_cq );
1420+
13681421 wq_attr = (struct efa_wq_init_attr ) {
13691422 .db_mmap_key = resp -> rq_db_mmap_key ,
13701423 .db_off = resp -> rq_db_offset ,
13711424 .cmd_fd = qp -> verbs_qp .qp .context -> cmd_fd ,
13721425 .pgsz = qp -> page_size ,
13731426 .sub_cq_idx = resp -> recv_sub_cq_idx ,
1427+ .need_lock = need_lock ,
13741428 };
13751429
13761430 err = efa_wq_initialize (& qp -> rq .wq , & wq_attr );
@@ -1584,10 +1638,12 @@ static struct ibv_qp *create_qp(struct ibv_context *ibvctx,
15841638{
15851639 struct efa_context * ctx = to_efa_context (ibvctx );
15861640 struct efa_dev * dev = to_efa_dev (ibvctx -> device );
1641+ struct efa_parent_domain * parent_domain ;
15871642 struct efa_create_qp_resp resp = {};
15881643 struct efa_create_qp req = {};
15891644 struct ibv_qp * ibvqp ;
15901645 struct efa_qp * qp ;
1646+ struct efa_pd * pd ;
15911647 int err ;
15921648
15931649 err = efa_check_qp_attr (ctx , attr , efa_attr );
@@ -1631,7 +1687,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibvctx,
16311687 qp -> sq_sig_all = attr -> sq_sig_all ;
16321688 qp -> dev = ibvctx -> device ;
16331689
1634- err = efa_rq_initialize (qp , & resp );
1690+ err = efa_rq_initialize (qp , attr , & resp );
16351691 if (err )
16361692 goto err_destroy_qp ;
16371693
@@ -1648,6 +1704,13 @@ static struct ibv_qp *create_qp(struct ibv_context *ibvctx,
16481704 qp -> verbs_qp .comp_mask |= VERBS_QP_EX ;
16491705 }
16501706
1707+ pd = to_efa_pd (attr -> pd );
1708+ if (pd -> orig_pd ) {
1709+ parent_domain = to_efa_parent_domain (attr -> pd );
1710+ qp -> parent_domain = parent_domain ;
1711+ atomic_fetch_add (& parent_domain -> refcount , 1 );
1712+ }
1713+
16511714 return ibvqp ;
16521715
16531716err_terminate_rq :
@@ -1852,6 +1915,9 @@ int efa_destroy_qp(struct ibv_qp *ibvqp)
18521915 return err ;
18531916 }
18541917
1918+ if (qp -> parent_domain )
1919+ atomic_fetch_sub (& qp -> parent_domain -> refcount , 1 );
1920+
18551921 pthread_spin_lock (& ctx -> qp_table_lock );
18561922 efa_lock_cqs (ibvqp );
18571923
@@ -2085,7 +2151,9 @@ int efa_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
20852151 struct efa_ah * ah ;
20862152 int err = 0 ;
20872153
2088- mmio_wc_spinlock (& wq -> wqlock );
2154+ if (wq -> need_lock )
2155+ mmio_wc_spinlock (& wq -> wqlock );
2156+
20892157 while (wr ) {
20902158 err = efa_post_send_validate_wr (qp , wr );
20912159 if (err ) {
@@ -2151,7 +2219,9 @@ int efa_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
21512219 * Not using mmio_wc_spinunlock as the doorbell write should be done
21522220 * inside the lock.
21532221 */
2154- pthread_spin_unlock (& wq -> wqlock );
2222+ if (wq -> need_lock )
2223+ pthread_spin_unlock (& wq -> wqlock );
2224+
21552225 return err ;
21562226}
21572227
@@ -2427,7 +2497,9 @@ static void efa_send_wr_start(struct ibv_qp_ex *ibvqpx)
24272497 struct efa_qp * qp = to_efa_qp_ex (ibvqpx );
24282498 struct efa_sq * sq = & qp -> sq ;
24292499
2430- mmio_wc_spinlock (& qp -> sq .wq .wqlock );
2500+ if (qp -> sq .wq .need_lock )
2501+ mmio_wc_spinlock (& qp -> sq .wq .wqlock );
2502+
24312503 qp -> wr_session_err = 0 ;
24322504 sq -> num_wqe_pending = 0 ;
24332505 sq -> phase_rb = qp -> sq .wq .phase ;
@@ -2505,7 +2577,8 @@ static int efa_send_wr_complete(struct ibv_qp_ex *ibvqpx)
25052577 * Not using mmio_wc_spinunlock as the doorbell write should be done
25062578 * inside the lock.
25072579 */
2508- pthread_spin_unlock (& sq -> wq .wqlock );
2580+ if (sq -> wq .need_lock )
2581+ pthread_spin_unlock (& sq -> wq .wqlock );
25092582
25102583 return qp -> wr_session_err ;
25112584}
@@ -2515,7 +2588,8 @@ static void efa_send_wr_abort(struct ibv_qp_ex *ibvqpx)
25152588 struct efa_sq * sq = & to_efa_qp_ex (ibvqpx )-> sq ;
25162589
25172590 efa_sq_roll_back (sq );
2518- pthread_spin_unlock (& sq -> wq .wqlock );
2591+ if (sq -> wq .need_lock )
2592+ pthread_spin_unlock (& sq -> wq .wqlock );
25192593}
25202594
25212595static void efa_qp_fill_wr_pfns (struct ibv_qp_ex * ibvqpx ,
@@ -2588,7 +2662,9 @@ int efa_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
25882662 int err = 0 ;
25892663 size_t i ;
25902664
2591- pthread_spin_lock (& wq -> wqlock );
2665+ if (wq -> need_lock )
2666+ pthread_spin_lock (& wq -> wqlock );
2667+
25922668 while (wr ) {
25932669 err = efa_post_recv_validate (qp , wr );
25942670 if (err ) {
@@ -2641,7 +2717,9 @@ int efa_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
26412717ring_db :
26422718 efa_rq_ring_doorbell (& qp -> rq , wq -> pc );
26432719
2644- pthread_spin_unlock (& wq -> wqlock );
2720+ if (wq -> need_lock )
2721+ pthread_spin_unlock (& wq -> wqlock );
2722+
26452723 return err ;
26462724}
26472725
0 commit comments