Skip to content

Commit 22c4e37

Browse files
mrgolinYonatanNachum
authored andcommitted
efa: Add option to create CQ with external memory
Extend the EFA direct verbs interface to enable creation of CQs on top of pre-allocated memory buffers. The memory can be passed by supplying a dmabuf fd and offset. Reviewed-by: Daniel Kranzdorf <[email protected]> Reviewed-by: Yonatan Nachum <[email protected]> Signed-off-by: Michael Margolin <[email protected]>
1 parent 8d0ce82 commit 22c4e37

File tree

5 files changed

+97
-21
lines changed

5 files changed

+97
-21
lines changed

providers/efa/efa.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ struct efa_cq {
6969
size_t cqe_size;
7070
uint8_t *buf;
7171
size_t buf_size;
72+
bool buf_mmaped;
7273
uint32_t *db;
7374
uint8_t *db_mmap_addr;
7475
uint16_t cc; /* Consumer Counter */

providers/efa/efadv.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ enum {
4848
EFADV_DEVICE_ATTR_CAPS_CQ_WITH_SGID = 1 << 2,
4949
EFADV_DEVICE_ATTR_CAPS_RDMA_WRITE = 1 << 3,
5050
EFADV_DEVICE_ATTR_CAPS_UNSOLICITED_WRITE_RECV = 1 << 4,
51+
EFADV_DEVICE_ATTR_CAPS_CQ_WITH_EXT_MEM_DMABUF = 1 << 5,
5152
};
5253

5354
struct efadv_device_attr {
@@ -86,9 +87,21 @@ enum {
8687
EFADV_WC_EX_WITH_IS_UNSOLICITED = 1 << 1,
8788
};
8889

90+
enum {
91+
EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF = 1 << 0,
92+
};
93+
8994
struct efadv_cq_init_attr {
9095
uint64_t comp_mask;
9196
uint64_t wc_flags;
97+
uint64_t flags;
98+
struct {
99+
uint8_t *buffer;
100+
uint64_t length;
101+
uint64_t offset;
102+
int32_t fd;
103+
uint8_t reserved[4];
104+
} ext_mem_dmabuf;
92105
};
93106

94107
struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx,

providers/efa/man/efadv_create_cq.3.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,14 @@ Compatibility is handled using the comp_mask and inlen fields.
4747
struct efadv_cq_init_attr {
4848
uint64_t comp_mask;
4949
uint64_t wc_flags;
50+
uint64_t flags;
51+
struct {
52+
uint8_t *buffer;
53+
uint64_t length;
54+
uint64_t offset;
55+
int32_t fd;
56+
uint8_t reserved[4];
57+
} ext_mem_dmabuf;
5058
};
5159
```
5260

@@ -65,6 +73,28 @@ struct efadv_cq_init_attr {
6573
EFADV_WC_EX_WITH_IS_UNSOLICITED:
6674
request for an option to check whether a receive WC is unsolicited.
6775

76+
*flags*
77+
: A bitwise OR of the various values described below.
78+
79+
EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF:
80+
create CQ with external memory provided via dmabuf.
81+
82+
*ext_mem_dmabuf*
83+
: Structure containing information about external memory when using
84+
EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF flag.
85+
86+
buffer:
87+
Pointer to the memory mapped in the process's virtual address space. The field is
88+
optional, but if not provided, the use of CQ poll interfaces should be avoided.
89+
90+
length:
91+
Length of the memory region to use.
92+
93+
fd:
94+
File descriptor of the dmabuf.
95+
96+
offset:
97+
Offset within the dmabuf.
6898

6999
# Completion iterator functions
70100

providers/efa/man/efadv_query_device.3.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ struct efadv_device_attr {
8585
requests in order to receive RDMA write with immediate and a WC generated for such
8686
receive will be marked as unsolicited.
8787

88+
EFADV_DEVICE_ATTR_CAPS_CQ_WITH_EXT_MEM_DMABUF:
89+
Indicates that creating CQs with external memory buffers by passing dmabuf is
90+
supported.
91+
8892
*max_rdma_size*
8993
: Maximum RDMA transfer size in bytes.
9094

providers/efa/verbs.c

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,9 @@ int efadv_query_device(struct ibv_context *ibvctx,
175175

176176
if (EFA_DEV_CAP(ctx, UNSOLICITED_WRITE_RECV))
177177
attr->device_caps |= EFADV_DEVICE_ATTR_CAPS_UNSOLICITED_WRITE_RECV;
178+
179+
if (EFA_DEV_CAP(ctx, CQ_WITH_EXT_MEM))
180+
attr->device_caps |= EFADV_DEVICE_ATTR_CAPS_CQ_WITH_EXT_MEM_DMABUF;
178181
}
179182

180183
if (vext_field_avail(typeof(*attr), max_rdma_size, inlen)) {
@@ -879,9 +882,9 @@ static void efa_cq_fill_pfns(struct efa_cq *cq,
879882
if (attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS)
880883
ibvcqx->read_dlid_path_bits = efa_wc_read_dlid_path_bits;
881884

882-
if (efa_attr && (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID))
885+
if (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID)
883886
cq->dv_cq.wc_read_sgid = efa_wc_read_sgid;
884-
if (efa_attr && (efa_attr->wc_flags & EFADV_WC_EX_WITH_IS_UNSOLICITED))
887+
if (efa_attr->wc_flags & EFADV_WC_EX_WITH_IS_UNSOLICITED)
885888
cq->dv_cq.wc_is_unsolicited = efa_wc_is_unsolicited;
886889
}
887890

@@ -900,9 +903,11 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
900903
struct efadv_cq_init_attr *efa_attr)
901904
{
902905
struct efa_context *ctx = to_efa_context(ibvctx);
906+
struct verbs_create_cq_prov_attr prov_attr = {};
903907
uint16_t cqe_size = ctx->ex_cqe_size;
904908
struct efa_create_cq_resp resp = {};
905909
struct efa_create_cq cmd = {};
910+
uint32_t cmd_flags = 0;
906911
uint16_t num_sub_cqs;
907912
struct efa_cq *cq;
908913
int sub_buf_size;
@@ -930,42 +935,58 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
930935
if (!cq)
931936
return NULL;
932937

933-
if (efa_attr && (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID))
938+
if (efa_attr->wc_flags & EFADV_WC_EX_WITH_SGID)
934939
cmd.flags |= EFA_CREATE_CQ_WITH_SGID;
935940

936941
num_sub_cqs = ctx->sub_cqs_per_cq;
937942
cmd.num_sub_cqs = num_sub_cqs;
938943
cmd.cq_entry_size = cqe_size;
944+
945+
if (efa_attr->flags & EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF) {
946+
prov_attr.buffer.length = efa_attr->ext_mem_dmabuf.length;
947+
prov_attr.buffer.dmabuf.offset = efa_attr->ext_mem_dmabuf.offset;
948+
prov_attr.buffer.dmabuf.fd = efa_attr->ext_mem_dmabuf.fd;
949+
cmd_flags = CREATE_CQ_CMD_FLAGS_WITH_MEM_DMABUF;
950+
}
951+
939952
if (attr->channel)
940953
cmd.flags |= EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL;
941954

942955
attr->cqe = roundup_pow_of_two(attr->cqe);
943-
err = ibv_cmd_create_cq_ex(ibvctx, attr, NULL, &cq->verbs_cq,
956+
err = ibv_cmd_create_cq_ex(ibvctx, attr, &prov_attr, &cq->verbs_cq,
944957
&cmd.ibv_cmd, sizeof(cmd),
945-
&resp.ibv_resp, sizeof(resp), 0);
958+
&resp.ibv_resp, sizeof(resp), cmd_flags);
946959
if (err) {
947960
errno = err;
948961
goto err_free_cq;
949962
}
950963

951964
sub_cq_size = cq->verbs_cq.cq.cqe;
952965
cq->cqn = resp.cq_idx;
953-
cq->buf_size = resp.q_mmap_size;
954966
cq->num_sub_cqs = num_sub_cqs;
955967
cq->cqe_size = cqe_size;
956968
cq->dev = ibvctx->device;
957969

958-
cq->buf = mmap(NULL, cq->buf_size, PROT_READ, MAP_SHARED,
959-
ibvctx->cmd_fd, resp.q_mmap_key);
960-
if (cq->buf == MAP_FAILED)
961-
goto err_destroy_cq;
970+
if (efa_attr->flags & EFADV_CQ_INIT_FLAGS_EXT_MEM_DMABUF) {
971+
cq->buf_size = efa_attr->ext_mem_dmabuf.length;
972+
cq->buf = efa_attr->ext_mem_dmabuf.buffer;
973+
} else {
974+
cq->buf_size = resp.q_mmap_size;
975+
cq->buf = mmap(NULL, cq->buf_size, PROT_READ, MAP_SHARED, ibvctx->cmd_fd,
976+
resp.q_mmap_key);
977+
if (cq->buf == MAP_FAILED)
978+
goto err_destroy_cq;
979+
980+
cq->buf_mmaped = true;
981+
}
962982

963-
buf = cq->buf;
964-
sub_buf_size = cq->cqe_size * sub_cq_size;
965-
for (i = 0; i < num_sub_cqs; i++) {
966-
efa_sub_cq_initialize(&cq->sub_cq_arr[i], buf, sub_cq_size,
967-
cq->cqe_size);
968-
buf += sub_buf_size;
983+
if (cq->buf) {
984+
buf = cq->buf;
985+
sub_buf_size = cq->cqe_size * sub_cq_size;
986+
for (i = 0; i < num_sub_cqs; i++) {
987+
efa_sub_cq_initialize(&cq->sub_cq_arr[i], buf, sub_cq_size, cq->cqe_size);
988+
buf += sub_buf_size;
989+
}
969990
}
970991

971992
if (resp.comp_mask & EFA_CREATE_CQ_RESP_DB_OFF) {
@@ -984,7 +1005,8 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
9841005
return &cq->verbs_cq.cq_ex;
9851006

9861007
err_unmap_cq:
987-
munmap(cq->buf, cq->buf_size);
1008+
if (cq->buf_mmaped)
1009+
munmap(cq->buf, cq->buf_size);
9881010
err_destroy_cq:
9891011
ibv_cmd_destroy_cq(&cq->verbs_cq.cq);
9901012
err_free_cq:
@@ -996,29 +1018,33 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *ibvctx,
9961018
struct ibv_cq *efa_create_cq(struct ibv_context *ibvctx, int ncqe,
9971019
struct ibv_comp_channel *channel, int vec)
9981020
{
1021+
struct efadv_cq_init_attr efa_attr = {};
9991022
struct ibv_cq_init_attr_ex attr_ex = {
10001023
.cqe = ncqe,
10011024
.channel = channel,
10021025
.comp_vector = vec
10031026
};
10041027
struct ibv_cq_ex *ibvcqx;
10051028

1006-
ibvcqx = create_cq(ibvctx, &attr_ex, NULL);
1029+
ibvcqx = create_cq(ibvctx, &attr_ex, &efa_attr);
10071030

10081031
return ibvcqx ? ibv_cq_ex_to_cq(ibvcqx) : NULL;
10091032
}
10101033

10111034
struct ibv_cq_ex *efa_create_cq_ex(struct ibv_context *ibvctx,
10121035
struct ibv_cq_init_attr_ex *attr_ex)
10131036
{
1014-
return create_cq(ibvctx, attr_ex, NULL);
1037+
struct efadv_cq_init_attr efa_attr = {};
1038+
1039+
return create_cq(ibvctx, attr_ex, &efa_attr);
10151040
}
10161041

10171042
struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx,
10181043
struct ibv_cq_init_attr_ex *attr_ex,
10191044
struct efadv_cq_init_attr *efa_attr,
10201045
uint32_t inlen)
10211046
{
1047+
struct efadv_cq_init_attr local_efa_attr = {};
10221048
uint64_t supp_wc_flags = 0;
10231049
struct efa_context *ctx;
10241050

@@ -1048,7 +1074,8 @@ struct ibv_cq_ex *efadv_create_cq(struct ibv_context *ibvctx,
10481074
return NULL;
10491075
}
10501076

1051-
return create_cq(ibvctx, attr_ex, efa_attr);
1077+
memcpy(&local_efa_attr, efa_attr, min_t(uint32_t, inlen, sizeof(local_efa_attr)));
1078+
return create_cq(ibvctx, attr_ex, &local_efa_attr);
10521079
}
10531080

10541081
struct efadv_cq *efadv_cq_from_ibv_cq_ex(struct ibv_cq_ex *ibvcqx)
@@ -1071,7 +1098,8 @@ int efa_destroy_cq(struct ibv_cq *ibvcq)
10711098
}
10721099

10731100
munmap(cq->db_mmap_addr, to_efa_dev(cq->dev)->pg_sz);
1074-
munmap(cq->buf, cq->buf_size);
1101+
if (cq->buf_mmaped)
1102+
munmap(cq->buf, cq->buf_size);
10751103

10761104
pthread_spin_destroy(&cq->lock);
10771105

0 commit comments

Comments
 (0)