Author | Tokens | Token Proportion | Commits | Commit Proportion |
---|---|---|---|---|
Alex Vesker | 5060 | 72.34% | 6 | 13.95% |
Yevgeny Kliteynik | 1593 | 22.77% | 15 | 34.88% |
Leon Romanovsky | 155 | 2.22% | 3 | 6.98% |
Erez Shitrit | 82 | 1.17% | 3 | 6.98% |
Rongwei Liu | 52 | 0.74% | 4 | 9.30% |
Itamar Gozlan | 23 | 0.33% | 1 | 2.33% |
Maor Gottlieb | 8 | 0.11% | 1 | 2.33% |
Parav Pandit | 6 | 0.09% | 1 | 2.33% |
Jianbo Liu | 4 | 0.06% | 1 | 2.33% |
Hamdan Igbaria | 3 | 0.04% | 1 | 2.33% |
Roi Dayan | 3 | 0.04% | 1 | 2.33% |
Maher Sanalla | 2 | 0.03% | 2 | 4.65% |
Tal Gilboa | 1 | 0.01% | 1 | 2.33% |
Denis Efremov | 1 | 0.01% | 1 | 2.33% |
zhang songyi | 1 | 0.01% | 1 | 2.33% |
Aharon Landau | 1 | 0.01% | 1 | 2.33% |
Total | 6995 | 43 |
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2019 Mellanox Technologies. */ #include <linux/smp.h> #include "dr_types.h" #define QUEUE_SIZE 128 #define SIGNAL_PER_DIV_QUEUE 16 #define TH_NUMS_TO_DRAIN 2 #define DR_SEND_INFO_POOL_SIZE 1000 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; struct dr_data_seg { u64 addr; u32 length; u32 lkey; unsigned int send_flags; }; enum send_info_type { WRITE_ICM = 0, GTA_ARG = 1, }; struct postsend_info { enum send_info_type type; struct dr_data_seg write; struct dr_data_seg read; u64 remote_addr; u32 rkey; }; struct dr_qp_rtr_attr { struct mlx5dr_cmd_gid_attr dgid_attr; enum ib_mtu mtu; u32 qp_num; u16 port_num; u8 min_rnr_timer; u8 sgid_index; u16 udp_src_port; u8 fl:1; }; struct dr_qp_rts_attr { u8 timeout; u8 retry_cnt; u8 rnr_retry; }; struct dr_qp_init_attr { u32 cqn; u32 pdn; u32 max_send_wr; struct mlx5_uars_page *uar; u8 isolate_vl_tc:1; }; struct mlx5dr_send_info_pool_obj { struct mlx5dr_ste_send_info ste_send_info; struct mlx5dr_send_info_pool *pool; struct list_head list_node; }; struct mlx5dr_send_info_pool { struct list_head free_list; }; static int dr_send_info_pool_fill(struct mlx5dr_send_info_pool *pool) { struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj; int i; for (i = 0; i < DR_SEND_INFO_POOL_SIZE; i++) { pool_obj = kzalloc(sizeof(*pool_obj), GFP_KERNEL); if (!pool_obj) goto clean_pool; pool_obj->pool = pool; list_add_tail(&pool_obj->list_node, &pool->free_list); } return 0; clean_pool: list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) { list_del(&pool_obj->list_node); kfree(pool_obj); } return -ENOMEM; } static void dr_send_info_pool_destroy(struct mlx5dr_send_info_pool *pool) { struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj; list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) { list_del(&pool_obj->list_node); kfree(pool_obj); } kfree(pool); } void mlx5dr_send_info_pool_destroy(struct mlx5dr_domain *dmn) { dr_send_info_pool_destroy(dmn->send_info_pool_tx); dr_send_info_pool_destroy(dmn->send_info_pool_rx); } static struct mlx5dr_send_info_pool *dr_send_info_pool_create(void) { struct mlx5dr_send_info_pool *pool; int ret; pool = kzalloc(sizeof(*pool), GFP_KERNEL); if (!pool) return NULL; INIT_LIST_HEAD(&pool->free_list); ret = dr_send_info_pool_fill(pool); if (ret) { kfree(pool); return NULL; } return pool; } int mlx5dr_send_info_pool_create(struct mlx5dr_domain *dmn) { dmn->send_info_pool_rx = dr_send_info_pool_create(); if (!dmn->send_info_pool_rx) return -ENOMEM; dmn->send_info_pool_tx = dr_send_info_pool_create(); if (!dmn->send_info_pool_tx) { dr_send_info_pool_destroy(dmn->send_info_pool_rx); return -ENOMEM; } return 0; } struct mlx5dr_ste_send_info *mlx5dr_send_info_alloc(struct mlx5dr_domain *dmn, enum mlx5dr_domain_nic_type nic_type) { struct mlx5dr_send_info_pool_obj *pool_obj; struct mlx5dr_send_info_pool *pool; int ret; pool = nic_type == DR_DOMAIN_NIC_TYPE_RX ? dmn->send_info_pool_rx : dmn->send_info_pool_tx; if (unlikely(list_empty(&pool->free_list))) { ret = dr_send_info_pool_fill(pool); if (ret) return NULL; } pool_obj = list_first_entry_or_null(&pool->free_list, struct mlx5dr_send_info_pool_obj, list_node); if (likely(pool_obj)) { list_del_init(&pool_obj->list_node); } else { WARN_ONCE(!pool_obj, "Failed getting ste send info obj from pool"); return NULL; } return &pool_obj->ste_send_info; } void mlx5dr_send_info_free(struct mlx5dr_ste_send_info *ste_send_info) { struct mlx5dr_send_info_pool_obj *pool_obj; pool_obj = container_of(ste_send_info, struct mlx5dr_send_info_pool_obj, ste_send_info); list_add(&pool_obj->list_node, &pool_obj->pool->free_list); } static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64) { unsigned int idx; u8 opcode; opcode = get_cqe_opcode(cqe64); if (opcode == MLX5_CQE_REQ_ERR) { idx = be16_to_cpu(cqe64->wqe_counter) & (dr_cq->qp->sq.wqe_cnt - 1); dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; } else if (opcode == MLX5_CQE_RESP_ERR) { ++dr_cq->qp->sq.cc; } else { idx = be16_to_cpu(cqe64->wqe_counter) & (dr_cq->qp->sq.wqe_cnt - 1); dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; return CQ_OK; } return CQ_POLL_ERR; } static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq) { struct mlx5_cqe64 *cqe64; int err; cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq); if (!cqe64) { if (unlikely(dr_cq->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) { mlx5_core_dbg_once(dr_cq->mdev, "Polling CQ while device is shutting down\n"); return CQ_POLL_ERR; } return CQ_EMPTY; } mlx5_cqwq_pop(&dr_cq->wq); err = dr_parse_cqe(dr_cq, cqe64); mlx5_cqwq_update_db_record(&dr_cq->wq); return err; } static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) { int npolled; int err = 0; for (npolled = 0; npolled < ne; ++npolled) { err = dr_cq_poll_one(dr_cq); if (err != CQ_OK) break; } return err == CQ_POLL_ERR ? err : npolled; } static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, struct dr_qp_init_attr *attr) { u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; struct mlx5_wq_param wqp; struct mlx5dr_qp *dr_qp; int inlen; void *qpc; void *in; int err; dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL); if (!dr_qp) return NULL; wqp.buf_numa_node = mdev->priv.numa_node; wqp.db_numa_node = mdev->priv.numa_node; dr_qp->rq.pc = 0; dr_qp->rq.cc = 0; dr_qp->rq.wqe_cnt = 256; dr_qp->sq.pc = 0; dr_qp->sq.cc = 0; dr_qp->sq.head = 0; dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr); MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq, &dr_qp->wq_ctrl); if (err) { mlx5_core_warn(mdev, "Can't create QP WQ\n"); goto err_wq; } dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt, sizeof(dr_qp->sq.wqe_head[0]), GFP_KERNEL); if (!dr_qp->sq.wqe_head) { mlx5_core_warn(mdev, "Can't allocate wqe head\n"); goto err_wqe_head; } inlen = MLX5_ST_SZ_BYTES(create_qp_in) + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * dr_qp->wq_ctrl.buf.npages; in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_in; } qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc); MLX5_SET(qpc, qpc, pd, attr->pdn); MLX5_SET(qpc, qpc, uar_page, attr->uar->index); MLX5_SET(qpc, qpc, log_page_size, dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET(qpc, qpc, fre, 1); MLX5_SET(qpc, qpc, rlky, 1); MLX5_SET(qpc, qpc, cqn_snd, attr->cqn); MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn); MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev)); MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma); if (MLX5_CAP_GEN(mdev, cqe_version) == 1) MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf, (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas)); MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn); kvfree(in); if (err) goto err_in; dr_qp->uar = attr->uar; return dr_qp; err_in: kfree(dr_qp->sq.wqe_head); err_wqe_head: mlx5_wq_destroy(&dr_qp->wq_ctrl); err_wq: kfree(dr_qp); return NULL; } static void dr_destroy_qp(struct mlx5_core_dev *mdev, struct mlx5dr_qp *dr_qp) { u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn); mlx5_cmd_exec_in(mdev, destroy_qp, in); kfree(dr_qp->sq.wqe_head); mlx5_wq_destroy(&dr_qp->wq_ctrl); kfree(dr_qp); } static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) { dma_wmb(); *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff); /* After wmb() the hw aware of new work */ wmb(); mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET); } static void dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, u32 remote_addr, struct dr_data_seg *data_seg, int *size) { struct mlx5_wqe_header_modify_argument_update_seg *wq_arg_seg; struct mlx5_wqe_flow_update_ctrl_seg *wq_flow_seg; wq_ctrl->general_id = cpu_to_be32(remote_addr); wq_flow_seg = (void *)(wq_ctrl + 1); /* mlx5_wqe_flow_update_ctrl_seg - all reserved */ memset(wq_flow_seg, 0, sizeof(*wq_flow_seg)); wq_arg_seg = (void *)(wq_flow_seg + 1); memcpy(wq_arg_seg->argument_list, (void *)(uintptr_t)data_seg->addr, data_seg->length); *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */ sizeof(*wq_flow_seg) + /* WQE flow update ctrl seg - reserved */ sizeof(*wq_arg_seg)) / /* WQE hdr modify arg seg - data */ MLX5_SEND_WQE_DS; } static void dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, u64 remote_addr, u32 rkey, struct dr_data_seg *data_seg, unsigned int *size) { struct mlx5_wqe_raddr_seg *wq_raddr; struct mlx5_wqe_data_seg *wq_dseg; wq_raddr = (void *)(wq_ctrl + 1); wq_raddr->raddr = cpu_to_be64(remote_addr); wq_raddr->rkey = cpu_to_be32(rkey); wq_raddr->reserved = 0; wq_dseg = (void *)(wq_raddr + 1); wq_dseg->byte_count = cpu_to_be32(data_seg->length); wq_dseg->lkey = cpu_to_be32(data_seg->lkey); wq_dseg->addr = cpu_to_be64(data_seg->addr); *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */ sizeof(*wq_dseg) + /* WQE data segment */ sizeof(*wq_raddr)) / /* WQE remote addr segment */ MLX5_SEND_WQE_DS; } static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl, struct dr_data_seg *data_seg) { wq_ctrl->signature = 0; wq_ctrl->rsvd[0] = 0; wq_ctrl->rsvd[1] = 0; wq_ctrl->fm_ce_se = data_seg->send_flags & IB_SEND_SIGNALED ? MLX5_WQE_CTRL_CQ_UPDATE : 0; wq_ctrl->imm = 0; } static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, u32 rkey, struct dr_data_seg *data_seg, u32 opcode, bool notify_hw) { struct mlx5_wqe_ctrl_seg *wq_ctrl; int opcode_mod = 0; unsigned int size; unsigned int idx; idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1); wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); dr_set_ctrl_seg(wq_ctrl, data_seg); switch (opcode) { case MLX5_OPCODE_RDMA_READ: case MLX5_OPCODE_RDMA_WRITE: dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr, rkey, data_seg, &size); break; case MLX5_OPCODE_FLOW_TBL_ACCESS: opcode_mod = MLX5_CMD_OP_MOD_UPDATE_HEADER_MODIFY_ARGUMENT; dr_rdma_handle_flow_access_arg_segments(wq_ctrl, remote_addr, data_seg, &size); break; default: WARN(true, "illegal opcode %d", opcode); return; } /* -------------------------------------------------------- * |opcode_mod (8 bit)|wqe_index (16 bits)| opcod (8 bits)| * -------------------------------------------------------- */ wq_ctrl->opmod_idx_opcode = cpu_to_be32((opcode_mod << 24) | ((dr_qp->sq.pc & 0xffff) << 8) | opcode); wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8); dr_qp->sq.pc += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); dr_qp->sq.wqe_head[idx] = dr_qp->sq.head++; if (notify_hw) dr_cmd_notify_hw(dr_qp, wq_ctrl); } static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info) { if (send_info->type == WRITE_ICM) { dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, &send_info->write, MLX5_OPCODE_RDMA_WRITE, false); dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, &send_info->read, MLX5_OPCODE_RDMA_READ, true); } else { /* GTA_ARG */ dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, &send_info->write, MLX5_OPCODE_FLOW_TBL_ACCESS, true); } } /** * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent * with send_list parameters: * * @ste: The data that attached to this specific ste * @size: of data to write * @offset: of the data from start of the hw_ste entry * @data: data * @ste_info: ste to be sent with send_list * @send_list: to append into it * @copy_data: if true indicates that the data should be kept because * it's not backuped any where (like in re-hash). * if false, it lets the data to be updated after * it was added to the list. */ void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, u16 offset, u8 *data, struct mlx5dr_ste_send_info *ste_info, struct list_head *send_list, bool copy_data) { ste_info->size = size; ste_info->ste = ste; ste_info->offset = offset; if (copy_data) { memcpy(ste_info->data_cont, data, size); ste_info->data = ste_info->data_cont; } else { ste_info->data = data; } list_add_tail(&ste_info->send_list, send_list); } /* The function tries to consume one wc each time, unless the queue is full, in * that case, which means that the hw is behind the sw in a full queue len * the function will drain the cq till it empty. */ static int dr_handle_pending_wc(struct mlx5dr_domain *dmn, struct mlx5dr_send_ring *send_ring) { bool is_drain = false; int ne; if (send_ring->pending_wqe < send_ring->signal_th) return 0; /* Queue is full start drain it */ if (send_ring->pending_wqe >= dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN) is_drain = true; do { ne = dr_poll_cq(send_ring->cq, 1); if (unlikely(ne < 0)) { mlx5_core_warn_once(dmn->mdev, "SMFS QPN 0x%x is disabled/limited", send_ring->qp->qpn); send_ring->err_state = true; return ne; } else if (ne == 1) { send_ring->pending_wqe -= send_ring->signal_th; } } while (ne == 1 || (is_drain && send_ring->pending_wqe >= send_ring->signal_th)); return 0; } static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring, struct postsend_info *send_info) { send_ring->pending_wqe++; if (send_ring->pending_wqe % send_ring->signal_th == 0) send_info->write.send_flags |= IB_SEND_SIGNALED; else send_info->write.send_flags = 0; } static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, struct mlx5dr_send_ring *send_ring, struct postsend_info *send_info) { u32 buff_offset; if (send_info->write.length > dmn->info.max_inline_size) { buff_offset = (send_ring->tx_head & (dmn->send_ring->signal_th - 1)) * send_ring->max_post_send_size; /* Copy to ring mr */ memcpy(send_ring->buf + buff_offset, (void *)(uintptr_t)send_info->write.addr, send_info->write.length); send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset; send_info->write.lkey = send_ring->mr->mkey; send_ring->tx_head++; } send_ring->pending_wqe++; if (send_ring->pending_wqe % send_ring->signal_th == 0) send_info->write.send_flags |= IB_SEND_SIGNALED; send_ring->pending_wqe++; send_info->read.length = send_info->write.length; /* Read into dedicated sync buffer */ send_info->read.addr = (uintptr_t)send_ring->sync_mr->dma_addr; send_info->read.lkey = send_ring->sync_mr->mkey; if (send_ring->pending_wqe % send_ring->signal_th == 0) send_info->read.send_flags = IB_SEND_SIGNALED; else send_info->read.send_flags = 0; } static void dr_fill_data_segs(struct mlx5dr_domain *dmn, struct mlx5dr_send_ring *send_ring, struct postsend_info *send_info) { if (send_info->type == WRITE_ICM) dr_fill_write_icm_segs(dmn, send_ring, send_info); else /* args */ dr_fill_write_args_segs(send_ring, send_info); } static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, struct postsend_info *send_info) { struct mlx5dr_send_ring *send_ring = dmn->send_ring; int ret; if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || send_ring->err_state)) { mlx5_core_dbg_once(dmn->mdev, "Skipping post send: QP err state: %d, device state: %d\n", send_ring->err_state, dmn->mdev->state); return 0; } spin_lock(&send_ring->lock); ret = dr_handle_pending_wc(dmn, send_ring); if (ret) goto out_unlock; dr_fill_data_segs(dmn, send_ring, send_info); dr_post_send(send_ring->qp, send_info); out_unlock: spin_unlock(&send_ring->lock); return ret; } static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, struct mlx5dr_ste_htbl *htbl, u8 **data, u32 *byte_size, int *iterations, int *num_stes) { u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); int alloc_size; if (chunk_byte_size > dmn->send_ring->max_post_send_size) { *iterations = chunk_byte_size / dmn->send_ring->max_post_send_size; *byte_size = dmn->send_ring->max_post_send_size; alloc_size = *byte_size; *num_stes = *byte_size / DR_STE_SIZE; } else { *iterations = 1; *num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk); alloc_size = *num_stes * DR_STE_SIZE; } *data = kvzalloc(alloc_size, GFP_KERNEL); if (!*data) return -ENOMEM; return 0; } /** * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm. * * @dmn: Domain * @ste: The ste struct that contains the data (at * least part of it) * @data: The real data to send size data * @size: for writing. * @offset: The offset from the icm mapped data to * start write to this for write only part of the * buffer. * * Return: 0 on success. */ int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, u8 *data, u16 size, u16 offset) { struct postsend_info send_info = {}; mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data, size); send_info.write.addr = (uintptr_t)data; send_info.write.length = size; send_info.write.lkey = 0; send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset; send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk); return dr_postsend_icm_data(dmn, &send_info); } int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, struct mlx5dr_ste_htbl *htbl, u8 *formatted_ste, u8 *mask) { u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); int num_stes_per_iter; int iterations; u8 *data; int ret; int i; int j; ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, &iterations, &num_stes_per_iter); if (ret) return ret; mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, formatted_ste, DR_STE_SIZE); /* Send the data iteration times */ for (i = 0; i < iterations; i++) { u32 ste_index = i * (byte_size / DR_STE_SIZE); struct postsend_info send_info = {}; /* Copy all ste's on the data buffer * need to add the bit_mask */ for (j = 0; j < num_stes_per_iter; j++) { struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j]; u32 ste_off = j * DR_STE_SIZE; if (mlx5dr_ste_is_not_used(ste)) { memcpy(data + ste_off, formatted_ste, DR_STE_SIZE); } else { /* Copy data */ memcpy(data + ste_off, htbl->chunk->hw_ste_arr + DR_STE_SIZE_REDUCED * (ste_index + j), DR_STE_SIZE_REDUCED); /* Copy bit_mask */ memcpy(data + ste_off + DR_STE_SIZE_REDUCED, mask, DR_STE_SIZE_MASK); /* Only when we have mask we need to re-arrange the STE */ mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data + (j * DR_STE_SIZE), DR_STE_SIZE); } } send_info.write.addr = (uintptr_t)data; send_info.write.length = byte_size; send_info.write.lkey = 0; send_info.remote_addr = mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index); send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk); ret = dr_postsend_icm_data(dmn, &send_info); if (ret) goto out_free; } out_free: kvfree(data); return ret; } /* Initialize htble with default STEs */ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, struct mlx5dr_ste_htbl *htbl, u8 *ste_init_data, bool update_hw_ste) { u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk); int iterations; int num_stes; u8 *copy_dst; u8 *data; int ret; int i; ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, &iterations, &num_stes); if (ret) return ret; if (update_hw_ste) { /* Copy the reduced STE to hash table ste_arr */ for (i = 0; i < num_stes; i++) { copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED; memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED); } } mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, ste_init_data, DR_STE_SIZE); /* Copy the same STE on the data buffer */ for (i = 0; i < num_stes; i++) { copy_dst = data + i * DR_STE_SIZE; memcpy(copy_dst, ste_init_data, DR_STE_SIZE); } /* Send the data iteration times */ for (i = 0; i < iterations; i++) { u8 ste_index = i * (byte_size / DR_STE_SIZE); struct postsend_info send_info = {}; send_info.write.addr = (uintptr_t)data; send_info.write.length = byte_size; send_info.write.lkey = 0; send_info.remote_addr = mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index); send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk); ret = dr_postsend_icm_data(dmn, &send_info); if (ret) goto out_free; } out_free: kvfree(data); return ret; } int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, struct mlx5dr_action *action) { struct postsend_info send_info = {}; send_info.write.addr = (uintptr_t)action->rewrite->data; send_info.write.length = action->rewrite->num_of_actions * DR_MODIFY_ACTION_SIZE; send_info.write.lkey = 0; send_info.remote_addr = mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk); send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk); return dr_postsend_icm_data(dmn, &send_info); } int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn, struct mlx5dr_icm_chunk *chunk, u16 num_of_actions, u8 *data) { struct postsend_info send_info = {}; int ret; send_info.write.addr = (uintptr_t)data; send_info.write.length = num_of_actions * DR_MODIFY_ACTION_SIZE; send_info.remote_addr = mlx5dr_icm_pool_get_chunk_mr_addr(chunk); send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(chunk); ret = dr_postsend_icm_data(dmn, &send_info); if (ret) return ret; return 0; } int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id, u16 num_of_actions, u8 *actions_data) { int data_len, iter = 0, cur_sent; u64 addr; int ret; addr = (uintptr_t)actions_data; data_len = num_of_actions * DR_MODIFY_ACTION_SIZE; do { struct postsend_info send_info = {}; send_info.type = GTA_ARG; send_info.write.addr = addr; cur_sent = min_t(u32, data_len, DR_ACTION_CACHE_LINE_SIZE); send_info.write.length = cur_sent; send_info.write.lkey = 0; send_info.remote_addr = arg_id + iter; ret = dr_postsend_icm_data(dmn, &send_info); if (ret) goto out; iter++; addr += cur_sent; data_len -= cur_sent; } while (data_len > 0); out: return ret; } static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev, struct mlx5dr_qp *dr_qp, int port) { u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; void *qpc; qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port); MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); MLX5_SET(qpc, qpc, rre, 1); MLX5_SET(qpc, qpc, rwe, 1); MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP); MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn); return mlx5_cmd_exec_in(mdev, rst2init_qp, in); } static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev, struct mlx5dr_qp *dr_qp, struct dr_qp_rts_attr *attr) { u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; void *qpc; qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc); MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn); MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt); MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry); MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */ MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP); MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn); return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in); } static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, struct mlx5dr_qp *dr_qp, struct dr_qp_rtr_attr *attr) { u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; void *qpc; qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc); MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn); MLX5_SET(qpc, qpc, mtu, attr->mtu); MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1); MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num); memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac)); memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid)); MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, attr->sgid_index); if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2) MLX5_SET(qpc, qpc, primary_address_path.udp_sport, attr->udp_src_port); MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num); MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl); MLX5_SET(qpc, qpc, min_rnr_nak, 1); MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP); MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn); return mlx5_cmd_exec_in(mdev, init2rtr_qp, in); } static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps) { /* Check whether RC RoCE QP creation with force loopback is allowed. * There are two separate capability bits for this: * - force loopback when RoCE is enabled * - force loopback when RoCE is disabled */ return ((caps->roce_caps.roce_en && caps->roce_caps.fl_rc_qp_when_roce_enabled) || (!caps->roce_caps.roce_en && caps->roce_caps.fl_rc_qp_when_roce_disabled)); } static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) { struct mlx5dr_qp *dr_qp = dmn->send_ring->qp; struct dr_qp_rts_attr rts_attr = {}; struct dr_qp_rtr_attr rtr_attr = {}; enum ib_mtu mtu = IB_MTU_1024; u16 gid_index = 0; int port = 1; int ret; /* Init */ ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port); if (ret) { mlx5dr_err(dmn, "Failed modify QP rst2init\n"); return ret; } /* RTR */ rtr_attr.mtu = mtu; rtr_attr.qp_num = dr_qp->qpn; rtr_attr.min_rnr_timer = 12; rtr_attr.port_num = port; rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; /* If QP creation with force loopback is allowed, then there * is no need for GID index when creating the QP. * Otherwise we query GID attributes and use GID index. */ rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps); if (!rtr_attr.fl) { ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr); if (ret) return ret; rtr_attr.sgid_index = gid_index; } ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); if (ret) { mlx5dr_err(dmn, "Failed modify QP init2rtr\n"); return ret; } /* RTS */ rts_attr.timeout = 14; rts_attr.retry_cnt = 7; rts_attr.rnr_retry = 7; ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr); if (ret) { mlx5dr_err(dmn, "Failed modify QP rtr2rts\n"); return ret; } return 0; } static void dr_cq_complete(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) { pr_err("CQ completion CQ: #%u\n", mcq->cqn); } static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, struct mlx5_uars_page *uar, size_t ncqe) { u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {}; u32 out[MLX5_ST_SZ_DW(create_cq_out)]; struct mlx5_wq_param wqp; struct mlx5_cqe64 *cqe; struct mlx5dr_cq *cq; int inlen, err, eqn; void *cqc, *in; __be64 *pas; int vector; u32 i; cq = kzalloc(sizeof(*cq), GFP_KERNEL); if (!cq) return NULL; ncqe = roundup_pow_of_two(ncqe); MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe)); wqp.buf_numa_node = mdev->priv.numa_node; wqp.db_numa_node = mdev->priv.numa_node; err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq, &cq->wq_ctrl); if (err) goto out; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { cqe = mlx5_cqwq_get_wqe(&cq->wq, i); cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; } inlen = MLX5_ST_SZ_BYTES(create_cq_in) + sizeof(u64) * cq->wq_ctrl.buf.npages; in = kvzalloc(inlen, GFP_KERNEL); if (!in) goto err_cqwq; vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev); err = mlx5_comp_eqn_get(mdev, vector, &eqn); if (err) { kvfree(in); goto err_cqwq; } cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET(cqc, cqc, uar_page, uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); cq->mcq.comp = dr_cq_complete; err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); kvfree(in); if (err) goto err_cqwq; cq->mcq.cqe_sz = 64; cq->mcq.set_ci_db = cq->wq_ctrl.db.db; cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; *cq->mcq.set_ci_db = 0; /* set no-zero value, in order to avoid the HW to run db-recovery on * CQ that used in polling mode. */ *cq->mcq.arm_db = cpu_to_be32(2 << 28); cq->mcq.vector = 0; cq->mcq.uar = uar; cq->mdev = mdev; return cq; err_cqwq: mlx5_wq_destroy(&cq->wq_ctrl); out: kfree(cq); return NULL; } static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq) { mlx5_core_destroy_cq(mdev, &cq->mcq); mlx5_wq_destroy(&cq->wq_ctrl); kfree(cq); } static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey) { u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; void *mkc; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); MLX5_SET(mkc, mkc, a, 1); MLX5_SET(mkc, mkc, rw, 1); MLX5_SET(mkc, mkc, rr, 1); MLX5_SET(mkc, mkc, lw, 1); MLX5_SET(mkc, mkc, lr, 1); MLX5_SET(mkc, mkc, pd, pdn); MLX5_SET(mkc, mkc, length64, 1); MLX5_SET(mkc, mkc, qpn, 0xffffff); return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in)); } static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev, u32 pdn, void *buf, size_t size) { struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL); struct device *dma_device; dma_addr_t dma_addr; int err; if (!mr) return NULL; dma_device = mlx5_core_dma_dev(mdev); dma_addr = dma_map_single(dma_device, buf, size, DMA_BIDIRECTIONAL); err = dma_mapping_error(dma_device, dma_addr); if (err) { mlx5_core_warn(mdev, "Can't dma buf\n"); kfree(mr); return NULL; } err = dr_create_mkey(mdev, pdn, &mr->mkey); if (err) { mlx5_core_warn(mdev, "Can't create mkey\n"); dma_unmap_single(dma_device, dma_addr, size, DMA_BIDIRECTIONAL); kfree(mr); return NULL; } mr->dma_addr = dma_addr; mr->size = size; mr->addr = buf; return mr; } static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr) { mlx5_core_destroy_mkey(mdev, mr->mkey); dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size, DMA_BIDIRECTIONAL); kfree(mr); } int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) { struct dr_qp_init_attr init_attr = {}; int cq_size; int size; int ret; dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL); if (!dmn->send_ring) return -ENOMEM; cq_size = QUEUE_SIZE + 1; dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size); if (!dmn->send_ring->cq) { mlx5dr_err(dmn, "Failed creating CQ\n"); ret = -ENOMEM; goto free_send_ring; } init_attr.cqn = dmn->send_ring->cq->mcq.cqn; init_attr.pdn = dmn->pdn; init_attr.uar = dmn->uar; init_attr.max_send_wr = QUEUE_SIZE; /* Isolated VL is applicable only if force loopback is supported */ if (dr_send_allow_fl(&dmn->info.caps)) init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc; spin_lock_init(&dmn->send_ring->lock); dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); if (!dmn->send_ring->qp) { mlx5dr_err(dmn, "Failed creating QP\n"); ret = -ENOMEM; goto clean_cq; } dmn->send_ring->cq->qp = dmn->send_ring->qp; dmn->info.max_send_wr = QUEUE_SIZE; dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, DR_STE_SIZE); dmn->send_ring->signal_th = dmn->info.max_send_wr / SIGNAL_PER_DIV_QUEUE; /* Prepare qp to be used */ ret = dr_prepare_qp_to_rts(dmn); if (ret) goto clean_qp; dmn->send_ring->max_post_send_size = mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K, DR_ICM_TYPE_STE); /* Allocating the max size as a buffer for writing */ size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size; dmn->send_ring->buf = kzalloc(size, GFP_KERNEL); if (!dmn->send_ring->buf) { ret = -ENOMEM; goto clean_qp; } dmn->send_ring->buf_size = size; dmn->send_ring->mr = dr_reg_mr(dmn->mdev, dmn->pdn, dmn->send_ring->buf, size); if (!dmn->send_ring->mr) { ret = -ENOMEM; goto free_mem; } dmn->send_ring->sync_buff = kzalloc(dmn->send_ring->max_post_send_size, GFP_KERNEL); if (!dmn->send_ring->sync_buff) { ret = -ENOMEM; goto clean_mr; } dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev, dmn->pdn, dmn->send_ring->sync_buff, dmn->send_ring->max_post_send_size); if (!dmn->send_ring->sync_mr) { ret = -ENOMEM; goto free_sync_mem; } return 0; free_sync_mem: kfree(dmn->send_ring->sync_buff); clean_mr: dr_dereg_mr(dmn->mdev, dmn->send_ring->mr); free_mem: kfree(dmn->send_ring->buf); clean_qp: dr_destroy_qp(dmn->mdev, dmn->send_ring->qp); clean_cq: dr_destroy_cq(dmn->mdev, dmn->send_ring->cq); free_send_ring: kfree(dmn->send_ring); return ret; } void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, struct mlx5dr_send_ring *send_ring) { dr_destroy_qp(dmn->mdev, send_ring->qp); dr_destroy_cq(dmn->mdev, send_ring->cq); dr_dereg_mr(dmn->mdev, send_ring->sync_mr); dr_dereg_mr(dmn->mdev, send_ring->mr); kfree(send_ring->buf); kfree(send_ring->sync_buff); kfree(send_ring); } int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) { struct mlx5dr_send_ring *send_ring = dmn->send_ring; struct postsend_info send_info = {}; u8 data[DR_STE_SIZE]; int num_of_sends_req; int ret; int i; /* Sending this amount of requests makes sure we will get drain */ num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; /* Send fake requests forcing the last to be signaled */ send_info.write.addr = (uintptr_t)data; send_info.write.length = DR_STE_SIZE; send_info.write.lkey = 0; /* Using the sync_mr in order to write/read */ send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; send_info.rkey = send_ring->sync_mr->mkey; for (i = 0; i < num_of_sends_req; i++) { ret = dr_postsend_icm_data(dmn, &send_info); if (ret) return ret; } spin_lock(&send_ring->lock); ret = dr_handle_pending_wc(dmn, send_ring); spin_unlock(&send_ring->lock); return ret; }
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
Created with Cregit http://github.com/cregit/cregit
Version 2.0-RC1