Contributors: 4
Author Tokens Token Proportion Commits Commit Proportion
Leon Romanovsky 3430 96.11% 2 40.00%
Jason Gunthorpe 133 3.73% 1 20.00%
Mark Bloch 4 0.11% 1 20.00%
Liu Shixin 2 0.06% 1 20.00%
Total 3569 5


// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
 * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
 */

#include "mlx5_ib.h"
#include <linux/mlx5/eswitch.h>
#include "counters.h"
#include "ib_rep.h"
#include "qp.h"

struct mlx5_ib_counter {
	const char *name;
	size_t offset;
};

#define INIT_Q_COUNTER(_name)		\
	{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}

static const struct mlx5_ib_counter basic_q_cnts[] = {
	INIT_Q_COUNTER(rx_write_requests),
	INIT_Q_COUNTER(rx_read_requests),
	INIT_Q_COUNTER(rx_atomic_requests),
	INIT_Q_COUNTER(out_of_buffer),
};

static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
	INIT_Q_COUNTER(out_of_sequence),
};

static const struct mlx5_ib_counter retrans_q_cnts[] = {
	INIT_Q_COUNTER(duplicate_request),
	INIT_Q_COUNTER(rnr_nak_retry_err),
	INIT_Q_COUNTER(packet_seq_err),
	INIT_Q_COUNTER(implied_nak_seq_err),
	INIT_Q_COUNTER(local_ack_timeout_err),
};

#define INIT_CONG_COUNTER(_name)		\
	{ .name = #_name, .offset =	\
		MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}

static const struct mlx5_ib_counter cong_cnts[] = {
	INIT_CONG_COUNTER(rp_cnp_ignored),
	INIT_CONG_COUNTER(rp_cnp_handled),
	INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
	INIT_CONG_COUNTER(np_cnp_sent),
};

static const struct mlx5_ib_counter extended_err_cnts[] = {
	INIT_Q_COUNTER(resp_local_length_error),
	INIT_Q_COUNTER(resp_cqe_error),
	INIT_Q_COUNTER(req_cqe_error),
	INIT_Q_COUNTER(req_remote_invalid_request),
	INIT_Q_COUNTER(req_remote_access_errors),
	INIT_Q_COUNTER(resp_remote_access_errors),
	INIT_Q_COUNTER(resp_cqe_flush_error),
	INIT_Q_COUNTER(req_cqe_flush_error),
};

static const struct mlx5_ib_counter roce_accl_cnts[] = {
	INIT_Q_COUNTER(roce_adp_retrans),
	INIT_Q_COUNTER(roce_adp_retrans_to),
	INIT_Q_COUNTER(roce_slow_restart),
	INIT_Q_COUNTER(roce_slow_restart_cnps),
	INIT_Q_COUNTER(roce_slow_restart_trans),
};

#define INIT_EXT_PPCNT_COUNTER(_name)		\
	{ .name = #_name, .offset =	\
	MLX5_BYTE_OFF(ppcnt_reg, \
		      counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}

static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
	INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
};

static int mlx5_ib_read_counters(struct ib_counters *counters,
				 struct ib_counters_read_attr *read_attr,
				 struct uverbs_attr_bundle *attrs)
{
	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
	struct mlx5_read_counters_attr mread_attr = {};
	struct mlx5_ib_flow_counters_desc *desc;
	int ret, i;

	mutex_lock(&mcounters->mcntrs_mutex);
	if (mcounters->cntrs_max_index > read_attr->ncounters) {
		ret = -EINVAL;
		goto err_bound;
	}

	mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
				 GFP_KERNEL);
	if (!mread_attr.out) {
		ret = -ENOMEM;
		goto err_bound;
	}

	mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
	mread_attr.flags = read_attr->flags;
	ret = mcounters->read_counters(counters->device, &mread_attr);
	if (ret)
		goto err_read;

	/* do the pass over the counters data array to assign according to the
	 * descriptions and indexing pairs
	 */
	desc = mcounters->counters_data;
	for (i = 0; i < mcounters->ncounters; i++)
		read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];

err_read:
	kfree(mread_attr.out);
err_bound:
	mutex_unlock(&mcounters->mcntrs_mutex);
	return ret;
}

static int mlx5_ib_destroy_counters(struct ib_counters *counters)
{
	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);

	mlx5_ib_counters_clear_description(counters);
	if (mcounters->hw_cntrs_hndl)
		mlx5_fc_destroy(to_mdev(counters->device)->mdev,
				mcounters->hw_cntrs_hndl);
	return 0;
}

static int mlx5_ib_create_counters(struct ib_counters *counters,
				   struct uverbs_attr_bundle *attrs)
{
	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);

	mutex_init(&mcounters->mcntrs_mutex);
	return 0;
}


static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
						   u32 port_num)
{
	return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
						   &dev->port[port_num].cnts;
}

/**
 * mlx5_ib_get_counters_id - Returns counters id to use for device+port
 * @dev:	Pointer to mlx5 IB device
 * @port_num:	Zero based port number
 *
 * mlx5_ib_get_counters_id() Returns counters set id to use for given
 * device port combination in switchdev and non switchdev mode of the
 * parent device.
 */
u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
{
	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);

	return cnts->set_id;
}

static struct rdma_hw_stats *
mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
{
	struct mlx5_ib_dev *dev = to_mdev(ibdev);
	const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;

	return rdma_alloc_hw_stats_struct(cnts->names,
					  cnts->num_q_counters +
						  cnts->num_cong_counters +
						  cnts->num_ext_ppcnt_counters,
					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
}

static struct rdma_hw_stats *
mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
{
	struct mlx5_ib_dev *dev = to_mdev(ibdev);
	const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts;

	return rdma_alloc_hw_stats_struct(cnts->names,
					  cnts->num_q_counters +
						  cnts->num_cong_counters +
						  cnts->num_ext_ppcnt_counters,
					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
}

static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
				    const struct mlx5_ib_counters *cnts,
				    struct rdma_hw_stats *stats,
				    u16 set_id)
{
	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
	__be32 val;
	int ret, i;

	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
	MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
	if (ret)
		return ret;

	for (i = 0; i < cnts->num_q_counters; i++) {
		val = *(__be32 *)((void *)out + cnts->offsets[i]);
		stats->value[i] = (u64)be32_to_cpu(val);
	}

	return 0;
}

static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
					    const struct mlx5_ib_counters *cnts,
					    struct rdma_hw_stats *stats)
{
	int offset = cnts->num_q_counters + cnts->num_cong_counters;
	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
	int ret, i;
	void *out;

	out = kvzalloc(sz, GFP_KERNEL);
	if (!out)
		return -ENOMEM;

	MLX5_SET(ppcnt_reg, in, local_port, 1);
	MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
	ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
				   0, 0);
	if (ret)
		goto free;

	for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
		stats->value[i + offset] =
			be64_to_cpup((__be64 *)(out +
				    cnts->offsets[i + offset]));
free:
	kvfree(out);
	return ret;
}

static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
				struct rdma_hw_stats *stats,
				u32 port_num, int index)
{
	struct mlx5_ib_dev *dev = to_mdev(ibdev);
	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
	struct mlx5_core_dev *mdev;
	int ret, num_counters;
	u32 mdev_port_num;

	if (!stats)
		return -EINVAL;

	num_counters = cnts->num_q_counters +
		       cnts->num_cong_counters +
		       cnts->num_ext_ppcnt_counters;

	/* q_counters are per IB device, query the master mdev */
	ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
	if (ret)
		return ret;

	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
		ret =  mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
		if (ret)
			return ret;
	}

	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
		mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
						    &mdev_port_num);
		if (!mdev) {
			/* If port is not affiliated yet, its in down state
			 * which doesn't have any counters yet, so it would be
			 * zero. So no need to read from the HCA.
			 */
			goto done;
		}
		ret = mlx5_lag_query_cong_counters(dev->mdev,
						   stats->value +
						   cnts->num_q_counters,
						   cnts->num_cong_counters,
						   cnts->offsets +
						   cnts->num_q_counters);

		mlx5_ib_put_native_port_mdev(dev, port_num);
		if (ret)
			return ret;
	}

done:
	return num_counters;
}

static struct rdma_hw_stats *
mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
{
	struct mlx5_ib_dev *dev = to_mdev(counter->device);
	const struct mlx5_ib_counters *cnts =
		get_counters(dev, counter->port - 1);

	return rdma_alloc_hw_stats_struct(cnts->names,
					  cnts->num_q_counters +
					  cnts->num_cong_counters +
					  cnts->num_ext_ppcnt_counters,
					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
}

static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
{
	struct mlx5_ib_dev *dev = to_mdev(counter->device);
	const struct mlx5_ib_counters *cnts =
		get_counters(dev, counter->port - 1);

	return mlx5_ib_query_q_counters(dev->mdev, cnts,
					counter->stats, counter->id);
}

static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
{
	struct mlx5_ib_dev *dev = to_mdev(counter->device);
	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};

	if (!counter->id)
		return 0;

	MLX5_SET(dealloc_q_counter_in, in, opcode,
		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
	MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
	return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
}

static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
				   struct ib_qp *qp)
{
	struct mlx5_ib_dev *dev = to_mdev(qp->device);
	int err;

	if (!counter->id) {
		u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
		u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};

		MLX5_SET(alloc_q_counter_in, in, opcode,
			 MLX5_CMD_OP_ALLOC_Q_COUNTER);
		MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
		if (err)
			return err;
		counter->id =
			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
	}

	err = mlx5_ib_qp_set_counter(qp, counter);
	if (err)
		goto fail_set_counter;

	return 0;

fail_set_counter:
	mlx5_ib_counter_dealloc(counter);
	counter->id = 0;

	return err;
}

static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
{
	return mlx5_ib_qp_set_counter(qp, NULL);
}


static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
				  const char **names,
				  size_t *offsets)
{
	int i;
	int j = 0;

	for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
		names[j] = basic_q_cnts[i].name;
		offsets[j] = basic_q_cnts[i].offset;
	}

	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
		for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
			names[j] = out_of_seq_q_cnts[i].name;
			offsets[j] = out_of_seq_q_cnts[i].offset;
		}
	}

	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
		for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
			names[j] = retrans_q_cnts[i].name;
			offsets[j] = retrans_q_cnts[i].offset;
		}
	}

	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
		for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
			names[j] = extended_err_cnts[i].name;
			offsets[j] = extended_err_cnts[i].offset;
		}
	}

	if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
		for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
			names[j] = roce_accl_cnts[i].name;
			offsets[j] = roce_accl_cnts[i].offset;
		}
	}

	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
		for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
			names[j] = cong_cnts[i].name;
			offsets[j] = cong_cnts[i].offset;
		}
	}

	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
		for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
			names[j] = ext_ppcnt_cnts[i].name;
			offsets[j] = ext_ppcnt_cnts[i].offset;
		}
	}
}


static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
				    struct mlx5_ib_counters *cnts)
{
	u32 num_counters;

	num_counters = ARRAY_SIZE(basic_q_cnts);

	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
		num_counters += ARRAY_SIZE(out_of_seq_q_cnts);

	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
		num_counters += ARRAY_SIZE(retrans_q_cnts);

	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
		num_counters += ARRAY_SIZE(extended_err_cnts);

	if (MLX5_CAP_GEN(dev->mdev, roce_accl))
		num_counters += ARRAY_SIZE(roce_accl_cnts);

	cnts->num_q_counters = num_counters;

	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
		cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
		num_counters += ARRAY_SIZE(cong_cnts);
	}
	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
		cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
		num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
	}
	cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL);
	if (!cnts->names)
		return -ENOMEM;

	cnts->offsets = kcalloc(num_counters,
				sizeof(*cnts->offsets), GFP_KERNEL);
	if (!cnts->offsets)
		goto err_names;

	return 0;

err_names:
	kfree(cnts->names);
	cnts->names = NULL;
	return -ENOMEM;
}

static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
	int num_cnt_ports;
	int i;

	num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;

	MLX5_SET(dealloc_q_counter_in, in, opcode,
		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);

	for (i = 0; i < num_cnt_ports; i++) {
		if (dev->port[i].cnts.set_id) {
			MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
				 dev->port[i].cnts.set_id);
			mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
		}
		kfree(dev->port[i].cnts.names);
		kfree(dev->port[i].cnts.offsets);
	}
}

static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
	int num_cnt_ports;
	int err = 0;
	int i;
	bool is_shared;

	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
	is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
	num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;

	for (i = 0; i < num_cnt_ports; i++) {
		err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
		if (err)
			goto err_alloc;

		mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
				      dev->port[i].cnts.offsets);

		MLX5_SET(alloc_q_counter_in, in, uid,
			 is_shared ? MLX5_SHARED_RESOURCE_UID : 0);

		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
		if (err) {
			mlx5_ib_warn(dev,
				     "couldn't allocate queue counter for port %d, err %d\n",
				     i + 1, err);
			goto err_alloc;
		}

		dev->port[i].cnts.set_id =
			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
	}
	return 0;

err_alloc:
	mlx5_ib_dealloc_counters(dev);
	return err;
}

static int read_flow_counters(struct ib_device *ibdev,
			      struct mlx5_read_counters_attr *read_attr)
{
	struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
	struct mlx5_ib_dev *dev = to_mdev(ibdev);

	return mlx5_fc_query(dev->mdev, fc,
			     &read_attr->out[IB_COUNTER_PACKETS],
			     &read_attr->out[IB_COUNTER_BYTES]);
}

/* flow counters currently expose two counters packets and bytes */
#define FLOW_COUNTERS_NUM 2
static int counters_set_description(
	struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
	struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
{
	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
	u32 cntrs_max_index = 0;
	int i;

	if (counters_type != MLX5_IB_COUNTERS_FLOW)
		return -EINVAL;

	/* init the fields for the object */
	mcounters->type = counters_type;
	mcounters->read_counters = read_flow_counters;
	mcounters->counters_num = FLOW_COUNTERS_NUM;
	mcounters->ncounters = ncounters;
	/* each counter entry have both description and index pair */
	for (i = 0; i < ncounters; i++) {
		if (desc_data[i].description > IB_COUNTER_BYTES)
			return -EINVAL;

		if (cntrs_max_index <= desc_data[i].index)
			cntrs_max_index = desc_data[i].index + 1;
	}

	mutex_lock(&mcounters->mcntrs_mutex);
	mcounters->counters_data = desc_data;
	mcounters->cntrs_max_index = cntrs_max_index;
	mutex_unlock(&mcounters->mcntrs_mutex);

	return 0;
}

#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
				   struct mlx5_ib_create_flow *ucmd)
{
	struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
	struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
	struct mlx5_ib_flow_counters_desc *desc_data = NULL;
	bool hw_hndl = false;
	int ret = 0;

	if (ucmd && ucmd->ncounters_data != 0) {
		cntrs_data = ucmd->data;
		if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
			return -EINVAL;

		desc_data = kcalloc(cntrs_data->ncounters,
				    sizeof(*desc_data),
				    GFP_KERNEL);
		if (!desc_data)
			return  -ENOMEM;

		if (copy_from_user(desc_data,
				   u64_to_user_ptr(cntrs_data->counters_data),
				   sizeof(*desc_data) * cntrs_data->ncounters)) {
			ret = -EFAULT;
			goto free;
		}
	}

	if (!mcounters->hw_cntrs_hndl) {
		mcounters->hw_cntrs_hndl = mlx5_fc_create(
			to_mdev(ibcounters->device)->mdev, false);
		if (IS_ERR(mcounters->hw_cntrs_hndl)) {
			ret = PTR_ERR(mcounters->hw_cntrs_hndl);
			goto free;
		}
		hw_hndl = true;
	}

	if (desc_data) {
		/* counters already bound to at least one flow */
		if (mcounters->cntrs_max_index) {
			ret = -EINVAL;
			goto free_hndl;
		}

		ret = counters_set_description(ibcounters,
					       MLX5_IB_COUNTERS_FLOW,
					       desc_data,
					       cntrs_data->ncounters);
		if (ret)
			goto free_hndl;

	} else if (!mcounters->cntrs_max_index) {
		/* counters not bound yet, must have udata passed */
		ret = -EINVAL;
		goto free_hndl;
	}

	return 0;

free_hndl:
	if (hw_hndl) {
		mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
				mcounters->hw_cntrs_hndl);
		mcounters->hw_cntrs_hndl = NULL;
	}
free:
	kfree(desc_data);
	return ret;
}

void mlx5_ib_counters_clear_description(struct ib_counters *counters)
{
	struct mlx5_ib_mcounters *mcounters;

	if (!counters || atomic_read(&counters->usecnt) != 1)
		return;

	mcounters = to_mcounters(counters);

	mutex_lock(&mcounters->mcntrs_mutex);
	kfree(mcounters->counters_data);
	mcounters->counters_data = NULL;
	mcounters->cntrs_max_index = 0;
	mutex_unlock(&mcounters->mcntrs_mutex);
}

static const struct ib_device_ops hw_stats_ops = {
	.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
	.get_hw_stats = mlx5_ib_get_hw_stats,
	.counter_bind_qp = mlx5_ib_counter_bind_qp,
	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
	.counter_dealloc = mlx5_ib_counter_dealloc,
	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
	.counter_update_stats = mlx5_ib_counter_update_stats,
};

static const struct ib_device_ops hw_switchdev_stats_ops = {
	.alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats,
	.get_hw_stats = mlx5_ib_get_hw_stats,
	.counter_bind_qp = mlx5_ib_counter_bind_qp,
	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
	.counter_dealloc = mlx5_ib_counter_dealloc,
	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
	.counter_update_stats = mlx5_ib_counter_update_stats,
};

static const struct ib_device_ops counters_ops = {
	.create_counters = mlx5_ib_create_counters,
	.destroy_counters = mlx5_ib_destroy_counters,
	.read_counters = mlx5_ib_read_counters,

	INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
};

int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
{
	ib_set_device_ops(&dev->ib_dev, &counters_ops);

	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
		return 0;

	if (is_mdev_switchdev_mode(dev->mdev))
		ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops);
	else
		ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
	return mlx5_ib_alloc_counters(dev);
}

void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
{
	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
		return;

	mlx5_ib_dealloc_counters(dev);
}