Contributors: 7
Author Tokens Token Proportion Commits Commit Proportion
Sudeep Dutt 3468 53.58% 3 30.00%
Nikhil P Rao 2106 32.54% 1 10.00%
Ashutosh Dixit 864 13.35% 2 20.00%
Dan Carpenter 15 0.23% 1 10.00%
Linus Torvalds 14 0.22% 1 10.00%
Al Viro 5 0.08% 1 10.00%
Gustavo A. R. Silva 1 0.02% 1 10.00%
Total 6473 10


/*
 * Intel MIC Platform Software Stack (MPSS)
 *
 * Copyright(c) 2014 Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License, version 2, as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * Intel SCIF driver.
 *
 */
#include <linux/scif.h>
#include "scif_main.h"
#include "scif_map.h"

static const char * const scif_ep_states[] = {
	"Unbound",
	"Bound",
	"Listening",
	"Connected",
	"Connecting",
	"Mapping",
	"Closing",
	"Close Listening",
	"Disconnected",
	"Zombie"};

enum conn_async_state {
	ASYNC_CONN_IDLE = 1,	/* ep setup for async connect */
	ASYNC_CONN_INPROGRESS,	/* async connect in progress */
	ASYNC_CONN_FLUSH_WORK	/* async work flush in progress  */
};

/*
 * File operations for anonymous inode file associated with a SCIF endpoint,
 * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
 * poll API in the kernel and these take in a struct file *. Since a struct
 * file is not available to kernel mode SCIF, it uses an anonymous file for
 * this purpose.
 */
const struct file_operations scif_anon_fops = {
	.owner = THIS_MODULE,
};

scif_epd_t scif_open(void)
{
	struct scif_endpt *ep;
	int err;

	might_sleep();
	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
	if (!ep)
		goto err_ep_alloc;

	ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
	if (!ep->qp_info.qp)
		goto err_qp_alloc;

	err = scif_anon_inode_getfile(ep);
	if (err)
		goto err_anon_inode;

	spin_lock_init(&ep->lock);
	mutex_init(&ep->sendlock);
	mutex_init(&ep->recvlock);

	scif_rma_ep_init(ep);
	ep->state = SCIFEP_UNBOUND;
	dev_dbg(scif_info.mdev.this_device,
		"SCIFAPI open: ep %p success\n", ep);
	return ep;

err_anon_inode:
	kfree(ep->qp_info.qp);
err_qp_alloc:
	kfree(ep);
err_ep_alloc:
	return NULL;
}
EXPORT_SYMBOL_GPL(scif_open);

/*
 * scif_disconnect_ep - Disconnects the endpoint if found
 * @epd: The end point returned from scif_open()
 */
static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
{
	struct scifmsg msg;
	struct scif_endpt *fep = NULL;
	struct scif_endpt *tmpep;
	struct list_head *pos, *tmpq;
	int err;

	/*
	 * Wake up any threads blocked in send()/recv() before closing
	 * out the connection. Grabbing and releasing the send/recv lock
	 * will ensure that any blocked senders/receivers have exited for
	 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
	 * close. Ring 3 endpoints are not affected since close will not
	 * be called while there are IOCTLs executing.
	 */
	wake_up_interruptible(&ep->sendwq);
	wake_up_interruptible(&ep->recvwq);
	mutex_lock(&ep->sendlock);
	mutex_unlock(&ep->sendlock);
	mutex_lock(&ep->recvlock);
	mutex_unlock(&ep->recvlock);

	/* Remove from the connected list */
	mutex_lock(&scif_info.connlock);
	list_for_each_safe(pos, tmpq, &scif_info.connected) {
		tmpep = list_entry(pos, struct scif_endpt, list);
		if (tmpep == ep) {
			list_del(pos);
			fep = tmpep;
			spin_lock(&ep->lock);
			break;
		}
	}

	if (!fep) {
		/*
		 * The other side has completed the disconnect before
		 * the end point can be removed from the list. Therefore
		 * the ep lock is not locked, traverse the disconnected
		 * list to find the endpoint and release the conn lock.
		 */
		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
			tmpep = list_entry(pos, struct scif_endpt, list);
			if (tmpep == ep) {
				list_del(pos);
				break;
			}
		}
		mutex_unlock(&scif_info.connlock);
		return NULL;
	}

	init_completion(&ep->discon);
	msg.uop = SCIF_DISCNCT;
	msg.src = ep->port;
	msg.dst = ep->peer;
	msg.payload[0] = (u64)ep;
	msg.payload[1] = ep->remote_ep;

	err = scif_nodeqp_send(ep->remote_dev, &msg);
	spin_unlock(&ep->lock);
	mutex_unlock(&scif_info.connlock);

	if (!err)
		/* Wait for the remote node to respond with SCIF_DISCNT_ACK */
		wait_for_completion_timeout(&ep->discon,
					    SCIF_NODE_ALIVE_TIMEOUT);
	return ep;
}

int scif_close(scif_epd_t epd)
{
	struct scif_endpt *ep = (struct scif_endpt *)epd;
	struct scif_endpt *tmpep;
	struct list_head *pos, *tmpq;
	enum scif_epd_state oldstate;
	bool flush_conn;

	dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
		ep, scif_ep_states[ep->state]);
	might_sleep();
	spin_lock(&ep->lock);
	flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
	spin_unlock(&ep->lock);

	if (flush_conn)
		flush_work(&scif_info.conn_work);

	spin_lock(&ep->lock);
	oldstate = ep->state;

	ep->state = SCIFEP_CLOSING;

	switch (oldstate) {
	case SCIFEP_ZOMBIE:
		dev_err(scif_info.mdev.this_device,
			"SCIFAPI close: zombie state unexpected\n");
		/* fall through */
	case SCIFEP_DISCONNECTED:
		spin_unlock(&ep->lock);
		scif_unregister_all_windows(epd);
		/* Remove from the disconnected list */
		mutex_lock(&scif_info.connlock);
		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
			tmpep = list_entry(pos, struct scif_endpt, list);
			if (tmpep == ep) {
				list_del(pos);
				break;
			}
		}
		mutex_unlock(&scif_info.connlock);
		break;
	case SCIFEP_UNBOUND:
	case SCIFEP_BOUND:
	case SCIFEP_CONNECTING:
		spin_unlock(&ep->lock);
		break;
	case SCIFEP_MAPPING:
	case SCIFEP_CONNECTED:
	case SCIFEP_CLOSING:
	{
		spin_unlock(&ep->lock);
		scif_unregister_all_windows(epd);
		scif_disconnect_ep(ep);
		break;
	}
	case SCIFEP_LISTENING:
	case SCIFEP_CLLISTEN:
	{
		struct scif_conreq *conreq;
		struct scifmsg msg;
		struct scif_endpt *aep;

		spin_unlock(&ep->lock);
		mutex_lock(&scif_info.eplock);

		/* remove from listen list */
		list_for_each_safe(pos, tmpq, &scif_info.listen) {
			tmpep = list_entry(pos, struct scif_endpt, list);
			if (tmpep == ep)
				list_del(pos);
		}
		/* Remove any dangling accepts */
		while (ep->acceptcnt) {
			aep = list_first_entry(&ep->li_accept,
					       struct scif_endpt, liacceptlist);
			list_del(&aep->liacceptlist);
			scif_put_port(aep->port.port);
			list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
				tmpep = list_entry(pos, struct scif_endpt,
						   miacceptlist);
				if (tmpep == aep) {
					list_del(pos);
					break;
				}
			}
			mutex_unlock(&scif_info.eplock);
			mutex_lock(&scif_info.connlock);
			list_for_each_safe(pos, tmpq, &scif_info.connected) {
				tmpep = list_entry(pos,
						   struct scif_endpt, list);
				if (tmpep == aep) {
					list_del(pos);
					break;
				}
			}
			list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
				tmpep = list_entry(pos,
						   struct scif_endpt, list);
				if (tmpep == aep) {
					list_del(pos);
					break;
				}
			}
			mutex_unlock(&scif_info.connlock);
			scif_teardown_ep(aep);
			mutex_lock(&scif_info.eplock);
			scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
			ep->acceptcnt--;
		}

		spin_lock(&ep->lock);
		mutex_unlock(&scif_info.eplock);

		/* Remove and reject any pending connection requests. */
		while (ep->conreqcnt) {
			conreq = list_first_entry(&ep->conlist,
						  struct scif_conreq, list);
			list_del(&conreq->list);

			msg.uop = SCIF_CNCT_REJ;
			msg.dst.node = conreq->msg.src.node;
			msg.dst.port = conreq->msg.src.port;
			msg.payload[0] = conreq->msg.payload[0];
			msg.payload[1] = conreq->msg.payload[1];
			/*
			 * No Error Handling on purpose for scif_nodeqp_send().
			 * If the remote node is lost we still want free the
			 * connection requests on the self node.
			 */
			scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
					 &msg);
			ep->conreqcnt--;
			kfree(conreq);
		}

		spin_unlock(&ep->lock);
		/* If a kSCIF accept is waiting wake it up */
		wake_up_interruptible(&ep->conwq);
		break;
	}
	}
	scif_put_port(ep->port.port);
	scif_anon_inode_fput(ep);
	scif_teardown_ep(ep);
	scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
	return 0;
}
EXPORT_SYMBOL_GPL(scif_close);

/**
 * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
 *			accept new connections.
 * @epd: The end point returned from scif_open()
 */
int __scif_flush(scif_epd_t epd)
{
	struct scif_endpt *ep = (struct scif_endpt *)epd;

	switch (ep->state) {
	case SCIFEP_LISTENING:
	{
		ep->state = SCIFEP_CLLISTEN;

		/* If an accept is waiting wake it up */
		wake_up_interruptible(&ep->conwq);
		break;
	}
	default:
		break;
	}
	return 0;
}

int scif_bind(scif_epd_t epd, u16 pn)
{
	struct scif_endpt *ep = (struct scif_endpt *)epd;
	int ret = 0;
	int tmp;

	dev_dbg(scif_info.mdev.this_device,
		"SCIFAPI bind: ep %p %s requested port number %d\n",
		ep, scif_ep_states[ep->state], pn);
	if (pn) {
		/*
		 * Similar to IETF RFC 1700, SCIF ports below
		 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
		 * processes or by processes executed by privileged users.
		 */
		if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
			ret = -EACCES;
			goto scif_bind_admin_exit;
		}
	}

	spin_lock(&ep->lock);
	if (ep->state == SCIFEP_BOUND) {
		ret = -EINVAL;
		goto scif_bind_exit;
	} else if (ep->state != SCIFEP_UNBOUND) {
		ret = -EISCONN;
		goto scif_bind_exit;
	}

	if (pn) {
		tmp = scif_rsrv_port(pn);
		if (tmp != pn) {
			ret = -EINVAL;
			goto scif_bind_exit;
		}
	} else {
		ret = scif_get_new_port();
		if (ret < 0)
			goto scif_bind_exit;
		pn = ret;
	}

	ep->state = SCIFEP_BOUND;
	ep->port.node = scif_info.nodeid;
	ep->port.port = pn;
	ep->conn_async_state = ASYNC_CONN_IDLE;
	ret = pn;
	dev_dbg(scif_info.mdev.this_device,
		"SCIFAPI bind: bound to port number %d\n", pn);
scif_bind_exit:
	spin_unlock(&ep->lock);
scif_bind_admin_exit:
	return ret;
}
EXPORT_SYMBOL_GPL(scif_bind);

int scif_listen(scif_epd_t epd, int backlog)
{
	struct scif_endpt *ep = (struct scif_endpt *)epd;

	dev_dbg(scif_info.mdev.this_device,
		"SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
	spin_lock(&ep->lock);
	switch (ep->state) {
	case SCIFEP_ZOMBIE:
	case SCIFEP_CLOSING:
	case SCIFEP_CLLISTEN:
	case SCIFEP_UNBOUND:
	case SCIFEP_DISCONNECTED:
		spin_unlock(&ep->lock);
		return -EINVAL;
	case SCIFEP_LISTENING:
	case SCIFEP_CONNECTED:
	case SCIFEP_CONNECTING:
	case SCIFEP_MAPPING:
		spin_unlock(&ep->lock);
		return -EISCONN;
	case SCIFEP_BOUND:
		break;
	}

	ep->state = SCIFEP_LISTENING;
	ep->backlog = backlog;

	ep->conreqcnt = 0;
	ep->acceptcnt = 0;
	INIT_LIST_HEAD(&ep->conlist);
	init_waitqueue_head(&ep->conwq);
	INIT_LIST_HEAD(&ep->li_accept);
	spin_unlock(&ep->lock);

	/*
	 * Listen status is complete so delete the qp information not needed
	 * on a listen before placing on the list of listening ep's
	 */
	scif_teardown_ep(ep);
	ep->qp_info.qp = NULL;

	mutex_lock(&scif_info.eplock);
	list_add_tail(&ep->list, &scif_info.listen);
	mutex_unlock(&scif_info.eplock);
	return 0;
}
EXPORT_SYMBOL_GPL(scif_listen);

/*
 ************************************************************************
 * SCIF connection flow:
 *
 * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
 *	connections via a SCIF_CNCT_REQ message
 * 2) A SCIF endpoint can initiate a SCIF connection by calling
 *	scif_connect(..) which calls scif_setup_qp_connect(..) which
 *	allocates the local qp for the endpoint ring buffer and then sends
 *	a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
 *	a SCIF_CNCT_REJ message
 * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
 *	wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
 *	message otherwise
 * 4) A thread blocked waiting for incoming connections allocates its local
 *	endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
 *	and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
 *	the node sends a SCIF_CNCT_REJ message
 * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
 *	connecting endpoint is woken up as part of handling
 *	scif_cnctgnt_resp(..) following which it maps the remote endpoints'
 *	QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
 *	success or a SCIF_CNCT_GNTNACK message on failure and completes
 *	the scif_connect(..) API
 * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
 *	in step 4 is woken up and completes the scif_accept(..) API
 * 7) The SCIF connection is now established between the two SCIF endpoints.
 */
static int scif_conn_func(struct scif_endpt *ep)
{
	int err = 0;
	struct scifmsg msg;
	struct device *spdev;

	err = scif_reserve_dma_chan(ep);
	if (err) {
		dev_err(&ep->remote_dev->sdev->dev,
			"%s %d err %d\n", __func__, __LINE__, err);
		ep->state = SCIFEP_BOUND;
		goto connect_error_simple;
	}
	/* Initiate the first part of the endpoint QP setup */
	err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
				    SCIF_ENDPT_QP_SIZE, ep->remote_dev);
	if (err) {
		dev_err(&ep->remote_dev->sdev->dev,
			"%s err %d qp_offset 0x%llx\n",
			__func__, err, ep->qp_info.qp_offset);
		ep->state = SCIFEP_BOUND;
		goto connect_error_simple;
	}

	spdev = scif_get_peer_dev(ep->remote_dev);
	if (IS_ERR(spdev)) {
		err = PTR_ERR(spdev);
		goto cleanup_qp;
	}
	/* Format connect message and send it */
	msg.src = ep->port;
	msg.dst = ep->conn_port;
	msg.uop = SCIF_CNCT_REQ;
	msg.payload[0] = (u64)ep;
	msg.payload[1] = ep->qp_info.qp_offset;
	err = _scif_nodeqp_send(ep->remote_dev, &msg);
	if (err)
		goto connect_error_dec;
	scif_put_peer_dev(spdev);
	/*
	 * Wait for the remote node to respond with SCIF_CNCT_GNT or
	 * SCIF_CNCT_REJ message.
	 */
	err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
				 SCIF_NODE_ALIVE_TIMEOUT);
	if (!err) {
		dev_err(&ep->remote_dev->sdev->dev,
			"%s %d timeout\n", __func__, __LINE__);
		ep->state = SCIFEP_BOUND;
	}
	spdev = scif_get_peer_dev(ep->remote_dev);
	if (IS_ERR(spdev)) {
		err = PTR_ERR(spdev);
		goto cleanup_qp;
	}
	if (ep->state == SCIFEP_MAPPING) {
		err = scif_setup_qp_connect_response(ep->remote_dev,
						     ep->qp_info.qp,
						     ep->qp_info.gnt_pld);
		/*
		 * If the resource to map the queue are not available then
		 * we need to tell the other side to terminate the accept
		 */
		if (err) {
			dev_err(&ep->remote_dev->sdev->dev,
				"%s %d err %d\n", __func__, __LINE__, err);
			msg.uop = SCIF_CNCT_GNTNACK;
			msg.payload[0] = ep->remote_ep;
			_scif_nodeqp_send(ep->remote_dev, &msg);
			ep->state = SCIFEP_BOUND;
			goto connect_error_dec;
		}

		msg.uop = SCIF_CNCT_GNTACK;
		msg.payload[0] = ep->remote_ep;
		err = _scif_nodeqp_send(ep->remote_dev, &msg);
		if (err) {
			ep->state = SCIFEP_BOUND;
			goto connect_error_dec;
		}
		ep->state = SCIFEP_CONNECTED;
		mutex_lock(&scif_info.connlock);
		list_add_tail(&ep->list, &scif_info.connected);
		mutex_unlock(&scif_info.connlock);
		dev_dbg(&ep->remote_dev->sdev->dev,
			"SCIFAPI connect: ep %p connected\n", ep);
	} else if (ep->state == SCIFEP_BOUND) {
		dev_dbg(&ep->remote_dev->sdev->dev,
			"SCIFAPI connect: ep %p connection refused\n", ep);
		err = -ECONNREFUSED;
		goto connect_error_dec;
	}
	scif_put_peer_dev(spdev);
	return err;
connect_error_dec:
	scif_put_peer_dev(spdev);
cleanup_qp:
	scif_cleanup_ep_qp(ep);
connect_error_simple:
	return err;
}

/*
 * scif_conn_handler:
 *
 * Workqueue handler for servicing non-blocking SCIF connect
 *
 */
void scif_conn_handler(struct work_struct *work)
{
	struct scif_endpt *ep;

	do {
		ep = NULL;
		spin_lock(&scif_info.nb_connect_lock);
		if (!list_empty(&scif_info.nb_connect_list)) {
			ep = list_first_entry(&scif_info.nb_connect_list,
					      struct scif_endpt, conn_list);
			list_del(&ep->conn_list);
		}
		spin_unlock(&scif_info.nb_connect_lock);
		if (ep) {
			ep->conn_err = scif_conn_func(ep);
			wake_up_interruptible(&ep->conn_pend_wq);
		}
	} while (ep);
}

int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
{
	struct scif_endpt *ep = (struct scif_endpt *)epd;
	int err = 0;
	struct scif_dev *remote_dev;
	struct device *spdev;

	dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
		scif_ep_states[ep->state]);

	if (!scif_dev || dst->node > scif_info.maxid)
		return -ENODEV;

	might_sleep();

	remote_dev = &scif_dev[dst->node];
	spdev = scif_get_peer_dev(remote_dev);
	if (IS_ERR(spdev)) {
		err = PTR_ERR(spdev);
		return err;
	}

	spin_lock(&ep->lock);
	switch (ep->state) {
	case SCIFEP_ZOMBIE:
	case SCIFEP_CLOSING:
		err = -EINVAL;
		break;
	case SCIFEP_DISCONNECTED:
		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
		else
			err = -EINVAL;
		break;
	case SCIFEP_LISTENING:
	case SCIFEP_CLLISTEN:
		err = -EOPNOTSUPP;
		break;
	case SCIFEP_CONNECTING:
	case SCIFEP_MAPPING:
		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
			err = -EINPROGRESS;
		else
			err = -EISCONN;
		break;
	case SCIFEP_CONNECTED:
		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
		else
			err = -EISCONN;
		break;
	case SCIFEP_UNBOUND:
		err = scif_get_new_port();
		if (err < 0)
			break;
		ep->port.port = err;
		ep->port.node = scif_info.nodeid;
		ep->conn_async_state = ASYNC_CONN_IDLE;
		/* Fall through */
	case SCIFEP_BOUND:
		/*
		 * If a non-blocking connect has been already initiated
		 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
		 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
		 * SCIF_BOUND due an error in the connection process
		 * (e.g., connection refused) If conn_async_state is
		 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
		 * so that the error status can be collected. If the state is
		 * already ASYNC_CONN_FLUSH_WORK - then set the error to
		 * EINPROGRESS since some other thread is waiting to collect
		 * error status.
		 */
		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
		} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
			err = -EINPROGRESS;
		} else {
			ep->conn_port = *dst;
			init_waitqueue_head(&ep->sendwq);
			init_waitqueue_head(&ep->recvwq);
			init_waitqueue_head(&ep->conwq);
			ep->conn_async_state = 0;

			if (unlikely(non_block))
				ep->conn_async_state = ASYNC_CONN_INPROGRESS;
		}
		break;
	}

	if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
			goto connect_simple_unlock1;

	ep->state = SCIFEP_CONNECTING;
	ep->remote_dev = &scif_dev[dst->node];
	ep->qp_info.qp->magic = SCIFEP_MAGIC;
	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
		init_waitqueue_head(&ep->conn_pend_wq);
		spin_lock(&scif_info.nb_connect_lock);
		list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
		spin_unlock(&scif_info.nb_connect_lock);
		err = -EINPROGRESS;
		schedule_work(&scif_info.conn_work);
	}
connect_simple_unlock1:
	spin_unlock(&ep->lock);
	scif_put_peer_dev(spdev);
	if (err) {
		return err;
	} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
		flush_work(&scif_info.conn_work);
		err = ep->conn_err;
		spin_lock(&ep->lock);
		ep->conn_async_state = ASYNC_CONN_IDLE;
		spin_unlock(&ep->lock);
	} else {
		err = scif_conn_func(ep);
	}
	return err;
}

int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
{
	return __scif_connect(epd, dst, false);
}
EXPORT_SYMBOL_GPL(scif_connect);

/**
 * scif_accept() - Accept a connection request from the remote node
 *
 * The function accepts a connection request from the remote node.  Successful
 * complete is indicate by a new end point being created and passed back
 * to the caller for future reference.
 *
 * Upon successful complete a zero will be returned and the peer information
 * will be filled in.
 *
 * If the end point is not in the listening state -EINVAL will be returned.
 *
 * If during the connection sequence resource allocation fails the -ENOMEM
 * will be returned.
 *
 * If the function is called with the ASYNC flag set and no connection requests
 * are pending it will return -EAGAIN.
 *
 * If the remote side is not sending any connection requests the caller may
 * terminate this function with a signal.  If so a -EINTR will be returned.
 */
int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
		scif_epd_t *newepd, int flags)
{
	struct scif_endpt *lep = (struct scif_endpt *)epd;
	struct scif_endpt *cep;
	struct scif_conreq *conreq;
	struct scifmsg msg;
	int err;
	struct device *spdev;

	dev_dbg(scif_info.mdev.this_device,
		"SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);

	if (flags & ~SCIF_ACCEPT_SYNC)
		return -EINVAL;

	if (!peer || !newepd)
		return -EINVAL;

	might_sleep();
	spin_lock(&lep->lock);
	if (lep->state != SCIFEP_LISTENING) {
		spin_unlock(&lep->lock);
		return -EINVAL;
	}

	if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
		/* No connection request present and we do not want to wait */
		spin_unlock(&lep->lock);
		return -EAGAIN;
	}

	lep->files = current->files;
retry_connection:
	spin_unlock(&lep->lock);
	/* Wait for the remote node to send us a SCIF_CNCT_REQ */
	err = wait_event_interruptible(lep->conwq,
				       (lep->conreqcnt ||
				       (lep->state != SCIFEP_LISTENING)));
	if (err)
		return err;

	if (lep->state != SCIFEP_LISTENING)
		return -EINTR;

	spin_lock(&lep->lock);

	if (!lep->conreqcnt)
		goto retry_connection;

	/* Get the first connect request off the list */
	conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
	list_del(&conreq->list);
	lep->conreqcnt--;
	spin_unlock(&lep->lock);

	/* Fill in the peer information */
	peer->node = conreq->msg.src.node;
	peer->port = conreq->msg.src.port;

	cep = kzalloc(sizeof(*cep), GFP_KERNEL);
	if (!cep) {
		err = -ENOMEM;
		goto scif_accept_error_epalloc;
	}
	spin_lock_init(&cep->lock);
	mutex_init(&cep->sendlock);
	mutex_init(&cep->recvlock);
	cep->state = SCIFEP_CONNECTING;
	cep->remote_dev = &scif_dev[peer->node];
	cep->remote_ep = conreq->msg.payload[0];

	scif_rma_ep_init(cep);

	err = scif_reserve_dma_chan(cep);
	if (err) {
		dev_err(scif_info.mdev.this_device,
			"%s %d err %d\n", __func__, __LINE__, err);
		goto scif_accept_error_qpalloc;
	}

	cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
	if (!cep->qp_info.qp) {
		err = -ENOMEM;
		goto scif_accept_error_qpalloc;
	}

	err = scif_anon_inode_getfile(cep);
	if (err)
		goto scif_accept_error_anon_inode;

	cep->qp_info.qp->magic = SCIFEP_MAGIC;
	spdev = scif_get_peer_dev(cep->remote_dev);
	if (IS_ERR(spdev)) {
		err = PTR_ERR(spdev);
		goto scif_accept_error_map;
	}
	err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
				   conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
				   cep->remote_dev);
	if (err) {
		dev_dbg(&cep->remote_dev->sdev->dev,
			"SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
			lep, cep, err, cep->qp_info.qp_offset);
		scif_put_peer_dev(spdev);
		goto scif_accept_error_map;
	}

	cep->port.node = lep->port.node;
	cep->port.port = lep->port.port;
	cep->peer.node = peer->node;
	cep->peer.port = peer->port;
	init_waitqueue_head(&cep->sendwq);
	init_waitqueue_head(&cep->recvwq);
	init_waitqueue_head(&cep->conwq);

	msg.uop = SCIF_CNCT_GNT;
	msg.src = cep->port;
	msg.payload[0] = cep->remote_ep;
	msg.payload[1] = cep->qp_info.qp_offset;
	msg.payload[2] = (u64)cep;

	err = _scif_nodeqp_send(cep->remote_dev, &msg);
	scif_put_peer_dev(spdev);
	if (err)
		goto scif_accept_error_map;
retry:
	/* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
	err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
				 SCIF_NODE_ACCEPT_TIMEOUT);
	if (!err && scifdev_alive(cep))
		goto retry;
	err = !err ? -ENODEV : 0;
	if (err)
		goto scif_accept_error_map;
	kfree(conreq);

	spin_lock(&cep->lock);

	if (cep->state == SCIFEP_CLOSING) {
		/*
		 * Remote failed to allocate resources and NAKed the grant.
		 * There is at this point nothing referencing the new end point.
		 */
		spin_unlock(&cep->lock);
		scif_teardown_ep(cep);
		kfree(cep);

		/* If call with sync flag then go back and wait. */
		if (flags & SCIF_ACCEPT_SYNC) {
			spin_lock(&lep->lock);
			goto retry_connection;
		}
		return -EAGAIN;
	}

	scif_get_port(cep->port.port);
	*newepd = (scif_epd_t)cep;
	spin_unlock(&cep->lock);
	return 0;
scif_accept_error_map:
	scif_anon_inode_fput(cep);
scif_accept_error_anon_inode:
	scif_teardown_ep(cep);
scif_accept_error_qpalloc:
	kfree(cep);
scif_accept_error_epalloc:
	msg.uop = SCIF_CNCT_REJ;
	msg.dst.node = conreq->msg.src.node;
	msg.dst.port = conreq->msg.src.port;
	msg.payload[0] = conreq->msg.payload[0];
	msg.payload[1] = conreq->msg.payload[1];
	scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
	kfree(conreq);
	return err;
}
EXPORT_SYMBOL_GPL(scif_accept);

/*
 * scif_msg_param_check:
 * @epd: The end point returned from scif_open()
 * @len: Length to receive
 * @flags: blocking or non blocking
 *
 * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
 */
static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
{
	int ret = -EINVAL;

	if (len < 0)
		goto err_ret;
	if (flags && (!(flags & SCIF_RECV_BLOCK)))
		goto err_ret;
	ret = 0;
err_ret:
	return ret;
}

static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
{
	struct scif_endpt *ep = (struct scif_endpt *)epd;
	struct scifmsg notif_msg;
	int curr_xfer_len = 0, sent_len = 0, write_count;
	int ret = 0;
	struct scif_qp *qp = ep->qp_info.qp;

	if (flags & SCIF_SEND_BLOCK)
		might_sleep();

	spin_lock(&ep->lock);
	while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
		write_count = scif_rb_space(&qp->outbound_q);
		if (write_count) {
			/* Best effort to send as much data as possible */
			curr_xfer_len = min(len - sent_len, write_count);
			ret = scif_rb_write(&qp->outbound_q, msg,
					    curr_xfer_len);
			if (ret < 0)
				break;
			/* Success. Update write pointer */
			scif_rb_commit(&qp->outbound_q);
			/*
			 * Send a notification to the peer about the
			 * produced data message.
			 */
			notif_msg.src = ep->port;
			notif_msg.uop = SCIF_CLIENT_SENT;
			notif_msg.payload[0] = ep->remote_ep;
			ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
			if (ret)
				break;
			sent_len += curr_xfer_len;
			msg = msg + curr_xfer_len;
			continue;
		}
		curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
		/* Not enough RB space. return for the Non Blocking case */
		if (!(flags & SCIF_SEND_BLOCK))
			break;

		spin_unlock(&ep->lock);
		/* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
		ret =
		wait_event_interruptible(ep->sendwq,
					 (SCIFEP_CONNECTED != ep->state) ||
					 (scif_rb_space(&qp->outbound_q) >=
					 curr_xfer_len));
		spin_lock(&ep->lock);
		if (ret)
			break;
	}
	if (sent_len)
		ret = sent_len;
	else if (!ret && SCIFEP_CONNECTED != ep->state)
		ret = SCIFEP_DISCONNECTED == ep->state ?
			-ECONNRESET : -ENOTCONN;
	spin_unlock(&ep->lock);
	return ret;
}

static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
{
	int read_size;
	struct scif_endpt *ep = (struct scif_endpt *)epd;
	struct scifmsg notif_msg;
	int curr_recv_len = 0, remaining_len = len, read_count;
	int ret = 0;
	struct scif_qp *qp = ep->qp_info.qp;

	if (flags & SCIF_RECV_BLOCK)
		might_sleep();
	spin_lock(&ep->lock);
	while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
				 SCIFEP_DISCONNECTED == ep->state)) {
		read_count = scif_rb_count(&qp->inbound_q, remaining_len);
		if (read_count) {
			/*
			 * Best effort to recv as much data as there
			 * are bytes to read in the RB particularly
			 * important for the Non Blocking case.
			 */
			curr_recv_len = min(remaining_len, read_count);
			read_size = scif_rb_get_next(&qp->inbound_q,
						     msg, curr_recv_len);
			if (ep->state == SCIFEP_CONNECTED) {
				/*
				 * Update the read pointer only if the endpoint
				 * is still connected else the read pointer
				 * might no longer exist since the peer has
				 * freed resources!
				 */
				scif_rb_update_read_ptr(&qp->inbound_q);
				/*
				 * Send a notification to the peer about the
				 * consumed data message only if the EP is in
				 * SCIFEP_CONNECTED state.
				 */
				notif_msg.src = ep->port;
				notif_msg.uop = SCIF_CLIENT_RCVD;
				notif_msg.payload[0] = ep->remote_ep;
				ret = _scif_nodeqp_send(ep->remote_dev,
							&notif_msg);
				if (ret)
					break;
			}
			remaining_len -= curr_recv_len;
			msg = msg + curr_recv_len;
			continue;
		}
		/*
		 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
		 * we will keep looping forever.
		 */
		if (ep->state == SCIFEP_DISCONNECTED)
			break;
		/*
		 * Return in the Non Blocking case if there is no data
		 * to read in this iteration.
		 */
		if (!(flags & SCIF_RECV_BLOCK))
			break;
		curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
		spin_unlock(&ep->lock);
		/*
		 * Wait for a SCIF_CLIENT_SEND message in the blocking case
		 * or until other side disconnects.
		 */
		ret =
		wait_event_interruptible(ep->recvwq,
					 SCIFEP_CONNECTED != ep->state ||
					 scif_rb_count(&qp->inbound_q,
						       curr_recv_len)
					 >= curr_recv_len);
		spin_lock(&ep->lock);
		if (ret)
			break;
	}
	if (len - remaining_len)
		ret = len - remaining_len;
	else if (!ret && ep->state != SCIFEP_CONNECTED)
		ret = ep->state == SCIFEP_DISCONNECTED ?
			-ECONNRESET : -ENOTCONN;
	spin_unlock(&ep->lock);
	return ret;
}

/**
 * scif_user_send() - Send data to connection queue
 * @epd: The end point returned from scif_open()
 * @msg: Address to place data
 * @len: Length to receive
 * @flags: blocking or non blocking
 *
 * This function is called from the driver IOCTL entry point
 * only and is a wrapper for _scif_send().
 */
int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
{
	struct scif_endpt *ep = (struct scif_endpt *)epd;
	int err = 0;
	int sent_len = 0;
	char *tmp;
	int loop_len;
	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));

	dev_dbg(scif_info.mdev.this_device,
		"SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
	if (!len)
		return 0;

	err = scif_msg_param_check(epd, len, flags);
	if (err)
		goto send_err;

	tmp = kmalloc(chunk_len, GFP_KERNEL);
	if (!tmp) {
		err = -ENOMEM;
		goto send_err;
	}
	/*
	 * Grabbing the lock before breaking up the transfer in
	 * multiple chunks is required to ensure that messages do
	 * not get fragmented and reordered.
	 */
	mutex_lock(&ep->sendlock);
	while (sent_len != len) {
		loop_len = len - sent_len;
		loop_len = min(chunk_len, loop_len);
		if (copy_from_user(tmp, msg, loop_len)) {
			err = -EFAULT;
			goto send_free_err;
		}
		err = _scif_send(epd, tmp, loop_len, flags);
		if (err < 0)
			goto send_free_err;
		sent_len += err;
		msg += err;
		if (err != loop_len)
			goto send_free_err;
	}
send_free_err:
	mutex_unlock(&ep->sendlock);
	kfree(tmp);
send_err:
	return err < 0 ? err : sent_len;
}

/**
 * scif_user_recv() - Receive data from connection queue
 * @epd: The end point returned from scif_open()
 * @msg: Address to place data
 * @len: Length to receive
 * @flags: blocking or non blocking
 *
 * This function is called from the driver IOCTL entry point
 * only and is a wrapper for _scif_recv().
 */
int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
{
	struct scif_endpt *ep = (struct scif_endpt *)epd;
	int err = 0;
	int recv_len = 0;
	char *tmp;
	int loop_len;
	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));

	dev_dbg(scif_info.mdev.this_device,
		"SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
	if (!len)
		return 0;

	err = scif_msg_param_check(epd, len, flags);
	if (err)
		goto recv_err;

	tmp = kmalloc(chunk_len, GFP_KERNEL);
	if (!tmp) {
		err = -ENOMEM;
		goto recv_err;
	}
	/*
	 * Grabbing the lock before breaking up the transfer in
	 * multiple chunks is required to ensure that messages do
	 * not get fragmented and reordered.
	 */
	mutex_lock(&ep->recvlock);
	while (recv_len != len) {
		loop_len = len - recv_len;
		loop_len = min(chunk_len, loop_len);
		err = _scif_recv(epd, tmp, loop_len, flags);
		if (err < 0)
			goto recv_free_err;
		if (copy_to_user(msg, tmp, err)) {
			err = -EFAULT;
			goto recv_free_err;
		}
		recv_len += err;
		msg += err;
		if (err != loop_len)
			goto recv_free_err;
	}
recv_free_err:
	mutex_unlock(&ep->recvlock);
	kfree(tmp);
recv_err:
	return err < 0 ? err : recv_len;
}

/**
 * scif_send() - Send data to connection queue
 * @epd: The end point returned from scif_open()
 * @msg: Address to place data
 * @len: Length to receive
 * @flags: blocking or non blocking
 *
 * This function is called from the kernel mode only and is
 * a wrapper for _scif_send().
 */
int scif_send(scif_epd_t epd, void *msg, int len, int flags)
{
	struct scif_endpt *ep = (struct scif_endpt *)epd;
	int ret;

	dev_dbg(scif_info.mdev.this_device,
		"SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
	if (!len)
		return 0;

	ret = scif_msg_param_check(epd, len, flags);
	if (ret)
		return ret;
	if (!ep->remote_dev)
		return -ENOTCONN;
	/*
	 * Grab the mutex lock in the blocking case only
	 * to ensure messages do not get fragmented/reordered.
	 * The non blocking mode is protected using spin locks
	 * in _scif_send().
	 */
	if (flags & SCIF_SEND_BLOCK)
		mutex_lock(&ep->sendlock);

	ret = _scif_send(epd, msg, len, flags);

	if (flags & SCIF_SEND_BLOCK)
		mutex_unlock(&ep->sendlock);
	return ret;
}
EXPORT_SYMBOL_GPL(scif_send);

/**
 * scif_recv() - Receive data from connection queue
 * @epd: The end point returned from scif_open()
 * @msg: Address to place data
 * @len: Length to receive
 * @flags: blocking or non blocking
 *
 * This function is called from the kernel mode only and is
 * a wrapper for _scif_recv().
 */
int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
{
	struct scif_endpt *ep = (struct scif_endpt *)epd;
	int ret;

	dev_dbg(scif_info.mdev.this_device,
		"SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
	if (!len)
		return 0;

	ret = scif_msg_param_check(epd, len, flags);
	if (ret)
		return ret;
	/*
	 * Grab the mutex lock in the blocking case only
	 * to ensure messages do not get fragmented/reordered.
	 * The non blocking mode is protected using spin locks
	 * in _scif_send().
	 */
	if (flags & SCIF_RECV_BLOCK)
		mutex_lock(&ep->recvlock);

	ret = _scif_recv(epd, msg, len, flags);

	if (flags & SCIF_RECV_BLOCK)
		mutex_unlock(&ep->recvlock);

	return ret;
}
EXPORT_SYMBOL_GPL(scif_recv);

static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
				   poll_table *p, struct scif_endpt *ep)
{
	/*
	 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
	 * and regrab it afterwards. Because the endpoint state might have
	 * changed while the lock was given up, the state must be checked
	 * again after re-acquiring the lock. The code in __scif_pollfd(..)
	 * does this.
	 */
	spin_unlock(&ep->lock);
	poll_wait(f, wq, p);
	spin_lock(&ep->lock);
}

__poll_t
__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
{
	__poll_t mask = 0;

	dev_dbg(scif_info.mdev.this_device,
		"SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);

	spin_lock(&ep->lock);

	/* Endpoint is waiting for a non-blocking connect to complete */
	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
		_scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
			if (ep->state == SCIFEP_CONNECTED ||
			    ep->state == SCIFEP_DISCONNECTED ||
			    ep->conn_err)
				mask |= EPOLLOUT;
			goto exit;
		}
	}

	/* Endpoint is listening for incoming connection requests */
	if (ep->state == SCIFEP_LISTENING) {
		_scif_poll_wait(f, &ep->conwq, wait, ep);
		if (ep->state == SCIFEP_LISTENING) {
			if (ep->conreqcnt)
				mask |= EPOLLIN;
			goto exit;
		}
	}

	/* Endpoint is connected or disconnected */
	if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
		if (poll_requested_events(wait) & EPOLLIN)
			_scif_poll_wait(f, &ep->recvwq, wait, ep);
		if (poll_requested_events(wait) & EPOLLOUT)
			_scif_poll_wait(f, &ep->sendwq, wait, ep);
		if (ep->state == SCIFEP_CONNECTED ||
		    ep->state == SCIFEP_DISCONNECTED) {
			/* Data can be read without blocking */
			if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
				mask |= EPOLLIN;
			/* Data can be written without blocking */
			if (scif_rb_space(&ep->qp_info.qp->outbound_q))
				mask |= EPOLLOUT;
			/* Return EPOLLHUP if endpoint is disconnected */
			if (ep->state == SCIFEP_DISCONNECTED)
				mask |= EPOLLHUP;
			goto exit;
		}
	}

	/* Return EPOLLERR if the endpoint is in none of the above states */
	mask |= EPOLLERR;
exit:
	spin_unlock(&ep->lock);
	return mask;
}

/**
 * scif_poll() - Kernel mode SCIF poll
 * @ufds: Array of scif_pollepd structures containing the end points
 *	  and events to poll on
 * @nfds: Size of the ufds array
 * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
 *
 * The code flow in this function is based on do_poll(..) in select.c
 *
 * Returns the number of endpoints which have pending events or 0 in
 * the event of a timeout. If a signal is used for wake up, -EINTR is
 * returned.
 */
int
scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
{
	struct poll_wqueues table;
	poll_table *pt;
	int i, count = 0, timed_out = timeout_msecs == 0;
	__poll_t mask;
	u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
		: msecs_to_jiffies(timeout_msecs);

	poll_initwait(&table);
	pt = &table.pt;
	while (1) {
		for (i = 0; i < nfds; i++) {
			pt->_key = ufds[i].events | EPOLLERR | EPOLLHUP;
			mask = __scif_pollfd(ufds[i].epd->anon,
					     pt, ufds[i].epd);
			mask &= ufds[i].events | EPOLLERR | EPOLLHUP;
			if (mask) {
				count++;
				pt->_qproc = NULL;
			}
			ufds[i].revents = mask;
		}
		pt->_qproc = NULL;
		if (!count) {
			count = table.error;
			if (signal_pending(current))
				count = -EINTR;
		}
		if (count || timed_out)
			break;

		if (!schedule_timeout_interruptible(timeout))
			timed_out = 1;
	}
	poll_freewait(&table);
	return count;
}
EXPORT_SYMBOL_GPL(scif_poll);

int scif_get_node_ids(u16 *nodes, int len, u16 *self)
{
	int online = 0;
	int offset = 0;
	int node;

	if (!scif_is_mgmt_node())
		scif_get_node_info();

	*self = scif_info.nodeid;
	mutex_lock(&scif_info.conflock);
	len = min_t(int, len, scif_info.total);
	for (node = 0; node <= scif_info.maxid; node++) {
		if (_scifdev_alive(&scif_dev[node])) {
			online++;
			if (offset < len)
				nodes[offset++] = node;
		}
	}
	dev_dbg(scif_info.mdev.this_device,
		"SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
		scif_info.total, online, offset);
	mutex_unlock(&scif_info.conflock);

	return online;
}
EXPORT_SYMBOL_GPL(scif_get_node_ids);

static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
{
	struct scif_client *client =
		container_of(si, struct scif_client, si);
	struct scif_peer_dev *spdev =
		container_of(dev, struct scif_peer_dev, dev);

	if (client->probe)
		client->probe(spdev);
	return 0;
}

static void scif_remove_client_dev(struct device *dev,
				   struct subsys_interface *si)
{
	struct scif_client *client =
		container_of(si, struct scif_client, si);
	struct scif_peer_dev *spdev =
		container_of(dev, struct scif_peer_dev, dev);

	if (client->remove)
		client->remove(spdev);
}

void scif_client_unregister(struct scif_client *client)
{
	subsys_interface_unregister(&client->si);
}
EXPORT_SYMBOL_GPL(scif_client_unregister);

int scif_client_register(struct scif_client *client)
{
	struct subsys_interface *si = &client->si;

	si->name = client->name;
	si->subsys = &scif_peer_bus;
	si->add_dev = scif_add_client_dev;
	si->remove_dev = scif_remove_client_dev;

	return subsys_interface_register(&client->si);
}
EXPORT_SYMBOL_GPL(scif_client_register);