cregit-Linux how code gets into the kernel

Release 4.11 drivers/misc/mic/cosm/cosm_scif_server.c

/*
 * Intel MIC Platform Software Stack (MPSS)
 *
 * Copyright(c) 2015 Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License, version 2, as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * The full GNU General Public License is included in this distribution in
 * the file called "COPYING".
 *
 * Intel MIC Coprocessor State Management (COSM) Driver
 *
 */
#include <linux/kthread.h>
#include <linux/sched/signal.h>

#include "cosm_main.h"

/*
 * The COSM driver uses SCIF to communicate between the management node and the
 * MIC cards. SCIF is used to (a) Send a shutdown command to the card (b)
 * receive a shutdown status back from the card upon completion of shutdown and
 * (c) receive periodic heartbeat messages from the card used to deduce if the
 * card has crashed.
 *
 * A COSM server consisting of a SCIF listening endpoint waits for incoming
 * connections from the card. Upon acceptance of the connection, a separate
 * work-item is scheduled to handle SCIF message processing for that card. The
 * life-time of this work-item is therefore the time from which the connection
 * from a card is accepted to the time at which the connection is closed. A new
 * work-item starts each time the card boots and is alive till the card (a)
 * shuts down (b) is reset (c) crashes (d) cosm_client driver on the card is
 * unloaded.
 *
 * From the point of view of COSM interactions with SCIF during card
 * shutdown, reset and crash are as follows:
 *
 * Card shutdown
 * -------------
 * 1. COSM client on the card invokes orderly_poweroff() in response to SHUTDOWN
 *    message from the host.
 * 2. Card driver shutdown callback invokes scif_unregister_device(..) resulting
 *    in scif_remove(..) getting called on the card
 * 3. scif_remove -> scif_stop -> scif_handle_remove_node ->
 *    scif_peer_unregister_device -> device_unregister for the host peer device
 * 4. During device_unregister remove(..) method of cosm_client is invoked which
 *    closes the COSM SCIF endpoint on the card. This results in a SCIF_DISCNCT
 *    message being sent to host SCIF. SCIF_DISCNCT message processing on the
 *    host SCIF sets the host COSM SCIF endpoint state to DISCONNECTED and wakes
 *    up the host COSM thread blocked in scif_poll(..) resulting in
 *    scif_poll(..)  returning POLLHUP.
 * 5. On the card, scif_peer_release_dev is next called which results in an
 *    SCIF_EXIT message being sent to the host and after receiving the
 *    SCIF_EXIT_ACK from the host the peer device teardown on the card is
 *    complete.
 * 6. As part of the SCIF_EXIT message processing on the host, host sends a
 *    SCIF_REMOVE_NODE to itself corresponding to the card being removed. This
 *    starts a similar SCIF peer device teardown sequence on the host
 *    corresponding to the card being shut down.
 *
 * Card reset
 * ----------
 * The case of interest here is when the card has not been previously shut down
 * since most of the steps below are skipped in that case:

 * 1. cosm_stop(..) invokes hw_ops->stop(..) method of the base PCIe driver
 *    which unregisters the SCIF HW device resulting in scif_remove(..) being
 *    called on the host.
 * 2. scif_remove(..) calls scif_disconnect_node(..) which results in a
 *    SCIF_EXIT message being sent to the card.
 * 3. The card executes scif_stop() as part of SCIF_EXIT message
 *    processing. This results in the COSM endpoint on the card being closed and
 *    the SCIF host peer device on the card getting unregistered similar to
 *    steps 3, 4 and 5 for the card shutdown case above. scif_poll(..) on the
 *    host returns POLLHUP as a result.
 * 4. On the host, card peer device unregister and SCIF HW remove(..) also
 *    subsequently complete.
 *
 * Card crash
 * ----------
 * If a reset is issued after the card has crashed, there is no SCIF_DISCNT
 * message from the card which would result in scif_poll(..) returning
 * POLLHUP. In this case when the host SCIF driver sends a SCIF_REMOVE_NODE
 * message to itself resulting in the card SCIF peer device being unregistered,
 * this results in a scif_peer_release_dev -> scif_cleanup_scifdev->
 * scif_invalidate_ep call sequence which sets the endpoint state to
 * DISCONNECTED and results in scif_poll(..) returning POLLHUP.
 */


#define COSM_SCIF_BACKLOG 16

#define COSM_HEARTBEAT_CHECK_DELTA_SEC 10

#define COSM_HEARTBEAT_TIMEOUT_SEC \
		(COSM_HEARTBEAT_SEND_SEC + COSM_HEARTBEAT_CHECK_DELTA_SEC)

#define COSM_HEARTBEAT_TIMEOUT_MSEC (COSM_HEARTBEAT_TIMEOUT_SEC * MSEC_PER_SEC)


static struct task_struct *server_thread;

static scif_epd_t listen_epd;

/* Publish MIC card's shutdown status to user space MIC daemon */

static void cosm_update_mic_status(struct cosm_device *cdev) { if (cdev->shutdown_status_int != MIC_NOP) { cosm_set_shutdown_status(cdev, cdev->shutdown_status_int); cdev->shutdown_status_int = MIC_NOP; } }

Contributors

PersonTokensPropCommitsCommitProp
Ashutosh Dixit36100.00%1100.00%
Total36100.00%1100.00%

/* Store MIC card's shutdown status internally when it is received */
static void cosm_shutdown_status_int(struct cosm_device *cdev, enum mic_status shutdown_status) { switch (shutdown_status) { case MIC_HALTED: case MIC_POWER_OFF: case MIC_RESTART: case MIC_CRASHED: break; default: dev_err(&cdev->dev, "%s %d Unexpected shutdown_status %d\n", __func__, __LINE__, shutdown_status); return; }; cdev->shutdown_status_int = shutdown_status; cdev->heartbeat_watchdog_enable = false; if (cdev->state != MIC_SHUTTING_DOWN) cosm_set_state(cdev, MIC_SHUTTING_DOWN); }

Contributors

PersonTokensPropCommitsCommitProp
Ashutosh Dixit80100.00%1100.00%
Total80100.00%1100.00%

/* Non-blocking recv. Read and process all available messages */
static void cosm_scif_recv(struct cosm_device *cdev) { struct cosm_msg msg; int rc; while (1) { rc = scif_recv(cdev->epd, &msg, sizeof(msg), 0); if (!rc) { break; } else if (rc < 0) { dev_dbg(&cdev->dev, "%s: %d rc %d\n", __func__, __LINE__, rc); break; } dev_dbg(&cdev->dev, "%s: %d rc %d id 0x%llx\n", __func__, __LINE__, rc, msg.id); switch (msg.id) { case COSM_MSG_SHUTDOWN_STATUS: cosm_shutdown_status_int(cdev, msg.shutdown_status); break; case COSM_MSG_HEARTBEAT: /* Nothing to do, heartbeat only unblocks scif_poll */ break; default: dev_err(&cdev->dev, "%s: %d unknown msg.id %lld\n", __func__, __LINE__, msg.id); break; } } }

Contributors

PersonTokensPropCommitsCommitProp
Ashutosh Dixit143100.00%1100.00%
Total143100.00%1100.00%

/* Publish crashed status for this MIC card */
static void cosm_set_crashed(struct cosm_device *cdev) { dev_err(&cdev->dev, "node alive timeout\n"); cosm_shutdown_status_int(cdev, MIC_CRASHED); cosm_update_mic_status(cdev); }

Contributors

PersonTokensPropCommitsCommitProp
Ashutosh Dixit33100.00%1100.00%
Total33100.00%1100.00%

/* Send host time to the MIC card to sync system time between host and MIC */
static void cosm_send_time(struct cosm_device *cdev) { struct cosm_msg msg = { .id = COSM_MSG_SYNC_TIME }; int rc; getnstimeofday64(&msg.timespec); rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK); if (rc < 0) dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n", __func__, __LINE__, rc); }

Contributors

PersonTokensPropCommitsCommitProp
Ashutosh Dixit74100.00%1100.00%
Total74100.00%1100.00%

/* * Close this cosm_device's endpoint after its peer endpoint on the card has * been closed. In all cases except MIC card crash POLLHUP on the host is * triggered by the client's endpoint being closed. */
static void cosm_scif_close(struct cosm_device *cdev) { /* * Because SHUTDOWN_STATUS message is sent by the MIC cards in the * reboot notifier when shutdown is still not complete, we notify mpssd * to reset the card when SCIF endpoint is closed. */ cosm_update_mic_status(cdev); scif_close(cdev->epd); cdev->epd = NULL; dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__); }

Contributors

PersonTokensPropCommitsCommitProp
Ashutosh Dixit44100.00%1100.00%
Total44100.00%1100.00%

/* * Set card state to ONLINE when a new SCIF connection from a MIC card is * received. Normally the state is BOOTING when the connection comes in, but can * be ONLINE if cosm_client driver on the card was unloaded and then reloaded. */
static int cosm_set_online(struct cosm_device *cdev) { int rc = 0; if (MIC_BOOTING == cdev->state || MIC_ONLINE == cdev->state) { cdev->heartbeat_watchdog_enable = cdev->sysfs_heartbeat_enable; cdev->epd = cdev->newepd; if (cdev->state == MIC_BOOTING) cosm_set_state(cdev, MIC_ONLINE); cosm_send_time(cdev); dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__); } else { dev_warn(&cdev->dev, "%s %d not going online in state: %s\n", __func__, __LINE__, cosm_state_string[cdev->state]); rc = -EINVAL; } /* Drop reference acquired by bus_find_device in the server thread */ put_device(&cdev->dev); return rc; }

Contributors

PersonTokensPropCommitsCommitProp
Ashutosh Dixit123100.00%1100.00%
Total123100.00%1100.00%

/* * Work function for handling work for a SCIF connection from a particular MIC * card. It first sets the card state to ONLINE and then calls scif_poll to * block on activity such as incoming messages on the SCIF endpoint. When the * endpoint is closed, the work function exits, completing its life cycle, from * MIC card boot to card shutdown/reset/crash. */
void cosm_scif_work(struct work_struct *work) { struct cosm_device *cdev = container_of(work, struct cosm_device, scif_work); struct scif_pollepd pollepd; int rc; mutex_lock(&cdev->cosm_mutex); if (cosm_set_online(cdev)) goto exit; while (1) { pollepd.epd = cdev->epd; pollepd.events = POLLIN; /* Drop the mutex before blocking in scif_poll(..) */ mutex_unlock(&cdev->cosm_mutex); /* poll(..) with timeout on our endpoint */ rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_TIMEOUT_MSEC); mutex_lock(&cdev->cosm_mutex); if (rc < 0) { dev_err(&cdev->dev, "%s %d scif_poll rc %d\n", __func__, __LINE__, rc); continue; } /* There is a message from the card */ if (pollepd.revents & POLLIN) cosm_scif_recv(cdev); /* The peer endpoint is closed or this endpoint disconnected */ if (pollepd.revents & POLLHUP) { cosm_scif_close(cdev); break; } /* Did we timeout from poll? */ if (!rc && cdev->heartbeat_watchdog_enable) cosm_set_crashed(cdev); } exit: dev_dbg(&cdev->dev, "%s %d exiting\n", __func__, __LINE__); mutex_unlock(&cdev->cosm_mutex); }

Contributors

PersonTokensPropCommitsCommitProp
Ashutosh Dixit195100.00%1100.00%
Total195100.00%1100.00%

/* * COSM SCIF server thread function. Accepts incoming SCIF connections from MIC * cards, finds the correct cosm_device to associate that connection with and * schedules individual work items for each MIC card. */
static int cosm_scif_server(void *unused) { struct cosm_device *cdev; scif_epd_t newepd; struct scif_port_id port_id; int rc; allow_signal(SIGKILL); while (!kthread_should_stop()) { rc = scif_accept(listen_epd, &port_id, &newepd, SCIF_ACCEPT_SYNC); if (rc < 0) { if (-ERESTARTSYS != rc) pr_err("%s %d rc %d\n", __func__, __LINE__, rc); continue; } /* * Associate the incoming connection with a particular * cosm_device, COSM device ID == SCIF node ID - 1 */ cdev = cosm_find_cdev_by_id(port_id.node - 1); if (!cdev) continue; cdev->newepd = newepd; schedule_work(&cdev->scif_work); } pr_debug("%s %d Server thread stopped\n", __func__, __LINE__); return 0; }

Contributors

PersonTokensPropCommitsCommitProp
Ashutosh Dixit124100.00%1100.00%
Total124100.00%1100.00%


static int cosm_scif_listen(void) { int rc; listen_epd = scif_open(); if (!listen_epd) { pr_err("%s %d scif_open failed\n", __func__, __LINE__); return -ENOMEM; } rc = scif_bind(listen_epd, SCIF_COSM_LISTEN_PORT); if (rc < 0) { pr_err("%s %d scif_bind failed rc %d\n", __func__, __LINE__, rc); goto err; } rc = scif_listen(listen_epd, COSM_SCIF_BACKLOG); if (rc < 0) { pr_err("%s %d scif_listen rc %d\n", __func__, __LINE__, rc); goto err; } pr_debug("%s %d listen_epd set up\n", __func__, __LINE__); return 0; err: scif_close(listen_epd); listen_epd = NULL; return rc; }

Contributors

PersonTokensPropCommitsCommitProp
Ashutosh Dixit124100.00%1100.00%
Total124100.00%1100.00%


static void cosm_scif_listen_exit(void) { pr_debug("%s %d closing listen_epd\n", __func__, __LINE__); if (listen_epd) { scif_close(listen_epd); listen_epd = NULL; } }

Contributors

PersonTokensPropCommitsCommitProp
Ashutosh Dixit32100.00%1100.00%
Total32100.00%1100.00%

/* * Create a listening SCIF endpoint and a server kthread which accepts incoming * SCIF connections from MIC cards */
int cosm_scif_init(void) { int rc = cosm_scif_listen(); if (rc) { pr_err("%s %d cosm_scif_listen rc %d\n", __func__, __LINE__, rc); goto err; } server_thread = kthread_run(cosm_scif_server, NULL, "cosm_server"); if (IS_ERR(server_thread)) { rc = PTR_ERR(server_thread); pr_err("%s %d kthread_run rc %d\n", __func__, __LINE__, rc); goto listen_exit; } return 0; listen_exit: cosm_scif_listen_exit(); err: return rc; }

Contributors

PersonTokensPropCommitsCommitProp
Ashutosh Dixit87100.00%1100.00%
Total87100.00%1100.00%

/* Stop the running server thread and close the listening SCIF endpoint */
void cosm_scif_exit(void) { int rc; if (!IS_ERR_OR_NULL(server_thread)) { rc = send_sig(SIGKILL, server_thread, 0); if (rc) { pr_err("%s %d send_sig rc %d\n", __func__, __LINE__, rc); return; } kthread_stop(server_thread); } cosm_scif_listen_exit(); }

Contributors

PersonTokensPropCommitsCommitProp
Ashutosh Dixit57100.00%1100.00%
Total57100.00%1100.00%


Overall Contributors

PersonTokensPropCommitsCommitProp
Ashutosh Dixit119899.75%150.00%
Ingo Molnar30.25%150.00%
Total1201100.00%2100.00%
Information contained on this website is for historical information purposes only and does not indicate or represent copyright ownership.
Created with cregit.