summaryrefslogtreecommitdiffstats
path: root/drivers/ntb
diff options
context:
space:
mode:
authorLogan Gunthorpe <logang@deltatee.com>2017-12-04 10:57:21 -0700
committerJon Mason <jdmason@kudzu.us>2018-01-28 22:17:23 -0500
commitd04be142b8b61ffb3c9cc5c6d1abda8fc59a16c9 (patch)
treeac62252c3e7724979c0baf852743dd79f5f750be /drivers/ntb
parent270d32e63c70c808a91449da24324e0009827c5f (diff)
downloadlinux-d04be142b8b61ffb3c9cc5c6d1abda8fc59a16c9.tar.bz2
ntb_hw_switchtec: Force down the link before initializing
If one host crashes and soft reboots, the other host may not see a link down event. Then when the crashed host comes back up, the surviving host may not know the link was reset and the NTB clients may not work without being reset. To solve this, we send a LINK_FORCE_DOWN message to each peer every time we come up, before we register the NTB device. If a surviving host still thinks the link is up it will take it down immediately. In this way, once the crashed host comes up fully, it will send a regular link up event as per usual and the link will be properly restarted. While we are in the area, this also fixes the MSG_LINK_UP message that was in the link down function that was reported by Doug Meyers. Signed-off-by: Logan Gunthorpe <logang@deltatee.com> Reported-by: ThanhTuThai <cruisethai@gmail.com> Signed-off-by: Jon Mason <jdmason@kudzu.us>
Diffstat (limited to 'drivers/ntb')
-rw-r--r--drivers/ntb/hw/mscc/ntb_hw_switchtec.c57
1 files changed, 50 insertions, 7 deletions
diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
index 145b31209f20..bcd5b6fb3800 100644
--- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
+++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
@@ -122,6 +122,7 @@ struct switchtec_ntb {
bool link_is_up;
enum ntb_speed link_speed;
enum ntb_width link_width;
+ struct work_struct link_reinit_work;
};
static struct switchtec_ntb *ntb_sndev(struct ntb_dev *ntb)
@@ -494,18 +495,43 @@ static void crosslink_init_dbmsgs(struct switchtec_ntb *sndev)
&sndev->mmio_peer_dbmsg->odb_mask);
}
-enum {
+enum switchtec_msg {
LINK_MESSAGE = 0,
MSG_LINK_UP = 1,
MSG_LINK_DOWN = 2,
MSG_CHECK_LINK = 3,
+ MSG_LINK_FORCE_DOWN = 4,
};
-static void switchtec_ntb_check_link(struct switchtec_ntb *sndev)
+static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev);
+
+static void link_reinit_work(struct work_struct *work)
+{
+ struct switchtec_ntb *sndev;
+
+ sndev = container_of(work, struct switchtec_ntb, link_reinit_work);
+
+ switchtec_ntb_reinit_peer(sndev);
+}
+
+static void switchtec_ntb_check_link(struct switchtec_ntb *sndev,
+ enum switchtec_msg msg)
{
int link_sta;
int old = sndev->link_is_up;
+ if (msg == MSG_LINK_FORCE_DOWN) {
+ schedule_work(&sndev->link_reinit_work);
+
+ if (sndev->link_is_up) {
+ sndev->link_is_up = 0;
+ ntb_link_event(&sndev->ntb);
+ dev_info(&sndev->stdev->dev, "ntb link forced down\n");
+ }
+
+ return;
+ }
+
link_sta = sndev->self_shared->link_sta;
if (link_sta) {
u64 peer = ioread64(&sndev->peer_shared->magic);
@@ -534,7 +560,7 @@ static void switchtec_ntb_link_notification(struct switchtec_dev *stdev)
{
struct switchtec_ntb *sndev = stdev->sndev;
- switchtec_ntb_check_link(sndev);
+ switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
}
static u64 switchtec_ntb_link_is_up(struct ntb_dev *ntb,
@@ -562,7 +588,7 @@ static int switchtec_ntb_link_enable(struct ntb_dev *ntb,
sndev->self_shared->link_sta = 1;
switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_UP);
- switchtec_ntb_check_link(sndev);
+ switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
return 0;
}
@@ -574,9 +600,9 @@ static int switchtec_ntb_link_disable(struct ntb_dev *ntb)
dev_dbg(&sndev->stdev->dev, "disabling link\n");
sndev->self_shared->link_sta = 0;
- switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_UP);
+ switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_DOWN);
- switchtec_ntb_check_link(sndev);
+ switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
return 0;
}
@@ -822,6 +848,8 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev)
sndev->ntb.topo = NTB_TOPO_SWITCH;
sndev->ntb.ops = &switchtec_ntb_ops;
+ INIT_WORK(&sndev->link_reinit_work, link_reinit_work);
+
sndev->self_partition = sndev->stdev->partition;
sndev->mmio_ntb = sndev->stdev->mmio_ntb;
@@ -1368,7 +1396,7 @@ static irqreturn_t switchtec_ntb_message_isr(int irq, void *dev)
iowrite8(1, &sndev->mmio_self_dbmsg->imsg[i].status);
if (i == LINK_MESSAGE)
- switchtec_ntb_check_link(sndev);
+ switchtec_ntb_check_link(sndev, msg);
}
}
@@ -1429,6 +1457,14 @@ static void switchtec_ntb_deinit_db_msg_irq(struct switchtec_ntb *sndev)
free_irq(sndev->message_irq, sndev);
}
+static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev)
+{
+ dev_info(&sndev->stdev->dev, "peer reinitialized\n");
+ switchtec_ntb_deinit_shared_mw(sndev);
+ switchtec_ntb_init_mw(sndev);
+ return switchtec_ntb_init_shared_mw(sndev);
+}
+
static int switchtec_ntb_add(struct device *dev,
struct class_interface *class_intf)
{
@@ -1471,6 +1507,13 @@ static int switchtec_ntb_add(struct device *dev,
if (rc)
goto deinit_shared_and_exit;
+ /*
+ * If this host crashed, the other host may think the link is
+ * still up. Tell them to force it down (it will go back up
+ * once we register the ntb device).
+ */
+ switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_FORCE_DOWN);
+
rc = ntb_register_device(&sndev->ntb);
if (rc)
goto deinit_and_exit;