From 7807e086a2d1f69cc1a57958cac04fea79fc2112 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Sat, 11 Feb 2017 14:02:49 +0100 Subject: ARM: OMAP2+: gpmc-onenand: propagate error on initialization failure gpmc_probe_onenand_child returns success even on gpmc_onenand_init failure. Fix that. Signed-off-by: Ladislav Michl Acked-by: Roger Quadros Signed-off-by: Tony Lindgren --- include/linux/omap-gpmc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index 35d0fd7a4948..e821a3132a3e 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -88,10 +88,11 @@ static inline int gpmc_nand_init(struct omap_nand_platform_data *d, #endif #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2) -extern void gpmc_onenand_init(struct omap_onenand_platform_data *d); +extern int gpmc_onenand_init(struct omap_onenand_platform_data *d); #else #define board_onenand_data NULL -static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d) +static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d) { + return 0; } #endif -- cgit v1.2.3 From ac28e47ccc3ff8dabce1aec6b224760c3e524044 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Tue, 21 Feb 2017 10:44:45 +0100 Subject: ARM: OMAP2+: Remove legacy gpmc-nand.c This code is no longer used and can be removed as we are using device tree. Removing this code also removes a dependency between drivers/mtd and arch/arm/mach-omap2 making furhter driver changes easier. Signed-off-by: Ladislav Michl [tony@atomide.com: removed from header too, updated comments] Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Makefile | 3 - arch/arm/mach-omap2/gpmc-nand.c | 154 ---------------------------------------- include/linux/omap-gpmc.h | 11 --- 3 files changed, 168 deletions(-) delete mode 100644 arch/arm/mach-omap2/gpmc-nand.c (limited to 'include') diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 093458b62c8d..c89757abb0ae 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -241,6 +241,3 @@ obj-$(CONFIG_MACH_OMAP2_TUSB6010) += usb-tusb6010.o onenand-$(CONFIG_MTD_ONENAND_OMAP2) := gpmc-onenand.o obj-y += $(onenand-m) $(onenand-y) - -nand-$(CONFIG_MTD_NAND_OMAP2) := gpmc-nand.o -obj-y += $(nand-m) $(nand-y) diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c deleted file mode 100644 index f6ac027f3c3b..000000000000 --- a/arch/arm/mach-omap2/gpmc-nand.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * gpmc-nand.c - * - * Copyright (C) 2009 Texas Instruments - * Vimal Singh - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include "soc.h" - -/* minimum size for IO mapping */ -#define NAND_IO_SIZE 4 - -static bool gpmc_hwecc_bch_capable(enum omap_ecc ecc_opt) -{ - /* platforms which support all ECC schemes */ - if (soc_is_am33xx() || soc_is_am43xx() || cpu_is_omap44xx() || - soc_is_omap54xx() || soc_is_dra7xx()) - return 1; - - if (ecc_opt == OMAP_ECC_BCH4_CODE_HW_DETECTION_SW || - ecc_opt == OMAP_ECC_BCH8_CODE_HW_DETECTION_SW) { - if (cpu_is_omap24xx()) - return 0; - else if (cpu_is_omap3630() && (GET_OMAP_REVISION() == 0)) - return 0; - else - return 1; - } - - /* OMAP3xxx do not have ELM engine, so cannot support ECC schemes - * which require H/W based ECC error detection */ - if ((cpu_is_omap34xx() || cpu_is_omap3630()) && - ((ecc_opt == OMAP_ECC_BCH4_CODE_HW) || - (ecc_opt == OMAP_ECC_BCH8_CODE_HW))) - return 0; - - /* legacy platforms support only HAM1 (1-bit Hamming) ECC scheme */ - if (ecc_opt == OMAP_ECC_HAM1_CODE_HW || - ecc_opt == OMAP_ECC_HAM1_CODE_SW) - return 1; - else - return 0; -} - -/* This function will go away once the device-tree convertion is complete */ -static void gpmc_set_legacy(struct omap_nand_platform_data *gpmc_nand_data, - struct gpmc_settings *s) -{ - /* Enable RD PIN Monitoring Reg */ - if (gpmc_nand_data->dev_ready) { - s->wait_on_read = true; - s->wait_on_write = true; - } - - if (gpmc_nand_data->devsize == NAND_BUSWIDTH_16) - s->device_width = GPMC_DEVWIDTH_16BIT; - else - s->device_width = GPMC_DEVWIDTH_8BIT; -} - -int gpmc_nand_init(struct omap_nand_platform_data *gpmc_nand_data, - struct gpmc_timings *gpmc_t) -{ - int err = 0; - struct gpmc_settings s; - struct platform_device *pdev; - struct resource gpmc_nand_res[] = { - { .flags = IORESOURCE_MEM, }, - { .flags = IORESOURCE_IRQ, }, - { .flags = IORESOURCE_IRQ, }, - }; - - BUG_ON(gpmc_nand_data->cs >= GPMC_CS_NUM); - - err = gpmc_cs_request(gpmc_nand_data->cs, NAND_IO_SIZE, - (unsigned long *)&gpmc_nand_res[0].start); - if (err < 0) { - pr_err("omap2-gpmc: Cannot request GPMC CS %d, error %d\n", - gpmc_nand_data->cs, err); - return err; - } - gpmc_nand_res[0].end = gpmc_nand_res[0].start + NAND_IO_SIZE - 1; - gpmc_nand_res[1].start = gpmc_get_client_irq(GPMC_IRQ_FIFOEVENTENABLE); - gpmc_nand_res[2].start = gpmc_get_client_irq(GPMC_IRQ_COUNT_EVENT); - - memset(&s, 0, sizeof(struct gpmc_settings)); - gpmc_set_legacy(gpmc_nand_data, &s); - - s.device_nand = true; - - if (gpmc_t) { - err = gpmc_cs_set_timings(gpmc_nand_data->cs, gpmc_t, &s); - if (err < 0) { - pr_err("omap2-gpmc: Unable to set gpmc timings: %d\n", - err); - return err; - } - } - - err = gpmc_cs_program_settings(gpmc_nand_data->cs, &s); - if (err < 0) - goto out_free_cs; - - err = gpmc_configure(GPMC_CONFIG_WP, 0); - if (err < 0) - goto out_free_cs; - - if (!gpmc_hwecc_bch_capable(gpmc_nand_data->ecc_opt)) { - pr_err("omap2-nand: Unsupported NAND ECC scheme selected\n"); - err = -EINVAL; - goto out_free_cs; - } - - - pdev = platform_device_alloc("omap2-nand", gpmc_nand_data->cs); - if (pdev) { - err = platform_device_add_resources(pdev, gpmc_nand_res, - ARRAY_SIZE(gpmc_nand_res)); - if (!err) - pdev->dev.platform_data = gpmc_nand_data; - } else { - err = -ENOMEM; - } - if (err) - goto out_free_pdev; - - err = platform_device_add(pdev); - if (err) { - dev_err(&pdev->dev, "Unable to register NAND device\n"); - goto out_free_pdev; - } - - return 0; - -out_free_pdev: - platform_device_put(pdev); -out_free_cs: - gpmc_cs_free(gpmc_nand_data->cs); - - return err; -} diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index e821a3132a3e..fd0de00c0d77 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -76,17 +76,6 @@ struct gpmc_timings; struct omap_nand_platform_data; struct omap_onenand_platform_data; -#if IS_ENABLED(CONFIG_MTD_NAND_OMAP2) -extern int gpmc_nand_init(struct omap_nand_platform_data *d, - struct gpmc_timings *gpmc_t); -#else -static inline int gpmc_nand_init(struct omap_nand_platform_data *d, - struct gpmc_timings *gpmc_t) -{ - return 0; -} -#endif - #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2) extern int gpmc_onenand_init(struct omap_onenand_platform_data *d); #else -- cgit v1.2.3 From 6f8830f5bbab16e54f261de187f3df4644a5b977 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Mon, 27 Feb 2017 16:58:36 -0800 Subject: scsi: libiscsi: add lock around task lists to fix list corruption regression There's a rather long standing regression from the commit "libiscsi: Reduce locking contention in fast path" Depending on iSCSI target behavior, it's possible to hit the case in iscsi_complete_task where the task is still on a pending list (!list_empty(&task->running)). When that happens the task is removed from the list while holding the session back_lock, but other task list modification occur under the frwd_lock. That leads to linked list corruption and eventually a panicked system. Rather than back out the session lock split entirely, in order to try and keep some of the performance gains this patch adds another lock to maintain the task lists integrity. Major enterprise supported kernels have been backing out the lock split for while now, thanks to the efforts at IBM where a lab setup has the most reliable reproducer I've seen on this issue. This patch has been tested there successfully. Signed-off-by: Chris Leech Fixes: 659743b02c41 ("[SCSI] libiscsi: Reduce locking contention in fast path") Reported-by: Prashantha Subbarao Reviewed-by: Guilherme G. Piccoli Cc: # v3.15+ Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 26 +++++++++++++++++++++++++- include/scsi/libiscsi.h | 1 + 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 834d1212b6d5..3fca34a675af 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -560,8 +560,12 @@ static void iscsi_complete_task(struct iscsi_task *task, int state) WARN_ON_ONCE(task->state == ISCSI_TASK_FREE); task->state = state; - if (!list_empty(&task->running)) + spin_lock_bh(&conn->taskqueuelock); + if (!list_empty(&task->running)) { + pr_debug_once("%s while task on list", __func__); list_del_init(&task->running); + } + spin_unlock_bh(&conn->taskqueuelock); if (conn->task == task) conn->task = NULL; @@ -783,7 +787,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, if (session->tt->xmit_task(task)) goto free_task; } else { + spin_lock_bh(&conn->taskqueuelock); list_add_tail(&task->running, &conn->mgmtqueue); + spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } @@ -1474,8 +1480,10 @@ void iscsi_requeue_task(struct iscsi_task *task) * this may be on the requeue list already if the xmit_task callout * is handling the r2ts while we are adding new ones */ + spin_lock_bh(&conn->taskqueuelock); if (list_empty(&task->running)) list_add_tail(&task->running, &conn->requeue); + spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } EXPORT_SYMBOL_GPL(iscsi_requeue_task); @@ -1512,22 +1520,26 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) * only have one nop-out as a ping from us and targets should not * overflow us with nop-ins */ + spin_lock_bh(&conn->taskqueuelock); check_mgmt: while (!list_empty(&conn->mgmtqueue)) { conn->task = list_entry(conn->mgmtqueue.next, struct iscsi_task, running); list_del_init(&conn->task->running); + spin_unlock_bh(&conn->taskqueuelock); if (iscsi_prep_mgmt_task(conn, conn->task)) { /* regular RX path uses back_lock */ spin_lock_bh(&conn->session->back_lock); __iscsi_put_task(conn->task); spin_unlock_bh(&conn->session->back_lock); conn->task = NULL; + spin_lock_bh(&conn->taskqueuelock); continue; } rc = iscsi_xmit_task(conn); if (rc) goto done; + spin_lock_bh(&conn->taskqueuelock); } /* process pending command queue */ @@ -1535,19 +1547,24 @@ check_mgmt: conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task, running); list_del_init(&conn->task->running); + spin_unlock_bh(&conn->taskqueuelock); if (conn->session->state == ISCSI_STATE_LOGGING_OUT) { fail_scsi_task(conn->task, DID_IMM_RETRY); + spin_lock_bh(&conn->taskqueuelock); continue; } rc = iscsi_prep_scsi_cmd_pdu(conn->task); if (rc) { if (rc == -ENOMEM || rc == -EACCES) { + spin_lock_bh(&conn->taskqueuelock); list_add_tail(&conn->task->running, &conn->cmdqueue); conn->task = NULL; + spin_unlock_bh(&conn->taskqueuelock); goto done; } else fail_scsi_task(conn->task, DID_ABORT); + spin_lock_bh(&conn->taskqueuelock); continue; } rc = iscsi_xmit_task(conn); @@ -1558,6 +1575,7 @@ check_mgmt: * we need to check the mgmt queue for nops that need to * be sent to aviod starvation */ + spin_lock_bh(&conn->taskqueuelock); if (!list_empty(&conn->mgmtqueue)) goto check_mgmt; } @@ -1577,12 +1595,15 @@ check_mgmt: conn->task = task; list_del_init(&conn->task->running); conn->task->state = ISCSI_TASK_RUNNING; + spin_unlock_bh(&conn->taskqueuelock); rc = iscsi_xmit_task(conn); if (rc) goto done; + spin_lock_bh(&conn->taskqueuelock); if (!list_empty(&conn->mgmtqueue)) goto check_mgmt; } + spin_unlock_bh(&conn->taskqueuelock); spin_unlock_bh(&conn->session->frwd_lock); return -ENODATA; @@ -1738,7 +1759,9 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) goto prepd_reject; } } else { + spin_lock_bh(&conn->taskqueuelock); list_add_tail(&task->running, &conn->cmdqueue); + spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } @@ -2896,6 +2919,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size, INIT_LIST_HEAD(&conn->mgmtqueue); INIT_LIST_HEAD(&conn->cmdqueue); INIT_LIST_HEAD(&conn->requeue); + spin_lock_init(&conn->taskqueuelock); INIT_WORK(&conn->xmitwork, iscsi_xmitworker); /* allocate login_task used for the login/text sequences */ diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index b0e275de6dec..583875ea136a 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -196,6 +196,7 @@ struct iscsi_conn { struct iscsi_task *task; /* xmit task in progress */ /* xmit */ + spinlock_t taskqueuelock; /* protects the next three lists */ struct list_head mgmtqueue; /* mgmt (control) xmit queue */ struct list_head cmdqueue; /* data-path cmd queue */ struct list_head requeue; /* tasks needing another run */ -- cgit v1.2.3 From 8893cf6cb1cf56334c05120e23092dbfc9423ebb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 1 Mar 2017 09:00:36 -0800 Subject: scsi: mpt3sas: Avoid sleeping in interrupt context Commit 669f044170d8 ("scsi: srp_transport: Move queuecommand() wait code to SCSI core") can make scsi_internal_device_block() sleep. However, the mpt3sas driver can call this function from an interrupt handler. Hence add a second argument to scsi_internal_device_block() that restores the old behavior of this function for the mpt3sas handler. The call chain that triggered an "IRQ handler enabled interrupts" complaint is as follows: _base_interrupt() -> _base_async_event() -> mpt3sas_scsih_event_callback() -> _scsih_check_topo_delete_events() -> _scsih_block_io_to_children_attached_directly() -> _scsih_block_io_device() -> _scsih_internal_device_block() -> scsi_internal_device_block() Reported-by: Omar Sandoval Signed-off-by: Bart Van Assche Cc: Omar Sandoval Cc: Hannes Reinecke Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Sathya Prakash Cc: Chaitra P B Cc: Suganath Prabu Subramani Cc: Sreekanth Reddy Cc: # v4.10+ Tested-by: Omar Sandoval Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.h | 3 --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 4 ++-- drivers/scsi/scsi_lib.c | 14 ++++++++++---- drivers/scsi/scsi_priv.h | 3 --- include/scsi/scsi_device.h | 4 ++++ 5 files changed, 16 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 7fe7e6ed595b..8981806fb13f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -1442,9 +1442,6 @@ void mpt3sas_transport_update_links(struct MPT3SAS_ADAPTER *ioc, u64 sas_address, u16 handle, u8 phy_number, u8 link_rate); extern struct sas_function_template mpt3sas_transport_functions; extern struct scsi_transport_template *mpt3sas_transport_template; -extern int scsi_internal_device_block(struct scsi_device *sdev); -extern int scsi_internal_device_unblock(struct scsi_device *sdev, - enum scsi_device_state new_state); /* trigger data externs */ void mpt3sas_send_trigger_data_event(struct MPT3SAS_ADAPTER *ioc, struct SL_WH_TRIGGERS_EVENT_DATA_T *event_data); diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 69c29c560575..919ba2bb15f1 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -2859,7 +2859,7 @@ _scsih_internal_device_block(struct scsi_device *sdev, sas_device_priv_data->sas_target->handle); sas_device_priv_data->block = 1; - r = scsi_internal_device_block(sdev); + r = scsi_internal_device_block(sdev, false); if (r == -EINVAL) sdev_printk(KERN_WARNING, sdev, "device_block failed with return(%d) for handle(0x%04x)\n", @@ -2895,7 +2895,7 @@ _scsih_internal_device_unblock(struct scsi_device *sdev, "performing a block followed by an unblock\n", r, sas_device_priv_data->sas_target->handle); sas_device_priv_data->block = 1; - r = scsi_internal_device_block(sdev); + r = scsi_internal_device_block(sdev, false); if (r) sdev_printk(KERN_WARNING, sdev, "retried device_block " "failed with return(%d) for handle(0x%04x)\n", diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index f5e45a252485..f41e6b84a1bd 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2932,6 +2932,8 @@ EXPORT_SYMBOL(scsi_target_resume); /** * scsi_internal_device_block - internal function to put a device temporarily into the SDEV_BLOCK state * @sdev: device to block + * @wait: Whether or not to wait until ongoing .queuecommand() / + * .queue_rq() calls have finished. * * Block request made by scsi lld's to temporarily stop all * scsi commands on the specified device. May sleep. @@ -2949,7 +2951,7 @@ EXPORT_SYMBOL(scsi_target_resume); * remove the rport mutex lock and unlock calls from srp_queuecommand(). */ int -scsi_internal_device_block(struct scsi_device *sdev) +scsi_internal_device_block(struct scsi_device *sdev, bool wait) { struct request_queue *q = sdev->request_queue; unsigned long flags; @@ -2969,12 +2971,16 @@ scsi_internal_device_block(struct scsi_device *sdev) * request queue. */ if (q->mq_ops) { - blk_mq_quiesce_queue(q); + if (wait) + blk_mq_quiesce_queue(q); + else + blk_mq_stop_hw_queues(q); } else { spin_lock_irqsave(q->queue_lock, flags); blk_stop_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); - scsi_wait_for_queuecommand(sdev); + if (wait) + scsi_wait_for_queuecommand(sdev); } return 0; @@ -3036,7 +3042,7 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_unblock); static void device_block(struct scsi_device *sdev, void *data) { - scsi_internal_device_block(sdev); + scsi_internal_device_block(sdev, true); } static int diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 99bfc985e190..f11bd102d6d5 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -188,8 +188,5 @@ static inline void scsi_dh_remove_device(struct scsi_device *sdev) { } */ #define SCSI_DEVICE_BLOCK_MAX_TIMEOUT 600 /* units in seconds */ -extern int scsi_internal_device_block(struct scsi_device *sdev); -extern int scsi_internal_device_unblock(struct scsi_device *sdev, - enum scsi_device_state new_state); #endif /* _SCSI_PRIV_H */ diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 6f22b39f1b0c..080c7ce9bae8 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -472,6 +472,10 @@ static inline int scsi_device_created(struct scsi_device *sdev) sdev->sdev_state == SDEV_CREATED_BLOCK; } +int scsi_internal_device_block(struct scsi_device *sdev, bool wait); +int scsi_internal_device_unblock(struct scsi_device *sdev, + enum scsi_device_state new_state); + /* accessor functions for the SCSI parameters */ static inline int scsi_device_sync(struct scsi_device *sdev) { -- cgit v1.2.3 From 0878fff1f42c18e448ab5b8b4f6a3eb32365b5b6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 5 Mar 2017 12:34:49 -0800 Subject: net: phy: Do not perform software reset for Generic PHY The Generic PHY driver is a catch-all PHY driver and it should preserve whatever prior initialization has been done by boot loader or firmware agents. For specific PHY device configuration it is expected that a specialized PHY driver would take over that role. Resetting the generic PHY was a bad idea that has lead to several complaints and downstream workarounds e.g: in OpenWrt/LEDE so restore the behavior prior to 87aa9f9c61ad ("net: phy: consolidate PHY reset in phy_init_hw()"). Reported-by: Felix Fietkau Fixes: 87aa9f9c61ad ("net: phy: consolidate PHY reset in phy_init_hw()") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 2 +- include/linux/phy.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index daec6555f3b1..5198ccfa347f 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1864,7 +1864,7 @@ static struct phy_driver genphy_driver[] = { .phy_id = 0xffffffff, .phy_id_mask = 0xffffffff, .name = "Generic PHY", - .soft_reset = genphy_soft_reset, + .soft_reset = genphy_no_soft_reset, .config_init = genphy_config_init, .features = PHY_GBIT_FEATURES | SUPPORTED_MII | SUPPORTED_AUI | SUPPORTED_FIBRE | diff --git a/include/linux/phy.h b/include/linux/phy.h index 772476028a65..43a774873aa9 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -837,6 +837,10 @@ int genphy_read_status(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); int genphy_soft_reset(struct phy_device *phydev); +static inline int genphy_no_soft_reset(struct phy_device *phydev) +{ + return 0; +} void phy_driver_unregister(struct phy_driver *drv); void phy_drivers_unregister(struct phy_driver *drv, int n); int phy_driver_register(struct phy_driver *new_driver, struct module *owner); -- cgit v1.2.3 From 68598d2ea886322f9b4b0058e5b288418622de95 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 1 Mar 2017 02:12:50 +0300 Subject: btrfs: remove btrfs_err_str function from uapi/linux/btrfs.h btrfs_err_str function is not called from anywhere and is replicated in the userspace headers for btrfs-progs. It's removal also fixes the following linux/btrfs.h userspace compilation error: /usr/include/linux/btrfs.h: In function 'btrfs_err_str': /usr/include/linux/btrfs.h:740:11: error: 'NULL' undeclared (first use in this function) return NULL; Suggested-by: Jeff Mahoney Signed-off-by: Dmitry V. Levin Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index db4c253f8011..dcfc3a5a9cb1 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -713,33 +713,6 @@ enum btrfs_err_code { BTRFS_ERROR_DEV_ONLY_WRITABLE, BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS }; -/* An error code to error string mapping for the kernel -* error codes -*/ -static inline char *btrfs_err_str(enum btrfs_err_code err_code) -{ - switch (err_code) { - case BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET: - return "unable to go below two devices on raid1"; - case BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET: - return "unable to go below four devices on raid10"; - case BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET: - return "unable to go below two devices on raid5"; - case BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET: - return "unable to go below three devices on raid6"; - case BTRFS_ERROR_DEV_TGT_REPLACE: - return "unable to remove the dev_replace target dev"; - case BTRFS_ERROR_DEV_MISSING_NOT_FOUND: - return "no missing devices found to remove"; - case BTRFS_ERROR_DEV_ONLY_WRITABLE: - return "unable to remove the only writeable device"; - case BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS: - return "add/delete/balance/replace/resize operation "\ - "in progress"; - default: - return NULL; - } -} #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) -- cgit v1.2.3 From 62f8f4d9066c1c6f2474845d1ca7e2891f2ae3fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 5 Mar 2017 10:52:16 -0800 Subject: dccp: fix use-after-free in dccp_feat_activate_values Dmitry reported crashes in DCCP stack [1] Problem here is that when I got rid of listener spinlock, I missed the fact that DCCP stores a complex state in struct dccp_request_sock, while TCP does not. Since multiple cpus could access it at the same time, we need to add protection. [1] BUG: KASAN: use-after-free in dccp_feat_activate_values+0x967/0xab0 net/dccp/feat.c:1541 at addr ffff88003713be68 Read of size 8 by task syz-executor2/8457 CPU: 2 PID: 8457 Comm: syz-executor2 Not tainted 4.10.0-rc7+ #127 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x292/0x398 lib/dump_stack.c:51 kasan_object_err+0x1c/0x70 mm/kasan/report.c:162 print_address_description mm/kasan/report.c:200 [inline] kasan_report_error mm/kasan/report.c:289 [inline] kasan_report.part.1+0x20e/0x4e0 mm/kasan/report.c:311 kasan_report mm/kasan/report.c:332 [inline] __asan_report_load8_noabort+0x29/0x30 mm/kasan/report.c:332 dccp_feat_activate_values+0x967/0xab0 net/dccp/feat.c:1541 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 NF_HOOK include/linux/netfilter.h:257 [inline] ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 dst_input include/net/dst.h:507 [inline] ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:257 [inline] ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 process_backlog+0xe5/0x6c0 net/core/dev.c:4839 napi_poll net/core/dev.c:5202 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5267 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902 do_softirq.part.17+0x1e8/0x230 kernel/softirq.c:328 do_softirq kernel/softirq.c:176 [inline] __local_bh_enable_ip+0x1f2/0x200 kernel/softirq.c:181 local_bh_enable include/linux/bottom_half.h:31 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:971 [inline] ip6_finish_output2+0xbb0/0x23d0 net/ipv6/ip6_output.c:123 ip6_finish_output+0x302/0x960 net/ipv6/ip6_output.c:148 NF_HOOK_COND include/linux/netfilter.h:246 [inline] ip6_output+0x1cb/0x8d0 net/ipv6/ip6_output.c:162 ip6_xmit+0xcdf/0x20d0 include/net/dst.h:501 inet6_csk_xmit+0x320/0x5f0 net/ipv6/inet6_connection_sock.c:179 dccp_transmit_skb+0xb09/0x1120 net/dccp/output.c:141 dccp_xmit_packet+0x215/0x760 net/dccp/output.c:280 dccp_write_xmit+0x168/0x1d0 net/dccp/output.c:362 dccp_sendmsg+0x79c/0xb10 net/dccp/proto.c:796 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 [inline] sock_sendmsg+0xca/0x110 net/socket.c:645 SYSC_sendto+0x660/0x810 net/socket.c:1687 SyS_sendto+0x40/0x50 net/socket.c:1655 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x4458b9 RSP: 002b:00007f8ceb77bb58 EFLAGS: 00000282 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000017 RCX: 00000000004458b9 RDX: 0000000000000023 RSI: 0000000020e60000 RDI: 0000000000000017 RBP: 00000000006e1b90 R08: 00000000200f9fe1 R09: 0000000000000020 R10: 0000000000008010 R11: 0000000000000282 R12: 00000000007080a8 R13: 0000000000000000 R14: 00007f8ceb77c9c0 R15: 00007f8ceb77c700 Object at ffff88003713be50, in cache kmalloc-64 size: 64 Allocated: PID = 8446 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605 kmem_cache_alloc_trace+0x82/0x270 mm/slub.c:2738 kmalloc include/linux/slab.h:490 [inline] dccp_feat_entry_new+0x214/0x410 net/dccp/feat.c:467 dccp_feat_push_change+0x38/0x220 net/dccp/feat.c:487 __feat_register_sp+0x223/0x2f0 net/dccp/feat.c:741 dccp_feat_propagate_ccid+0x22b/0x2b0 net/dccp/feat.c:949 dccp_feat_server_ccid_dependencies+0x1b3/0x250 net/dccp/feat.c:1012 dccp_make_response+0x1f1/0xc90 net/dccp/output.c:423 dccp_v6_send_response+0x4ec/0xc20 net/dccp/ipv6.c:217 dccp_v6_conn_request+0xaba/0x11b0 net/dccp/ipv6.c:377 dccp_rcv_state_process+0x51e/0x1650 net/dccp/input.c:606 dccp_v6_do_rcv+0x213/0x350 net/dccp/ipv6.c:632 sk_backlog_rcv include/net/sock.h:893 [inline] __sk_receive_skb+0x36f/0xcc0 net/core/sock.c:479 dccp_v6_rcv+0xba5/0x1d00 net/dccp/ipv6.c:742 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 NF_HOOK include/linux/netfilter.h:257 [inline] ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 dst_input include/net/dst.h:507 [inline] ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:257 [inline] ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 process_backlog+0xe5/0x6c0 net/core/dev.c:4839 napi_poll net/core/dev.c:5202 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5267 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 Freed: PID = 15 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 [inline] kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578 slab_free_hook mm/slub.c:1355 [inline] slab_free_freelist_hook mm/slub.c:1377 [inline] slab_free mm/slub.c:2954 [inline] kfree+0xe8/0x2b0 mm/slub.c:3874 dccp_feat_entry_destructor.part.4+0x48/0x60 net/dccp/feat.c:418 dccp_feat_entry_destructor net/dccp/feat.c:416 [inline] dccp_feat_list_pop net/dccp/feat.c:541 [inline] dccp_feat_activate_values+0x57f/0xab0 net/dccp/feat.c:1543 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 NF_HOOK include/linux/netfilter.h:257 [inline] ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 dst_input include/net/dst.h:507 [inline] ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:257 [inline] ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 process_backlog+0xe5/0x6c0 net/core/dev.c:4839 napi_poll net/core/dev.c:5202 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5267 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 Memory state around the buggy address: ffff88003713bd00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88003713bd80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88003713be00: fc fc fc fc fc fc fc fc fc fc fb fb fb fb fb fb ^ Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: David S. Miller --- include/linux/dccp.h | 1 + net/dccp/minisocks.c | 24 ++++++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 61d042bbbf60..68449293c4b6 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -163,6 +163,7 @@ struct dccp_request_sock { __u64 dreq_isr; __u64 dreq_gsr; __be32 dreq_service; + spinlock_t dreq_lock; struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index e267e6f4c9a5..abd07a443219 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -142,6 +142,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, struct dccp_request_sock *dreq = dccp_rsk(req); bool own_req; + /* TCP/DCCP listeners became lockless. + * DCCP stores complex state in its request_sock, so we need + * a protection for them, now this code runs without being protected + * by the parent (listener) lock. + */ + spin_lock_bh(&dreq->dreq_lock); + /* Check for retransmitted REQUEST */ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { @@ -156,7 +163,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, inet_rtx_syn_ack(sk, req); } /* Network Duplicate, discard packet */ - return NULL; + goto out; } DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; @@ -182,20 +189,20 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, req, &own_req); - if (!child) - goto listen_overflow; - - return inet_csk_complete_hashdance(sk, child, req, own_req); + if (child) { + child = inet_csk_complete_hashdance(sk, child, req, own_req); + goto out; + } -listen_overflow: - dccp_pr_debug("listen_overflow!\n"); DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) req->rsk_ops->send_reset(sk, skb); inet_csk_reqsk_queue_drop(sk, req); - return NULL; +out: + spin_unlock_bh(&dreq->dreq_lock); + return child; } EXPORT_SYMBOL_GPL(dccp_check_req); @@ -246,6 +253,7 @@ int dccp_reqsk_init(struct request_sock *req, { struct dccp_request_sock *dreq = dccp_rsk(req); + spin_lock_init(&dreq->dreq_lock); inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport; inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport); inet_rsk(req)->acked = 0; -- cgit v1.2.3 From 568af6de058cb2b0c5b98d98ffcf37cdc6bc38a7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 4 Mar 2017 19:53:47 +0100 Subject: netfilter: nf_tables: set pktinfo->thoff at AH header if found Phil Sutter reports that IPv6 AH header matching is broken. From userspace, nft generates bytecode that expects to find the AH header at NFT_PAYLOAD_TRANSPORT_HEADER both for IPv4 and IPv6. However, pktinfo->thoff is set to the inner header after the AH header in IPv6, while in IPv4 pktinfo->thoff points to the AH header indeed. This behaviour is inconsistent. This patch fixes this problem by updating ipv6_find_hdr() to get the IP6_FH_F_AUTH flag so this function stops at the AH header, so both IPv4 and IPv6 pktinfo->thoff point to the AH header. This is also inconsistent when trying to match encapsulated headers: 1) A packet that looks like IPv4 + AH + TCP dport 22 will *not* match. 2) A packet that looks like IPv6 + AH + TCP dport 22 will match. Reported-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv6.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index d150b5066201..97983d1c05e4 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -9,12 +9,13 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, struct sk_buff *skb, const struct nf_hook_state *state) { + unsigned int flags = IP6_FH_F_AUTH; int protohdr, thoff = 0; unsigned short frag_off; nft_set_pktinfo(pkt, skb, state); - protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); if (protohdr < 0) { nft_set_pktinfo_proto_unspec(pkt, skb); return; @@ -32,6 +33,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, const struct nf_hook_state *state) { #if IS_ENABLED(CONFIG_IPV6) + unsigned int flags = IP6_FH_F_AUTH; struct ipv6hdr *ip6h, _ip6h; unsigned int thoff = 0; unsigned short frag_off; @@ -50,7 +52,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, if (pkt_len + sizeof(*ip6h) > skb->len) return -1; - protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); if (protohdr < 0) return -1; -- cgit v1.2.3 From 745cb7f8a5de0805cade3de3991b7a95317c7c73 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 7 Mar 2017 23:50:50 +0300 Subject: uapi: fix linux/packet_diag.h userspace compilation error Replace MAX_ADDR_LEN with its numeric value to fix the following linux/packet_diag.h userspace compilation error: /usr/include/linux/packet_diag.h:67:17: error: 'MAX_ADDR_LEN' undeclared here (not in a function) __u8 pdmc_addr[MAX_ADDR_LEN]; This is not the first case in the UAPI where the numeric value of MAX_ADDR_LEN is used instead of symbolic one, uapi/linux/if_link.h already does the same: $ grep MAX_ADDR_LEN include/uapi/linux/if_link.h __u8 mac[32]; /* MAX_ADDR_LEN */ There are no UAPI headers besides these two that use MAX_ADDR_LEN. Signed-off-by: Dmitry V. Levin Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/uapi/linux/packet_diag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h index d08c63f3dd6f..0c5d5dd61b6a 100644 --- a/include/uapi/linux/packet_diag.h +++ b/include/uapi/linux/packet_diag.h @@ -64,7 +64,7 @@ struct packet_diag_mclist { __u32 pdmc_count; __u16 pdmc_type; __u16 pdmc_alen; - __u8 pdmc_addr[MAX_ADDR_LEN]; + __u8 pdmc_addr[32]; /* MAX_ADDR_LEN */ }; struct packet_diag_ring { -- cgit v1.2.3 From 4fe8435909fddc97b81472026aa954e06dd192a5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 7 Mar 2017 20:00:13 -0800 Subject: bpf: convert htab map to hlist_nulls when all map elements are pre-allocated one cpu can delete and reuse htab_elem while another cpu is still walking the hlist. In such case the lookup may miss the element. Convert hlist to hlist_nulls to avoid such scenario. When bucket lock is taken there is no need to take such precautions, so only convert map_lookup and map_get_next to nulls. The race window is extremely small and only reproducible with explicit udelay() inside lookup_nulls_elem_raw() Similar to hlist add hlist_nulls_for_each_entry_safe() and hlist_nulls_entry_safe() helpers. Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Reported-by: Jonathan Perry Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/list_nulls.h | 5 +++ include/linux/rculist_nulls.h | 14 +++++++ kernel/bpf/hashtab.c | 94 +++++++++++++++++++++++++++---------------- 3 files changed, 79 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index b01fe1009084..87ff4f58a2f0 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -29,6 +29,11 @@ struct hlist_nulls_node { ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls)) #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) + +#define hlist_nulls_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \ + }) /** * ptr_is_a_nulls - Test if a ptr is a nulls * @ptr: ptr to be tested diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 4ae95f7e8597..a23a33153180 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -156,5 +156,19 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) +/** + * hlist_nulls_for_each_entry_safe - + * iterate over list of given type safe against removal of list entry + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_nulls_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_nulls_node within the struct. + */ +#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \ + for (({barrier();}), \ + pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ + (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \ + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });) #endif #endif diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 63c86a7be2a1..afe5bab376c9 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -13,11 +13,12 @@ #include #include #include +#include #include "percpu_freelist.h" #include "bpf_lru_list.h" struct bucket { - struct hlist_head head; + struct hlist_nulls_head head; raw_spinlock_t lock; }; @@ -44,7 +45,7 @@ enum extra_elem_state { /* each htab element is struct htab_elem + key + value */ struct htab_elem { union { - struct hlist_node hash_node; + struct hlist_nulls_node hash_node; struct { void *padding; union { @@ -337,7 +338,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) goto free_htab; for (i = 0; i < htab->n_buckets; i++) { - INIT_HLIST_HEAD(&htab->buckets[i].head); + INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); raw_spin_lock_init(&htab->buckets[i].lock); } @@ -377,28 +378,52 @@ static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) return &htab->buckets[hash & (htab->n_buckets - 1)]; } -static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) +static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash) { return &__select_bucket(htab, hash)->head; } -static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash, +/* this lookup function can only be called with bucket lock taken */ +static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash, void *key, u32 key_size) { + struct hlist_nulls_node *n; struct htab_elem *l; - hlist_for_each_entry_rcu(l, head, hash_node) + hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) if (l->hash == hash && !memcmp(&l->key, key, key_size)) return l; return NULL; } +/* can be called without bucket lock. it will repeat the loop in + * the unlikely event when elements moved from one bucket into another + * while link list is being walked + */ +static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, + u32 hash, void *key, + u32 key_size, u32 n_buckets) +{ + struct hlist_nulls_node *n; + struct htab_elem *l; + +again: + hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) + if (l->hash == hash && !memcmp(&l->key, key, key_size)) + return l; + + if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1)))) + goto again; + + return NULL; +} + /* Called from syscall or from eBPF program */ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct hlist_head *head; + struct hlist_nulls_head *head; struct htab_elem *l; u32 hash, key_size; @@ -411,7 +436,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) head = select_bucket(htab, hash); - l = lookup_elem_raw(head, hash, key, key_size); + l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); return l; } @@ -444,8 +469,9 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) { struct bpf_htab *htab = (struct bpf_htab *)arg; - struct htab_elem *l, *tgt_l; - struct hlist_head *head; + struct htab_elem *l = NULL, *tgt_l; + struct hlist_nulls_head *head; + struct hlist_nulls_node *n; unsigned long flags; struct bucket *b; @@ -455,9 +481,9 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) raw_spin_lock_irqsave(&b->lock, flags); - hlist_for_each_entry_rcu(l, head, hash_node) + hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) if (l == tgt_l) { - hlist_del_rcu(&l->hash_node); + hlist_nulls_del_rcu(&l->hash_node); break; } @@ -470,7 +496,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct hlist_head *head; + struct hlist_nulls_head *head; struct htab_elem *l, *next_l; u32 hash, key_size; int i; @@ -484,7 +510,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) head = select_bucket(htab, hash); /* lookup the key */ - l = lookup_elem_raw(head, hash, key, key_size); + l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); if (!l) { i = 0; @@ -492,7 +518,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) } /* key was found, get next key in the same bucket */ - next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)), + next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)), struct htab_elem, hash_node); if (next_l) { @@ -511,7 +537,7 @@ find_first_elem: head = select_bucket(htab, i); /* pick first element in the bucket */ - next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), + next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)), struct htab_elem, hash_node); if (next_l) { /* if it's not empty, just return it */ @@ -676,7 +702,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new = NULL, *l_old; - struct hlist_head *head; + struct hlist_nulls_head *head; unsigned long flags; struct bucket *b; u32 key_size, hash; @@ -715,9 +741,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, /* add new element to the head of the list, so that * concurrent search will find it before old elem */ - hlist_add_head_rcu(&l_new->hash_node, head); + hlist_nulls_add_head_rcu(&l_new->hash_node, head); if (l_old) { - hlist_del_rcu(&l_old->hash_node); + hlist_nulls_del_rcu(&l_old->hash_node); free_htab_elem(htab, l_old); } ret = 0; @@ -731,7 +757,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new, *l_old = NULL; - struct hlist_head *head; + struct hlist_nulls_head *head; unsigned long flags; struct bucket *b; u32 key_size, hash; @@ -772,10 +798,10 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, /* add new element to the head of the list, so that * concurrent search will find it before old elem */ - hlist_add_head_rcu(&l_new->hash_node, head); + hlist_nulls_add_head_rcu(&l_new->hash_node, head); if (l_old) { bpf_lru_node_set_ref(&l_new->lru_node); - hlist_del_rcu(&l_old->hash_node); + hlist_nulls_del_rcu(&l_old->hash_node); } ret = 0; @@ -796,7 +822,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new = NULL, *l_old; - struct hlist_head *head; + struct hlist_nulls_head *head; unsigned long flags; struct bucket *b; u32 key_size, hash; @@ -835,7 +861,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, ret = PTR_ERR(l_new); goto err; } - hlist_add_head_rcu(&l_new->hash_node, head); + hlist_nulls_add_head_rcu(&l_new->hash_node, head); } ret = 0; err: @@ -849,7 +875,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new = NULL, *l_old; - struct hlist_head *head; + struct hlist_nulls_head *head; unsigned long flags; struct bucket *b; u32 key_size, hash; @@ -897,7 +923,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, } else { pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size), value, onallcpus); - hlist_add_head_rcu(&l_new->hash_node, head); + hlist_nulls_add_head_rcu(&l_new->hash_node, head); l_new = NULL; } ret = 0; @@ -925,7 +951,7 @@ static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, static int htab_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct hlist_head *head; + struct hlist_nulls_head *head; struct bucket *b; struct htab_elem *l; unsigned long flags; @@ -945,7 +971,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) l = lookup_elem_raw(head, hash, key, key_size); if (l) { - hlist_del_rcu(&l->hash_node); + hlist_nulls_del_rcu(&l->hash_node); free_htab_elem(htab, l); ret = 0; } @@ -957,7 +983,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct hlist_head *head; + struct hlist_nulls_head *head; struct bucket *b; struct htab_elem *l; unsigned long flags; @@ -977,7 +1003,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) l = lookup_elem_raw(head, hash, key, key_size); if (l) { - hlist_del_rcu(&l->hash_node); + hlist_nulls_del_rcu(&l->hash_node); ret = 0; } @@ -992,12 +1018,12 @@ static void delete_all_elements(struct bpf_htab *htab) int i; for (i = 0; i < htab->n_buckets; i++) { - struct hlist_head *head = select_bucket(htab, i); - struct hlist_node *n; + struct hlist_nulls_head *head = select_bucket(htab, i); + struct hlist_nulls_node *n; struct htab_elem *l; - hlist_for_each_entry_safe(l, n, head, hash_node) { - hlist_del_rcu(&l->hash_node); + hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { + hlist_nulls_del_rcu(&l->hash_node); if (l->state != HTAB_EXTRA_ELEM_USED) htab_elem_free(htab, l); } -- cgit v1.2.3 From cdfbabfb2f0ce983fdaa42f20e5f7842178fc01e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 9 Mar 2017 08:09:05 +0000 Subject: net: Work around lockdep limitation in sockets that use sockets Lockdep issues a circular dependency warning when AFS issues an operation through AF_RXRPC from a context in which the VFS/VM holds the mmap_sem. The theory lockdep comes up with is as follows: (1) If the pagefault handler decides it needs to read pages from AFS, it calls AFS with mmap_sem held and AFS begins an AF_RXRPC call, but creating a call requires the socket lock: mmap_sem must be taken before sk_lock-AF_RXRPC (2) afs_open_socket() opens an AF_RXRPC socket and binds it. rxrpc_bind() binds the underlying UDP socket whilst holding its socket lock. inet_bind() takes its own socket lock: sk_lock-AF_RXRPC must be taken before sk_lock-AF_INET (3) Reading from a TCP socket into a userspace buffer might cause a fault and thus cause the kernel to take the mmap_sem, but the TCP socket is locked whilst doing this: sk_lock-AF_INET must be taken before mmap_sem However, lockdep's theory is wrong in this instance because it deals only with lock classes and not individual locks. The AF_INET lock in (2) isn't really equivalent to the AF_INET lock in (3) as the former deals with a socket entirely internal to the kernel that never sees userspace. This is a limitation in the design of lockdep. Fix the general case by: (1) Double up all the locking keys used in sockets so that one set are used if the socket is created by userspace and the other set is used if the socket is created by the kernel. (2) Store the kern parameter passed to sk_alloc() in a variable in the sock struct (sk_kern_sock). This informs sock_lock_init(), sock_init_data() and sk_clone_lock() as to the lock keys to be used. Note that the child created by sk_clone_lock() inherits the parent's kern setting. (3) Add a 'kern' parameter to ->accept() that is analogous to the one passed in to ->create() that distinguishes whether kernel_accept() or sys_accept4() was the caller and can be passed to sk_alloc(). Note that a lot of accept functions merely dequeue an already allocated socket. I haven't touched these as the new socket already exists before we get the parameter. Note also that there are a couple of places where I've made the accepted socket unconditionally kernel-based: irda_accept() rds_rcp_accept_one() tcp_accept_from_sock() because they follow a sock_create_kern() and accept off of that. Whilst creating this, I noticed that lustre and ocfs don't create sockets through sock_create_kern() and thus they aren't marked as for-kernel, though they appear to be internal. I wonder if these should do that so that they use the new set of lock keys. Signed-off-by: David Howells Signed-off-by: David S. Miller --- crypto/af_alg.c | 9 ++- crypto/algif_hash.c | 9 ++- drivers/staging/lustre/lnet/lnet/lib-socket.c | 4 +- fs/dlm/lowcomms.c | 2 +- fs/ocfs2/cluster/tcp.c | 2 +- include/crypto/if_alg.h | 2 +- include/linux/net.h | 2 +- include/net/inet_common.h | 3 +- include/net/inet_connection_sock.h | 2 +- include/net/sctp/structs.h | 3 +- include/net/sock.h | 9 ++- net/atm/svc.c | 5 +- net/ax25/af_ax25.c | 3 +- net/bluetooth/l2cap_sock.c | 2 +- net/bluetooth/rfcomm/sock.c | 3 +- net/bluetooth/sco.c | 2 +- net/core/sock.c | 106 ++++++++++++++------------ net/decnet/af_decnet.c | 5 +- net/ipv4/af_inet.c | 5 +- net/ipv4/inet_connection_sock.c | 2 +- net/irda/af_irda.c | 5 +- net/iucv/af_iucv.c | 2 +- net/llc/af_llc.c | 4 +- net/netrom/af_netrom.c | 3 +- net/nfc/llcp_sock.c | 2 +- net/phonet/pep.c | 6 +- net/phonet/socket.c | 4 +- net/rds/tcp_listen.c | 2 +- net/rose/af_rose.c | 3 +- net/sctp/ipv6.c | 5 +- net/sctp/protocol.c | 5 +- net/sctp/socket.c | 4 +- net/smc/af_smc.c | 2 +- net/socket.c | 4 +- net/tipc/socket.c | 8 +- net/unix/af_unix.c | 5 +- net/vmw_vsock/af_vsock.c | 3 +- net/x25/af_x25.c | 3 +- 38 files changed, 142 insertions(+), 108 deletions(-) (limited to 'include') diff --git a/crypto/af_alg.c b/crypto/af_alg.c index f5e18c2a4852..690deca17c35 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -266,7 +266,7 @@ unlock: return err; } -int af_alg_accept(struct sock *sk, struct socket *newsock) +int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern) { struct alg_sock *ask = alg_sk(sk); const struct af_alg_type *type; @@ -281,7 +281,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) if (!type) goto unlock; - sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, 0); + sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, kern); err = -ENOMEM; if (!sk2) goto unlock; @@ -323,9 +323,10 @@ unlock: } EXPORT_SYMBOL_GPL(af_alg_accept); -static int alg_accept(struct socket *sock, struct socket *newsock, int flags) +static int alg_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { - return af_alg_accept(sock->sk, newsock); + return af_alg_accept(sock->sk, newsock, kern); } static const struct proto_ops alg_proto_ops = { diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 54fc90e8339c..5e92bd275ef3 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -239,7 +239,8 @@ unlock: return err ?: len; } -static int hash_accept(struct socket *sock, struct socket *newsock, int flags) +static int hash_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); @@ -260,7 +261,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags) if (err) return err; - err = af_alg_accept(ask->parent, newsock); + err = af_alg_accept(ask->parent, newsock, kern); if (err) return err; @@ -378,7 +379,7 @@ static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg, } static int hash_accept_nokey(struct socket *sock, struct socket *newsock, - int flags) + int flags, bool kern) { int err; @@ -386,7 +387,7 @@ static int hash_accept_nokey(struct socket *sock, struct socket *newsock, if (err) return err; - return hash_accept(sock, newsock, flags); + return hash_accept(sock, newsock, flags, kern); } static struct proto_ops algif_hash_ops_nokey = { diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c index b7b87ecefcdf..9fca8d225ee0 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-socket.c +++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c @@ -532,7 +532,7 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock) newsock->ops = sock->ops; - rc = sock->ops->accept(sock, newsock, O_NONBLOCK); + rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false); if (rc == -EAGAIN) { /* Nothing ready, so wait for activity */ init_waitqueue_entry(&wait, current); @@ -540,7 +540,7 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock) set_current_state(TASK_INTERRUPTIBLE); schedule(); remove_wait_queue(sk_sleep(sock->sk), &wait); - rc = sock->ops->accept(sock, newsock, O_NONBLOCK); + rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false); } if (rc) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 7d398d300e97..9382db998ec9 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -743,7 +743,7 @@ static int tcp_accept_from_sock(struct connection *con) newsock->type = con->sock->type; newsock->ops = con->sock->ops; - result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK); + result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK, true); if (result < 0) goto accept_err; diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 4348027384f5..d0ab7e56d0b4 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1863,7 +1863,7 @@ static int o2net_accept_one(struct socket *sock, int *more) new_sock->type = sock->type; new_sock->ops = sock->ops; - ret = sock->ops->accept(sock, new_sock, O_NONBLOCK); + ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false); if (ret < 0) goto out; diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index a2bfd7843f18..e2b9c6fe2714 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -73,7 +73,7 @@ int af_alg_unregister_type(const struct af_alg_type *type); int af_alg_release(struct socket *sock); void af_alg_release_parent(struct sock *sk); -int af_alg_accept(struct sock *sk, struct socket *newsock); +int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern); int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len); void af_alg_free_sg(struct af_alg_sgl *sgl); diff --git a/include/linux/net.h b/include/linux/net.h index cd0c8bd0a1de..0620f5e18c96 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -146,7 +146,7 @@ struct proto_ops { int (*socketpair)(struct socket *sock1, struct socket *sock2); int (*accept) (struct socket *sock, - struct socket *newsock, int flags); + struct socket *newsock, int flags, bool kern); int (*getname) (struct socket *sock, struct sockaddr *addr, int *sockaddr_len, int peer); diff --git a/include/net/inet_common.h b/include/net/inet_common.h index b7952d55b9c0..f39ae697347f 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -20,7 +20,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags, int is_sendmsg); int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); -int inet_accept(struct socket *sock, struct socket *newsock, int flags); +int inet_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern); int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size); ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 826f198374f8..c7a577976bec 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -258,7 +258,7 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk, return (unsigned long)min_t(u64, when, max_when); } -struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); +struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern); int inet_csk_get_port(struct sock *sk, unsigned short snum); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index a244db5e5ff7..07a0b128625a 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -476,7 +476,8 @@ struct sctp_pf { int (*send_verify) (struct sctp_sock *, union sctp_addr *); int (*supported_addrs)(const struct sctp_sock *, __be16 *); struct sock *(*create_accept_sk) (struct sock *sk, - struct sctp_association *asoc); + struct sctp_association *asoc, + bool kern); int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr); void (*to_sk_saddr)(union sctp_addr *, struct sock *sk); void (*to_sk_daddr)(union sctp_addr *, struct sock *sk); diff --git a/include/net/sock.h b/include/net/sock.h index 5e5997654db6..03252d53975d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -236,6 +236,7 @@ struct sock_common { * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings * @sk_lock: synchronizer + * @sk_kern_sock: True if sock is using kernel lock classes * @sk_rcvbuf: size of receive buffer in bytes * @sk_wq: sock wait queue and async head * @sk_rx_dst: receive input route used by early demux @@ -430,7 +431,8 @@ struct sock { #endif kmemcheck_bitfield_begin(flags); - unsigned int sk_padding : 2, + unsigned int sk_padding : 1, + sk_kern_sock : 1, sk_no_check_tx : 1, sk_no_check_rx : 1, sk_userlocks : 4, @@ -1015,7 +1017,8 @@ struct proto { int addr_len); int (*disconnect)(struct sock *sk, int flags); - struct sock * (*accept)(struct sock *sk, int flags, int *err); + struct sock * (*accept)(struct sock *sk, int flags, int *err, + bool kern); int (*ioctl)(struct sock *sk, int cmd, unsigned long arg); @@ -1573,7 +1576,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, int sock_no_bind(struct socket *, struct sockaddr *, int); int sock_no_connect(struct socket *, struct sockaddr *, int, int); int sock_no_socketpair(struct socket *, struct socket *); -int sock_no_accept(struct socket *, struct socket *, int); +int sock_no_accept(struct socket *, struct socket *, int, bool); int sock_no_getname(struct socket *, struct sockaddr *, int *, int); unsigned int sock_no_poll(struct file *, struct socket *, struct poll_table_struct *); diff --git a/net/atm/svc.c b/net/atm/svc.c index db9794ec61d8..5589de7086af 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -318,7 +318,8 @@ out: return error; } -static int svc_accept(struct socket *sock, struct socket *newsock, int flags) +static int svc_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk = sock->sk; struct sk_buff *skb; @@ -329,7 +330,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags) lock_sock(sk); - error = svc_create(sock_net(sk), newsock, 0, 0); + error = svc_create(sock_net(sk), newsock, 0, kern); if (error) goto out; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index a8e42cedf1db..b7c486752b3a 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1320,7 +1320,8 @@ out_release: return err; } -static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) +static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sk_buff *skb; struct sock *newsk; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index f307b145ea54..507b80d59dec 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -301,7 +301,7 @@ done: } static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, - int flags) + int flags, bool kern) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index aa1a814ceddc..ac3c650cb234 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -471,7 +471,8 @@ done: return err; } -static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags) +static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index e4e9a2da1e7e..728e0c8dc8e7 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -627,7 +627,7 @@ done: } static int sco_sock_accept(struct socket *sock, struct socket *newsock, - int flags) + int flags, bool kern) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *ch; diff --git a/net/core/sock.c b/net/core/sock.c index f6fd79f33097..a96d5f7a5734 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -197,66 +197,55 @@ EXPORT_SYMBOL(sk_net_capable); /* * Each address family might have different locking rules, so we have - * one slock key per address family: + * one slock key per address family and separate keys for internal and + * userspace sockets. */ static struct lock_class_key af_family_keys[AF_MAX]; +static struct lock_class_key af_family_kern_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; +static struct lock_class_key af_family_kern_slock_keys[AF_MAX]; /* * Make lock validator output more readable. (we pre-construct these * strings build-time, so that runtime initialization of socket * locks is fast): */ + +#define _sock_locks(x) \ + x "AF_UNSPEC", x "AF_UNIX" , x "AF_INET" , \ + x "AF_AX25" , x "AF_IPX" , x "AF_APPLETALK", \ + x "AF_NETROM", x "AF_BRIDGE" , x "AF_ATMPVC" , \ + x "AF_X25" , x "AF_INET6" , x "AF_ROSE" , \ + x "AF_DECnet", x "AF_NETBEUI" , x "AF_SECURITY" , \ + x "AF_KEY" , x "AF_NETLINK" , x "AF_PACKET" , \ + x "AF_ASH" , x "AF_ECONET" , x "AF_ATMSVC" , \ + x "AF_RDS" , x "AF_SNA" , x "AF_IRDA" , \ + x "AF_PPPOX" , x "AF_WANPIPE" , x "AF_LLC" , \ + x "27" , x "28" , x "AF_CAN" , \ + x "AF_TIPC" , x "AF_BLUETOOTH", x "IUCV" , \ + x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \ + x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \ + x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \ + x "AF_QIPCRTR", x "AF_SMC" , x "AF_MAX" + static const char *const af_family_key_strings[AF_MAX+1] = { - "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" , - "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK", - "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" , - "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" , - "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , - "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , - "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , - "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , - "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , - "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , - "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , - "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , - "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , - "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , - "sk_lock-AF_QIPCRTR", "sk_lock-AF_SMC" , "sk_lock-AF_MAX" + _sock_locks("sk_lock-") }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { - "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , - "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK", - "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" , - "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" , - "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , - "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , - "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , - "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" , - "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , - "slock-27" , "slock-28" , "slock-AF_CAN" , - "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , - "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , - "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , - "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , - "slock-AF_QIPCRTR", "slock-AF_SMC" , "slock-AF_MAX" + _sock_locks("slock-") }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { - "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , - "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK", - "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" , - "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" , - "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , - "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , - "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , - "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" , - "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , - "clock-27" , "clock-28" , "clock-AF_CAN" , - "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , - "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , - "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , - "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , - "clock-AF_QIPCRTR", "clock-AF_SMC" , "clock-AF_MAX" + _sock_locks("clock-") +}; + +static const char *const af_family_kern_key_strings[AF_MAX+1] = { + _sock_locks("k-sk_lock-") +}; +static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = { + _sock_locks("k-slock-") +}; +static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = { + _sock_locks("k-clock-") }; /* @@ -264,6 +253,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { * so split the lock classes by using a per-AF key: */ static struct lock_class_key af_callback_keys[AF_MAX]; +static struct lock_class_key af_kern_callback_keys[AF_MAX]; /* Take into consideration the size of the struct sk_buff overhead in the * determination of these values, since that is non-constant across @@ -1293,7 +1283,16 @@ lenout: */ static inline void sock_lock_init(struct sock *sk) { - sock_lock_init_class_and_name(sk, + if (sk->sk_kern_sock) + sock_lock_init_class_and_name( + sk, + af_family_kern_slock_key_strings[sk->sk_family], + af_family_kern_slock_keys + sk->sk_family, + af_family_kern_key_strings[sk->sk_family], + af_family_kern_keys + sk->sk_family); + else + sock_lock_init_class_and_name( + sk, af_family_slock_key_strings[sk->sk_family], af_family_slock_keys + sk->sk_family, af_family_key_strings[sk->sk_family], @@ -1399,6 +1398,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, * why we need sk_prot_creator -acme */ sk->sk_prot = sk->sk_prot_creator = prot; + sk->sk_kern_sock = kern; sock_lock_init(sk); sk->sk_net_refcnt = kern ? 0 : 1; if (likely(sk->sk_net_refcnt)) @@ -2277,7 +2277,8 @@ int sock_no_socketpair(struct socket *sock1, struct socket *sock2) } EXPORT_SYMBOL(sock_no_socketpair); -int sock_no_accept(struct socket *sock, struct socket *newsock, int flags) +int sock_no_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { return -EOPNOTSUPP; } @@ -2481,7 +2482,14 @@ void sock_init_data(struct socket *sock, struct sock *sk) } rwlock_init(&sk->sk_callback_lock); - lockdep_set_class_and_name(&sk->sk_callback_lock, + if (sk->sk_kern_sock) + lockdep_set_class_and_name( + &sk->sk_callback_lock, + af_kern_callback_keys + sk->sk_family, + af_family_kern_clock_key_strings[sk->sk_family]); + else + lockdep_set_class_and_name( + &sk->sk_callback_lock, af_callback_keys + sk->sk_family, af_family_clock_key_strings[sk->sk_family]); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index e6e79eda9763..7de5b40a5d0d 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1070,7 +1070,8 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) return skb == NULL ? ERR_PTR(err) : skb; } -static int dn_accept(struct socket *sock, struct socket *newsock, int flags) +static int dn_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk = sock->sk, *newsk; struct sk_buff *skb = NULL; @@ -1099,7 +1100,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags) cb = DN_SKB_CB(skb); sk->sk_ack_backlog--; - newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0); + newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern); if (newsk == NULL) { release_sock(sk); kfree_skb(skb); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5091f46826fa..6b1fc6e4278e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -689,11 +689,12 @@ EXPORT_SYMBOL(inet_stream_connect); * Accept a pending connection. The TCP layer now gives BSD semantics. */ -int inet_accept(struct socket *sock, struct socket *newsock, int flags) +int inet_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk1 = sock->sk; int err = -EINVAL; - struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err); + struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern); if (!sk2) goto do_err; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b4d5980ade3b..5e313c1ac94f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -424,7 +424,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) /* * This will accept the next outstanding connection. */ -struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) +struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) { struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock_queue *queue = &icsk->icsk_accept_queue; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 81adc29a448d..8d77ad5cadaf 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -828,7 +828,8 @@ out: * Wait for incoming connection * */ -static int irda_accept(struct socket *sock, struct socket *newsock, int flags) +static int irda_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk = sock->sk; struct irda_sock *new, *self = irda_sk(sk); @@ -836,7 +837,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) struct sk_buff *skb = NULL; int err; - err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0); + err = irda_create(sock_net(sk), newsock, sk->sk_protocol, kern); if (err) return err; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 89bbde1081ce..84de7b6326dc 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -938,7 +938,7 @@ done: /* Accept a pending connection */ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, - int flags) + int flags, bool kern) { DECLARE_WAITQUEUE(wait, current); struct sock *sk = sock->sk, *nsk; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 06186d608a27..cb4fff785cbf 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -641,11 +641,13 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) * @sock: Socket which connections arrive on. * @newsock: Socket to move incoming connection to. * @flags: User specified operational flags. + * @kern: If the socket is kernel internal * * Accept a new incoming connection. * Returns 0 upon success, negative otherwise. */ -static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags) +static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk = sock->sk, *newsk; struct llc_sock *llc, *newllc; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 4bbf4526b885..ebf16f7f9089 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -765,7 +765,8 @@ out_release: return err; } -static int nr_accept(struct socket *sock, struct socket *newsock, int flags) +static int nr_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sk_buff *skb; struct sock *newsk; diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 879885b31cce..2ffb18e73df6 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -441,7 +441,7 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent, } static int llcp_sock_accept(struct socket *sock, struct socket *newsock, - int flags) + int flags, bool kern) { DECLARE_WAITQUEUE(wait, current); struct sock *sk = sock->sk, *new_sk; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 222bedcd9575..e81537991ddf 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -772,7 +772,8 @@ static void pep_sock_close(struct sock *sk, long timeout) sock_put(sk); } -static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) +static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, + bool kern) { struct pep_sock *pn = pep_sk(sk), *newpn; struct sock *newsk = NULL; @@ -846,7 +847,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) } /* Create a new to-be-accepted sock */ - newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0); + newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, + kern); if (!newsk) { pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL); err = -ENOBUFS; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index a6c8da3ee893..64634e3ec2fc 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -305,7 +305,7 @@ out: } static int pn_socket_accept(struct socket *sock, struct socket *newsock, - int flags) + int flags, bool kern) { struct sock *sk = sock->sk; struct sock *newsk; @@ -314,7 +314,7 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock, if (unlikely(sk->sk_state != TCP_LISTEN)) return -EINVAL; - newsk = sk->sk_prot->accept(sk, flags, &err); + newsk = sk->sk_prot->accept(sk, flags, &err, kern); if (!newsk) return err; diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 2c69a508a693..507678853e6c 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -133,7 +133,7 @@ int rds_tcp_accept_one(struct socket *sock) new_sock->type = sock->type; new_sock->ops = sock->ops; - ret = sock->ops->accept(sock, new_sock, O_NONBLOCK); + ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true); if (ret < 0) goto out; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index b8a1df2c9785..4a9729257023 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -871,7 +871,8 @@ out_release: return err; } -static int rose_accept(struct socket *sock, struct socket *newsock, int flags) +static int rose_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sk_buff *skb; struct sock *newsk; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 063baac5b9fe..961ee59f696a 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -640,14 +640,15 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr) /* Create and initialize a new sk for the socket to be returned by accept(). */ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, - struct sctp_association *asoc) + struct sctp_association *asoc, + bool kern) { struct sock *newsk; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; struct ipv6_txoptions *opt; - newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0); + newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern); if (!newsk) goto out; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1b6d4574d2b0..989a900383b5 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -575,10 +575,11 @@ static int sctp_v4_is_ce(const struct sk_buff *skb) /* Create and initialize a new sk for the socket returned by accept(). */ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, - struct sctp_association *asoc) + struct sctp_association *asoc, + bool kern) { struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL, - sk->sk_prot, 0); + sk->sk_prot, kern); struct inet_sock *newinet; if (!newsk) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6f0a9be50f50..0f378ea2ae38 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4116,7 +4116,7 @@ static int sctp_disconnect(struct sock *sk, int flags) * descriptor will be returned from accept() to represent the newly * formed association. */ -static struct sock *sctp_accept(struct sock *sk, int flags, int *err) +static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) { struct sctp_sock *sp; struct sctp_endpoint *ep; @@ -4151,7 +4151,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err) */ asoc = list_entry(ep->asocs.next, struct sctp_association, asocs); - newsk = sp->pf->create_accept_sk(sk, asoc); + newsk = sp->pf->create_accept_sk(sk, asoc, kern); if (!newsk) { error = -ENOMEM; goto out; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 85837ab90e89..093803786eac 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -944,7 +944,7 @@ out: } static int smc_accept(struct socket *sock, struct socket *new_sock, - int flags) + int flags, bool kern) { struct sock *sk = sock->sk, *nsk; DECLARE_WAITQUEUE(wait, current); diff --git a/net/socket.c b/net/socket.c index e0757e648c0c..e034fe4164be 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1506,7 +1506,7 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, if (err) goto out_fd; - err = sock->ops->accept(sock, newsock, sock->file->f_flags); + err = sock->ops->accept(sock, newsock, sock->file->f_flags, false); if (err < 0) goto out_fd; @@ -3239,7 +3239,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) if (err < 0) goto done; - err = sock->ops->accept(sock, *newsock, flags); + err = sock->ops->accept(sock, *newsock, flags, true); if (err < 0) { sock_release(*newsock); *newsock = NULL; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 43e4045e72bc..7130e73bd42c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -115,7 +115,8 @@ static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); static void tipc_sock_destruct(struct sock *sk); static int tipc_release(struct socket *sock); -static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); +static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, + bool kern); static void tipc_sk_timeout(unsigned long data); static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); @@ -2029,7 +2030,8 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) * * Returns 0 on success, errno otherwise */ -static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) +static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, + bool kern) { struct sock *new_sk, *sk = sock->sk; struct sk_buff *buf; @@ -2051,7 +2053,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) buf = skb_peek(&sk->sk_receive_queue); - res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0); + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern); if (res) goto exit; security_sk_clone(sock->sk, new_sock->sk); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ee37b390260a..928691c43408 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -636,7 +636,7 @@ static int unix_bind(struct socket *, struct sockaddr *, int); static int unix_stream_connect(struct socket *, struct sockaddr *, int addr_len, int flags); static int unix_socketpair(struct socket *, struct socket *); -static int unix_accept(struct socket *, struct socket *, int); +static int unix_accept(struct socket *, struct socket *, int, bool); static int unix_getname(struct socket *, struct sockaddr *, int *, int); static unsigned int unix_poll(struct file *, struct socket *, poll_table *); static unsigned int unix_dgram_poll(struct file *, struct socket *, @@ -1402,7 +1402,8 @@ static void unix_sock_inherit_flags(const struct socket *old, set_bit(SOCK_PASSSEC, &new->flags); } -static int unix_accept(struct socket *sock, struct socket *newsock, int flags) +static int unix_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk = sock->sk; struct sock *tsk; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 9192ead66751..9f770f33c100 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1250,7 +1250,8 @@ out: return err; } -static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) +static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *listener; int err; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index fd28a49dbe8f..8b911c29860e 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -852,7 +852,8 @@ static int x25_wait_for_data(struct sock *sk, long timeout) return rc; } -static int x25_accept(struct socket *sock, struct socket *newsock, int flags) +static int x25_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk = sock->sk; struct sock *newsk; -- cgit v1.2.3 From c962cff17dfa11f4a8227ac16de2b28aea3312e4 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:23 +0800 Subject: Revert "x86/acpi: Set persistent cpuid <-> nodeid mapping when booting" Revert: dc6db24d2476 ("x86/acpi: Set persistent cpuid <-> nodeid mapping when booting") The mapping of "cpuid <-> nodeid" is established at boot time via ACPI tables to keep associations of workqueues and other node related items consistent across cpu hotplug. But, ACPI tables are unreliable and failures with that boot time mapping have been reported on machines where the ACPI table and the physical information which is retrieved at actual hotplug is inconsistent. Revert the mapping implementation so it can be replaced with a less error prone approach. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-2-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 2 +- drivers/acpi/acpi_processor.c | 5 --- drivers/acpi/bus.c | 1 - drivers/acpi/processor_core.c | 73 ------------------------------------------- include/linux/acpi.h | 3 -- 5 files changed, 1 insertion(+), 83 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ae32838cac5f..f6b0e87d2388 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -710,7 +710,7 @@ static void __init acpi_set_irq_model_ioapic(void) #ifdef CONFIG_ACPI_HOTPLUG_CPU #include -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA int nid; diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 4467a8089ab8..5d208a99d0c9 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -182,11 +182,6 @@ int __weak arch_register_cpu(int cpu) void __weak arch_unregister_cpu(int cpu) {} -int __weak acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) -{ - return -ENODEV; -} - static int acpi_processor_hotadd_init(struct acpi_processor *pr) { unsigned long long sta; diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 80cb5eb75b63..34fbe027e73a 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1249,7 +1249,6 @@ static int __init acpi_init(void) acpi_wakeup_device_init(); acpi_debugger_init(); acpi_setup_sb_notify_handler(); - acpi_set_processor_mapping(); return 0; } diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 611a5585a902..a84386204659 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -278,79 +278,6 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) } EXPORT_SYMBOL_GPL(acpi_get_cpuid); -#ifdef CONFIG_ACPI_HOTPLUG_CPU -static bool __init -map_processor(acpi_handle handle, phys_cpuid_t *phys_id, int *cpuid) -{ - int type, id; - u32 acpi_id; - acpi_status status; - acpi_object_type acpi_type; - unsigned long long tmp; - union acpi_object object = { 0 }; - struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; - - status = acpi_get_type(handle, &acpi_type); - if (ACPI_FAILURE(status)) - return false; - - switch (acpi_type) { - case ACPI_TYPE_PROCESSOR: - status = acpi_evaluate_object(handle, NULL, NULL, &buffer); - if (ACPI_FAILURE(status)) - return false; - acpi_id = object.processor.proc_id; - - /* validate the acpi_id */ - if(acpi_processor_validate_proc_id(acpi_id)) - return false; - break; - case ACPI_TYPE_DEVICE: - status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp); - if (ACPI_FAILURE(status)) - return false; - acpi_id = tmp; - break; - default: - return false; - } - - type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0; - - *phys_id = __acpi_get_phys_id(handle, type, acpi_id, false); - id = acpi_map_cpuid(*phys_id, acpi_id); - - if (id < 0) - return false; - *cpuid = id; - return true; -} - -static acpi_status __init -set_processor_node_mapping(acpi_handle handle, u32 lvl, void *context, - void **rv) -{ - phys_cpuid_t phys_id; - int cpu_id; - - if (!map_processor(handle, &phys_id, &cpu_id)) - return AE_ERROR; - - acpi_map_cpu2node(handle, cpu_id, phys_id); - return AE_OK; -} - -void __init acpi_set_processor_mapping(void) -{ - /* Set persistent cpu <-> node mapping for all processors. */ - acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, set_processor_node_mapping, - NULL, NULL, NULL); -} -#else -void __init acpi_set_processor_mapping(void) {} -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ - #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC static int get_ioapic_id(struct acpi_subtable_header *entry, u32 gsi_base, u64 *phys_addr, int *ioapic_id) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 673acda012af..63a7519b00cc 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -294,11 +294,8 @@ bool acpi_processor_validate_proc_id(int proc_id); int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu); int acpi_unmap_cpu(int cpu); -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ -void acpi_set_processor_mapping(void); - #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); #endif -- cgit v1.2.3 From a77d6cd968497792e072b74dff45b891ba778ddb Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:27 +0800 Subject: acpi/processor: Check for duplicate processor ids at hotplug time The check for duplicate processor ids happens at boot time based on the ACPI table contents, but the final sanity checks for a processor happen at hotplug time. At hotplug time, where the physical information is available, which might differ from the ACPI table information, a check for duplicate processor ids is missing. Add it to the hotplug checks and rename the function so it better reflects its purpose. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-6-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- drivers/acpi/acpi_processor.c | 13 ++++++++++--- include/linux/acpi.h | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 9a98d7e00200..0143135b3abe 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -280,6 +280,13 @@ static int acpi_processor_get_info(struct acpi_device *device) pr->acpi_id = value; } + if (acpi_duplicate_processor_id(pr->acpi_id)) { + dev_err(&device->dev, + "Failed to get unique processor _UID (0x%x)\n", + pr->acpi_id); + return -ENODEV; + } + pr->phys_id = acpi_get_phys_id(pr->handle, device_declaration, pr->acpi_id); if (invalid_phys_cpuid(pr->phys_id)) @@ -580,7 +587,7 @@ static struct acpi_scan_handler processor_container_handler = { static int nr_unique_ids __initdata; /* The number of the duplicate processor IDs */ -static int nr_duplicate_ids __initdata; +static int nr_duplicate_ids; /* Used to store the unique processor IDs */ static int unique_processor_ids[] __initdata = { @@ -588,7 +595,7 @@ static int unique_processor_ids[] __initdata = { }; /* Used to store the duplicate processor IDs */ -static int duplicate_processor_ids[] __initdata = { +static int duplicate_processor_ids[] = { [0 ... NR_CPUS - 1] = -1, }; @@ -679,7 +686,7 @@ void __init acpi_processor_check_duplicates(void) NULL, NULL); } -bool __init acpi_processor_validate_proc_id(int proc_id) +bool acpi_duplicate_processor_id(int proc_id) { int i; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 63a7519b00cc..9b05886f9773 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -287,7 +287,7 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id) } /* Validate the processor object's proc_id */ -bool acpi_processor_validate_proc_id(int proc_id); +bool acpi_duplicate_processor_id(int proc_id); #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ -- cgit v1.2.3 From 65869a47f3488253f5fd88cc4f14e0a4e2601a55 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 11 Mar 2017 16:55:49 +0100 Subject: bpf: improve read-only handling Improve bpf_{prog,jit_binary}_{un,}lock_ro() by throwing a one-time warning in case of an error when the image couldn't be set read-only, and also mark struct bpf_prog as locked when bpf_prog_lock_ro() was called. Reason for the latter is that bpf_prog_unlock_ro() is called from various places including error paths, and we shouldn't mess with page attributes when really not needed. For bpf_jit_binary_unlock_ro() this is not needed as jited flag implicitly indicates this, thus for archs with ARCH_HAS_SET_MEMORY we're guaranteed to have a previously locked image. Overall, this should also help us to identify any further potential issues with set_memory_*() helpers. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0c167fdee5f7..fbf7b39e8103 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -409,6 +409,7 @@ struct bpf_prog { u16 pages; /* Number of allocated pages */ kmemcheck_bitfield_begin(meta); u16 jited:1, /* Is our filter JIT'ed? */ + locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ dst_needed:1, /* Do we need dst entry? */ @@ -554,22 +555,29 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog) #ifdef CONFIG_ARCH_HAS_SET_MEMORY static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { - set_memory_ro((unsigned long)fp, fp->pages); + fp->locked = 1; + WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages)); } static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { - set_memory_rw((unsigned long)fp, fp->pages); + if (fp->locked) { + WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages)); + /* In case set_memory_rw() fails, we want to be the first + * to crash here instead of some random place later on. + */ + fp->locked = 0; + } } static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) { - set_memory_ro((unsigned long)hdr, hdr->pages); + WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages)); } static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) { - set_memory_rw((unsigned long)hdr, hdr->pages); + WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages)); } #else static inline void bpf_prog_lock_ro(struct bpf_prog *fp) -- cgit v1.2.3 From 337ba7fbf0fbc12242359c4af114878618b90951 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sat, 25 Feb 2017 16:29:50 +0300 Subject: uapi: fix drm/omap_drm.h userspace compilation errors Consistently use types from linux/types.h like in other uapi drm/*_drm.h header files to fix the following drm/omap_drm.h userspace compilation errors: /usr/include/drm/omap_drm.h:36:2: error: unknown type name 'uint64_t' uint64_t param; /* in */ /usr/include/drm/omap_drm.h:37:2: error: unknown type name 'uint64_t' uint64_t value; /* in (set_param), out (get_param) */ /usr/include/drm/omap_drm.h:56:2: error: unknown type name 'uint32_t' uint32_t bytes; /* (for non-tiled formats) */ /usr/include/drm/omap_drm.h:58:3: error: unknown type name 'uint16_t' uint16_t width; /usr/include/drm/omap_drm.h:59:3: error: unknown type name 'uint16_t' uint16_t height; /usr/include/drm/omap_drm.h:65:2: error: unknown type name 'uint32_t' uint32_t flags; /* in */ /usr/include/drm/omap_drm.h:66:2: error: unknown type name 'uint32_t' uint32_t handle; /* out */ /usr/include/drm/omap_drm.h:67:2: error: unknown type name 'uint32_t' uint32_t __pad; /usr/include/drm/omap_drm.h:77:2: error: unknown type name 'uint32_t' uint32_t handle; /* buffer handle (in) */ /usr/include/drm/omap_drm.h:78:2: error: unknown type name 'uint32_t' uint32_t op; /* mask of omap_gem_op (in) */ /usr/include/drm/omap_drm.h:82:2: error: unknown type name 'uint32_t' uint32_t handle; /* buffer handle (in) */ /usr/include/drm/omap_drm.h:83:2: error: unknown type name 'uint32_t' uint32_t op; /* mask of omap_gem_op (in) */ /usr/include/drm/omap_drm.h:88:2: error: unknown type name 'uint32_t' uint32_t nregions; /usr/include/drm/omap_drm.h:89:2: error: unknown type name 'uint32_t' uint32_t __pad; /usr/include/drm/omap_drm.h:93:2: error: unknown type name 'uint32_t' uint32_t handle; /* buffer handle (in) */ /usr/include/drm/omap_drm.h:94:2: error: unknown type name 'uint32_t' uint32_t pad; /usr/include/drm/omap_drm.h:95:2: error: unknown type name 'uint64_t' uint64_t offset; /* mmap offset (out) */ /usr/include/drm/omap_drm.h:102:2: error: unknown type name 'uint32_t' uint32_t size; /* virtual size for mmap'ing (out) */ /usr/include/drm/omap_drm.h:103:2: error: unknown type name 'uint32_t' uint32_t __pad; Fixes: ef6503e89194 ("drm: Kbuild: add omap_drm.h to the installed headers") Signed-off-by: Dmitry V. Levin Signed-off-by: Tomi Valkeinen --- include/uapi/drm/omap_drm.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h index 407cb55df6ac..7fb97863c945 100644 --- a/include/uapi/drm/omap_drm.h +++ b/include/uapi/drm/omap_drm.h @@ -33,8 +33,8 @@ extern "C" { #define OMAP_PARAM_CHIPSET_ID 1 /* ie. 0x3430, 0x4430, etc */ struct drm_omap_param { - uint64_t param; /* in */ - uint64_t value; /* in (set_param), out (get_param) */ + __u64 param; /* in */ + __u64 value; /* in (set_param), out (get_param) */ }; #define OMAP_BO_SCANOUT 0x00000001 /* scanout capable (phys contiguous) */ @@ -53,18 +53,18 @@ struct drm_omap_param { #define OMAP_BO_TILED (OMAP_BO_TILED_8 | OMAP_BO_TILED_16 | OMAP_BO_TILED_32) union omap_gem_size { - uint32_t bytes; /* (for non-tiled formats) */ + __u32 bytes; /* (for non-tiled formats) */ struct { - uint16_t width; - uint16_t height; + __u16 width; + __u16 height; } tiled; /* (for tiled formats) */ }; struct drm_omap_gem_new { union omap_gem_size size; /* in */ - uint32_t flags; /* in */ - uint32_t handle; /* out */ - uint32_t __pad; + __u32 flags; /* in */ + __u32 handle; /* out */ + __u32 __pad; }; /* mask of operations: */ @@ -74,33 +74,33 @@ enum omap_gem_op { }; struct drm_omap_gem_cpu_prep { - uint32_t handle; /* buffer handle (in) */ - uint32_t op; /* mask of omap_gem_op (in) */ + __u32 handle; /* buffer handle (in) */ + __u32 op; /* mask of omap_gem_op (in) */ }; struct drm_omap_gem_cpu_fini { - uint32_t handle; /* buffer handle (in) */ - uint32_t op; /* mask of omap_gem_op (in) */ + __u32 handle; /* buffer handle (in) */ + __u32 op; /* mask of omap_gem_op (in) */ /* TODO maybe here we pass down info about what regions are touched * by sw so we can be clever about cache ops? For now a placeholder, * set to zero and we just do full buffer flush.. */ - uint32_t nregions; - uint32_t __pad; + __u32 nregions; + __u32 __pad; }; struct drm_omap_gem_info { - uint32_t handle; /* buffer handle (in) */ - uint32_t pad; - uint64_t offset; /* mmap offset (out) */ + __u32 handle; /* buffer handle (in) */ + __u32 pad; + __u64 offset; /* mmap offset (out) */ /* note: in case of tiled buffers, the user virtual size can be * different from the physical size (ie. how many pages are needed * to back the object) which is returned in DRM_IOCTL_GEM_OPEN.. * This size here is the one that should be used if you want to * mmap() the buffer: */ - uint32_t size; /* virtual size for mmap'ing (out) */ - uint32_t __pad; + __u32 size; /* virtual size for mmap'ing (out) */ + __u32 __pad; }; #define DRM_OMAP_GET_PARAM 0x00 -- cgit v1.2.3 From 10596608c4d62cb8c1c2b806debcbd32fe657e71 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 8 Mar 2017 22:54:18 +0800 Subject: netfilter: nf_tables: fix mismatch in big-endian system Currently, there are two different methods to store an u16 integer to the u32 data register. For example: u32 *dest = ®s->data[priv->dreg]; 1. *dest = 0; *(u16 *) dest = val_u16; 2. *dest = val_u16; For method 1, the u16 value will be stored like this, either in big-endian or little-endian system: 0 15 31 +-+-+-+-+-+-+-+-+-+-+-+-+ | Value | 0 | +-+-+-+-+-+-+-+-+-+-+-+-+ For method 2, in little-endian system, the u16 value will be the same as listed above. But in big-endian system, the u16 value will be stored like this: 0 15 31 +-+-+-+-+-+-+-+-+-+-+-+-+ | 0 | Value | +-+-+-+-+-+-+-+-+-+-+-+-+ So later we use "memcmp(®s->data[priv->sreg], data, 2);" to do compare in nft_cmp, nft_lookup expr ..., method 2 will get the wrong result in big-endian system, as 0~15 bits will always be zero. For the similar reason, when loading an u16 value from the u32 data register, we should use "*(u16 *) sreg;" instead of "(u16)*sreg;", the 2nd method will get the wrong value in the big-endian system. So introduce some wrapper functions to store/load an u8 or u16 integer to/from the u32 data register, and use them in the right place. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 29 +++++++++++++++++++++++++++ net/ipv4/netfilter/nft_masq_ipv4.c | 8 ++++---- net/ipv4/netfilter/nft_redir_ipv4.c | 8 ++++---- net/ipv6/netfilter/nft_masq_ipv6.c | 8 ++++---- net/ipv6/netfilter/nft_redir_ipv6.c | 8 ++++---- net/netfilter/nft_ct.c | 18 +++++++++-------- net/netfilter/nft_meta.c | 40 +++++++++++++++++++------------------ net/netfilter/nft_nat.c | 8 ++++---- 8 files changed, 80 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2aa8a9d80fbe..70c5ca0c60b1 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -103,6 +103,35 @@ struct nft_regs { }; }; +/* Store/load an u16 or u8 integer to/from the u32 data register. + * + * Note, when using concatenations, register allocation happens at 32-bit + * level. So for store instruction, pad the rest part with zero to avoid + * garbage values. + */ + +static inline void nft_reg_store16(u32 *dreg, u16 val) +{ + *dreg = 0; + *(u16 *)dreg = val; +} + +static inline void nft_reg_store8(u32 *dreg, u8 val) +{ + *dreg = 0; + *(u8 *)dreg = val; +} + +static inline u16 nft_reg_load16(u32 *sreg) +{ + return *(u16 *)sreg; +} + +static inline u8 nft_reg_load8(u32 *sreg) +{ + return *(u8 *)sreg; +} + static inline void nft_data_copy(u32 *dst, const struct nft_data *src, unsigned int len) { diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index a0ea8aad1bf1..f18677277119 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -26,10 +26,10 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); range.flags = priv->flags; if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); } regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt), &range, nft_out(pkt)); diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index 1650ed23c15d..5120be1d3118 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -26,10 +26,10 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr, memset(&mr, 0, sizeof(mr)); if (priv->sreg_proto_min) { - mr.range[0].min.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - mr.range[0].max.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + mr.range[0].min.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + mr.range[0].max.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 6c5b5b1830a7..4146536e9c15 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -27,10 +27,10 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); range.flags = priv->flags; if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); } regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, nft_out(pkt)); diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index f5ac080fc084..a27e424f690d 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -26,10 +26,10 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min], - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max], + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index bf548a7a71ec..91585b5e5307 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -83,7 +83,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr, switch (priv->key) { case NFT_CT_DIRECTION: - *dest = CTINFO2DIR(ctinfo); + nft_reg_store8(dest, CTINFO2DIR(ctinfo)); return; case NFT_CT_STATUS: *dest = ct->status; @@ -151,20 +151,22 @@ static void nft_ct_get_eval(const struct nft_expr *expr, return; } case NFT_CT_L3PROTOCOL: - *dest = nf_ct_l3num(ct); + nft_reg_store8(dest, nf_ct_l3num(ct)); return; case NFT_CT_PROTOCOL: - *dest = nf_ct_protonum(ct); + nft_reg_store8(dest, nf_ct_protonum(ct)); return; #ifdef CONFIG_NF_CONNTRACK_ZONES case NFT_CT_ZONE: { const struct nf_conntrack_zone *zone = nf_ct_zone(ct); + u16 zoneid; if (priv->dir < IP_CT_DIR_MAX) - *dest = nf_ct_zone_id(zone, priv->dir); + zoneid = nf_ct_zone_id(zone, priv->dir); else - *dest = zone->id; + zoneid = zone->id; + nft_reg_store16(dest, zoneid); return; } #endif @@ -183,10 +185,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); return; case NFT_CT_PROTO_SRC: - *dest = (__force __u16)tuple->src.u.all; + nft_reg_store16(dest, (__force u16)tuple->src.u.all); return; case NFT_CT_PROTO_DST: - *dest = (__force __u16)tuple->dst.u.all; + nft_reg_store16(dest, (__force u16)tuple->dst.u.all); return; default: break; @@ -205,7 +207,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, const struct nft_ct *priv = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; enum ip_conntrack_info ctinfo; - u16 value = regs->data[priv->sreg]; + u16 value = nft_reg_load16(®s->data[priv->sreg]); struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index e1f5ca9b423b..7b60e01f38ff 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -45,16 +45,15 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = skb->len; break; case NFT_META_PROTOCOL: - *dest = 0; - *(__be16 *)dest = skb->protocol; + nft_reg_store16(dest, (__force u16)skb->protocol); break; case NFT_META_NFPROTO: - *dest = nft_pf(pkt); + nft_reg_store8(dest, nft_pf(pkt)); break; case NFT_META_L4PROTO: if (!pkt->tprot_set) goto err; - *dest = pkt->tprot; + nft_reg_store8(dest, pkt->tprot); break; case NFT_META_PRIORITY: *dest = skb->priority; @@ -85,14 +84,12 @@ void nft_meta_get_eval(const struct nft_expr *expr, case NFT_META_IIFTYPE: if (in == NULL) goto err; - *dest = 0; - *(u16 *)dest = in->type; + nft_reg_store16(dest, in->type); break; case NFT_META_OIFTYPE: if (out == NULL) goto err; - *dest = 0; - *(u16 *)dest = out->type; + nft_reg_store16(dest, out->type); break; case NFT_META_SKUID: sk = skb_to_full_sk(skb); @@ -142,19 +139,19 @@ void nft_meta_get_eval(const struct nft_expr *expr, #endif case NFT_META_PKTTYPE: if (skb->pkt_type != PACKET_LOOPBACK) { - *dest = skb->pkt_type; + nft_reg_store8(dest, skb->pkt_type); break; } switch (nft_pf(pkt)) { case NFPROTO_IPV4: if (ipv4_is_multicast(ip_hdr(skb)->daddr)) - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); else - *dest = PACKET_BROADCAST; + nft_reg_store8(dest, PACKET_BROADCAST); break; case NFPROTO_IPV6: - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); break; case NFPROTO_NETDEV: switch (skb->protocol) { @@ -168,14 +165,14 @@ void nft_meta_get_eval(const struct nft_expr *expr, goto err; if (ipv4_is_multicast(iph->daddr)) - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); else - *dest = PACKET_BROADCAST; + nft_reg_store8(dest, PACKET_BROADCAST); break; } case htons(ETH_P_IPV6): - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); break; default: WARN_ON_ONCE(1); @@ -230,7 +227,9 @@ void nft_meta_set_eval(const struct nft_expr *expr, { const struct nft_meta *meta = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; - u32 value = regs->data[meta->sreg]; + u32 *sreg = ®s->data[meta->sreg]; + u32 value = *sreg; + u8 pkt_type; switch (meta->key) { case NFT_META_MARK: @@ -240,9 +239,12 @@ void nft_meta_set_eval(const struct nft_expr *expr, skb->priority = value; break; case NFT_META_PKTTYPE: - if (skb->pkt_type != value && - skb_pkt_type_ok(value) && skb_pkt_type_ok(skb->pkt_type)) - skb->pkt_type = value; + pkt_type = nft_reg_load8(sreg); + + if (skb->pkt_type != pkt_type && + skb_pkt_type_ok(pkt_type) && + skb_pkt_type_ok(skb->pkt_type)) + skb->pkt_type = pkt_type; break; case NFT_META_NFTRACE: skb->nf_trace = !!value; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 19a7bf3236f9..439e0bd152a0 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -65,10 +65,10 @@ static void nft_nat_eval(const struct nft_expr *expr, } if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } -- cgit v1.2.3 From 170a1fb9c01bc40b7e8fd57a32ac9a0e131ec5b6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sat, 11 Mar 2017 00:25:26 -0500 Subject: netfilter: Force fake conntrack entry to be at least 8 bytes aligned Since the nfct and nfctinfo have been combined, the nf_conn structure must be at least 8 bytes aligned, as the 3 LSB bits are used for the nfctinfo. But there's a fake nf_conn structure to denote untracked connections, which is created by a PER_CPU construct. This does not guarantee that it will be 8 bytes aligned and can break the logic in determining the correct nfctinfo. I triggered this on a 32bit machine with the following error: BUG: unable to handle kernel NULL pointer dereference at 00000af4 IP: nf_ct_deliver_cached_events+0x1b/0xfb *pdpt = 0000000031962001 *pde = 0000000000000000 Oops: 0000 [#1] SMP [Modules linked in: ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables ipv6 crc_ccitt ppdev r8169 parport_pc parport OK ] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.0-test+ #75 Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014 task: c126ec00 task.stack: c1258000 EIP: nf_ct_deliver_cached_events+0x1b/0xfb EFLAGS: 00010202 CPU: 0 EAX: 0021cd01 EBX: 00000000 ECX: 27b0c767 EDX: 32bcb17a ESI: f34135c0 EDI: f34135c0 EBP: f2debd60 ESP: f2debd3c DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 CR0: 80050033 CR2: 00000af4 CR3: 309a0440 CR4: 001406f0 Call Trace: ? ipv6_skip_exthdr+0xac/0xcb ipv6_confirm+0x10c/0x119 [nf_conntrack_ipv6] nf_hook_slow+0x22/0xc7 nf_hook+0x9a/0xad [ipv6] ? ip6t_do_table+0x356/0x379 [ip6_tables] ? ip6_fragment+0x9e9/0x9e9 [ipv6] ip6_output+0xee/0x107 [ipv6] ? ip6_fragment+0x9e9/0x9e9 [ipv6] dst_output+0x36/0x4d [ipv6] NF_HOOK.constprop.37+0xb2/0xba [ipv6] ? icmp6_dst_alloc+0x2c/0xfd [ipv6] ? local_bh_enable+0x14/0x14 [ipv6] mld_sendpack+0x1c5/0x281 [ipv6] ? mark_held_locks+0x40/0x5c mld_ifc_timer_expire+0x1f6/0x21e [ipv6] call_timer_fn+0x135/0x283 ? detach_if_pending+0x55/0x55 ? mld_dad_timer_expire+0x3e/0x3e [ipv6] __run_timers+0x111/0x14b ? mld_dad_timer_expire+0x3e/0x3e [ipv6] run_timer_softirq+0x1c/0x36 __do_softirq+0x185/0x37c ? test_ti_thread_flag.constprop.19+0xd/0xd do_softirq_own_stack+0x22/0x28 irq_exit+0x5a/0xa4 smp_apic_timer_interrupt+0x2a/0x34 apic_timer_interrupt+0x37/0x3c By using DEFINE/DECLARE_PER_CPU_ALIGNED we can enforce at least 8 byte alignment as all cache line sizes are at least 8 bytes or more. Fixes: a9e419dc7be6 ("netfilter: merge ctinfo into nfct pointer storage area") Signed-off-by: Steven Rostedt (VMware) Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 2 +- net/netfilter/nf_conntrack_core.c | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index f540f9ad2af4..19605878da47 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -244,7 +244,7 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, u32 seq); /* Fake conntrack entry for untracked connections */ -DECLARE_PER_CPU(struct nf_conn, nf_conntrack_untracked); +DECLARE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked); static inline struct nf_conn *nf_ct_untracked_get(void) { return raw_cpu_ptr(&nf_conntrack_untracked); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 071b97fcbefb..ffb78e5f7b70 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -181,7 +181,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; seqcount_t nf_conntrack_generation __read_mostly; -DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); +/* nf_conn must be 8 bytes aligned, as the 3 LSB bits are used + * for the nfctinfo. We cheat by (ab)using the PER CPU cache line + * alignment to enforce this. + */ +DEFINE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); static unsigned int nf_conntrack_hash_rnd __read_mostly; -- cgit v1.2.3 From 04166f48d9593af4513ae06c0f966c0cee300a20 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 13 Mar 2017 13:24:03 +0100 Subject: Revert "netfilter: nf_tables: add flush field to struct nft_set_iter" This reverts commit 1f48ff6c5393aa7fe290faf5d633164f105b0aa7. This patch is not required anymore now that we keep a dummy list of set elements in the bitmap set implementation, so revert this before we forget this code has no clients. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 - net/netfilter/nf_tables_api.c | 4 ---- 2 files changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 70c5ca0c60b1..0136028652bd 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -232,7 +232,6 @@ struct nft_set_elem { struct nft_set; struct nft_set_iter { u8 genmask; - bool flush; unsigned int count; unsigned int skip; int err; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5e0ccfd5bb37..434c739dfeca 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3145,7 +3145,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, iter.count = 0; iter.err = 0; iter.fn = nf_tables_bind_check_setelem; - iter.flush = false; set->ops->walk(ctx, set, &iter); if (iter.err < 0) @@ -3399,7 +3398,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) args.iter.count = 0; args.iter.err = 0; args.iter.fn = nf_tables_dump_setelem; - args.iter.flush = false; set->ops->walk(&ctx, set, &args.iter); nla_nest_end(skb, nest); @@ -3963,7 +3961,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct nft_set_iter iter = { .genmask = genmask, .fn = nft_flush_set, - .flush = true, }; set->ops->walk(&ctx, set, &iter); @@ -5114,7 +5111,6 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, iter.count = 0; iter.err = 0; iter.fn = nf_tables_loop_check_setelem; - iter.flush = false; set->ops->walk(ctx, set, &iter); if (iter.err < 0) -- cgit v1.2.3 From c42f8218610aa09d7d3795e5810387673c1f84b6 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 9 Mar 2017 17:20:04 +0100 Subject: iio: sw-device: Fix config group initialization Use the IS_ENABLED() helper macro to ensure that the configfs group is initialized either when configfs is built-in or when configfs is built as a module. Otherwise software device creation will result in undefined behaviour when configfs is built as a module since the configfs group for the device not properly initialized. Similar to commit b2f0c09664b7 ("iio: sw-trigger: Fix config group initialization"). Fixes: 0f3a8c3f34f7 ("iio: Add support for creating IIO devices via configfs") Reported-by: Miguel Robles Signed-off-by: Lars-Peter Clausen Acked-by: Daniel Baluta Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/sw_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/sw_device.h b/include/linux/iio/sw_device.h index 23ca41515527..fa7931933067 100644 --- a/include/linux/iio/sw_device.h +++ b/include/linux/iio/sw_device.h @@ -62,7 +62,7 @@ void iio_swd_group_init_type_name(struct iio_sw_device *d, const char *name, struct config_item_type *type) { -#ifdef CONFIG_CONFIGFS_FS +#if IS_ENABLED(CONFIG_CONFIGFS_FS) config_group_init_type_name(&d->group, name, type); #endif } -- cgit v1.2.3 From 3243367b209faed5c320a4e5f9a565ee2a2ba958 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Mon, 13 Mar 2017 20:50:08 +0100 Subject: usb-core: Add LINEAR_FRAME_INTR_BINTERVAL USB quirk Some USB 2.0 devices erroneously report millisecond values in bInterval. The generic config code manages to catch most of them, but in some cases it's not completely enough. The case at stake here is a USB 2.0 braille device, which wants to announce 10ms and thus sets bInterval to 10, but with the USB 2.0 computation that yields to 64ms. It happens that one can type fast enough to reach this interval and get the device buffers overflown, leading to problematic latencies. The generic config code does not catch this case because the 64ms is considered a sane enough value. This change thus adds a USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL quirk to mark devices which actually report milliseconds in bInterval, and marks Vario Ultra devices as needing it. Signed-off-by: Samuel Thibault Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 10 ++++++++++ drivers/usb/core/quirks.c | 8 ++++++++ include/linux/usb/quirks.h | 6 ++++++ 3 files changed, 24 insertions(+) (limited to 'include') diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 25dbd8c7aec7..4be52c602e9b 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -280,6 +280,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, /* * Adjust bInterval for quirked devices. + */ + /* + * This quirk fixes bIntervals reported in ms. + */ + if (to_usb_device(ddev)->quirks & + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL) { + n = clamp(fls(d->bInterval) + 3, i, j); + i = j = n; + } + /* * This quirk fixes bIntervals reported in * linear microframes. */ diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 24f9f98968a5..96b21b0dac1e 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -170,6 +170,14 @@ static const struct usb_device_id usb_quirk_list[] = { /* M-Systems Flash Disk Pioneers */ { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Baum Vario Ultra */ + { USB_DEVICE(0x0904, 0x6101), .driver_info = + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + { USB_DEVICE(0x0904, 0x6102), .driver_info = + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + { USB_DEVICE(0x0904, 0x6103), .driver_info = + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + /* Keytouch QWERTY Panel keyboard */ { USB_DEVICE(0x0926, 0x3333), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index 1d0043dc34e4..de2a722fe3cf 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -50,4 +50,10 @@ /* device can't handle Link Power Management */ #define USB_QUIRK_NO_LPM BIT(10) +/* + * Device reports its bInterval as linear frames instead of the + * USB 2.0 calculation. + */ +#define USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL BIT(11) + #endif /* __LINUX_USB_QUIRKS_H */ -- cgit v1.2.3 From 0043c1dfbec7b6e2427409059b05347d6f51aa9f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 8 Feb 2017 09:24:25 +0000 Subject: serial: st-asc: Use new GPIOD API to obtain RTS pin The commits mentioned below adapt the GPIO API to allow more information to be passed directly through devm_get_gpiod_from_child() in the first instance. This facilitates the removal of subsequent calls, such as gpiod_direction_output(). This patch firstly moves to utilise the new API and secondly removes the now superfluous call do set the direction. Reported-by: Stephen Rothwell Suggested-by: Boris Brezillon Signed-off-by: Lee Jones [Also drop the header file dummies that only this driver was using] Acked-by: Greg Kroah-Hartman Signed-off-by: Linus Walleij --- drivers/tty/serial/st-asc.c | 11 ++++++----- include/linux/gpio/consumer.h | 16 ---------------- 2 files changed, 6 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c index bcf1d33e6ffe..c334bcc59c64 100644 --- a/drivers/tty/serial/st-asc.c +++ b/drivers/tty/serial/st-asc.c @@ -575,12 +575,13 @@ static void asc_set_termios(struct uart_port *port, struct ktermios *termios, pinctrl_select_state(ascport->pinctrl, ascport->states[NO_HW_FLOWCTRL]); - gpiod = devm_get_gpiod_from_child(port->dev, "rts", - &np->fwnode); - if (!IS_ERR(gpiod)) { - gpiod_direction_output(gpiod, 0); + gpiod = devm_fwnode_get_gpiod_from_child(port->dev, + "rts", + &np->fwnode, + GPIOD_OUT_LOW, + np->name); + if (!IS_ERR(gpiod)) ascport->rts = gpiod; - } } } diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 2484b2fcc6eb..933d93656605 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -143,15 +143,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, struct fwnode_handle *child, enum gpiod_flags flags, const char *label); -/* FIXME: delete this helper when users are switched over */ -static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, - const char *con_id, struct fwnode_handle *child) -{ - return devm_fwnode_get_index_gpiod_from_child(dev, con_id, - 0, child, - GPIOD_ASIS, - "?"); -} #else /* CONFIG_GPIOLIB */ @@ -444,13 +435,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, return ERR_PTR(-ENOSYS); } -/* FIXME: delete this when all users are switched over */ -static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, - const char *con_id, struct fwnode_handle *child) -{ - return ERR_PTR(-ENOSYS); -} - #endif /* CONFIG_GPIOLIB */ static inline -- cgit v1.2.3 From 94840e3c802daa1a62985957f36ac48faf8ceedd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 22 Feb 2017 13:25:14 -0800 Subject: fscrypt: eliminate ->prepare_context() operation The only use of the ->prepare_context() fscrypt operation was to allow ext4 to evict inline data from the inode before ->set_context(). However, there is no reason why this cannot be done as simply the first step in ->set_context(), and in fact it makes more sense to do it that way because then the policy modes and flags get validated before any real work is done. Therefore, merge ext4_prepare_context() into ext4_set_context(), and remove ->prepare_context(). Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/policy.c | 7 ------- fs/ext4/super.c | 10 ++++------ include/linux/fscrypt_common.h | 1 - 3 files changed, 4 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 14b76da71269..4908906d54d5 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -33,17 +33,10 @@ static int create_encryption_context_from_policy(struct inode *inode, const struct fscrypt_policy *policy) { struct fscrypt_context ctx; - int res; if (!inode->i_sb->s_cop->set_context) return -EOPNOTSUPP; - if (inode->i_sb->s_cop->prepare_context) { - res = inode->i_sb->s_cop->prepare_context(inode); - if (res) - return res; - } - ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1; memcpy(ctx.master_key_descriptor, policy->master_key_descriptor, FS_KEY_DESCRIPTOR_SIZE); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2e03a0a88d92..a9448db1cf7e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1120,17 +1120,16 @@ static int ext4_get_context(struct inode *inode, void *ctx, size_t len) EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len); } -static int ext4_prepare_context(struct inode *inode) -{ - return ext4_convert_inline_data(inode); -} - static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, void *fs_data) { handle_t *handle = fs_data; int res, res2, retries = 0; + res = ext4_convert_inline_data(inode); + if (res) + return res; + /* * If a journal handle was specified, then the encryption context is * being set on a new inode via inheritance and is part of a larger @@ -1196,7 +1195,6 @@ static unsigned ext4_max_namelen(struct inode *inode) static const struct fscrypt_operations ext4_cryptops = { .key_prefix = "ext4:", .get_context = ext4_get_context, - .prepare_context = ext4_prepare_context, .set_context = ext4_set_context, .dummy_context = ext4_dummy_context, .is_encrypted = ext4_encrypted_inode, diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h index 547f81592ba1..10c1abfbac6c 100644 --- a/include/linux/fscrypt_common.h +++ b/include/linux/fscrypt_common.h @@ -87,7 +87,6 @@ struct fscrypt_operations { unsigned int flags; const char *key_prefix; int (*get_context)(struct inode *, void *, size_t); - int (*prepare_context)(struct inode *); int (*set_context)(struct inode *, const void *, size_t, void *); int (*dummy_context)(struct inode *); bool (*is_encrypted)(struct inode *); -- cgit v1.2.3 From 8200f2085abe7f29a016381f3122000cc7b2a760 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 4 Mar 2017 18:13:57 -0700 Subject: vmbus: use rcu for per-cpu channel list The per-cpu channel list is now referred to in the interrupt routine. This is mostly safe since the host will not normally generate an interrupt when channel is being deleted but if it did then there would be a use after free problem. To solve, this use RCU protection on ther per-cpu list. Fixes: 631e63a9f346 ("vmbus: change to per channel tasklet") Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 7 ++++--- drivers/hv/vmbus_drv.c | 6 +++++- include/linux/hyperv.h | 7 +++++++ 3 files changed, 16 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index f33465d78a02..d2cfa3eb71a2 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -350,7 +350,8 @@ static struct vmbus_channel *alloc_channel(void) static void free_channel(struct vmbus_channel *channel) { tasklet_kill(&channel->callback_event); - kfree(channel); + + kfree_rcu(channel, rcu); } static void percpu_channel_enq(void *arg) @@ -359,14 +360,14 @@ static void percpu_channel_enq(void *arg) struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context); - list_add_tail(&channel->percpu_list, &hv_cpu->chan_list); + list_add_tail_rcu(&channel->percpu_list, &hv_cpu->chan_list); } static void percpu_channel_deq(void *arg) { struct vmbus_channel *channel = arg; - list_del(&channel->percpu_list); + list_del_rcu(&channel->percpu_list); } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index da6b59ba5940..8370b9dc6037 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -939,8 +939,10 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu) if (relid == 0) continue; + rcu_read_lock(); + /* Find channel based on relid */ - list_for_each_entry(channel, &hv_cpu->chan_list, percpu_list) { + list_for_each_entry_rcu(channel, &hv_cpu->chan_list, percpu_list) { if (channel->offermsg.child_relid != relid) continue; @@ -956,6 +958,8 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu) tasklet_schedule(&channel->callback_event); } } + + rcu_read_unlock(); } } diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 62bbf3c1aa4a..c4c7ae91f9d1 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -845,6 +845,13 @@ struct vmbus_channel { * link up channels based on their CPU affinity. */ struct list_head percpu_list; + + /* + * Defer freeing channel until after all cpu's have + * gone through grace period. + */ + struct rcu_head rcu; + /* * For performance critical channels (storage, networking * etc,), Hyper-V has a mechanism to enhance the throughput -- cgit v1.2.3 From dad72a1d28442b03aac86836a42de2d00a1014ab Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sat, 4 Mar 2017 18:13:58 -0700 Subject: vmbus: remove hv_event_tasklet_disable/enable With the recent introduction of per-channel tasklet, we need to update the way we handle the 3 concurrency issues: 1. hv_process_channel_removal -> percpu_channel_deq vs. vmbus_chan_sched -> list_for_each_entry(..., percpu_list); 2. vmbus_process_offer -> percpu_channel_enq/deq vs. vmbus_chan_sched. 3. vmbus_close_internal vs. the per-channel tasklet vmbus_on_event; The first 2 issues can be handled by Stephen's recent patch "vmbus: use rcu for per-cpu channel list", and the third issue can be handled by calling tasklet_disable in vmbus_close_internal here. We don't need the original hv_event_tasklet_disable/enable since we now use per-channel tasklet instead of the previous per-CPU tasklet, and actually we must remove them due to the side effect now: vmbus_process_offer -> hv_event_tasklet_enable -> tasklet_schedule will start the per-channel callback prematurely, cauing NULL dereferencing (the channel may haven't been properly configured to run the callback yet). Fixes: 631e63a9f346 ("vmbus: change to per channel tasklet") Signed-off-by: Dexuan Cui Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Tested-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 12 ++++-------- drivers/hv/channel_mgmt.c | 19 ------------------- include/linux/hyperv.h | 3 --- 3 files changed, 4 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index bd0d1988feb2..57b2958205c7 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -530,15 +530,13 @@ static int vmbus_close_internal(struct vmbus_channel *channel) int ret; /* - * vmbus_on_event(), running in the tasklet, can race + * vmbus_on_event(), running in the per-channel tasklet, can race * with vmbus_close_internal() in the case of SMP guest, e.g., when * the former is accessing channel->inbound.ring_buffer, the latter - * could be freeing the ring_buffer pages. - * - * To resolve the race, we can serialize them by disabling the - * tasklet when the latter is running here. + * could be freeing the ring_buffer pages, so here we must stop it + * first. */ - hv_event_tasklet_disable(channel); + tasklet_disable(&channel->callback_event); /* * In case a device driver's probe() fails (e.g., @@ -605,8 +603,6 @@ static int vmbus_close_internal(struct vmbus_channel *channel) get_order(channel->ringbuffer_pagecount * PAGE_SIZE)); out: - hv_event_tasklet_enable(channel); - return ret; } diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index d2cfa3eb71a2..bf846d078d85 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -382,19 +382,6 @@ static void vmbus_release_relid(u32 relid) true); } -void hv_event_tasklet_disable(struct vmbus_channel *channel) -{ - tasklet_disable(&channel->callback_event); -} - -void hv_event_tasklet_enable(struct vmbus_channel *channel) -{ - tasklet_enable(&channel->callback_event); - - /* In case there is any pending event */ - tasklet_schedule(&channel->callback_event); -} - void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) { unsigned long flags; @@ -403,7 +390,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) BUG_ON(!channel->rescind); BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); - hv_event_tasklet_disable(channel); if (channel->target_cpu != get_cpu()) { put_cpu(); smp_call_function_single(channel->target_cpu, @@ -412,7 +398,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) percpu_channel_deq(channel); put_cpu(); } - hv_event_tasklet_enable(channel); if (channel->primary_channel == NULL) { list_del(&channel->listentry); @@ -506,7 +491,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) init_vp_index(newchannel, dev_type); - hv_event_tasklet_disable(newchannel); if (newchannel->target_cpu != get_cpu()) { put_cpu(); smp_call_function_single(newchannel->target_cpu, @@ -516,7 +500,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) percpu_channel_enq(newchannel); put_cpu(); } - hv_event_tasklet_enable(newchannel); /* * This state is used to indicate a successful open @@ -566,7 +549,6 @@ err_deq_chan: list_del(&newchannel->listentry); mutex_unlock(&vmbus_connection.channel_mutex); - hv_event_tasklet_disable(newchannel); if (newchannel->target_cpu != get_cpu()) { put_cpu(); smp_call_function_single(newchannel->target_cpu, @@ -575,7 +557,6 @@ err_deq_chan: percpu_channel_deq(newchannel); put_cpu(); } - hv_event_tasklet_enable(newchannel); vmbus_release_relid(newchannel->offermsg.child_relid); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index c4c7ae91f9d1..970771a5f739 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1437,9 +1437,6 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, const int *srv_version, int srv_vercnt, int *nego_fw_version, int *nego_srv_version); -void hv_event_tasklet_disable(struct vmbus_channel *channel); -void hv_event_tasklet_enable(struct vmbus_channel *channel); - void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); void vmbus_setevent(struct vmbus_channel *channel); -- cgit v1.2.3 From 7c468447f40645fbf2a033dfdaa92b1957130d50 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Fri, 10 Mar 2017 12:28:18 -0600 Subject: crypto: ccp - Assign DMA commands to the channel's CCP The CCP driver generally uses a round-robin approach when assigning operations to available CCPs. For the DMA engine, however, the DMA mappings of the SGs are associated with a specific CCP. When an IOMMU is enabled, the IOMMU is programmed based on this specific device. If the DMA operations are not performed by that specific CCP then addressing errors and I/O page faults will occur. Update the CCP driver to allow a specific CCP device to be requested for an operation and use this in the DMA engine support. Cc: # 4.9.x- Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-dev.c | 5 ++++- drivers/crypto/ccp/ccp-dmaengine.c | 1 + include/linux/ccp.h | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c index 511ab042b5e7..92d1c6959f08 100644 --- a/drivers/crypto/ccp/ccp-dev.c +++ b/drivers/crypto/ccp/ccp-dev.c @@ -283,11 +283,14 @@ EXPORT_SYMBOL_GPL(ccp_version); */ int ccp_enqueue_cmd(struct ccp_cmd *cmd) { - struct ccp_device *ccp = ccp_get_device(); + struct ccp_device *ccp; unsigned long flags; unsigned int i; int ret; + /* Some commands might need to be sent to a specific device */ + ccp = cmd->ccp ? cmd->ccp : ccp_get_device(); + if (!ccp) return -ENODEV; diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c index e5d9278f4019..8d0eeb46d4a2 100644 --- a/drivers/crypto/ccp/ccp-dmaengine.c +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -390,6 +390,7 @@ static struct ccp_dma_desc *ccp_create_desc(struct dma_chan *dma_chan, goto err; ccp_cmd = &cmd->ccp_cmd; + ccp_cmd->ccp = chan->ccp; ccp_pt = &ccp_cmd->u.passthru_nomap; ccp_cmd->flags = CCP_CMD_MAY_BACKLOG; ccp_cmd->flags |= CCP_CMD_PASSTHRU_NO_DMA_MAP; diff --git a/include/linux/ccp.h b/include/linux/ccp.h index c71dd8fa5764..c41b8d99dd0e 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -556,7 +556,7 @@ enum ccp_engine { * struct ccp_cmd - CCP operation request * @entry: list element (ccp driver use only) * @work: work element used for callbacks (ccp driver use only) - * @ccp: CCP device to be run on (ccp driver use only) + * @ccp: CCP device to be run on * @ret: operation return code (ccp driver use only) * @flags: cmd processing flags * @engine: CCP operation to perform -- cgit v1.2.3 From 5be9b730b09c45c358bbfe7f51d254e306cccc07 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 16 Mar 2017 16:40:21 -0700 Subject: kasan: add a prototype of task_struct to avoid warning Add a prototype of task_struct to fix below warning on arm64. In file included from arch/arm64/kernel/probes/kprobes.c:19:0: include/linux/kasan.h:81:132: error: 'struct task_struct' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] static inline void kasan_unpoison_task_stack(struct task_struct *task) {} As same as other types (kmem_cache, page, and vm_struct) this adds a prototype of task_struct data structure on top of kasan.h. [arnd] A related warning was fixed before, but now appears in a different line in the same file in v4.11-rc2. The patch from Masami Hiramatsu still seems appropriate, so let's take his version. Fixes: 71af2ed5eeea ("kasan, sched/headers: Remove from ") Link: https://patchwork.kernel.org/patch/9569839/ Link: http://lkml.kernel.org/r/20170313141517.3397802-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Signed-off-by: Masami Hiramatsu Acked-by: Alexander Potapenko Acked-by: Andrey Ryabinin Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 1c823bef4c15..5734480c9590 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -6,6 +6,7 @@ struct kmem_cache; struct page; struct vm_struct; +struct task_struct; #ifdef CONFIG_KASAN -- cgit v1.2.3 From 15c9e10d9ad4d41d076148bbff1de7f659f68852 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 16 Mar 2017 16:40:33 -0700 Subject: drivers core: remove assert_held_device_hotplug() The last caller of assert_held_device_hotplug() is gone, so remove it again. Link: http://lkml.kernel.org/r/20170314125226.16779-3-heiko.carstens@de.ibm.com Signed-off-by: Heiko Carstens Acked-by: Dan Williams Cc: Michal Hocko Cc: "Rafael J. Wysocki" Cc: Vladimir Davydov Cc: Ben Hutchings Cc: Gerald Schaefer Cc: Martin Schwidefsky Cc: Sebastian Ott Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/core.c | 5 ----- include/linux/device.h | 1 - 2 files changed, 6 deletions(-) (limited to 'include') diff --git a/drivers/base/core.c b/drivers/base/core.c index 684bda4d14a1..6bb60fb6a30b 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -639,11 +639,6 @@ int lock_device_hotplug_sysfs(void) return restart_syscall(); } -void assert_held_device_hotplug(void) -{ - lockdep_assert_held(&device_hotplug_lock); -} - #ifdef CONFIG_BLOCK static inline int device_is_not_partition(struct device *dev) { diff --git a/include/linux/device.h b/include/linux/device.h index 30c4570e928d..9ef518af5515 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1140,7 +1140,6 @@ static inline bool device_supports_offline(struct device *dev) extern void lock_device_hotplug(void); extern void unlock_device_hotplug(void); extern int lock_device_hotplug_sysfs(void); -void assert_held_device_hotplug(void); extern int device_offline(struct device *dev); extern int device_online(struct device *dev); extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); -- cgit v1.2.3 From 4cbe4dac82e423ecc9a0ba46af24a860853259f4 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 13 Mar 2017 19:29:08 +0200 Subject: net/mlx4_core: Avoid delays during VF driver device shutdown Some Hypervisors detach VFs from VMs by instantly causing an FLR event to be generated for a VF. In the mlx4 case, this will cause that VF's comm channel to be disabled before the VM has an opportunity to invoke the VF device's "shutdown" method. For such Hypervisors, there is a race condition between the VF's shutdown method and its internal-error detection/reset thread. The internal-error detection/reset thread (which runs every 5 seconds) also detects a disabled comm channel. If the internal-error detection/reset flow wins the race, we still get delays (while that flow tries repeatedly to detect comm-channel recovery). The cited commit fixed the command timeout problem when the internal-error detection/reset flow loses the race. This commit avoids the unneeded delays when the internal-error detection/reset flow wins. Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown") Signed-off-by: Jack Morgenstein Reported-by: Simon Xiao Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 11 +++++++++++ drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++ include/linux/mlx4/device.h | 1 + 3 files changed, 23 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index e8c105164931..0e0fa7030565 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2305,6 +2305,17 @@ static int sync_toggles(struct mlx4_dev *dev) rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { /* PCI might be offline */ + + /* If device removal has been requested, + * do not continue retrying. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_NOWAIT) { + mlx4_warn(dev, + "communication channel is offline\n"); + return -EIO; + } + msleep(100); wr_toggle = swab32(readl(&priv->mfunc.comm-> slave_write)); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 21377c315083..703205475524 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1940,6 +1940,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev) (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); if (!offline_bit) return 0; + + /* If device removal has been requested, + * do not continue retrying. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_NOWAIT) + break; + /* There are cases as part of AER/Reset flow that PF needs * around 100 msec to load. We therefore sleep for 100 msec * to allow other tasks to make use of that CPU during this @@ -3955,6 +3963,9 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct devlink *devlink = priv_to_devlink(priv); int active_vfs = 0; + if (mlx4_is_slave(dev)) + persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; + mutex_lock(&persist->interface_state_mutex); persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; mutex_unlock(&persist->interface_state_mutex); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7e66e4f62858..1beb1ec2fbdf 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -476,6 +476,7 @@ enum { enum { MLX4_INTERFACE_STATE_UP = 1 << 0, MLX4_INTERFACE_STATE_DELETION = 1 << 1, + MLX4_INTERFACE_STATE_NOWAIT = 1 << 2, }; #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ -- cgit v1.2.3 From 530c6891b1220cba780b6c18f4691d85a3435080 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 1 Mar 2017 23:13:24 -0600 Subject: target: allow ALUA setup for some passthrough backends This patch allows passthrough backends to use the core/base LIO ALUA setup and state checks, but still handle the execution of commands. This will allow the target_core_user module to execute STPG and RTPG in userspace, and not have to duplicate the ALUA state checks, path information (needed so we can check if command is executable on specific paths) and setup (rtslib sets/updates the configfs ALUA interface like it does for iblock or file). For STPG, the target_core_user userspace daemon, tcmu-runner will still execute the STPG, and to update the core/base LIO state it will use the existing configfs interface. For RTPG, tcmu-runner will loop over configfs and/or cache the state. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 9 +++++---- drivers/target/target_core_pscsi.c | 3 ++- drivers/target/target_core_tpg.c | 3 ++- include/target/target_core_backend.h | 7 ++++++- 4 files changed, 15 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index f5e330099bfc..a41bbb8087cf 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -691,7 +691,7 @@ target_alua_state_check(struct se_cmd *cmd) if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE) return 0; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA) return 0; /* @@ -1973,7 +1973,7 @@ ssize_t core_alua_store_tg_pt_gp_info( unsigned char buf[TG_PT_GROUP_NAME_BUF]; int move = 0; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH || + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA || (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) return -ENODEV; @@ -2230,7 +2230,7 @@ ssize_t core_alua_store_offline_bit( unsigned long tmp; int ret; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH || + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA || (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) return -ENODEV; @@ -2316,7 +2316,8 @@ ssize_t core_alua_store_secondary_write_metadata( int core_setup_alua(struct se_device *dev) { - if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && + if (!(dev->transport->transport_flags & + TRANSPORT_FLAG_PASSTHROUGH_ALUA) && !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) { struct t10_alua_lu_gp_member *lu_gp_mem; diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 44d92f23a3f0..94cda7991e80 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1080,7 +1080,8 @@ static void pscsi_req_done(struct request *req, int uptodate) static const struct target_backend_ops pscsi_ops = { .name = "pscsi", .owner = THIS_MODULE, - .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, + .transport_flags = TRANSPORT_FLAG_PASSTHROUGH | + TRANSPORT_FLAG_PASSTHROUGH_ALUA, .attach_hba = pscsi_attach_hba, .detach_hba = pscsi_detach_hba, .pmode_enable_hba = pscsi_pmode_enable_hba, diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index c0dbfa016575..6fb191914f45 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -602,7 +602,8 @@ int core_tpg_add_lun( if (ret) goto out_kill_ref; - if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && + if (!(dev->transport->transport_flags & + TRANSPORT_FLAG_PASSTHROUGH_ALUA) && !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp); diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index b54b98dc2d4a..1b0f447ce850 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -4,7 +4,12 @@ #include #include -#define TRANSPORT_FLAG_PASSTHROUGH 1 +#define TRANSPORT_FLAG_PASSTHROUGH 0x1 +/* + * ALUA commands, state checks and setup operations are handled by the + * backend module. + */ +#define TRANSPORT_FLAG_PASSTHROUGH_ALUA 0x2 struct request_queue; struct scatterlist; -- cgit v1.2.3 From d7175373f2745ed4abe5b388d5aabd06304f801e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 2 Mar 2017 04:59:48 -0600 Subject: target: fix ALUA transition timeout handling The implicit transition time tells initiators the min time to wait before timing out a transition. We currently schedule the transition to occur in tg_pt_gp_implicit_trans_secs seconds so there is no room for delays. If core_alua_do_transition_tg_pt_work->core_alua_update_tpg_primary_metadata needs to write out info to a remote file, then the initiator can easily time out the operation. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 23 ++++++++--------------- include/target/target_core_base.h | 2 +- 2 files changed, 9 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 58bf5e6350ac..594807cd92cb 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1013,7 +1013,7 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp) static void core_alua_do_transition_tg_pt_work(struct work_struct *work) { struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work, - struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work); + struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work); struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status == ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG); @@ -1076,13 +1076,12 @@ static int core_alua_do_transition_tg_pt( /* * Flush any pending transitions */ - if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs && - atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == + if (!explicit && atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == ALUA_ACCESS_STATE_TRANSITION) { /* Just in case */ tg_pt_gp->tg_pt_gp_alua_pending_state = new_state; tg_pt_gp->tg_pt_gp_transition_complete = &wait; - flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work); + flush_work(&tg_pt_gp->tg_pt_gp_transition_work); wait_for_completion(&wait); tg_pt_gp->tg_pt_gp_transition_complete = NULL; return 0; @@ -1117,15 +1116,9 @@ static int core_alua_do_transition_tg_pt( atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt); spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) { - unsigned long transition_tmo; - - transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ; - schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, - transition_tmo); - } else { + schedule_work(&tg_pt_gp->tg_pt_gp_transition_work); + if (explicit) { tg_pt_gp->tg_pt_gp_transition_complete = &wait; - schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, 0); wait_for_completion(&wait); tg_pt_gp->tg_pt_gp_transition_complete = NULL; } @@ -1696,8 +1689,8 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev, mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex); spin_lock_init(&tg_pt_gp->tg_pt_gp_lock); atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0); - INIT_DELAYED_WORK(&tg_pt_gp->tg_pt_gp_transition_work, - core_alua_do_transition_tg_pt_work); + INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work, + core_alua_do_transition_tg_pt_work); tg_pt_gp->tg_pt_gp_dev = dev; atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED); @@ -1805,7 +1798,7 @@ void core_alua_free_tg_pt_gp( dev->t10_alua.alua_tg_pt_gps_counter--; spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work); + flush_work(&tg_pt_gp->tg_pt_gp_transition_work); /* * Allow a struct t10_alua_tg_pt_gp_member * referenced by diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 37c274e61acc..4b784b6e21c0 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -299,7 +299,7 @@ struct t10_alua_tg_pt_gp { struct list_head tg_pt_gp_lun_list; struct se_lun *tg_pt_gp_alua_lun; struct se_node_acl *tg_pt_gp_alua_nacl; - struct delayed_work tg_pt_gp_transition_work; + struct work_struct tg_pt_gp_transition_work; struct completion *tg_pt_gp_transition_complete; }; -- cgit v1.2.3 From fdfe4a393e9cd8c92f4489ca207d410f44d05043 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Mon, 13 Mar 2017 23:45:21 +0900 Subject: generic syscalls: Wire up statx syscall The new syscall statx is implemented as generic code, so enable it for architectures like openrisc which use the generic syscall table. Fixes: a528d35e8bfcc ("statx: Add a system call to make enhanced file info available") Cc: Thomas Gleixner Cc: Al Viro Cc: David Howells Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Stafford Horne Signed-off-by: Will Deacon --- include/uapi/asm-generic/unistd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 9b1462e38b82..a076cf1a3a23 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -730,9 +730,11 @@ __SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect) __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc) #define __NR_pkey_free 290 __SYSCALL(__NR_pkey_free, sys_pkey_free) +#define __NR_statx 291 +__SYSCALL(__NR_statx, sys_statx) #undef __NR_syscalls -#define __NR_syscalls 291 +#define __NR_syscalls 292 /* * All syscalls below here should go away really, -- cgit v1.2.3 From 73488331eb9460d14974a1e2c734f77ce8869183 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 14 Mar 2017 09:27:57 +0100 Subject: drm/exynos/decon5433: fix vblank event handling Current implementation of event handling assumes that vblank interrupt is always called at the right time. It is not true, it can be delayed due to various reasons. As a result different races can happen. The patch fixes the issue by using hardware frame counter present in DECON to serialize vblank and commit completion events. Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 78 ++++++++++++++++++++++++++- include/video/exynos5433_decon.h | 8 +++ 2 files changed, 85 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 2130ccf1e036..cfe6f8af849f 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -68,6 +68,8 @@ struct decon_context { unsigned long flags; unsigned long out_type; int first_win; + spinlock_t vblank_lock; + u32 frame_id; }; static const uint32_t decon_formats[] = { @@ -122,6 +124,48 @@ static void decon_disable_vblank(struct exynos_drm_crtc *crtc) writel(0, ctx->addr + DECON_VIDINTCON0); } +/* return number of starts/ends of frame transmissions since reset */ +static u32 decon_get_frame_count(struct decon_context *ctx, bool end) +{ + u32 frm, pfrm, status, cnt = 2; + + /* To get consistent result repeat read until frame id is stable. + * Usually the loop will be executed once, in rare cases when the loop + * is executed at frame change time 2nd pass will be needed. + */ + frm = readl(ctx->addr + DECON_CRFMID); + do { + status = readl(ctx->addr + DECON_VIDCON1); + pfrm = frm; + frm = readl(ctx->addr + DECON_CRFMID); + } while (frm != pfrm && --cnt); + + /* CRFMID is incremented on BPORCH in case of I80 and on VSYNC in case + * of RGB, it should be taken into account. + */ + if (!frm) + return 0; + + switch (status & (VIDCON1_VSTATUS_MASK | VIDCON1_I80_ACTIVE)) { + case VIDCON1_VSTATUS_VS: + if (!(ctx->out_type & IFTYPE_I80)) + --frm; + break; + case VIDCON1_VSTATUS_BP: + --frm; + break; + case VIDCON1_I80_ACTIVE: + case VIDCON1_VSTATUS_AC: + if (end) + --frm; + break; + default: + break; + } + + return frm; +} + static void decon_setup_trigger(struct decon_context *ctx) { if (!(ctx->out_type & (IFTYPE_I80 | I80_HW_TRG))) @@ -365,11 +409,14 @@ static void decon_disable_plane(struct exynos_drm_crtc *crtc, static void decon_atomic_flush(struct exynos_drm_crtc *crtc) { struct decon_context *ctx = crtc->ctx; + unsigned long flags; int i; if (test_bit(BIT_SUSPENDED, &ctx->flags)) return; + spin_lock_irqsave(&ctx->vblank_lock, flags); + for (i = ctx->first_win; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, false); @@ -378,12 +425,18 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc) if (ctx->out_type & IFTYPE_I80) set_bit(BIT_WIN_UPDATED, &ctx->flags); + + ctx->frame_id = decon_get_frame_count(ctx, true); + exynos_crtc_handle_event(crtc); + + spin_unlock_irqrestore(&ctx->vblank_lock, flags); } static void decon_swreset(struct decon_context *ctx) { unsigned int tries; + unsigned long flags; writel(0, ctx->addr + DECON_VIDCON0); for (tries = 2000; tries; --tries) { @@ -401,6 +454,10 @@ static void decon_swreset(struct decon_context *ctx) WARN(tries == 0, "failed to software reset DECON\n"); + spin_lock_irqsave(&ctx->vblank_lock, flags); + ctx->frame_id = 0; + spin_unlock_irqrestore(&ctx->vblank_lock, flags); + if (!(ctx->out_type & IFTYPE_HDMI)) return; @@ -579,6 +636,24 @@ static const struct component_ops decon_component_ops = { .unbind = decon_unbind, }; +static void decon_handle_vblank(struct decon_context *ctx) +{ + u32 frm; + + spin_lock(&ctx->vblank_lock); + + frm = decon_get_frame_count(ctx, true); + + if (frm != ctx->frame_id) { + /* handle only if incremented, take care of wrap-around */ + if ((s32)(frm - ctx->frame_id) > 0) + drm_crtc_handle_vblank(&ctx->crtc->base); + ctx->frame_id = frm; + } + + spin_unlock(&ctx->vblank_lock); +} + static irqreturn_t decon_irq_handler(int irq, void *dev_id) { struct decon_context *ctx = dev_id; @@ -599,7 +674,7 @@ static irqreturn_t decon_irq_handler(int irq, void *dev_id) (VIDOUT_INTERLACE_EN_F | VIDOUT_INTERLACE_FIELD_F)) return IRQ_HANDLED; } - drm_crtc_handle_vblank(&ctx->crtc->base); + decon_handle_vblank(ctx); } out: @@ -672,6 +747,7 @@ static int exynos5433_decon_probe(struct platform_device *pdev) __set_bit(BIT_SUSPENDED, &ctx->flags); ctx->dev = dev; ctx->out_type = (unsigned long)of_device_get_match_data(dev); + spin_lock_init(&ctx->vblank_lock); if (ctx->out_type & IFTYPE_HDMI) { ctx->first_win = 1; diff --git a/include/video/exynos5433_decon.h b/include/video/exynos5433_decon.h index ef8e2a8ad0af..352fc0d9528f 100644 --- a/include/video/exynos5433_decon.h +++ b/include/video/exynos5433_decon.h @@ -46,6 +46,7 @@ #define DECON_FRAMEFIFO_STATUS 0x0524 #define DECON_CMU 0x1404 #define DECON_UPDATE 0x1410 +#define DECON_CRFMID 0x1414 #define DECON_UPDATE_SCHEME 0x1438 #define DECON_VIDCON1 0x2000 #define DECON_VIDCON2 0x2004 @@ -142,6 +143,13 @@ #define STANDALONE_UPDATE_F (1 << 0) /* DECON_VIDCON1 */ +#define VIDCON1_LINECNT_MASK (0x0fff << 16) +#define VIDCON1_I80_ACTIVE (1 << 15) +#define VIDCON1_VSTATUS_MASK (0x3 << 13) +#define VIDCON1_VSTATUS_VS (0 << 13) +#define VIDCON1_VSTATUS_BP (1 << 13) +#define VIDCON1_VSTATUS_AC (2 << 13) +#define VIDCON1_VSTATUS_FP (3 << 13) #define VIDCON1_VCLK_MASK (0x3 << 9) #define VIDCON1_VCLK_RUN_VDEN_DISABLE (0x3 << 9) #define VIDCON1_VCLK_HOLD (0x0 << 9) -- cgit v1.2.3 From f3cce673e1c11112c536fbc8e6912c5414d7141f Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 14 Mar 2017 09:27:58 +0100 Subject: drm/exynos/decon5433: signal frame done interrupt at front porch DECON in case of video mode generates interrupt by default at start of vertical back porch. As this interrupt is used to generate VBLANK events more optimal point is start of vertical front porch. Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 2 +- include/video/exynos5433_decon.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index cfe6f8af849f..a43d0fa0b051 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -105,7 +105,7 @@ static int decon_enable_vblank(struct exynos_drm_crtc *crtc) if (ctx->out_type & IFTYPE_I80) val |= VIDINTCON0_FRAMEDONE; else - val |= VIDINTCON0_INTFRMEN; + val |= VIDINTCON0_INTFRMEN | VIDINTCON0_FRAMESEL_FP; writel(val, ctx->addr + DECON_VIDINTCON0); } diff --git a/include/video/exynos5433_decon.h b/include/video/exynos5433_decon.h index 352fc0d9528f..6b083d327e98 100644 --- a/include/video/exynos5433_decon.h +++ b/include/video/exynos5433_decon.h @@ -127,6 +127,10 @@ /* VIDINTCON0 */ #define VIDINTCON0_FRAMEDONE (1 << 17) +#define VIDINTCON0_FRAMESEL_BP (0 << 15) +#define VIDINTCON0_FRAMESEL_VS (1 << 15) +#define VIDINTCON0_FRAMESEL_AC (2 << 15) +#define VIDINTCON0_FRAMESEL_FP (3 << 15) #define VIDINTCON0_INTFRMEN (1 << 12) #define VIDINTCON0_INTEN (1 << 0) -- cgit v1.2.3 From 0ca10b60ceeb5372da01798ca68c116ae45a6eb6 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 20 Mar 2017 11:25:16 +0100 Subject: reset: fix optional reset_control_get stubs to return NULL When RESET_CONTROLLER is not enabled, the optional reset_control_get stubs should now also return NULL. Since it is now valid for reset_control_assert/deassert/reset/status/put to be called unconditionally, with NULL as an argument for optional resets, the stubs are not allowed to warn anymore. Fixes: bb475230b8e5 ("reset: make optional functions really optional") Reported-by: Andrzej Hajda Tested-by: Andrzej Hajda Reviewed-by: Andrzej Hajda Cc: Ramiro Oliveira Signed-off-by: Philipp Zabel --- include/linux/reset.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/reset.h b/include/linux/reset.h index 86b4ed75359e..96fb139bdd08 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -31,31 +31,26 @@ static inline int device_reset_optional(struct device *dev) static inline int reset_control_reset(struct reset_control *rstc) { - WARN_ON(1); return 0; } static inline int reset_control_assert(struct reset_control *rstc) { - WARN_ON(1); return 0; } static inline int reset_control_deassert(struct reset_control *rstc) { - WARN_ON(1); return 0; } static inline int reset_control_status(struct reset_control *rstc) { - WARN_ON(1); return 0; } static inline void reset_control_put(struct reset_control *rstc) { - WARN_ON(1); } static inline int __must_check device_reset(struct device *dev) @@ -74,14 +69,14 @@ static inline struct reset_control *__of_reset_control_get( const char *id, int index, bool shared, bool optional) { - return ERR_PTR(-ENOTSUPP); + return optional ? NULL : ERR_PTR(-ENOTSUPP); } static inline struct reset_control *__devm_reset_control_get( struct device *dev, const char *id, int index, bool shared, bool optional) { - return ERR_PTR(-ENOTSUPP); + return optional ? NULL : ERR_PTR(-ENOTSUPP); } #endif /* CONFIG_RESET_CONTROLLER */ -- cgit v1.2.3 From 36d277bac8080202684e67162ebb157f16631581 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 15 Mar 2017 09:32:14 +0800 Subject: vsock: track pkt owner vsock So that we can cancel a queued pkt later if necessary. Signed-off-by: Peng Tao Signed-off-by: David S. Miller --- include/linux/virtio_vsock.h | 3 +++ net/vmw_vsock/virtio_transport_common.c | 7 +++++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 9638bfeb0d1f..584f9a647ad4 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -48,6 +48,8 @@ struct virtio_vsock_pkt { struct virtio_vsock_hdr hdr; struct work_struct work; struct list_head list; + /* socket refcnt not held, only use for cancellation */ + struct vsock_sock *vsk; void *buf; u32 len; u32 off; @@ -56,6 +58,7 @@ struct virtio_vsock_pkt { struct virtio_vsock_pkt_info { u32 remote_cid, remote_port; + struct vsock_sock *vsk; struct msghdr *msg; u32 pkt_len; u16 type; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 8d592a45b597..af087b44ceea 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -58,6 +58,7 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, pkt->len = len; pkt->hdr.len = cpu_to_le32(len); pkt->reply = info->reply; + pkt->vsk = info->vsk; if (info->msg && len > 0) { pkt->buf = kmalloc(len, GFP_KERNEL); @@ -180,6 +181,7 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk, struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, .type = type, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -519,6 +521,7 @@ int virtio_transport_connect(struct vsock_sock *vsk) struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_REQUEST, .type = VIRTIO_VSOCK_TYPE_STREAM, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -534,6 +537,7 @@ int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | (mode & SEND_SHUTDOWN ? VIRTIO_VSOCK_SHUTDOWN_SEND : 0), + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -560,6 +564,7 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk, .type = VIRTIO_VSOCK_TYPE_STREAM, .msg = msg, .pkt_len = len, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -581,6 +586,7 @@ static int virtio_transport_reset(struct vsock_sock *vsk, .op = VIRTIO_VSOCK_OP_RST, .type = VIRTIO_VSOCK_TYPE_STREAM, .reply = !!pkt, + .vsk = vsk, }; /* Send RST only if the original pkt is not a RST pkt */ @@ -826,6 +832,7 @@ virtio_transport_send_response(struct vsock_sock *vsk, .remote_cid = le64_to_cpu(pkt->hdr.src_cid), .remote_port = le32_to_cpu(pkt->hdr.src_port), .reply = true, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); -- cgit v1.2.3 From 16320f363ae128d9b9c70e60f00f2a572f57c23d Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 15 Mar 2017 09:32:15 +0800 Subject: vhost-vsock: add pkt cancel capability To allow canceling all packets of a connection. Reviewed-by: Stefan Hajnoczi Reviewed-by: Jorgen Hansen Signed-off-by: Peng Tao Signed-off-by: David S. Miller --- drivers/vhost/vsock.c | 41 +++++++++++++++++++++++++++++++++++++++++ include/net/af_vsock.h | 3 +++ 2 files changed, 44 insertions(+) (limited to 'include') diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index ce5e63d2c66a..44eed8eb0725 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -223,6 +223,46 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) return len; } +static int +vhost_transport_cancel_pkt(struct vsock_sock *vsk) +{ + struct vhost_vsock *vsock; + struct virtio_vsock_pkt *pkt, *n; + int cnt = 0; + LIST_HEAD(freeme); + + /* Find the vhost_vsock according to guest context id */ + vsock = vhost_vsock_get(vsk->remote_addr.svm_cid); + if (!vsock) + return -ENODEV; + + spin_lock_bh(&vsock->send_pkt_list_lock); + list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) { + if (pkt->vsk != vsk) + continue; + list_move(&pkt->list, &freeme); + } + spin_unlock_bh(&vsock->send_pkt_list_lock); + + list_for_each_entry_safe(pkt, n, &freeme, list) { + if (pkt->reply) + cnt++; + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + + if (cnt) { + struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; + int new_cnt; + + new_cnt = atomic_sub_return(cnt, &vsock->queued_replies); + if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num) + vhost_poll_queue(&tx_vq->poll); + } + + return 0; +} + static struct virtio_vsock_pkt * vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, unsigned int out, unsigned int in) @@ -675,6 +715,7 @@ static struct virtio_transport vhost_transport = { .release = virtio_transport_release, .connect = virtio_transport_connect, .shutdown = virtio_transport_shutdown, + .cancel_pkt = vhost_transport_cancel_pkt, .dgram_enqueue = virtio_transport_dgram_enqueue, .dgram_dequeue = virtio_transport_dgram_dequeue, diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index f2758964ce6f..f32ed9ac181a 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -100,6 +100,9 @@ struct vsock_transport { void (*destruct)(struct vsock_sock *); void (*release)(struct vsock_sock *); + /* Cancel all pending packets sent on vsock. */ + int (*cancel_pkt)(struct vsock_sock *vsk); + /* Connections. */ int (*connect)(struct vsock_sock *); -- cgit v1.2.3 From 1f904495b79003cd3d881de8731377d48fcbc7e3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 18 Mar 2017 19:27:23 +0800 Subject: sctp: define dst_pending_confirm as a bit in sctp_transport As tp->dst_pending_confirm's value can only be set 0 or 1, this patch is to change to define it as a bit instead of __u32. Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 07a0b128625a..4f645198e9bd 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -753,6 +753,8 @@ struct sctp_transport { /* Is the Path MTU update pending on this tranport */ pmtu_pending:1, + dst_pending_confirm:1, /* need to confirm neighbour */ + /* Has this transport moved the ctsn since we last sacked */ sack_generation:1; u32 dst_cookie; @@ -806,8 +808,6 @@ struct sctp_transport { __u32 burst_limited; /* Holds old cwnd when max.burst is applied */ - __u32 dst_pending_confirm; /* need to confirm neighbour */ - /* Destination */ struct dst_entry *dst; /* Source address. */ -- cgit v1.2.3 From 4ef1b2869447411ad3ef91ad7d4891a83c1a509a Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Sat, 18 Mar 2017 17:03:00 -0400 Subject: tcp: mark skbs with SCM_TIMESTAMPING_OPT_STATS SOF_TIMESTAMPING_OPT_STATS can be enabled and disabled while packets are collected on the error queue. So, checking SOF_TIMESTAMPING_OPT_STATS in sk->sk_tsflags is not enough to safely assume that the skb contains OPT_STATS data. Add a bit in sock_exterr_skb to indicate whether the skb contains opt_stats data. Fixes: 1c885808e456 ("tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING") Reported-by: JongHwan Kim Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Eric Dumazet Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/errqueue.h | 2 ++ net/core/skbuff.c | 17 +++++++++++------ net/socket.c | 2 +- 3 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h index 9ca23fcfb5d7..6fdfc884fdeb 100644 --- a/include/linux/errqueue.h +++ b/include/linux/errqueue.h @@ -20,6 +20,8 @@ struct sock_exterr_skb { struct sock_extended_err ee; u16 addr_offset; __be16 port; + u8 opt_stats:1, + unused:7; }; #endif diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b1fbd1958eb6..9f781092fda9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3793,16 +3793,20 @@ EXPORT_SYMBOL(skb_clone_sk); static void __skb_complete_tx_timestamp(struct sk_buff *skb, struct sock *sk, - int tstype) + int tstype, + bool opt_stats) { struct sock_exterr_skb *serr; int err; + BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb)); + serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = tstype; + serr->opt_stats = opt_stats; if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; if (sk->sk_protocol == IPPROTO_TCP && @@ -3843,7 +3847,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, */ if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { *skb_hwtstamps(skb) = *hwtstamps; - __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); + __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); sock_put(sk); } } @@ -3854,7 +3858,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, struct sock *sk, int tstype) { struct sk_buff *skb; - bool tsonly; + bool tsonly, opt_stats = false; if (!sk) return; @@ -3867,9 +3871,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, #ifdef CONFIG_INET if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && sk->sk_protocol == IPPROTO_TCP && - sk->sk_type == SOCK_STREAM) + sk->sk_type == SOCK_STREAM) { skb = tcp_get_timestamping_opt_stats(sk); - else + opt_stats = true; + } else #endif skb = alloc_skb(0, GFP_ATOMIC); } else { @@ -3888,7 +3893,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, else skb->tstamp = ktime_get_real(); - __skb_complete_tx_timestamp(skb, sk, tstype); + __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats); } EXPORT_SYMBOL_GPL(__skb_tstamp_tx); diff --git a/net/socket.c b/net/socket.c index 692d6989d2c2..985ef06792d6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -706,7 +706,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, SCM_TIMESTAMPING, sizeof(tss), &tss); if (skb_is_err_queue(skb) && skb->len && - (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS)) + SKB_EXT_ERR(skb)->opt_stats) put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS, skb->len, skb->data); } -- cgit v1.2.3 From a5023a99393dab276069cd60dad3e61d57720fda Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Fri, 17 Mar 2017 00:28:56 +0100 Subject: hwmon: Add missing HWMON_T_ALARM Unfortunately the HWMON_T_ALARM define was missing, although the associated entry was present in hwmon_temp_attributes. This is needed to convert drivers to the new interface which use channel based alarms. Signed-off-by: Peter Huewe Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 78d59dba563e..88b673749121 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -88,6 +88,7 @@ enum hwmon_temp_attributes { #define HWMON_T_CRIT_HYST BIT(hwmon_temp_crit_hyst) #define HWMON_T_EMERGENCY BIT(hwmon_temp_emergency) #define HWMON_T_EMERGENCY_HYST BIT(hwmon_temp_emergency_hyst) +#define HWMON_T_ALARM BIT(hwmon_temp_alarm) #define HWMON_T_MIN_ALARM BIT(hwmon_temp_min_alarm) #define HWMON_T_MAX_ALARM BIT(hwmon_temp_max_alarm) #define HWMON_T_CRIT_ALARM BIT(hwmon_temp_crit_alarm) -- cgit v1.2.3 From 9d3a4de4cb8db8e71730e36736272ef041836f68 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 16 Mar 2017 17:00:16 +0000 Subject: iommu: Disambiguate MSI region types The introduction of reserved regions has left a couple of rough edges which we could do with sorting out sooner rather than later. Since we are not yet addressing the potential dynamic aspect of software-managed reservations and presenting them at arbitrary fixed addresses, it is incongruous that we end up displaying hardware vs. software-managed MSI regions to userspace differently, especially since ARM-based systems may actually require one or the other, or even potentially both at once, (which iommu-dma currently has no hope of dealing with at all). Let's resolve the former user-visible inconsistency ASAP before the ABI has been baked into a kernel release, in a way that also lays the groundwork for the latter shortcoming to be addressed by follow-up patches. For clarity, rename the software-managed type to IOMMU_RESV_SW_MSI, use IOMMU_RESV_MSI to describe the hardware type, and document everything a little bit. Since the x86 MSI remapping hardware falls squarely under this meaning of IOMMU_RESV_MSI, apply that type to their regions as well, so that we tell the same story to userspace across all platforms. Secondly, as the various region types require quite different handling, and it really makes little sense to ever try combining them, convert the bitfield-esque #defines to a plain enum in the process before anyone gets the wrong impression. Fixes: d30ddcaa7b02 ("iommu: Add a new type field in iommu_resv_region") Reviewed-by: Eric Auger CC: Alex Williamson CC: David Woodhouse CC: kvm@vger.kernel.org Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2 +- drivers/iommu/arm-smmu-v3.c | 2 +- drivers/iommu/arm-smmu.c | 2 +- drivers/iommu/intel-iommu.c | 2 +- drivers/iommu/iommu.c | 5 +++-- drivers/vfio/vfio_iommu_type1.c | 7 +++---- include/linux/iommu.h | 18 +++++++++++++----- 7 files changed, 23 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 98940d1392cb..b17536d6e69b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3202,7 +3202,7 @@ static void amd_iommu_get_resv_regions(struct device *dev, region = iommu_alloc_resv_region(MSI_RANGE_START, MSI_RANGE_END - MSI_RANGE_START + 1, - 0, IOMMU_RESV_RESERVED); + 0, IOMMU_RESV_MSI); if (!region) return; list_add_tail(®ion->list, head); diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 5806a6acc94e..591bb96047c9 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1888,7 +1888,7 @@ static void arm_smmu_get_resv_regions(struct device *dev, int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, - prot, IOMMU_RESV_MSI); + prot, IOMMU_RESV_SW_MSI); if (!region) return; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index abf6496843a6..b493c99e17f7 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1608,7 +1608,7 @@ static void arm_smmu_get_resv_regions(struct device *dev, int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, - prot, IOMMU_RESV_MSI); + prot, IOMMU_RESV_SW_MSI); if (!region) return; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 91d60493b57c..d412a313a372 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5249,7 +5249,7 @@ static void intel_iommu_get_resv_regions(struct device *device, reg = iommu_alloc_resv_region(IOAPIC_RANGE_START, IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1, - 0, IOMMU_RESV_RESERVED); + 0, IOMMU_RESV_MSI); if (!reg) return; list_add_tail(®->list, head); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 8ea14f41a979..3b67144dead2 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -72,6 +72,7 @@ static const char * const iommu_group_resv_type_string[] = { [IOMMU_RESV_DIRECT] = "direct", [IOMMU_RESV_RESERVED] = "reserved", [IOMMU_RESV_MSI] = "msi", + [IOMMU_RESV_SW_MSI] = "msi", }; #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ @@ -1743,8 +1744,8 @@ void iommu_put_resv_regions(struct device *dev, struct list_head *list) } struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, - size_t length, - int prot, int type) + size_t length, int prot, + enum iommu_resv_type type) { struct iommu_resv_region *region; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index c26fa1f3ed86..32d2633092a3 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1182,8 +1182,7 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain, return NULL; } -static bool vfio_iommu_has_resv_msi(struct iommu_group *group, - phys_addr_t *base) +static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base) { struct list_head group_resv_regions; struct iommu_resv_region *region, *next; @@ -1192,7 +1191,7 @@ static bool vfio_iommu_has_resv_msi(struct iommu_group *group, INIT_LIST_HEAD(&group_resv_regions); iommu_get_group_resv_regions(group, &group_resv_regions); list_for_each_entry(region, &group_resv_regions, list) { - if (region->type & IOMMU_RESV_MSI) { + if (region->type == IOMMU_RESV_SW_MSI) { *base = region->start; ret = true; goto out; @@ -1283,7 +1282,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (ret) goto out_domain; - resv_msi = vfio_iommu_has_resv_msi(iommu_group, &resv_msi_base); + resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base); INIT_LIST_HEAD(&domain->group_list); list_add(&group->next, &domain->group_list); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 6a6de187ddc0..2e4de0deee53 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -125,9 +125,16 @@ enum iommu_attr { }; /* These are the possible reserved region types */ -#define IOMMU_RESV_DIRECT (1 << 0) -#define IOMMU_RESV_RESERVED (1 << 1) -#define IOMMU_RESV_MSI (1 << 2) +enum iommu_resv_type { + /* Memory regions which must be mapped 1:1 at all times */ + IOMMU_RESV_DIRECT, + /* Arbitrary "never map this or give it to a device" address ranges */ + IOMMU_RESV_RESERVED, + /* Hardware MSI region (untranslated) */ + IOMMU_RESV_MSI, + /* Software-managed MSI translation window */ + IOMMU_RESV_SW_MSI, +}; /** * struct iommu_resv_region - descriptor for a reserved memory region @@ -142,7 +149,7 @@ struct iommu_resv_region { phys_addr_t start; size_t length; int prot; - int type; + enum iommu_resv_type type; }; #ifdef CONFIG_IOMMU_API @@ -288,7 +295,8 @@ extern void iommu_get_resv_regions(struct device *dev, struct list_head *list); extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); extern int iommu_request_dm_for_dev(struct device *dev); extern struct iommu_resv_region * -iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, int type); +iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, + enum iommu_resv_type type); extern int iommu_get_group_resv_regions(struct iommu_group *group, struct list_head *head); -- cgit v1.2.3 From 1511949c61ec63e4b646c34d602ac6990b38ce30 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 20 Mar 2017 17:46:27 +0800 Subject: sctp: declare struct sctp_stream before using it sctp_stream_free uses struct sctp_stream as a param, but struct sctp_stream is defined after it's declaration. This patch is to declare struct sctp_stream before sctp_stream_free. Fixes: a83863174a61 ("sctp: prepare asoc stream for stream reconf") Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 4f645198e9bd..592decebac75 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -83,6 +83,7 @@ struct sctp_bind_addr; struct sctp_ulpq; struct sctp_ep_common; struct crypto_shash; +struct sctp_stream; #include -- cgit v1.2.3 From 0957c29f78af7d890c4ac506eda8f76bfc5a137a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 7 Mar 2017 22:56:53 +0000 Subject: IB/core: Restore I/O MMU, s390 and powerpc support Avoid that the following error message is reported on the console while loading an RDMA driver with I/O MMU support enabled: DMAR: Allocating domain for mlx5_0 failed Ensure that DMA mapping operations that use to_pci_dev() to access to struct pci_dev see the correct PCI device. E.g. the s390 and powerpc DMA mapping operations use to_pci_dev() even with I/O MMU support disabled. This patch preserves the following changes of the DMA mapping updates patch series: - Introduction of dma_virt_ops. - Removal of ib_device.dma_ops. - Removal of struct ib_dma_mapping_ops. - Removal of an if-statement from each ib_dma_*() operation. - IB HW drivers no longer set dma_device directly. Reported-by: Sebastian Ott Reported-by: Parav Pandit Fixes: commit 99db9494035f ("IB/core: Remove ib_device.dma_device") Signed-off-by: Bart Van Assche Reviewed-by: parav@mellanox.com Tested-by: parav@mellanox.com Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 26 ++++++++++++++++++++------ include/rdma/ib_verbs.h | 30 +++++++++++++++++------------- 2 files changed, 37 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 593d2ce6ec7c..addf869045cc 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -336,12 +336,26 @@ int ib_register_device(struct ib_device *device, struct device *parent = device->dev.parent; WARN_ON_ONCE(!parent); - if (!device->dev.dma_ops) - device->dev.dma_ops = parent->dma_ops; - if (!device->dev.dma_mask) - device->dev.dma_mask = parent->dma_mask; - if (!device->dev.coherent_dma_mask) - device->dev.coherent_dma_mask = parent->coherent_dma_mask; + WARN_ON_ONCE(device->dma_device); + if (device->dev.dma_ops) { + /* + * The caller provided custom DMA operations. Copy the + * DMA-related fields that are used by e.g. dma_alloc_coherent() + * into device->dev. + */ + device->dma_device = &device->dev; + if (!device->dev.dma_mask) + device->dev.dma_mask = parent->dma_mask; + if (!device->dev.coherent_dma_mask) + device->dev.coherent_dma_mask = + parent->coherent_dma_mask; + } else { + /* + * The caller did not provide custom DMA operations. Use the + * DMA mapping operations of the parent device. + */ + device->dma_device = parent; + } mutex_lock(&device_mutex); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0f1813c13687..99e4423eb2b8 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1863,6 +1863,9 @@ struct ib_port_immutable { }; struct ib_device { + /* Do not access @dma_device directly from ULP nor from HW drivers. */ + struct device *dma_device; + char name[IB_DEVICE_NAME_MAX]; struct list_head event_handler_list; @@ -3007,7 +3010,7 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) */ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { - return dma_mapping_error(&dev->dev, dma_addr); + return dma_mapping_error(dev->dma_device, dma_addr); } /** @@ -3021,7 +3024,7 @@ static inline u64 ib_dma_map_single(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) { - return dma_map_single(&dev->dev, cpu_addr, size, direction); + return dma_map_single(dev->dma_device, cpu_addr, size, direction); } /** @@ -3035,7 +3038,7 @@ static inline void ib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_single(&dev->dev, addr, size, direction); + dma_unmap_single(dev->dma_device, addr, size, direction); } /** @@ -3052,7 +3055,7 @@ static inline u64 ib_dma_map_page(struct ib_device *dev, size_t size, enum dma_data_direction direction) { - return dma_map_page(&dev->dev, page, offset, size, direction); + return dma_map_page(dev->dma_device, page, offset, size, direction); } /** @@ -3066,7 +3069,7 @@ static inline void ib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_page(&dev->dev, addr, size, direction); + dma_unmap_page(dev->dma_device, addr, size, direction); } /** @@ -3080,7 +3083,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - return dma_map_sg(&dev->dev, sg, nents, direction); + return dma_map_sg(dev->dma_device, sg, nents, direction); } /** @@ -3094,7 +3097,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - dma_unmap_sg(&dev->dev, sg, nents, direction); + dma_unmap_sg(dev->dma_device, sg, nents, direction); } static inline int ib_dma_map_sg_attrs(struct ib_device *dev, @@ -3102,7 +3105,8 @@ static inline int ib_dma_map_sg_attrs(struct ib_device *dev, enum dma_data_direction direction, unsigned long dma_attrs) { - return dma_map_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs); + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); } static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, @@ -3110,7 +3114,7 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, enum dma_data_direction direction, unsigned long dma_attrs) { - dma_unmap_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs); + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); } /** * ib_sg_dma_address - Return the DMA address from a scatter/gather entry @@ -3152,7 +3156,7 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_cpu(&dev->dev, addr, size, dir); + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); } /** @@ -3167,7 +3171,7 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_device(&dev->dev, addr, size, dir); + dma_sync_single_for_device(dev->dma_device, addr, size, dir); } /** @@ -3182,7 +3186,7 @@ static inline void *ib_dma_alloc_coherent(struct ib_device *dev, dma_addr_t *dma_handle, gfp_t flag) { - return dma_alloc_coherent(&dev->dev, size, dma_handle, flag); + return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag); } /** @@ -3196,7 +3200,7 @@ static inline void ib_dma_free_coherent(struct ib_device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle) { - dma_free_coherent(&dev->dev, size, cpu_addr, dma_handle); + dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); } /** -- cgit v1.2.3 From 812755d69efcf7c12fded57983d456647a0bbadd Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Fri, 24 Feb 2017 03:28:13 +0300 Subject: uapi: fix rdma/mlx5-abi.h userspace compilation errors Consistently use types from linux/types.h to fix the following rdma/mlx5-abi.h userspace compilation errors: /usr/include/rdma/mlx5-abi.h:69:25: error: 'u64' undeclared here (not in a function) MLX5_LIB_CAP_4K_UAR = (u64)1 << 0, /usr/include/rdma/mlx5-abi.h:69:29: error: expected ',' or '}' before numeric constant MLX5_LIB_CAP_4K_UAR = (u64)1 << 0, Include to fix the following rdma/mlx5-abi.h userspace compilation error: /usr/include/rdma/mlx5-abi.h:286:12: error: 'ETH_ALEN' undeclared here (not in a function) __u8 dmac[ETH_ALEN]; Signed-off-by: Dmitry V. Levin Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx5-abi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index da7cd62bace7..0b3d30837a9f 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -34,6 +34,7 @@ #define MLX5_ABI_USER_H #include +#include /* For ETH_ALEN. */ enum { MLX5_QP_FLAG_SIGNATURE = 1 << 0, @@ -66,7 +67,7 @@ struct mlx5_ib_alloc_ucontext_req { }; enum mlx5_lib_caps { - MLX5_LIB_CAP_4K_UAR = (u64)1 << 0, + MLX5_LIB_CAP_4K_UAR = (__u64)1 << 0, }; struct mlx5_ib_alloc_ucontext_req_v2 { -- cgit v1.2.3