diff options
Diffstat (limited to 'include')
81 files changed, 2224 insertions, 1470 deletions
diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h index 880a292d792f..c13f46109e88 100644 --- a/include/asm-generic/dma-mapping.h +++ b/include/asm-generic/dma-mapping.h @@ -4,7 +4,7 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - return &dma_direct_ops; + return NULL; } #endif /* _ASM_GENERIC_DMA_MAPPING_H */ diff --git a/include/linux/alcor_pci.h b/include/linux/alcor_pci.h new file mode 100644 index 000000000000..da973e8a2da8 --- /dev/null +++ b/include/linux/alcor_pci.h @@ -0,0 +1,286 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (C) 2018 Oleksij Rempel <linux@rempel-privat.de> + * + * Driver for Alcor Micro AU6601 and AU6621 controllers + */ + +#ifndef __ALCOR_PCI_H +#define __ALCOR_PCI_H + +#define ALCOR_SD_CARD 0 +#define ALCOR_MS_CARD 1 + +#define DRV_NAME_ALCOR_PCI_SDMMC "alcor_sdmmc" +#define DRV_NAME_ALCOR_PCI_MS "alcor_ms" + +#define PCI_ID_ALCOR_MICRO 0x1AEA +#define PCI_ID_AU6601 0x6601 +#define PCI_ID_AU6621 0x6621 + +#define MHZ_TO_HZ(freq) ((freq) * 1000 * 1000) + +#define AU6601_BASE_CLOCK 31000000 +#define AU6601_MIN_CLOCK 150000 +#define AU6601_MAX_CLOCK 208000000 +#define AU6601_MAX_DMA_SEGMENTS 1 +#define AU6601_MAX_PIO_SEGMENTS 1 +#define AU6601_MAX_DMA_BLOCK_SIZE 0x1000 +#define AU6601_MAX_PIO_BLOCK_SIZE 0x200 +#define AU6601_MAX_DMA_BLOCKS 1 +#define AU6601_DMA_LOCAL_SEGMENTS 1 + +/* registers spotter by reverse engineering but still + * with unknown functionality: + * 0x10 - ADMA phy address. AU6621 only? + * 0x51 - LED ctrl? + * 0x52 - unknown + * 0x61 - LED related? Always toggled BIT0 + * 0x63 - Same as 0x61? + * 0x77 - unknown + */ + +/* SDMA phy address. Higher then 0x0800.0000? + * The au6601 and au6621 have different DMA engines with different issues. One + * For example au6621 engine is triggered by addr change. No other interaction + * is needed. This means, if we get two buffers with same address, then engine + * will stall. + */ +#define AU6601_REG_SDMA_ADDR 0x00 +#define AU6601_SDMA_MASK 0xffffffff + +#define AU6601_DMA_BOUNDARY 0x05 +#define AU6621_DMA_PAGE_CNT 0x05 +/* PIO */ +#define AU6601_REG_BUFFER 0x08 +/* ADMA ctrl? AU6621 only. */ +#define AU6621_DMA_CTRL 0x0c +#define AU6621_DMA_ENABLE BIT(0) +/* CMD index */ +#define AU6601_REG_CMD_OPCODE 0x23 +/* CMD parametr */ +#define AU6601_REG_CMD_ARG 0x24 +/* CMD response 4x4 Bytes */ +#define AU6601_REG_CMD_RSP0 0x30 +#define AU6601_REG_CMD_RSP1 0x34 +#define AU6601_REG_CMD_RSP2 0x38 +#define AU6601_REG_CMD_RSP3 0x3C +/* default timeout set to 125: 125 * 40ms = 5 sec + * how exactly it is calculated? + */ +#define AU6601_TIME_OUT_CTRL 0x69 +/* Block size for SDMA or PIO */ +#define AU6601_REG_BLOCK_SIZE 0x6c +/* Some power related reg, used together with AU6601_OUTPUT_ENABLE */ +#define AU6601_POWER_CONTROL 0x70 + +/* PLL ctrl */ +#define AU6601_CLK_SELECT 0x72 +#define AU6601_CLK_OVER_CLK 0x80 +#define AU6601_CLK_384_MHZ 0x30 +#define AU6601_CLK_125_MHZ 0x20 +#define AU6601_CLK_48_MHZ 0x10 +#define AU6601_CLK_EXT_PLL 0x04 +#define AU6601_CLK_X2_MODE 0x02 +#define AU6601_CLK_ENABLE 0x01 +#define AU6601_CLK_31_25_MHZ 0x00 + +#define AU6601_CLK_DIVIDER 0x73 + +#define AU6601_INTERFACE_MODE_CTRL 0x74 +#define AU6601_DLINK_MODE 0x80 +#define AU6601_INTERRUPT_DELAY_TIME 0x40 +#define AU6601_SIGNAL_REQ_CTRL 0x30 +#define AU6601_MS_CARD_WP BIT(3) +#define AU6601_SD_CARD_WP BIT(0) + +/* same register values are used for: + * - AU6601_OUTPUT_ENABLE + * - AU6601_POWER_CONTROL + */ +#define AU6601_ACTIVE_CTRL 0x75 +#define AU6601_XD_CARD BIT(4) +/* AU6601_MS_CARD_ACTIVE - will cativate MS card section? */ +#define AU6601_MS_CARD BIT(3) +#define AU6601_SD_CARD BIT(0) + +/* card slot state. It should automatically detect type of + * the card + */ +#define AU6601_DETECT_STATUS 0x76 +#define AU6601_DETECT_EN BIT(7) +#define AU6601_MS_DETECTED BIT(3) +#define AU6601_SD_DETECTED BIT(0) +#define AU6601_DETECT_STATUS_M 0xf + +#define AU6601_REG_SW_RESET 0x79 +#define AU6601_BUF_CTRL_RESET BIT(7) +#define AU6601_RESET_DATA BIT(3) +#define AU6601_RESET_CMD BIT(0) + +#define AU6601_OUTPUT_ENABLE 0x7a + +#define AU6601_PAD_DRIVE0 0x7b +#define AU6601_PAD_DRIVE1 0x7c +#define AU6601_PAD_DRIVE2 0x7d +/* read EEPROM? */ +#define AU6601_FUNCTION 0x7f + +#define AU6601_CMD_XFER_CTRL 0x81 +#define AU6601_CMD_17_BYTE_CRC 0xc0 +#define AU6601_CMD_6_BYTE_WO_CRC 0x80 +#define AU6601_CMD_6_BYTE_CRC 0x40 +#define AU6601_CMD_START_XFER 0x20 +#define AU6601_CMD_STOP_WAIT_RDY 0x10 +#define AU6601_CMD_NO_RESP 0x00 + +#define AU6601_REG_BUS_CTRL 0x82 +#define AU6601_BUS_WIDTH_4BIT 0x20 +#define AU6601_BUS_WIDTH_8BIT 0x10 +#define AU6601_BUS_WIDTH_1BIT 0x00 + +#define AU6601_DATA_XFER_CTRL 0x83 +#define AU6601_DATA_WRITE BIT(7) +#define AU6601_DATA_DMA_MODE BIT(6) +#define AU6601_DATA_START_XFER BIT(0) + +#define AU6601_DATA_PIN_STATE 0x84 +#define AU6601_BUS_STAT_CMD BIT(15) +/* BIT(4) - BIT(7) are permanently 1. + * May be reserved or not attached DAT4-DAT7 + */ +#define AU6601_BUS_STAT_DAT3 BIT(3) +#define AU6601_BUS_STAT_DAT2 BIT(2) +#define AU6601_BUS_STAT_DAT1 BIT(1) +#define AU6601_BUS_STAT_DAT0 BIT(0) +#define AU6601_BUS_STAT_DAT_MASK 0xf + +#define AU6601_OPT 0x85 +#define AU6601_OPT_CMD_LINE_LEVEL 0x80 +#define AU6601_OPT_NCRC_16_CLK BIT(4) +#define AU6601_OPT_CMD_NWT BIT(3) +#define AU6601_OPT_STOP_CLK BIT(2) +#define AU6601_OPT_DDR_MODE BIT(1) +#define AU6601_OPT_SD_18V BIT(0) + +#define AU6601_CLK_DELAY 0x86 +#define AU6601_CLK_DATA_POSITIVE_EDGE 0x80 +#define AU6601_CLK_CMD_POSITIVE_EDGE 0x40 +#define AU6601_CLK_POSITIVE_EDGE_ALL (AU6601_CLK_CMD_POSITIVE_EDGE \ + | AU6601_CLK_DATA_POSITIVE_EDGE) + + +#define AU6601_REG_INT_STATUS 0x90 +#define AU6601_REG_INT_ENABLE 0x94 +#define AU6601_INT_DATA_END_BIT_ERR BIT(22) +#define AU6601_INT_DATA_CRC_ERR BIT(21) +#define AU6601_INT_DATA_TIMEOUT_ERR BIT(20) +#define AU6601_INT_CMD_INDEX_ERR BIT(19) +#define AU6601_INT_CMD_END_BIT_ERR BIT(18) +#define AU6601_INT_CMD_CRC_ERR BIT(17) +#define AU6601_INT_CMD_TIMEOUT_ERR BIT(16) +#define AU6601_INT_ERROR BIT(15) +#define AU6601_INT_OVER_CURRENT_ERR BIT(8) +#define AU6601_INT_CARD_INSERT BIT(7) +#define AU6601_INT_CARD_REMOVE BIT(6) +#define AU6601_INT_READ_BUF_RDY BIT(5) +#define AU6601_INT_WRITE_BUF_RDY BIT(4) +#define AU6601_INT_DMA_END BIT(3) +#define AU6601_INT_DATA_END BIT(1) +#define AU6601_INT_CMD_END BIT(0) + +#define AU6601_INT_NORMAL_MASK 0x00007FFF +#define AU6601_INT_ERROR_MASK 0xFFFF8000 + +#define AU6601_INT_CMD_MASK (AU6601_INT_CMD_END | \ + AU6601_INT_CMD_TIMEOUT_ERR | AU6601_INT_CMD_CRC_ERR | \ + AU6601_INT_CMD_END_BIT_ERR | AU6601_INT_CMD_INDEX_ERR) +#define AU6601_INT_DATA_MASK (AU6601_INT_DATA_END | AU6601_INT_DMA_END | \ + AU6601_INT_READ_BUF_RDY | AU6601_INT_WRITE_BUF_RDY | \ + AU6601_INT_DATA_TIMEOUT_ERR | AU6601_INT_DATA_CRC_ERR | \ + AU6601_INT_DATA_END_BIT_ERR) +#define AU6601_INT_ALL_MASK ((u32)-1) + +/* MS_CARD mode registers */ + +#define AU6601_MS_STATUS 0xa0 + +#define AU6601_MS_BUS_MODE_CTRL 0xa1 +#define AU6601_MS_BUS_8BIT_MODE 0x03 +#define AU6601_MS_BUS_4BIT_MODE 0x01 +#define AU6601_MS_BUS_1BIT_MODE 0x00 + +#define AU6601_MS_TPC_CMD 0xa2 +#define AU6601_MS_TPC_READ_PAGE_DATA 0x02 +#define AU6601_MS_TPC_READ_REG 0x04 +#define AU6601_MS_TPC_GET_INT 0x07 +#define AU6601_MS_TPC_WRITE_PAGE_DATA 0x0D +#define AU6601_MS_TPC_WRITE_REG 0x0B +#define AU6601_MS_TPC_SET_RW_REG_ADRS 0x08 +#define AU6601_MS_TPC_SET_CMD 0x0E +#define AU6601_MS_TPC_EX_SET_CMD 0x09 +#define AU6601_MS_TPC_READ_SHORT_DATA 0x03 +#define AU6601_MS_TPC_WRITE_SHORT_DATA 0x0C + +#define AU6601_MS_TRANSFER_MODE 0xa3 +#define AU6601_MS_XFER_INT_TIMEOUT_CHK BIT(2) +#define AU6601_MS_XFER_DMA_ENABLE BIT(1) +#define AU6601_MS_XFER_START BIT(0) + +#define AU6601_MS_DATA_PIN_STATE 0xa4 + +#define AU6601_MS_INT_STATUS 0xb0 +#define AU6601_MS_INT_ENABLE 0xb4 +#define AU6601_MS_INT_OVER_CURRENT_ERROR BIT(23) +#define AU6601_MS_INT_DATA_CRC_ERROR BIT(21) +#define AU6601_MS_INT_INT_TIMEOUT BIT(20) +#define AU6601_MS_INT_INT_RESP_ERROR BIT(19) +#define AU6601_MS_INT_CED_ERROR BIT(18) +#define AU6601_MS_INT_TPC_TIMEOUT BIT(16) +#define AU6601_MS_INT_ERROR BIT(15) +#define AU6601_MS_INT_CARD_INSERT BIT(7) +#define AU6601_MS_INT_CARD_REMOVE BIT(6) +#define AU6601_MS_INT_BUF_READ_RDY BIT(5) +#define AU6601_MS_INT_BUF_WRITE_RDY BIT(4) +#define AU6601_MS_INT_DMA_END BIT(3) +#define AU6601_MS_INT_TPC_END BIT(1) + +#define AU6601_MS_INT_DATA_MASK 0x00000038 +#define AU6601_MS_INT_TPC_MASK 0x003d8002 +#define AU6601_MS_INT_TPC_ERROR 0x003d0000 + +#define ALCOR_PCIE_LINK_CTRL_OFFSET 0x10 +#define ALCOR_PCIE_LINK_CAP_OFFSET 0x0c +#define ALCOR_CAP_START_OFFSET 0x34 + +struct alcor_dev_cfg { + u8 dma; +}; + +struct alcor_pci_priv { + struct pci_dev *pdev; + struct pci_dev *parent_pdev; + struct device *dev; + void __iomem *iobase; + unsigned int irq; + + unsigned long id; /* idr id */ + + struct alcor_dev_cfg *cfg; + + /* PCI ASPM related vars */ + int pdev_cap_off; + u8 pdev_aspm_cap; + int parent_cap_off; + u8 parent_aspm_cap; + u8 ext_config_dev_aspm; +}; + +void alcor_write8(struct alcor_pci_priv *priv, u8 val, unsigned int addr); +void alcor_write16(struct alcor_pci_priv *priv, u16 val, unsigned int addr); +void alcor_write32(struct alcor_pci_priv *priv, u32 val, unsigned int addr); +void alcor_write32be(struct alcor_pci_priv *priv, u32 val, unsigned int addr); +u8 alcor_read8(struct alcor_pci_priv *priv, unsigned int addr); +u32 alcor_read32(struct alcor_pci_priv *priv, unsigned int addr); +u32 alcor_read32be(struct alcor_pci_priv *priv, unsigned int addr); +#endif diff --git a/include/linux/bio.h b/include/linux/bio.h index 056fb627edb3..7380b094dcca 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -491,35 +491,40 @@ do { \ bio_clear_flag(bio, BIO_THROTTLED);\ (bio)->bi_disk = (bdev)->bd_disk; \ (bio)->bi_partno = (bdev)->bd_partno; \ + bio_associate_blkg(bio); \ } while (0) #define bio_copy_dev(dst, src) \ do { \ (dst)->bi_disk = (src)->bi_disk; \ (dst)->bi_partno = (src)->bi_partno; \ + bio_clone_blkg_association(dst, src); \ } while (0) #define bio_dev(bio) \ disk_devt((bio)->bi_disk) #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) -int bio_associate_blkcg_from_page(struct bio *bio, struct page *page); +void bio_associate_blkg_from_page(struct bio *bio, struct page *page); #else -static inline int bio_associate_blkcg_from_page(struct bio *bio, - struct page *page) { return 0; } +static inline void bio_associate_blkg_from_page(struct bio *bio, + struct page *page) { } #endif #ifdef CONFIG_BLK_CGROUP -int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); -int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); -void bio_disassociate_task(struct bio *bio); -void bio_clone_blkcg_association(struct bio *dst, struct bio *src); +void bio_disassociate_blkg(struct bio *bio); +void bio_associate_blkg(struct bio *bio); +void bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css); +void bio_clone_blkg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ -static inline int bio_associate_blkcg(struct bio *bio, - struct cgroup_subsys_state *blkcg_css) { return 0; } -static inline void bio_disassociate_task(struct bio *bio) { } -static inline void bio_clone_blkcg_association(struct bio *dst, - struct bio *src) { } +static inline void bio_disassociate_blkg(struct bio *bio) { } +static inline void bio_associate_blkg(struct bio *bio) { } +static inline void bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) +{ } +static inline void bio_clone_blkg_association(struct bio *dst, + struct bio *src) { } #endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_HIGHMEM diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 6d766a19f2bb..f025fd1e22e6 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -21,6 +21,7 @@ #include <linux/blkdev.h> #include <linux/atomic.h> #include <linux/kthread.h> +#include <linux/fs.h> /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) @@ -122,11 +123,8 @@ struct blkcg_gq { /* all non-root blkcg_gq's are guaranteed to have access to parent */ struct blkcg_gq *parent; - /* request allocation list for this blkcg-q pair */ - struct request_list rl; - /* reference count */ - atomic_t refcnt; + struct percpu_ref refcnt; /* is this blkg online? protected by both blkcg and q locks */ bool online; @@ -184,6 +182,8 @@ extern struct cgroup_subsys_state * const blkcg_root_css; struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, struct request_queue *q, bool update_hint); +struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q); struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q); int blkcg_init_queue(struct request_queue *q); @@ -230,22 +230,62 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx); void blkg_conf_finish(struct blkg_conf_ctx *ctx); +/** + * blkcg_css - find the current css + * + * Find the css associated with either the kthread or the current task. + * This may return a dying css, so it is up to the caller to use tryget logic + * to confirm it is alive and well. + */ +static inline struct cgroup_subsys_state *blkcg_css(void) +{ + struct cgroup_subsys_state *css; + + css = kthread_blkcg(); + if (css) + return css; + return task_css(current, io_cgrp_id); +} static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; } -static inline struct blkcg *bio_blkcg(struct bio *bio) +/** + * __bio_blkcg - internal, inconsistent version to get blkcg + * + * DO NOT USE. + * This function is inconsistent and consequently is dangerous to use. The + * first part of the function returns a blkcg where a reference is owned by the + * bio. This means it does not need to be rcu protected as it cannot go away + * with the bio owning a reference to it. However, the latter potentially gets + * it from task_css(). This can race against task migration and the cgroup + * dying. It is also semantically different as it must be called rcu protected + * and is susceptible to failure when trying to get a reference to it. + * Therefore, it is not ok to assume that *_get() will always succeed on the + * blkcg returned here. + */ +static inline struct blkcg *__bio_blkcg(struct bio *bio) { - struct cgroup_subsys_state *css; + if (bio && bio->bi_blkg) + return bio->bi_blkg->blkcg; + return css_to_blkcg(blkcg_css()); +} - if (bio && bio->bi_css) - return css_to_blkcg(bio->bi_css); - css = kthread_blkcg(); - if (css) - return css_to_blkcg(css); - return css_to_blkcg(task_css(current, io_cgrp_id)); +/** + * bio_blkcg - grab the blkcg associated with a bio + * @bio: target bio + * + * This returns the blkcg associated with a bio, %NULL if not associated. + * Callers are expected to either handle %NULL or know association has been + * done prior to calling this. + */ +static inline struct blkcg *bio_blkcg(struct bio *bio) +{ + if (bio && bio->bi_blkg) + return bio->bi_blkg->blkcg; + return NULL; } static inline bool blk_cgroup_congested(void) @@ -328,16 +368,12 @@ static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, * @q: request_queue of interest * * Lookup blkg for the @blkcg - @q pair. This function should be called - * under RCU read lock and is guaranteed to return %NULL if @q is bypassing - * - see blk_queue_bypass_start() for details. + * under RCU read loc. */ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) { WARN_ON_ONCE(!rcu_read_lock_held()); - - if (unlikely(blk_queue_bypass(q))) - return NULL; return __blkg_lookup(blkcg, q, false); } @@ -451,26 +487,35 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) */ static inline void blkg_get(struct blkcg_gq *blkg) { - WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); - atomic_inc(&blkg->refcnt); + percpu_ref_get(&blkg->refcnt); } /** - * blkg_try_get - try and get a blkg reference + * blkg_tryget - try and get a blkg reference * @blkg: blkg to get * * This is for use when doing an RCU lookup of the blkg. We may be in the midst * of freeing this blkg, so we can only use it if the refcnt is not zero. */ -static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) +static inline bool blkg_tryget(struct blkcg_gq *blkg) { - if (atomic_inc_not_zero(&blkg->refcnt)) - return blkg; - return NULL; + return percpu_ref_tryget(&blkg->refcnt); } +/** + * blkg_tryget_closest - try and get a blkg ref on the closet blkg + * @blkg: blkg to get + * + * This walks up the blkg tree to find the closest non-dying blkg and returns + * the blkg that it did association with as it may not be the passed in blkg. + */ +static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) +{ + while (blkg && !percpu_ref_tryget(&blkg->refcnt)) + blkg = blkg->parent; -void __blkg_release_rcu(struct rcu_head *rcu); + return blkg; +} /** * blkg_put - put a blkg reference @@ -478,9 +523,7 @@ void __blkg_release_rcu(struct rcu_head *rcu); */ static inline void blkg_put(struct blkcg_gq *blkg) { - WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); - if (atomic_dec_and_test(&blkg->refcnt)) - call_rcu(&blkg->rcu_head, __blkg_release_rcu); + percpu_ref_put(&blkg->refcnt); } /** @@ -515,94 +558,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q, false))) -/** - * blk_get_rl - get request_list to use - * @q: request_queue of interest - * @bio: bio which will be attached to the allocated request (may be %NULL) - * - * The caller wants to allocate a request from @q to use for @bio. Find - * the request_list to use and obtain a reference on it. Should be called - * under queue_lock. This function is guaranteed to return non-%NULL - * request_list. - */ -static inline struct request_list *blk_get_rl(struct request_queue *q, - struct bio *bio) -{ - struct blkcg *blkcg; - struct blkcg_gq *blkg; - - rcu_read_lock(); - - blkcg = bio_blkcg(bio); - - /* bypass blkg lookup and use @q->root_rl directly for root */ - if (blkcg == &blkcg_root) - goto root_rl; - - /* - * Try to use blkg->rl. blkg lookup may fail under memory pressure - * or if either the blkcg or queue is going away. Fall back to - * root_rl in such cases. - */ - blkg = blkg_lookup(blkcg, q); - if (unlikely(!blkg)) - goto root_rl; - - blkg_get(blkg); - rcu_read_unlock(); - return &blkg->rl; -root_rl: - rcu_read_unlock(); - return &q->root_rl; -} - -/** - * blk_put_rl - put request_list - * @rl: request_list to put - * - * Put the reference acquired by blk_get_rl(). Should be called under - * queue_lock. - */ -static inline void blk_put_rl(struct request_list *rl) -{ - if (rl->blkg->blkcg != &blkcg_root) - blkg_put(rl->blkg); -} - -/** - * blk_rq_set_rl - associate a request with a request_list - * @rq: request of interest - * @rl: target request_list - * - * Associate @rq with @rl so that accounting and freeing can know the - * request_list @rq came from. - */ -static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) -{ - rq->rl = rl; -} - -/** - * blk_rq_rl - return the request_list a request came from - * @rq: request of interest - * - * Return the request_list @rq is allocated from. - */ -static inline struct request_list *blk_rq_rl(struct request *rq) -{ - return rq->rl; -} - -struct request_list *__blk_queue_next_rl(struct request_list *rl, - struct request_queue *q); -/** - * blk_queue_for_each_rl - iterate through all request_lists of a request_queue - * - * Should be used under queue_lock. - */ -#define blk_queue_for_each_rl(rl, q) \ - for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) - static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp) { int ret; @@ -797,32 +752,34 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg struct bio *bio) { return false; } #endif + +static inline void blkcg_bio_issue_init(struct bio *bio) +{ + bio_issue_init(&bio->bi_issue, bio_sectors(bio)); +} + static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { - struct blkcg *blkcg; struct blkcg_gq *blkg; bool throtl = false; rcu_read_lock(); - blkcg = bio_blkcg(bio); - - /* associate blkcg if bio hasn't attached one */ - bio_associate_blkcg(bio, &blkcg->css); - - blkg = blkg_lookup(blkcg, q); - if (unlikely(!blkg)) { - spin_lock_irq(q->queue_lock); - blkg = blkg_lookup_create(blkcg, q); - if (IS_ERR(blkg)) - blkg = NULL; - spin_unlock_irq(q->queue_lock); + + if (!bio->bi_blkg) { + char b[BDEVNAME_SIZE]; + + WARN_ONCE(1, + "no blkg associated for bio on block-device: %s\n", + bio_devname(bio, b)); + bio_associate_blkg(bio); } + blkg = bio->bi_blkg; + throtl = blk_throtl_bio(q, blkg, bio); if (!throtl) { - blkg = blkg ?: q->root_blkg; /* * If the bio is flagged with BIO_QUEUE_ENTERED it means this * is a split bio and we would have already accounted for the @@ -834,6 +791,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); } + blkcg_bio_issue_init(bio); + rcu_read_unlock(); return !throtl; } @@ -930,6 +889,7 @@ static inline int blkcg_activate_policy(struct request_queue *q, static inline void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol) { } +static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, @@ -939,12 +899,7 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } static inline void blkg_get(struct blkcg_gq *blkg) { } static inline void blkg_put(struct blkcg_gq *blkg) { } -static inline struct request_list *blk_get_rl(struct request_queue *q, - struct bio *bio) { return &q->root_rl; } -static inline void blk_put_rl(struct request_list *rl) { } -static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } -static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } - +static inline void blkcg_bio_issue_init(struct bio *bio) { } static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { return true; } diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h index 9f4c17f0d2d8..0b1f45c62623 100644 --- a/include/linux/blk-mq-pci.h +++ b/include/linux/blk-mq-pci.h @@ -2,10 +2,10 @@ #ifndef _LINUX_BLK_MQ_PCI_H #define _LINUX_BLK_MQ_PCI_H -struct blk_mq_tag_set; +struct blk_mq_queue_map; struct pci_dev; -int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev, +int blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, int offset); #endif /* _LINUX_BLK_MQ_PCI_H */ diff --git a/include/linux/blk-mq-rdma.h b/include/linux/blk-mq-rdma.h index b4ade198007d..7b6ecf9ac4c3 100644 --- a/include/linux/blk-mq-rdma.h +++ b/include/linux/blk-mq-rdma.h @@ -4,7 +4,7 @@ struct blk_mq_tag_set; struct ib_device; -int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set, +int blk_mq_rdma_map_queues(struct blk_mq_queue_map *map, struct ib_device *dev, int first_vec); #endif /* _LINUX_BLK_MQ_RDMA_H */ diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h index 69b4da262c45..687ae287e1dc 100644 --- a/include/linux/blk-mq-virtio.h +++ b/include/linux/blk-mq-virtio.h @@ -2,10 +2,10 @@ #ifndef _LINUX_BLK_MQ_VIRTIO_H #define _LINUX_BLK_MQ_VIRTIO_H -struct blk_mq_tag_set; +struct blk_mq_queue_map; struct virtio_device; -int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, +int blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, struct virtio_device *vdev, int first_vec); #endif /* _LINUX_BLK_MQ_VIRTIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2286dc12c6bc..0e030f5f76b6 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -37,7 +37,8 @@ struct blk_mq_hw_ctx { struct blk_mq_ctx *dispatch_from; unsigned int dispatch_busy; - unsigned int nr_ctx; + unsigned short type; + unsigned short nr_ctx; struct blk_mq_ctx **ctxs; spinlock_t dispatch_wait_lock; @@ -74,10 +75,31 @@ struct blk_mq_hw_ctx { struct srcu_struct srcu[0]; }; +struct blk_mq_queue_map { + unsigned int *mq_map; + unsigned int nr_queues; + unsigned int queue_offset; +}; + +enum hctx_type { + HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */ + HCTX_TYPE_READ, /* just for READ I/O */ + HCTX_TYPE_POLL, /* polled I/O of any kind */ + + HCTX_MAX_TYPES, +}; + struct blk_mq_tag_set { - unsigned int *mq_map; + /* + * map[] holds ctx -> hctx mappings, one map exists for each type + * that the driver wishes to support. There are no restrictions + * on maps being of the same size, and it's perfectly legal to + * share maps between types. + */ + struct blk_mq_queue_map map[HCTX_MAX_TYPES]; + unsigned int nr_maps; /* nr entries in map[] */ const struct blk_mq_ops *ops; - unsigned int nr_hw_queues; + unsigned int nr_hw_queues; /* nr hw queues across maps */ unsigned int queue_depth; /* max hw supported */ unsigned int reserved_tags; unsigned int cmd_size; /* per-request extra data */ @@ -99,6 +121,7 @@ struct blk_mq_queue_data { typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); +typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *); typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *); typedef void (put_budget_fn)(struct blk_mq_hw_ctx *); typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); @@ -109,11 +132,13 @@ typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *, typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *, unsigned int); -typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, +typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); -typedef void (busy_tag_iter_fn)(struct request *, void *, bool); -typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); +typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); +typedef int (poll_fn)(struct blk_mq_hw_ctx *); typedef int (map_queues_fn)(struct blk_mq_tag_set *set); +typedef bool (busy_fn)(struct request_queue *); +typedef void (complete_fn)(struct request *); struct blk_mq_ops { @@ -123,6 +148,15 @@ struct blk_mq_ops { queue_rq_fn *queue_rq; /* + * If a driver uses bd->last to judge when to submit requests to + * hardware, it must define this function. In case of errors that + * make us stop issuing further requests, this hook serves the + * purpose of kicking the hardware (which the last request otherwise + * would have done). + */ + commit_rqs_fn *commit_rqs; + + /* * Reserve budget before queue request, once .queue_rq is * run, it is driver's responsibility to release the * reserved budget. Also we have to handle failure case @@ -141,7 +175,7 @@ struct blk_mq_ops { */ poll_fn *poll; - softirq_done_fn *complete; + complete_fn *complete; /* * Called when the block layer side of a hardware queue has been @@ -165,6 +199,11 @@ struct blk_mq_ops { /* Called from inside blk_get_request() */ void (*initialize_rq_fn)(struct request *rq); + /* + * If set, returns whether or not this queue currently is busy + */ + busy_fn *busy; + map_queues_fn *map_queues; #ifdef CONFIG_BLK_DEBUG_FS @@ -218,6 +257,8 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); +bool blk_mq_queue_inflight(struct request_queue *q); + enum { /* return when out of requests */ BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), @@ -264,7 +305,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); -void blk_mq_complete_request(struct request *rq); +bool blk_mq_complete_request(struct request *rq); bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, struct bio *bio); bool blk_mq_queue_stopped(struct request_queue *q); @@ -288,24 +329,12 @@ void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); -int blk_mq_map_queues(struct blk_mq_tag_set *set); +int blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); -/** - * blk_mq_mark_complete() - Set request state to complete - * @rq: request to set to complete state - * - * Returns true if request state was successfully set to complete. If - * successful, the caller is responsibile for seeing this request is ended, as - * blk_mq_complete_request will not work again. - */ -static inline bool blk_mq_mark_complete(struct request *rq) -{ - return cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) == - MQ_RQ_IN_FLIGHT; -} +unsigned int blk_mq_rq_cpu(struct request *rq); /* * Driver command data is immediately after the request. So subtract request @@ -328,4 +357,14 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) for ((i) = 0; (i) < (hctx)->nr_ctx && \ ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) +static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + if (rq->tag != -1) + return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT); + + return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) | + BLK_QC_T_INTERNAL; +} + #endif diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1dcf652ba0aa..5c7e7f859a24 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -174,11 +174,11 @@ struct bio { void *bi_private; #ifdef CONFIG_BLK_CGROUP /* - * Optional ioc and css associated with this bio. Put on bio - * release. Read comment on top of bio_associate_current(). + * Represents the association of the css and request_queue for the bio. + * If a bio goes direct to device, it will not have a blkg as it will + * not have a request_queue associated with it. The reference is put + * on release of the bio. */ - struct io_context *bi_ioc; - struct cgroup_subsys_state *bi_css; struct blkcg_gq *bi_blkg; struct bio_issue bi_issue; #endif @@ -228,6 +228,7 @@ struct bio { #define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion * of this bio. */ #define BIO_QUEUE_ENTERED 11 /* can use blk_queue_enter_live() */ +#define BIO_TRACKED 12 /* set if bio goes through the rq_qos path */ /* See BVEC_POOL_OFFSET below before adding new flags */ @@ -323,6 +324,8 @@ enum req_flag_bits { /* command specific flags for REQ_OP_WRITE_ZEROES: */ __REQ_NOUNMAP, /* do not free blocks when zeroing */ + __REQ_HIPRI, + /* for driver use */ __REQ_DRV, __REQ_SWAP, /* swapping request. */ @@ -343,8 +346,8 @@ enum req_flag_bits { #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) - #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) +#define REQ_HIPRI (1ULL << __REQ_HIPRI) #define REQ_DRV (1ULL << __REQ_DRV) #define REQ_SWAP (1ULL << __REQ_SWAP) @@ -422,17 +425,6 @@ static inline bool blk_qc_t_valid(blk_qc_t cookie) return cookie != BLK_QC_T_NONE; } -static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num, - bool internal) -{ - blk_qc_t ret = tag | (queue_num << BLK_QC_T_SHIFT); - - if (internal) - ret |= BLK_QC_T_INTERNAL; - - return ret; -} - static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) { return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4293dc1cd160..338604dff7d0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -58,25 +58,6 @@ struct blk_stat_callback; typedef void (rq_end_io_fn)(struct request *, blk_status_t); -#define BLK_RL_SYNCFULL (1U << 0) -#define BLK_RL_ASYNCFULL (1U << 1) - -struct request_list { - struct request_queue *q; /* the queue this rl belongs to */ -#ifdef CONFIG_BLK_CGROUP - struct blkcg_gq *blkg; /* blkg this request pool belongs to */ -#endif - /* - * count[], starved[], and wait[] are indexed by - * BLK_RW_SYNC/BLK_RW_ASYNC - */ - int count[2]; - int starved[2]; - mempool_t *rq_pool; - wait_queue_head_t wait[2]; - unsigned int flags; -}; - /* * request flags */ typedef __u32 __bitwise req_flags_t; @@ -85,8 +66,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_SORTED ((__force req_flags_t)(1 << 0)) /* drive already may have started this one */ #define RQF_STARTED ((__force req_flags_t)(1 << 1)) -/* uses tagged queueing */ -#define RQF_QUEUED ((__force req_flags_t)(1 << 2)) /* may not be passed by ioscheduler */ #define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3)) /* request for flush sequence */ @@ -150,8 +129,8 @@ enum mq_rq_state { struct request { struct request_queue *q; struct blk_mq_ctx *mq_ctx; + struct blk_mq_hw_ctx *mq_hctx; - int cpu; unsigned int cmd_flags; /* op and common flags */ req_flags_t rq_flags; @@ -245,11 +224,7 @@ struct request { refcount_t ref; unsigned int timeout; - - /* access through blk_rq_set_deadline, blk_rq_deadline */ - unsigned long __deadline; - - struct list_head timeout_list; + unsigned long deadline; union { struct __call_single_data csd; @@ -264,10 +239,6 @@ struct request { /* for bidi */ struct request *next_rq; - -#ifdef CONFIG_BLK_CGROUP - struct request_list *rl; /* rl this rq is alloced from */ -#endif }; static inline bool blk_op_is_scsi(unsigned int op) @@ -311,41 +282,21 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; -typedef void (request_fn_proc) (struct request_queue *q); typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); -typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t); -typedef int (prep_rq_fn) (struct request_queue *, struct request *); -typedef void (unprep_rq_fn) (struct request_queue *, struct request *); struct bio_vec; -typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); -typedef int (lld_busy_fn) (struct request_queue *q); -typedef int (bsg_job_fn) (struct bsg_job *); -typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t); -typedef void (exit_rq_fn)(struct request_queue *, struct request *); enum blk_eh_timer_return { BLK_EH_DONE, /* drivers has completed the command */ BLK_EH_RESET_TIMER, /* reset timer and try again */ }; -typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); - enum blk_queue_state { Queue_down, Queue_up, }; -struct blk_queue_tag { - struct request **tag_index; /* map of busy tags */ - unsigned long *tag_map; /* bit map of free/busy tags */ - int max_depth; /* what we will send to device */ - int real_max_depth; /* what the array can hold */ - atomic_t refcnt; /* map can be shared */ - int alloc_policy; /* tag allocation policy */ - int next_tag; /* next tag */ -}; #define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */ #define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */ @@ -389,7 +340,6 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; - unsigned char cluster; unsigned char raid_partial_stripes_expensive; enum blk_zoned_model zoned; }; @@ -444,40 +394,15 @@ struct request_queue { struct list_head queue_head; struct request *last_merge; struct elevator_queue *elevator; - int nr_rqs[2]; /* # allocated [a]sync rqs */ - int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ struct blk_queue_stats *stats; struct rq_qos *rq_qos; - /* - * If blkcg is not used, @q->root_rl serves all requests. If blkcg - * is used, root blkg allocates from @q->root_rl and all other - * blkgs from their own blkg->rl. Which one to use should be - * determined using bio_request_list(). - */ - struct request_list root_rl; - - request_fn_proc *request_fn; make_request_fn *make_request_fn; - poll_q_fn *poll_fn; - prep_rq_fn *prep_rq_fn; - unprep_rq_fn *unprep_rq_fn; - softirq_done_fn *softirq_done_fn; - rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; - lld_busy_fn *lld_busy_fn; - /* Called just after a request is allocated */ - init_rq_fn *init_rq_fn; - /* Called just before a request is freed */ - exit_rq_fn *exit_rq_fn; - /* Called from inside blk_get_request() */ - void (*initialize_rq_fn)(struct request *rq); const struct blk_mq_ops *mq_ops; - unsigned int *mq_map; - /* sw queues */ struct blk_mq_ctx __percpu *queue_ctx; unsigned int nr_queues; @@ -488,17 +413,6 @@ struct request_queue { struct blk_mq_hw_ctx **queue_hw_ctx; unsigned int nr_hw_queues; - /* - * Dispatch queue sorting - */ - sector_t end_sector; - struct request *boundary_rq; - - /* - * Delayed queue handling - */ - struct delayed_work delay_work; - struct backing_dev_info *backing_dev_info; /* @@ -529,13 +443,7 @@ struct request_queue { */ gfp_t bounce_gfp; - /* - * protects queue structures from reentrancy. ->__queue_lock should - * _never_ be used directly, it is queue private. always use - * ->queue_lock. - */ - spinlock_t __queue_lock; - spinlock_t *queue_lock; + spinlock_t queue_lock; /* * queue kobject @@ -545,7 +453,7 @@ struct request_queue { /* * mq queue kobject */ - struct kobject mq_kobj; + struct kobject *mq_kobj; #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity integrity; @@ -561,27 +469,12 @@ struct request_queue { * queue settings */ unsigned long nr_requests; /* Max # of requests */ - unsigned int nr_congestion_on; - unsigned int nr_congestion_off; - unsigned int nr_batching; unsigned int dma_drain_size; void *dma_drain_buffer; unsigned int dma_pad_mask; unsigned int dma_alignment; - struct blk_queue_tag *queue_tags; - - unsigned int nr_sorted; - unsigned int in_flight[2]; - - /* - * Number of active block driver functions for which blk_drain_queue() - * must wait. Must be incremented around functions that unlock the - * queue_lock internally, e.g. scsi_request_fn(). - */ - unsigned int request_fn_active; - unsigned int rq_timeout; int poll_nsec; @@ -590,7 +483,6 @@ struct request_queue { struct timer_list timeout; struct work_struct timeout_work; - struct list_head timeout_list; struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP @@ -645,11 +537,9 @@ struct request_queue { struct mutex sysfs_lock; - int bypass_depth; atomic_t mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) - bsg_job_fn *bsg_job_fn; struct bsg_class_device bsg_dev; #endif @@ -669,12 +559,12 @@ struct request_queue { #ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; struct dentry *sched_debugfs_dir; + struct dentry *rqos_debugfs_dir; #endif bool mq_sysfs_init_done; size_t cmd_size; - void *rq_alloc_data; struct work_struct release_work; @@ -682,10 +572,8 @@ struct request_queue { u64 write_hints[BLK_MAX_WRITE_HINTS]; }; -#define QUEUE_FLAG_QUEUED 0 /* uses generic tag queueing */ #define QUEUE_FLAG_STOPPED 1 /* queue is stopped */ #define QUEUE_FLAG_DYING 2 /* queue being torn down */ -#define QUEUE_FLAG_BYPASS 3 /* act as dumb FIFO queue */ #define QUEUE_FLAG_BIDI 4 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 5 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 6 /* complete on same CPU-group */ @@ -718,19 +606,15 @@ struct request_queue { (1 << QUEUE_FLAG_ADD_RANDOM)) #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ - (1 << QUEUE_FLAG_SAME_COMP) | \ - (1 << QUEUE_FLAG_POLL)) + (1 << QUEUE_FLAG_SAME_COMP)) void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); -bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); -#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) -#define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ @@ -757,37 +641,20 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); -static inline int queue_in_flight(struct request_queue *q) -{ - return q->in_flight[0] + q->in_flight[1]; -} - static inline bool blk_account_rq(struct request *rq) { return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); } -#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) -/* rq->queuelist of dequeued request must be list_empty() */ -#define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) -/* - * Driver can handle struct request, if it either has an old style - * request_fn defined, or is blk-mq based. - */ -static inline bool queue_is_rq_based(struct request_queue *q) -{ - return q->request_fn || q->mq_ops; -} - -static inline unsigned int blk_queue_cluster(struct request_queue *q) +static inline bool queue_is_mq(struct request_queue *q) { - return q->limits.cluster; + return q->mq_ops; } static inline enum blk_zoned_model @@ -845,27 +712,6 @@ static inline bool rq_is_sync(struct request *rq) return op_is_sync(rq->cmd_flags); } -static inline bool blk_rl_full(struct request_list *rl, bool sync) -{ - unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL; - - return rl->flags & flag; -} - -static inline void blk_set_rl_full(struct request_list *rl, bool sync) -{ - unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL; - - rl->flags |= flag; -} - -static inline void blk_clear_rl_full(struct request_list *rl, bool sync) -{ - unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL; - - rl->flags &= ~flag; -} - static inline bool rq_mergeable(struct request *rq) { if (blk_rq_is_passthrough(rq)) @@ -902,16 +748,6 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) return q->nr_requests; } -/* - * q->prep_rq_fn return values - */ -enum { - BLKPREP_OK, /* serve it */ - BLKPREP_KILL, /* fatal error, kill, return -EIO */ - BLKPREP_DEFER, /* leave on queue */ - BLKPREP_INVALID, /* invalid command, kill, return -EREMOTEIO */ -}; - extern unsigned long blk_max_low_pfn, blk_max_pfn; /* @@ -983,10 +819,8 @@ extern blk_qc_t direct_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_init_request_from_bio(struct request *req, struct bio *bio); extern void blk_put_request(struct request *); -extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, unsigned int op, blk_mq_req_flags_t flags); -extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, struct bio_set *bs, gfp_t gfp_mask, @@ -996,7 +830,6 @@ extern void blk_rq_unprep_clone(struct request *rq); extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern int blk_rq_append_bio(struct request *rq, struct bio **bio); -extern void blk_delay_queue(struct request_queue *, unsigned long); extern void blk_queue_split(struct request_queue *, struct bio **); extern void blk_recount_segments(struct request_queue *, struct bio *); extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); @@ -1009,15 +842,7 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); extern void blk_queue_exit(struct request_queue *q); -extern void blk_start_queue(struct request_queue *q); -extern void blk_start_queue_async(struct request_queue *q); -extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); -extern void __blk_stop_queue(struct request_queue *q); -extern void __blk_run_queue(struct request_queue *q); -extern void __blk_run_queue_uncond(struct request_queue *q); -extern void blk_run_queue(struct request_queue *); -extern void blk_run_queue_async(struct request_queue *q); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); @@ -1034,7 +859,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); -bool blk_poll(struct request_queue *q, blk_qc_t cookie); +int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { @@ -1172,13 +997,6 @@ static inline unsigned int blk_rq_count_bios(struct request *rq) return nr_bios; } -/* - * Request issue related functions. - */ -extern struct request *blk_peek_request(struct request_queue *q); -extern void blk_start_request(struct request *rq); -extern struct request *blk_fetch_request(struct request_queue *q); - void blk_steal_bios(struct bio_list *list, struct request *rq); /* @@ -1196,27 +1014,18 @@ void blk_steal_bios(struct bio_list *list, struct request *rq); */ extern bool blk_update_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); -extern void blk_finish_request(struct request *rq, blk_status_t error); -extern bool blk_end_request(struct request *rq, blk_status_t error, - unsigned int nr_bytes); extern void blk_end_request_all(struct request *rq, blk_status_t error); extern bool __blk_end_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); extern void __blk_end_request_all(struct request *rq, blk_status_t error); extern bool __blk_end_request_cur(struct request *rq, blk_status_t error); -extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); -extern void blk_unprep_request(struct request *); /* * Access functions for manipulating queue properties */ -extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, - spinlock_t *lock, int node_id); -extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); -extern int blk_init_allocated_queue(struct request_queue *); extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); @@ -1255,15 +1064,10 @@ extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size); -extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); -extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); -extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); -extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); -extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); @@ -1299,8 +1103,7 @@ extern long nr_blockdev_pages(void); bool __must_check blk_get_queue(struct request_queue *); struct request_queue *blk_alloc_queue(gfp_t); -struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, - spinlock_t *lock); +struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); @@ -1317,9 +1120,10 @@ extern void blk_set_queue_dying(struct request_queue *); * schedule() where blk_schedule_flush_plug() is called. */ struct blk_plug { - struct list_head list; /* requests */ struct list_head mq_list; /* blk-mq requests */ struct list_head cb_list; /* md requires an unplug callback */ + unsigned short rq_count; + bool multiple_queues; }; #define BLK_MAX_REQUEST_COUNT 16 #define BLK_PLUG_FLUSH_SIZE (128 * 1024) @@ -1358,31 +1162,10 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) struct blk_plug *plug = tsk->plug; return plug && - (!list_empty(&plug->list) || - !list_empty(&plug->mq_list) || + (!list_empty(&plug->mq_list) || !list_empty(&plug->cb_list)); } -/* - * tag stuff - */ -extern int blk_queue_start_tag(struct request_queue *, struct request *); -extern struct request *blk_queue_find_tag(struct request_queue *, int); -extern void blk_queue_end_tag(struct request_queue *, struct request *); -extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int); -extern void blk_queue_free_tags(struct request_queue *); -extern int blk_queue_resize_tags(struct request_queue *, int); -extern struct blk_queue_tag *blk_init_tags(int, int); -extern void blk_free_tags(struct blk_queue_tag *); - -static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, - int tag) -{ - if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) - return NULL; - return bqt->tag_index[tag]; -} - extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); @@ -1982,4 +1765,17 @@ static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, #endif /* CONFIG_BLOCK */ +static inline void blk_wake_io_task(struct task_struct *waiter) +{ + /* + * If we're polling, the task itself is doing the completions. For + * that case, we don't need to signal a wakeup, it's enough to just + * mark us as RUNNING. + */ + if (waiter == current) + __set_current_state(TASK_RUNNING); + else + wake_up_process(waiter); +} + #endif diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 6aeaf6472665..b356e0006731 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -31,6 +31,9 @@ struct device; struct scatterlist; struct request_queue; +typedef int (bsg_job_fn) (struct bsg_job *); +typedef enum blk_eh_timer_return (bsg_timeout_fn)(struct request *); + struct bsg_buffer { unsigned int payload_len; int sg_cnt; @@ -72,7 +75,8 @@ struct bsg_job { void bsg_job_done(struct bsg_job *job, int result, unsigned int reply_payload_rcv_len); struct request_queue *bsg_setup_queue(struct device *dev, const char *name, - bsg_job_fn *job_fn, int dd_job_size); + bsg_job_fn *job_fn, bsg_timeout_fn *timeout, int dd_job_size); +void bsg_remove_queue(struct request_queue *q); void bsg_job_put(struct bsg_job *job); int __must_check bsg_job_get(struct bsg_job *job); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9d12757a65b0..9968332cceed 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -93,6 +93,8 @@ extern struct css_set init_css_set; bool css_has_online_children(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); +struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup, + struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, struct cgroup_subsys *ss); struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, diff --git a/include/linux/compat.h b/include/linux/compat.h index 88720b443cd6..056be0d03722 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -169,6 +169,10 @@ typedef struct { compat_sigset_word sig[_COMPAT_NSIG_WORDS]; } compat_sigset_t; +int set_compat_user_sigmask(const compat_sigset_t __user *usigmask, + sigset_t *set, sigset_t *oldset, + size_t sigsetsize); + struct compat_sigaction { #ifndef __ARCH_HAS_IRIX_SIGACTION compat_uptr_t sa_handler; @@ -558,6 +562,12 @@ asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id, struct io_event __user *events, struct old_timespec32 __user *timeout, const struct __compat_aio_sigset __user *usig); +asmlinkage long compat_sys_io_pgetevents_time64(compat_aio_context_t ctx_id, + compat_long_t min_nr, + compat_long_t nr, + struct io_event __user *events, + struct __kernel_timespec __user *timeout, + const struct __compat_aio_sigset __user *usig); /* fs/cookies.c */ asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t); @@ -643,11 +653,21 @@ asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, compat_ulong_t __user *exp, struct old_timespec32 __user *tsp, void __user *sig); +asmlinkage long compat_sys_pselect6_time64(int n, compat_ulong_t __user *inp, + compat_ulong_t __user *outp, + compat_ulong_t __user *exp, + struct __kernel_timespec __user *tsp, + void __user *sig); asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, struct old_timespec32 __user *tsp, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); +asmlinkage long compat_sys_ppoll_time64(struct pollfd __user *ufds, + unsigned int nfds, + struct __kernel_timespec __user *tsp, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize); /* fs/signalfd.c */ asmlinkage long compat_sys_signalfd4(int ufd, @@ -768,6 +788,9 @@ asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset, asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese, struct compat_siginfo __user *uinfo, struct old_timespec32 __user *uts, compat_size_t sigsetsize); +asmlinkage long compat_sys_rt_sigtimedwait_time64(compat_sigset_t __user *uthese, + struct compat_siginfo __user *uinfo, + struct __kernel_timespec __user *uts, compat_size_t sigsetsize); asmlinkage long compat_sys_rt_sigqueueinfo(compat_pid_t pid, int sig, struct compat_siginfo __user *uinfo); /* No generic prototype for rt_sigreturn */ @@ -873,6 +896,9 @@ asmlinkage long compat_sys_move_pages(pid_t pid, compat_ulong_t nr_pages, asmlinkage long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, struct compat_siginfo __user *uinfo); +asmlinkage long compat_sys_recvmmsg_time64(int fd, struct compat_mmsghdr __user *mmsg, + unsigned vlen, unsigned int flags, + struct __kernel_timespec __user *timeout); asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags, struct old_timespec32 __user *timeout); diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 30213adbb6b9..2ad5c363d7d5 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -30,8 +30,6 @@ struct bus_type; extern void dma_debug_add_bus(struct bus_type *bus); -extern int dma_debug_resize_entries(u32 num_entries); - extern void debug_dma_map_single(struct device *dev, const void *addr, unsigned long len); @@ -72,17 +70,6 @@ extern void debug_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, int direction); -extern void debug_dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, - int direction); - -extern void debug_dma_sync_single_range_for_device(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, int direction); - extern void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, int direction); @@ -101,11 +88,6 @@ static inline void dma_debug_add_bus(struct bus_type *bus) { } -static inline int dma_debug_resize_entries(u32 num_entries) -{ - return 0; -} - static inline void debug_dma_map_single(struct device *dev, const void *addr, unsigned long len) { @@ -174,22 +156,6 @@ static inline void debug_dma_sync_single_for_device(struct device *dev, { } -static inline void debug_dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, - int direction) -{ -} - -static inline void debug_dma_sync_single_range_for_device(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, - int direction) -{ -} - static inline void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, int direction) diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 9e66bfe369aa..b7338702592a 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -5,8 +5,6 @@ #include <linux/dma-mapping.h> #include <linux/mem_encrypt.h> -#define DIRECT_MAPPING_ERROR (~(dma_addr_t)0) - #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include <asm/dma-direct.h> #else @@ -50,14 +48,6 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) return __sme_clr(__dma_to_phys(dev, daddr)); } -#ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN -void dma_mark_clean(void *addr, size_t size); -#else -static inline void dma_mark_clean(void *addr, size_t size) -{ -} -#endif /* CONFIG_ARCH_HAS_DMA_MARK_CLEAN */ - u64 dma_direct_get_required_mask(struct device *dev); void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); @@ -67,11 +57,8 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); -dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs); -int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs); +struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); +void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page); int dma_direct_supported(struct device *dev, u64 mask); -int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr); #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index e8ca5e654277..e760dc5d1fa8 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -69,7 +69,6 @@ dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs); void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs); -int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); /* The DMA API isn't _quite_ the whole story, though... */ void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index d327bdd53716..ba521d5506c9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -128,13 +128,14 @@ struct dma_map_ops { enum dma_data_direction dir); void (*cache_sync)(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction); - int (*mapping_error)(struct device *dev, dma_addr_t dma_addr); int (*dma_supported)(struct device *dev, u64 mask); u64 (*get_required_mask)(struct device *dev); }; -extern const struct dma_map_ops dma_direct_ops; +#define DMA_MAPPING_ERROR (~(dma_addr_t)0) + extern const struct dma_map_ops dma_virt_ops; +extern const struct dma_map_ops dma_dummy_ops; #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) @@ -220,6 +221,69 @@ static inline const struct dma_map_ops *get_dma_ops(struct device *dev) } #endif +static inline bool dma_is_direct(const struct dma_map_ops *ops) +{ + return likely(!ops); +} + +/* + * All the dma_direct_* declarations are here just for the indirect call bypass, + * and must not be used directly drivers! + */ +dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); +int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs); + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir); +void dma_direct_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir); +#else +static inline void dma_direct_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ +} +static inline void dma_direct_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ +} +#endif + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs); +void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs); +void dma_direct_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir); +void dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir); +#else +static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ +} +static inline void dma_direct_unmap_sg(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline void dma_direct_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ +} +static inline void dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ +} +#endif + static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, size_t size, enum dma_data_direction dir, @@ -230,9 +294,12 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, BUG_ON(!valid_dma_direction(dir)); debug_dma_map_single(dev, ptr, size); - addr = ops->map_page(dev, virt_to_page(ptr), - offset_in_page(ptr), size, - dir, attrs); + if (dma_is_direct(ops)) + addr = dma_direct_map_page(dev, virt_to_page(ptr), + offset_in_page(ptr), size, dir, attrs); + else + addr = ops->map_page(dev, virt_to_page(ptr), + offset_in_page(ptr), size, dir, attrs); debug_dma_map_page(dev, virt_to_page(ptr), offset_in_page(ptr), size, dir, addr, true); @@ -247,11 +314,19 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (ops->unmap_page) + if (dma_is_direct(ops)) + dma_direct_unmap_page(dev, addr, size, dir, attrs); + else if (ops->unmap_page) ops->unmap_page(dev, addr, size, dir, attrs); debug_dma_unmap_page(dev, addr, size, dir, true); } +static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + return dma_unmap_single_attrs(dev, addr, size, dir, attrs); +} + /* * dma_maps_sg_attrs returns 0 on error and > 0 on success. * It should never return a value < 0. @@ -264,7 +339,10 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int ents; BUG_ON(!valid_dma_direction(dir)); - ents = ops->map_sg(dev, sg, nents, dir, attrs); + if (dma_is_direct(ops)) + ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); + else + ents = ops->map_sg(dev, sg, nents, dir, attrs); BUG_ON(ents < 0); debug_dma_map_sg(dev, sg, nents, ents, dir); @@ -279,7 +357,9 @@ static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg BUG_ON(!valid_dma_direction(dir)); debug_dma_unmap_sg(dev, sg, nents, dir); - if (ops->unmap_sg) + if (dma_is_direct(ops)) + dma_direct_unmap_sg(dev, sg, nents, dir, attrs); + else if (ops->unmap_sg) ops->unmap_sg(dev, sg, nents, dir, attrs); } @@ -293,25 +373,15 @@ static inline dma_addr_t dma_map_page_attrs(struct device *dev, dma_addr_t addr; BUG_ON(!valid_dma_direction(dir)); - addr = ops->map_page(dev, page, offset, size, dir, attrs); + if (dma_is_direct(ops)) + addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); + else + addr = ops->map_page(dev, page, offset, size, dir, attrs); debug_dma_map_page(dev, page, offset, size, dir, addr, false); return addr; } -static inline void dma_unmap_page_attrs(struct device *dev, - dma_addr_t addr, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->unmap_page) - ops->unmap_page(dev, addr, size, dir, attrs); - debug_dma_unmap_page(dev, addr, size, dir, false); -} - static inline dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, @@ -327,7 +397,7 @@ static inline dma_addr_t dma_map_resource(struct device *dev, BUG_ON(pfn_valid(PHYS_PFN(phys_addr))); addr = phys_addr; - if (ops->map_resource) + if (ops && ops->map_resource) addr = ops->map_resource(dev, phys_addr, size, dir, attrs); debug_dma_map_resource(dev, phys_addr, size, dir, addr); @@ -342,7 +412,7 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (ops->unmap_resource) + if (ops && ops->unmap_resource) ops->unmap_resource(dev, addr, size, dir, attrs); debug_dma_unmap_resource(dev, addr, size, dir); } @@ -354,11 +424,20 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_for_cpu) + if (dma_is_direct(ops)) + dma_direct_sync_single_for_cpu(dev, addr, size, dir); + else if (ops->sync_single_for_cpu) ops->sync_single_for_cpu(dev, addr, size, dir); debug_dma_sync_single_for_cpu(dev, addr, size, dir); } +static inline void dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t addr, unsigned long offset, size_t size, + enum dma_data_direction dir) +{ + return dma_sync_single_for_cpu(dev, addr + offset, size, dir); +} + static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) @@ -366,37 +445,18 @@ static inline void dma_sync_single_for_device(struct device *dev, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_for_device) + if (dma_is_direct(ops)) + dma_direct_sync_single_for_device(dev, addr, size, dir); + else if (ops->sync_single_for_device) ops->sync_single_for_device(dev, addr, size, dir); debug_dma_sync_single_for_device(dev, addr, size, dir); } -static inline void dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t addr, - unsigned long offset, - size_t size, - enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_for_cpu) - ops->sync_single_for_cpu(dev, addr + offset, size, dir); - debug_dma_sync_single_range_for_cpu(dev, addr, offset, size, dir); -} - static inline void dma_sync_single_range_for_device(struct device *dev, - dma_addr_t addr, - unsigned long offset, - size_t size, - enum dma_data_direction dir) + dma_addr_t addr, unsigned long offset, size_t size, + enum dma_data_direction dir) { - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_for_device) - ops->sync_single_for_device(dev, addr + offset, size, dir); - debug_dma_sync_single_range_for_device(dev, addr, offset, size, dir); + return dma_sync_single_for_device(dev, addr + offset, size, dir); } static inline void @@ -406,7 +466,9 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_sg_for_cpu) + if (dma_is_direct(ops)) + dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); + else if (ops->sync_sg_for_cpu) ops->sync_sg_for_cpu(dev, sg, nelems, dir); debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); } @@ -418,7 +480,9 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_sg_for_device) + if (dma_is_direct(ops)) + dma_direct_sync_sg_for_device(dev, sg, nelems, dir); + else if (ops->sync_sg_for_device) ops->sync_sg_for_device(dev, sg, nelems, dir); debug_dma_sync_sg_for_device(dev, sg, nelems, dir); @@ -431,16 +495,8 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, #define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s, r, 0) #define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0) -static inline void -dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->cache_sync) - ops->cache_sync(dev, vaddr, size, dir); -} +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction dir); extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, @@ -455,107 +511,29 @@ void *dma_common_pages_remap(struct page **pages, size_t size, const void *caller); void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags); -/** - * dma_mmap_attrs - map a coherent DMA allocation into user space - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices - * @vma: vm_area_struct describing requested user mapping - * @cpu_addr: kernel CPU-view address returned from dma_alloc_attrs - * @handle: device-view address returned from dma_alloc_attrs - * @size: size of memory originally requested in dma_alloc_attrs - * @attrs: attributes of mapping properties requested in dma_alloc_attrs - * - * Map a coherent DMA buffer previously allocated by dma_alloc_attrs - * into user space. The coherent DMA buffer must not be freed by the - * driver until the user space mapping has been released. - */ -static inline int -dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, - dma_addr_t dma_addr, size_t size, unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - BUG_ON(!ops); - if (ops->mmap) - return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); -} +int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot); +bool dma_in_atomic_pool(void *start, size_t size); +void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags); +bool dma_free_from_pool(void *start, size_t size); +int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0) int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); -static inline int -dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, - dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - BUG_ON(!ops); - if (ops->get_sgtable) - return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, - attrs); - return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, - attrs); -} - +int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0) -#ifndef arch_dma_alloc_attrs -#define arch_dma_alloc_attrs(dev) (true) -#endif - -static inline void *dma_alloc_attrs(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - void *cpu_addr; - - BUG_ON(!ops); - WARN_ON_ONCE(dev && !dev->coherent_dma_mask); - - if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) - return cpu_addr; - - /* let the implementation decide on the zone to allocate from: */ - flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); - - if (!arch_dma_alloc_attrs(&dev)) - return NULL; - if (!ops->alloc) - return NULL; - - cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); - debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); - return cpu_addr; -} - -static inline void dma_free_attrs(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle, - unsigned long attrs) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!ops); - - if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr)) - return; - /* - * On non-coherent platforms which implement DMA-coherent buffers via - * non-cacheable remaps, ops->free() may call vunmap(). Thus getting - * this far in IRQ context is a) at risk of a BUG_ON() or trying to - * sleep on some machines, and b) an indication that the driver is - * probably misusing the coherent API anyway. - */ - WARN_ON(irqs_disabled()); - - if (!ops->free || !cpu_addr) - return; - - debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); - ops->free(dev, size, cpu_addr, dma_handle, attrs); -} +void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t flag, unsigned long attrs); +void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, unsigned long attrs); static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) @@ -573,43 +551,16 @@ static inline void dma_free_coherent(struct device *dev, size_t size, static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - const struct dma_map_ops *ops = get_dma_ops(dev); - debug_dma_mapping_error(dev, dma_addr); - if (ops->mapping_error) - return ops->mapping_error(dev, dma_addr); - return 0; -} - -static inline void dma_check_mask(struct device *dev, u64 mask) -{ - if (sme_active() && (mask < (((u64)sme_get_me_mask() << 1) - 1))) - dev_warn(dev, "SME is active, device will require DMA bounce buffers\n"); -} - -static inline int dma_supported(struct device *dev, u64 mask) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (!ops) - return 0; - if (!ops->dma_supported) - return 1; - return ops->dma_supported(dev, mask); -} - -#ifndef HAVE_ARCH_DMA_SET_MASK -static inline int dma_set_mask(struct device *dev, u64 mask) -{ - if (!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; - dma_check_mask(dev, mask); - - *dev->dma_mask = mask; + if (dma_addr == DMA_MAPPING_ERROR) + return -ENOMEM; return 0; } -#endif + +int dma_supported(struct device *dev, u64 mask); +int dma_set_mask(struct device *dev, u64 mask); +int dma_set_coherent_mask(struct device *dev, u64 mask); static inline u64 dma_get_mask(struct device *dev) { @@ -618,21 +569,6 @@ static inline u64 dma_get_mask(struct device *dev) return DMA_BIT_MASK(32); } -#ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK -int dma_set_coherent_mask(struct device *dev, u64 mask); -#else -static inline int dma_set_coherent_mask(struct device *dev, u64 mask) -{ - if (!dma_supported(dev, mask)) - return -EIO; - - dma_check_mask(dev, mask); - - dev->coherent_dma_mask = mask; - return 0; -} -#endif - /* * Set both the DMA mask and the coherent DMA mask to the same thing. * Note that we don't check the return value from dma_set_coherent_mask() @@ -676,8 +612,7 @@ static inline unsigned int dma_get_max_seg_size(struct device *dev) return SZ_64K; } -static inline unsigned int dma_set_max_seg_size(struct device *dev, - unsigned int size) +static inline int dma_set_max_seg_size(struct device *dev, unsigned int size) { if (dev->dma_parms) { dev->dma_parms->max_segment_size = size; @@ -709,12 +644,13 @@ static inline unsigned long dma_max_pfn(struct device *dev) } #endif +/* + * Please always use dma_alloc_coherent instead as it already zeroes the memory! + */ static inline void *dma_zalloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { - void *ret = dma_alloc_coherent(dev, size, dma_handle, - flag | __GFP_ZERO); - return ret; + return dma_alloc_coherent(dev, size, dma_handle, flag); } static inline int dma_get_cache_alignment(void) diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index 9051b055beec..69b36ed31a99 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -38,7 +38,10 @@ pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction); #else -#define arch_dma_cache_sync NULL +static inline void arch_dma_cache_sync(struct device *dev, void *vaddr, + size_t size, enum dma_data_direction direction) +{ +} #endif /* CONFIG_DMA_NONCOHERENT_CACHE_SYNC */ #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE @@ -69,4 +72,6 @@ static inline void arch_sync_dma_for_cpu_all(struct device *dev) } #endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */ +void arch_dma_prep_coherent(struct page *page, size_t size); + #endif /* _LINUX_DMA_NONCOHERENT_H */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 015bb59c0331..2e9e2763bf47 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -23,74 +23,6 @@ enum elv_merge { ELEVATOR_DISCARD_MERGE = 3, }; -typedef enum elv_merge (elevator_merge_fn) (struct request_queue *, struct request **, - struct bio *); - -typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *); - -typedef void (elevator_merged_fn) (struct request_queue *, struct request *, enum elv_merge); - -typedef int (elevator_allow_bio_merge_fn) (struct request_queue *, - struct request *, struct bio *); - -typedef int (elevator_allow_rq_merge_fn) (struct request_queue *, - struct request *, struct request *); - -typedef void (elevator_bio_merged_fn) (struct request_queue *, - struct request *, struct bio *); - -typedef int (elevator_dispatch_fn) (struct request_queue *, int); - -typedef void (elevator_add_req_fn) (struct request_queue *, struct request *); -typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *); -typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); -typedef int (elevator_may_queue_fn) (struct request_queue *, unsigned int); - -typedef void (elevator_init_icq_fn) (struct io_cq *); -typedef void (elevator_exit_icq_fn) (struct io_cq *); -typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, - struct bio *, gfp_t); -typedef void (elevator_put_req_fn) (struct request *); -typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *); -typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *); - -typedef int (elevator_init_fn) (struct request_queue *, - struct elevator_type *e); -typedef void (elevator_exit_fn) (struct elevator_queue *); -typedef void (elevator_registered_fn) (struct request_queue *); - -struct elevator_ops -{ - elevator_merge_fn *elevator_merge_fn; - elevator_merged_fn *elevator_merged_fn; - elevator_merge_req_fn *elevator_merge_req_fn; - elevator_allow_bio_merge_fn *elevator_allow_bio_merge_fn; - elevator_allow_rq_merge_fn *elevator_allow_rq_merge_fn; - elevator_bio_merged_fn *elevator_bio_merged_fn; - - elevator_dispatch_fn *elevator_dispatch_fn; - elevator_add_req_fn *elevator_add_req_fn; - elevator_activate_req_fn *elevator_activate_req_fn; - elevator_deactivate_req_fn *elevator_deactivate_req_fn; - - elevator_completed_req_fn *elevator_completed_req_fn; - - elevator_request_list_fn *elevator_former_req_fn; - elevator_request_list_fn *elevator_latter_req_fn; - - elevator_init_icq_fn *elevator_init_icq_fn; /* see iocontext.h */ - elevator_exit_icq_fn *elevator_exit_icq_fn; /* ditto */ - - elevator_set_req_fn *elevator_set_req_fn; - elevator_put_req_fn *elevator_put_req_fn; - - elevator_may_queue_fn *elevator_may_queue_fn; - - elevator_init_fn *elevator_init_fn; - elevator_exit_fn *elevator_exit_fn; - elevator_registered_fn *elevator_registered_fn; -}; - struct blk_mq_alloc_data; struct blk_mq_hw_ctx; @@ -137,17 +69,14 @@ struct elevator_type struct kmem_cache *icq_cache; /* fields provided by elevator implementation */ - union { - struct elevator_ops sq; - struct elevator_mq_ops mq; - } ops; + struct elevator_mq_ops ops; + size_t icq_size; /* see iocontext.h */ size_t icq_align; /* ditto */ struct elv_fs_entry *elevator_attrs; char elevator_name[ELV_NAME_MAX]; const char *elevator_alias; struct module *elevator_owner; - bool uses_mq; #ifdef CONFIG_BLK_DEBUG_FS const struct blk_mq_debugfs_attr *queue_debugfs_attrs; const struct blk_mq_debugfs_attr *hctx_debugfs_attrs; @@ -175,40 +104,25 @@ struct elevator_queue struct kobject kobj; struct mutex sysfs_lock; unsigned int registered:1; - unsigned int uses_mq:1; DECLARE_HASHTABLE(hash, ELV_HASH_BITS); }; /* * block elevator interface */ -extern void elv_dispatch_sort(struct request_queue *, struct request *); -extern void elv_dispatch_add_tail(struct request_queue *, struct request *); -extern void elv_add_request(struct request_queue *, struct request *, int); -extern void __elv_add_request(struct request_queue *, struct request *, int); extern enum elv_merge elv_merge(struct request_queue *, struct request **, struct bio *); extern void elv_merge_requests(struct request_queue *, struct request *, struct request *); extern void elv_merged_request(struct request_queue *, struct request *, enum elv_merge); -extern void elv_bio_merged(struct request_queue *q, struct request *, - struct bio *); extern bool elv_attempt_insert_merge(struct request_queue *, struct request *); -extern void elv_requeue_request(struct request_queue *, struct request *); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); -extern int elv_may_queue(struct request_queue *, unsigned int); -extern void elv_completed_request(struct request_queue *, struct request *); -extern int elv_set_request(struct request_queue *q, struct request *rq, - struct bio *bio, gfp_t gfp_mask); -extern void elv_put_request(struct request_queue *, struct request *); -extern void elv_drain_elevator(struct request_queue *); /* * io scheduler registration */ -extern void __init load_default_elevator_module(void); extern int elv_register(struct elevator_type *); extern void elv_unregister(struct elevator_type *); @@ -260,9 +174,5 @@ enum { #define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) #define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist) -#else /* CONFIG_BLOCK */ - -static inline void load_default_elevator_module(void) { } - #endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 1cda6648a41f..811c77743dad 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2026,7 +2026,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) .ki_filp = filp, .ki_flags = iocb_flags(filp), .ki_hint = ki_hint_validate(file_write_hint(filp)), - .ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0), + .ki_ioprio = get_current_ioprio(), }; } diff --git a/include/linux/futex.h b/include/linux/futex.h index 821ae502d3d8..ccaef0097785 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -9,9 +9,6 @@ struct inode; struct mm_struct; struct task_struct; -extern int -handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); - /* * Futexes are matched on equal values of this key. * The key type depends on whether it's a shared or private mapping. @@ -55,11 +52,6 @@ extern void exit_robust_list(struct task_struct *curr); long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); -#ifdef CONFIG_HAVE_FUTEX_CMPXCHG -#define futex_cmpxchg_enabled 1 -#else -extern int futex_cmpxchg_enabled; -#endif #else static inline void exit_robust_list(struct task_struct *curr) { diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 70fc838e6773..06c0fd594097 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -17,6 +17,7 @@ #include <linux/percpu-refcount.h> #include <linux/uuid.h> #include <linux/blk_types.h> +#include <asm/local.h> #ifdef CONFIG_BLOCK @@ -89,6 +90,7 @@ struct disk_stats { unsigned long merges[NR_STAT_GROUPS]; unsigned long io_ticks; unsigned long time_in_queue; + local_t in_flight[2]; }; #define PARTITION_META_INFO_VOLNAMELTH 64 @@ -122,14 +124,13 @@ struct hd_struct { int make_it_fail; #endif unsigned long stamp; - atomic_t in_flight[2]; #ifdef CONFIG_SMP struct disk_stats __percpu *dkstats; #else struct disk_stats dkstats; #endif struct percpu_ref ref; - struct rcu_head rcu_head; + struct rcu_work rcu_work; }; #define GENHD_FL_REMOVABLE 1 @@ -295,8 +296,11 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, #define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) #define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) -#define __part_stat_add(cpu, part, field, addnd) \ - (per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd)) +#define part_stat_get_cpu(part, field, cpu) \ + (per_cpu_ptr((part)->dkstats, (cpu))->field) + +#define part_stat_get(part, field) \ + part_stat_get_cpu(part, field, smp_processor_id()) #define part_stat_read(part, field) \ ({ \ @@ -333,10 +337,9 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_lock() ({ rcu_read_lock(); 0; }) #define part_stat_unlock() rcu_read_unlock() -#define __part_stat_add(cpu, part, field, addnd) \ - ((part)->dkstats.field += addnd) - -#define part_stat_read(part, field) ((part)->dkstats.field) +#define part_stat_get(part, field) ((part)->dkstats.field) +#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field) +#define part_stat_read(part, field) part_stat_get(part, field) static inline void part_stat_set_all(struct hd_struct *part, int value) { @@ -362,22 +365,33 @@ static inline void free_part_stats(struct hd_struct *part) part_stat_read(part, field[STAT_WRITE]) + \ part_stat_read(part, field[STAT_DISCARD])) -#define part_stat_add(cpu, part, field, addnd) do { \ - __part_stat_add((cpu), (part), field, addnd); \ +#define __part_stat_add(part, field, addnd) \ + (part_stat_get(part, field) += (addnd)) + +#define part_stat_add(part, field, addnd) do { \ + __part_stat_add((part), field, addnd); \ if ((part)->partno) \ - __part_stat_add((cpu), &part_to_disk((part))->part0, \ + __part_stat_add(&part_to_disk((part))->part0, \ field, addnd); \ } while (0) -#define part_stat_dec(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, -1) -#define part_stat_inc(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, 1) -#define part_stat_sub(cpu, gendiskp, field, subnd) \ - part_stat_add(cpu, gendiskp, field, -subnd) - -void part_in_flight(struct request_queue *q, struct hd_struct *part, - unsigned int inflight[2]); +#define part_stat_dec(gendiskp, field) \ + part_stat_add(gendiskp, field, -1) +#define part_stat_inc(gendiskp, field) \ + part_stat_add(gendiskp, field, 1) +#define part_stat_sub(gendiskp, field, subnd) \ + part_stat_add(gendiskp, field, -subnd) + +#define part_stat_local_dec(gendiskp, field) \ + local_dec(&(part_stat_get(gendiskp, field))) +#define part_stat_local_inc(gendiskp, field) \ + local_inc(&(part_stat_get(gendiskp, field))) +#define part_stat_local_read(gendiskp, field) \ + local_read(&(part_stat_get(gendiskp, field))) +#define part_stat_local_read_cpu(gendiskp, field, cpu) \ + local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) + +unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part); void part_in_flight_rw(struct request_queue *q, struct hd_struct *part, unsigned int inflight[2]); void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, @@ -398,8 +412,7 @@ static inline void free_part_info(struct hd_struct *part) kfree(part->info); } -/* block/blk-core.c */ -extern void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part); +void update_io_ticks(struct hd_struct *part, unsigned long now); /* block/genhd.c */ extern void device_add_disk(struct device *parent, struct gendisk *disk, diff --git a/include/linux/ide.h b/include/linux/ide.h index c74b0321922a..e7d29ae633cd 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -10,7 +10,7 @@ #include <linux/init.h> #include <linux/ioport.h> #include <linux/ata.h> -#include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/proc_fs.h> #include <linux/interrupt.h> #include <linux/bitops.h> @@ -50,6 +50,7 @@ struct ide_request { struct scsi_request sreq; u8 sense[SCSI_SENSE_BUFFERSIZE]; u8 type; + void *special; }; static inline struct ide_request *ide_req(struct request *rq) @@ -529,6 +530,10 @@ struct ide_drive_s { struct request_queue *queue; /* request queue */ + bool (*prep_rq)(struct ide_drive_s *, struct request *); + + struct blk_mq_tag_set tag_set; + struct request *rq; /* current request */ void *driver_data; /* extra driver data */ u16 *id; /* identification info */ @@ -612,6 +617,10 @@ struct ide_drive_s { bool sense_rq_armed; struct request *sense_rq; struct request_sense sense_data; + + /* async sense insertion */ + struct work_struct rq_work; + struct list_head rq_list; }; typedef struct ide_drive_s ide_drive_t; @@ -1089,6 +1098,7 @@ extern int ide_pci_clk; int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int); void ide_kill_rq(ide_drive_t *, struct request *); +void ide_insert_request_head(ide_drive_t *, struct request *); void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int); void ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int); @@ -1208,7 +1218,7 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); extern void ide_timer_expiry(struct timer_list *t); extern irqreturn_t ide_intr(int irq, void *dev_id); -extern void do_ide_request(struct request_queue *); +extern blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); extern void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq); void ide_init_disk(struct gendisk *, ide_drive_t *); diff --git a/include/linux/init.h b/include/linux/init.h index 9c2aba1dbabf..5255069f5a9f 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -146,7 +146,6 @@ extern unsigned int reset_devices; /* used by init/main.c */ void setup_arch(char **); void prepare_namespace(void); -void __init load_default_modules(void); int __init init_rootfs(void); #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX) diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 9e30ed6443db..e9bfe6972aed 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -71,6 +71,19 @@ static inline int task_nice_ioclass(struct task_struct *task) } /* + * If the calling process has set an I/O priority, use that. Otherwise, return + * the default I/O priority. + */ +static inline int get_current_ioprio(void) +{ + struct io_context *ioc = current->io_context; + + if (ioc) + return ioc->ioprio; + return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); +} + +/* * For inheritance, return the highest of the two given priorities */ extern int ioprio_best(unsigned short aprio, unsigned short bprio); diff --git a/include/linux/key.h b/include/linux/key.h index e58ee10f6e58..7099985e35a9 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -346,6 +346,9 @@ static inline key_serial_t key_serial(const struct key *key) extern void key_set_timeout(struct key *, unsigned); +extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, + key_perm_t perm); + /* * The permissions required on a key that we're looking up. */ diff --git a/include/linux/libata.h b/include/linux/libata.h index 38c95d66ab12..68133842e6d7 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -135,7 +135,6 @@ enum { ATA_SHT_EMULATED = 1, ATA_SHT_THIS_ID = -1, - ATA_SHT_USE_CLUSTERING = 1, /* struct ata_taskfile flags */ ATA_TFLAG_LBA48 = (1 << 0), /* enable 48-bit LBA and "HOB" */ @@ -1360,7 +1359,6 @@ extern struct device_attribute *ata_common_sdev_attrs[]; .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ .this_id = ATA_SHT_THIS_ID, \ .emulated = ATA_SHT_EMULATED, \ - .use_clustering = ATA_SHT_USE_CLUSTERING, \ .proc_name = drv_name, \ .slave_configure = ata_scsi_slave_config, \ .slave_destroy = ata_scsi_slave_destroy, \ diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 097072c5a852..5440f11b0907 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -38,6 +38,10 @@ enum { NDD_UNARMED = 1, /* locked memory devices should not be accessed */ NDD_LOCKED = 2, + /* memory under security wipes should not be accessed */ + NDD_SECURITY_OVERWRITE = 3, + /* tracking whether or not there is a pending device reference */ + NDD_WORK_PENDING = 4, /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, @@ -87,7 +91,7 @@ struct nvdimm_bus_descriptor { ndctl_fn ndctl; int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc, - struct nvdimm *nvdimm, unsigned int cmd); + struct nvdimm *nvdimm, unsigned int cmd, void *data); }; struct nd_cmd_desc { @@ -155,6 +159,46 @@ static inline struct nd_blk_region_desc *to_blk_region_desc( } +enum nvdimm_security_state { + NVDIMM_SECURITY_DISABLED, + NVDIMM_SECURITY_UNLOCKED, + NVDIMM_SECURITY_LOCKED, + NVDIMM_SECURITY_FROZEN, + NVDIMM_SECURITY_OVERWRITE, +}; + +#define NVDIMM_PASSPHRASE_LEN 32 +#define NVDIMM_KEY_DESC_LEN 22 + +struct nvdimm_key_data { + u8 data[NVDIMM_PASSPHRASE_LEN]; +}; + +enum nvdimm_passphrase_type { + NVDIMM_USER, + NVDIMM_MASTER, +}; + +struct nvdimm_security_ops { + enum nvdimm_security_state (*state)(struct nvdimm *nvdimm, + enum nvdimm_passphrase_type pass_type); + int (*freeze)(struct nvdimm *nvdimm); + int (*change_key)(struct nvdimm *nvdimm, + const struct nvdimm_key_data *old_data, + const struct nvdimm_key_data *new_data, + enum nvdimm_passphrase_type pass_type); + int (*unlock)(struct nvdimm *nvdimm, + const struct nvdimm_key_data *key_data); + int (*disable)(struct nvdimm *nvdimm, + const struct nvdimm_key_data *key_data); + int (*erase)(struct nvdimm *nvdimm, + const struct nvdimm_key_data *key_data, + enum nvdimm_passphrase_type pass_type); + int (*overwrite)(struct nvdimm *nvdimm, + const struct nvdimm_key_data *key_data); + int (*query_overwrite)(struct nvdimm *nvdimm); +}; + void badrange_init(struct badrange *badrange); int badrange_add(struct badrange *badrange, u64 addr, u64 length); void badrange_forget(struct badrange *badrange, phys_addr_t start, @@ -165,6 +209,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nfit_desc); void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); struct nvdimm_bus *to_nvdimm_bus(struct device *dev); +struct nvdimm_bus *nvdimm_to_bus(struct nvdimm *nvdimm); struct nvdimm *to_nvdimm(struct device *dev); struct nd_region *to_nd_region(struct device *dev); struct device *nd_region_dev(struct nd_region *nd_region); @@ -175,10 +220,21 @@ const char *nvdimm_name(struct nvdimm *nvdimm); struct kobject *nvdimm_kobj(struct nvdimm *nvdimm); unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm); void *nvdimm_provider_data(struct nvdimm *nvdimm); -struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, - const struct attribute_group **groups, unsigned long flags, - unsigned long cmd_mask, int num_flush, - struct resource *flush_wpq); +struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, + void *provider_data, const struct attribute_group **groups, + unsigned long flags, unsigned long cmd_mask, int num_flush, + struct resource *flush_wpq, const char *dimm_id, + const struct nvdimm_security_ops *sec_ops); +static inline struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, + void *provider_data, const struct attribute_group **groups, + unsigned long flags, unsigned long cmd_mask, int num_flush, + struct resource *flush_wpq) +{ + return __nvdimm_create(nvdimm_bus, provider_data, groups, flags, + cmd_mask, num_flush, flush_wpq, NULL, NULL); +} + +int nvdimm_security_setup_events(struct nvdimm *nvdimm); const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, @@ -204,6 +260,16 @@ u64 nd_fletcher64(void *addr, size_t len, bool le); void nvdimm_flush(struct nd_region *nd_region); int nvdimm_has_flush(struct nd_region *nd_region); int nvdimm_has_cache(struct nd_region *nd_region); +int nvdimm_in_overwrite(struct nvdimm *nvdimm); + +static inline int nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len, int *cmd_rc) +{ + struct nvdimm_bus *nvdimm_bus = nvdimm_to_bus(nvdimm); + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); + + return nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, cmd_rc); +} #ifdef CONFIG_ARCH_HAS_PMEM_API #define ARCH_MEMREMAP_PMEM MEMREMAP_WB diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 2fdeac1a420d..5d865a5d5cdc 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -90,7 +90,7 @@ typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int, struct nvm_chk_meta *); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *); -typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); +typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int); typedef void (nvm_destroy_dma_pool_fn)(void *); typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t, dma_addr_t *); @@ -357,6 +357,7 @@ struct nvm_geo { u32 clba; /* sectors per chunk */ u16 csecs; /* sector size */ u16 sos; /* out-of-band area size */ + bool ext; /* metadata in extended data buffer */ /* device write constrains */ u32 ws_min; /* minimum write size */ diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 1e70060c92ce..e2687a30e5a1 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -54,12 +54,8 @@ * idle before writing to some registers. */ #define TMIO_MMC_HAS_IDLE_WAIT BIT(4) -/* - * A GPIO is used for card hotplug detection. We need an extra flag for this, - * because 0 is a valid GPIO number too, and requiring users to specify - * cd_gpio < 0 to disable GPIO hotplug would break backwards compatibility. - */ -#define TMIO_MMC_USE_GPIO_CD BIT(5) + +/* BIT(5) is unused */ /* * Some controllers have CMD12 automatically @@ -104,7 +100,6 @@ struct tmio_mmc_data { unsigned long capabilities2; unsigned long flags; u32 ocr_mask; /* available voltages */ - unsigned int cd_gpio; int alignment_shift; dma_addr_t dma_rx_offset; unsigned int max_blk_count; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4d16ba04790e..54299251d40d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -751,8 +751,8 @@ struct mlx5_hca_vport_context { u64 node_guid; u32 cap_mask1; u32 cap_mask1_perm; - u32 cap_mask2; - u32 cap_mask2_perm; + u16 cap_mask2; + u16 cap_mask2_perm; u16 lid; u8 init_type_reply; /* bitmask: see ib spec 14.2.5.6 InitTypeReply */ u8 lmc; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 821b751485fb..35fe5217b244 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -76,13 +76,7 @@ enum { }; enum { - MLX5_GENERAL_OBJ_TYPES_CAP_UCTX = (1ULL << 4), - MLX5_GENERAL_OBJ_TYPES_CAP_UMEM = (1ULL << 5), -}; - -enum { - MLX5_OBJ_TYPE_UCTX = 0x0004, - MLX5_OBJ_TYPE_UMEM = 0x0005, + MLX5_SHARED_RESOURCE_UID = 0xffff, }; enum { @@ -144,6 +138,9 @@ enum { MLX5_CMD_OP_DESTROY_XRQ = 0x718, MLX5_CMD_OP_QUERY_XRQ = 0x719, MLX5_CMD_OP_ARM_XRQ = 0x71a, + MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725, + MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726, + MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727, MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, @@ -247,6 +244,7 @@ enum { MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT = 0x93d, MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT = 0x93e, + MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT = 0x93f, MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941, MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942, @@ -259,9 +257,19 @@ enum { MLX5_CMD_OP_MODIFY_GENERAL_OBJECT = 0xa01, MLX5_CMD_OP_QUERY_GENERAL_OBJECT = 0xa02, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT = 0xa03, + MLX5_CMD_OP_CREATE_UCTX = 0xa04, + MLX5_CMD_OP_DESTROY_UCTX = 0xa06, + MLX5_CMD_OP_CREATE_UMEM = 0xa08, + MLX5_CMD_OP_DESTROY_UMEM = 0xa0a, MLX5_CMD_OP_MAX }; +/* Valid range for general commands that don't work over an object */ +enum { + MLX5_CMD_OP_GENERAL_START = 0xb00, + MLX5_CMD_OP_GENERAL_END = 0xd00, +}; + struct mlx5_ifc_flow_table_fields_supported_bits { u8 outer_dmac[0x1]; u8 outer_smac[0x1]; @@ -1179,7 +1187,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_440[0x20]; - u8 reserved_at_460[0x10]; + u8 reserved_at_460[0x3]; + u8 log_max_uctx[0x5]; + u8 reserved_at_468[0x3]; + u8 log_max_umem[0x5]; u8 max_num_eqs[0x10]; u8 reserved_at_480[0x3]; @@ -6694,7 +6705,7 @@ struct mlx5_ifc_dealloc_transport_domain_out_bits { struct mlx5_ifc_dealloc_transport_domain_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7547,7 +7558,7 @@ struct mlx5_ifc_alloc_transport_domain_out_bits { struct mlx5_ifc_alloc_transport_domain_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7569,7 +7580,7 @@ struct mlx5_ifc_alloc_q_counter_out_bits { struct mlx5_ifc_alloc_q_counter_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -9390,9 +9401,9 @@ struct mlx5_ifc_general_obj_out_cmd_hdr_bits { }; struct mlx5_ifc_umem_bits { - u8 modify_field_select[0x40]; + u8 reserved_at_0[0x80]; - u8 reserved_at_40[0x5b]; + u8 reserved_at_80[0x1b]; u8 log_page_size[0x5]; u8 page_offset[0x20]; @@ -9403,21 +9414,46 @@ struct mlx5_ifc_umem_bits { }; struct mlx5_ifc_uctx_bits { - u8 modify_field_select[0x40]; - u8 cap[0x20]; - u8 reserved_at_60[0x1a0]; + u8 reserved_at_20[0x160]; }; struct mlx5_ifc_create_umem_in_bits { - struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; - struct mlx5_ifc_umem_bits umem; + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_umem_bits umem; }; struct mlx5_ifc_create_uctx_in_bits { - struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; - struct mlx5_ifc_uctx_bits uctx; + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_uctx_bits uctx; +}; + +struct mlx5_ifc_destroy_uctx_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x10]; + u8 uid[0x10]; + + u8 reserved_at_60[0x20]; }; struct mlx5_ifc_mtrc_string_db_param_bits { diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 2a5fe75dd082..4d35ff36ceff 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -147,6 +147,9 @@ struct mmc_host_ops { /* Prepare HS400 target operating frequency depending host driver */ int (*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios); + /* Prepare switch to DDR during the HS400 init sequence */ + int (*hs400_prepare_ddr)(struct mmc_host *host); + /* Prepare for switching from HS400 to HS200 */ void (*hs400_downgrade)(struct mmc_host *host); @@ -331,7 +334,7 @@ struct mmc_host { #define MMC_CAP_UHS (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | \ MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | \ MMC_CAP_UHS_DDR50) -/* (1 << 21) is free for reuse */ +#define MMC_CAP_SYNC_RUNTIME_PM (1 << 21) /* Synced runtime PM suspends. */ #define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */ #define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */ #define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */ diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index 06607c59c4d0..feebd7aa6f5c 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -17,12 +17,7 @@ struct mmc_host; int mmc_gpio_get_ro(struct mmc_host *host); -int mmc_gpio_request_ro(struct mmc_host *host, unsigned int gpio); - int mmc_gpio_get_cd(struct mmc_host *host); -int mmc_gpio_request_cd(struct mmc_host *host, unsigned int gpio, - unsigned int debounce); - int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, unsigned int idx, bool override_active_level, unsigned int debounce, bool *gpio_invert); diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 496ff759f84c..91745cc3704c 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -403,7 +403,6 @@ struct nvme_fc_port_template { void **handle); void (*delete_queue)(struct nvme_fc_local_port *, unsigned int qidx, void *handle); - void (*poll_queue)(struct nvme_fc_local_port *, void *handle); int (*ls_req)(struct nvme_fc_local_port *, struct nvme_fc_remote_port *, struct nvmefc_ls_req *); @@ -649,22 +648,6 @@ enum { * sequence in one LLDD operation. Errors during Data * sequence transmit must not allow RSP sequence to be sent. */ - NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 1), - /* Bit 2: When 0, the LLDD is calling the cmd rcv handler - * in a non-isr context, allowing the transport to finish - * op completion in the calling context. When 1, the LLDD - * is calling the cmd rcv handler in an ISR context, - * requiring the transport to transition to a workqueue - * for op completion. - */ - NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 2), - /* Bit 3: When 0, the LLDD is calling the op done handler - * in a non-isr context, allowing the transport to finish - * op completion in the calling context. When 1, the LLDD - * is calling the op done handler in an ISR context, - * requiring the transport to transition to a workqueue - * for op completion. - */ }; diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h new file mode 100644 index 000000000000..03d87c0550a9 --- /dev/null +++ b/include/linux/nvme-tcp.h @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVMe over Fabrics TCP protocol header. + * Copyright (c) 2018 Lightbits Labs. All rights reserved. + */ + +#ifndef _LINUX_NVME_TCP_H +#define _LINUX_NVME_TCP_H + +#include <linux/nvme.h> + +#define NVME_TCP_DISC_PORT 8009 +#define NVME_TCP_ADMIN_CCSZ SZ_8K +#define NVME_TCP_DIGEST_LENGTH 4 + +enum nvme_tcp_pfv { + NVME_TCP_PFV_1_0 = 0x0, +}; + +enum nvme_tcp_fatal_error_status { + NVME_TCP_FES_INVALID_PDU_HDR = 0x01, + NVME_TCP_FES_PDU_SEQ_ERR = 0x02, + NVME_TCP_FES_HDR_DIGEST_ERR = 0x03, + NVME_TCP_FES_DATA_OUT_OF_RANGE = 0x04, + NVME_TCP_FES_R2T_LIMIT_EXCEEDED = 0x05, + NVME_TCP_FES_DATA_LIMIT_EXCEEDED = 0x05, + NVME_TCP_FES_UNSUPPORTED_PARAM = 0x06, +}; + +enum nvme_tcp_digest_option { + NVME_TCP_HDR_DIGEST_ENABLE = (1 << 0), + NVME_TCP_DATA_DIGEST_ENABLE = (1 << 1), +}; + +enum nvme_tcp_pdu_type { + nvme_tcp_icreq = 0x0, + nvme_tcp_icresp = 0x1, + nvme_tcp_h2c_term = 0x2, + nvme_tcp_c2h_term = 0x3, + nvme_tcp_cmd = 0x4, + nvme_tcp_rsp = 0x5, + nvme_tcp_h2c_data = 0x6, + nvme_tcp_c2h_data = 0x7, + nvme_tcp_r2t = 0x9, +}; + +enum nvme_tcp_pdu_flags { + NVME_TCP_F_HDGST = (1 << 0), + NVME_TCP_F_DDGST = (1 << 1), + NVME_TCP_F_DATA_LAST = (1 << 2), + NVME_TCP_F_DATA_SUCCESS = (1 << 3), +}; + +/** + * struct nvme_tcp_hdr - nvme tcp pdu common header + * + * @type: pdu type + * @flags: pdu specific flags + * @hlen: pdu header length + * @pdo: pdu data offset + * @plen: pdu wire byte length + */ +struct nvme_tcp_hdr { + __u8 type; + __u8 flags; + __u8 hlen; + __u8 pdo; + __le32 plen; +}; + +/** + * struct nvme_tcp_icreq_pdu - nvme tcp initialize connection request pdu + * + * @hdr: pdu generic header + * @pfv: pdu version format + * @hpda: host pdu data alignment (dwords, 0's based) + * @digest: digest types enabled + * @maxr2t: maximum r2ts per request supported + */ +struct nvme_tcp_icreq_pdu { + struct nvme_tcp_hdr hdr; + __le16 pfv; + __u8 hpda; + __u8 digest; + __le32 maxr2t; + __u8 rsvd2[112]; +}; + +/** + * struct nvme_tcp_icresp_pdu - nvme tcp initialize connection response pdu + * + * @hdr: pdu common header + * @pfv: pdu version format + * @cpda: controller pdu data alignment (dowrds, 0's based) + * @digest: digest types enabled + * @maxdata: maximum data capsules per r2t supported + */ +struct nvme_tcp_icresp_pdu { + struct nvme_tcp_hdr hdr; + __le16 pfv; + __u8 cpda; + __u8 digest; + __le32 maxdata; + __u8 rsvd[112]; +}; + +/** + * struct nvme_tcp_term_pdu - nvme tcp terminate connection pdu + * + * @hdr: pdu common header + * @fes: fatal error status + * @fei: fatal error information + */ +struct nvme_tcp_term_pdu { + struct nvme_tcp_hdr hdr; + __le16 fes; + __le32 fei; + __u8 rsvd[8]; +}; + +/** + * struct nvme_tcp_cmd_pdu - nvme tcp command capsule pdu + * + * @hdr: pdu common header + * @cmd: nvme command + */ +struct nvme_tcp_cmd_pdu { + struct nvme_tcp_hdr hdr; + struct nvme_command cmd; +}; + +/** + * struct nvme_tcp_rsp_pdu - nvme tcp response capsule pdu + * + * @hdr: pdu common header + * @hdr: nvme-tcp generic header + * @cqe: nvme completion queue entry + */ +struct nvme_tcp_rsp_pdu { + struct nvme_tcp_hdr hdr; + struct nvme_completion cqe; +}; + +/** + * struct nvme_tcp_r2t_pdu - nvme tcp ready-to-transfer pdu + * + * @hdr: pdu common header + * @command_id: nvme command identifier which this relates to + * @ttag: transfer tag (controller generated) + * @r2t_offset: offset from the start of the command data + * @r2t_length: length the host is allowed to send + */ +struct nvme_tcp_r2t_pdu { + struct nvme_tcp_hdr hdr; + __u16 command_id; + __u16 ttag; + __le32 r2t_offset; + __le32 r2t_length; + __u8 rsvd[4]; +}; + +/** + * struct nvme_tcp_data_pdu - nvme tcp data pdu + * + * @hdr: pdu common header + * @command_id: nvme command identifier which this relates to + * @ttag: transfer tag (controller generated) + * @data_offset: offset from the start of the command data + * @data_length: length of the data stream + */ +struct nvme_tcp_data_pdu { + struct nvme_tcp_hdr hdr; + __u16 command_id; + __u16 ttag; + __le32 data_offset; + __le32 data_length; + __u8 rsvd[4]; +}; + +union nvme_tcp_pdu { + struct nvme_tcp_icreq_pdu icreq; + struct nvme_tcp_icresp_pdu icresp; + struct nvme_tcp_cmd_pdu cmd; + struct nvme_tcp_rsp_pdu rsp; + struct nvme_tcp_r2t_pdu r2t; + struct nvme_tcp_data_pdu data; +}; + +#endif /* _LINUX_NVME_TCP_H */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 818dbe9331be..bbcc83886899 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -52,15 +52,20 @@ enum { enum { NVMF_TRTYPE_RDMA = 1, /* RDMA */ NVMF_TRTYPE_FC = 2, /* Fibre Channel */ + NVMF_TRTYPE_TCP = 3, /* TCP/IP */ NVMF_TRTYPE_LOOP = 254, /* Reserved for host usage */ NVMF_TRTYPE_MAX, }; /* Transport Requirements codes for Discovery Log Page entry TREQ field */ enum { - NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */ - NVMF_TREQ_REQUIRED = 1, /* Required */ - NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */ + NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */ + NVMF_TREQ_REQUIRED = 1, /* Required */ + NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */ +#define NVME_TREQ_SECURE_CHANNEL_MASK \ + (NVMF_TREQ_REQUIRED | NVMF_TREQ_NOT_REQUIRED) + + NVMF_TREQ_DISABLE_SQFLOW = (1 << 2), /* Supports SQ flow control disable */ }; /* RDMA QP Service Type codes for Discovery Log Page entry TSAS @@ -198,6 +203,11 @@ enum { NVME_PS_FLAGS_NON_OP_STATE = 1 << 1, }; +enum nvme_ctrl_attr { + NVME_CTRL_ATTR_HID_128_BIT = (1 << 0), + NVME_CTRL_ATTR_TBKAS = (1 << 6), +}; + struct nvme_id_ctrl { __le16 vid; __le16 ssvid; @@ -214,7 +224,11 @@ struct nvme_id_ctrl { __le32 rtd3e; __le32 oaes; __le32 ctratt; - __u8 rsvd100[156]; + __u8 rsvd100[28]; + __le16 crdt1; + __le16 crdt2; + __le16 crdt3; + __u8 rsvd134[122]; __le16 oacs; __u8 acl; __u8 aerl; @@ -481,12 +495,21 @@ enum { NVME_AER_NOTICE_NS_CHANGED = 0x00, NVME_AER_NOTICE_FW_ACT_STARTING = 0x01, NVME_AER_NOTICE_ANA = 0x03, + NVME_AER_NOTICE_DISC_CHANGED = 0xf0, }; enum { - NVME_AEN_CFG_NS_ATTR = 1 << 8, - NVME_AEN_CFG_FW_ACT = 1 << 9, - NVME_AEN_CFG_ANA_CHANGE = 1 << 11, + NVME_AEN_BIT_NS_ATTR = 8, + NVME_AEN_BIT_FW_ACT = 9, + NVME_AEN_BIT_ANA_CHANGE = 11, + NVME_AEN_BIT_DISC_CHANGE = 31, +}; + +enum { + NVME_AEN_CFG_NS_ATTR = 1 << NVME_AEN_BIT_NS_ATTR, + NVME_AEN_CFG_FW_ACT = 1 << NVME_AEN_BIT_FW_ACT, + NVME_AEN_CFG_ANA_CHANGE = 1 << NVME_AEN_BIT_ANA_CHANGE, + NVME_AEN_CFG_DISC_CHANGE = 1 << NVME_AEN_BIT_DISC_CHANGE, }; struct nvme_lba_range_type { @@ -639,7 +662,12 @@ struct nvme_common_command { __le32 cdw2[2]; __le64 metadata; union nvme_data_ptr dptr; - __le32 cdw10[6]; + __le32 cdw10; + __le32 cdw11; + __le32 cdw12; + __le32 cdw13; + __le32 cdw14; + __le32 cdw15; }; struct nvme_rw_command { @@ -738,6 +766,15 @@ enum { NVME_HOST_MEM_RETURN = (1 << 1), }; +struct nvme_feat_host_behavior { + __u8 acre; + __u8 resv1[511]; +}; + +enum { + NVME_ENABLE_ACRE = 1, +}; + /* Admin commands */ enum nvme_admin_opcode { @@ -792,6 +829,7 @@ enum { NVME_FEAT_RRL = 0x12, NVME_FEAT_PLM_CONFIG = 0x13, NVME_FEAT_PLM_WINDOW = 0x14, + NVME_FEAT_HOST_BEHAVIOR = 0x16, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, NVME_FEAT_RESV_MASK = 0x82, @@ -1030,6 +1068,10 @@ struct nvmf_disc_rsp_page_hdr { struct nvmf_disc_rsp_page_entry entries[0]; }; +enum { + NVME_CONNECT_DISABLE_SQFLOW = (1 << 2), +}; + struct nvmf_connect_command { __u8 opcode; __u8 resv1; @@ -1126,6 +1168,20 @@ struct nvme_command { }; }; +struct nvme_error_slot { + __le64 error_count; + __le16 sqid; + __le16 cmdid; + __le16 status_field; + __le16 param_error_location; + __le64 lba; + __le32 nsid; + __u8 vs; + __u8 resv[3]; + __le64 cs; + __u8 resv2[24]; +}; + static inline bool nvme_is_write(struct nvme_command *cmd) { /* @@ -1243,6 +1299,7 @@ enum { NVME_SC_ANA_TRANSITION = 0x303, NVME_SC_HOST_PATH_ERROR = 0x370, + NVME_SC_CRD = 0x1800, NVME_SC_DNR = 0x4000, }; diff --git a/include/linux/platform_data/mmc-esdhc-imx.h b/include/linux/platform_data/mmc-esdhc-imx.h index 640dec8b5b0c..b606ca4197df 100644 --- a/include/linux/platform_data/mmc-esdhc-imx.h +++ b/include/linux/platform_data/mmc-esdhc-imx.h @@ -30,15 +30,11 @@ enum cd_types { * * ESDHC_WP(CD)_CONTROLLER type is not available on i.MX25/35. * - * @wp_gpio: gpio for write_protect - * @cd_gpio: gpio for card_detect interrupt * @wp_type: type of write_protect method (see wp_types enum above) * @cd_type: type of card_detect method (see cd_types enum above) */ struct esdhc_platform_data { - unsigned int wp_gpio; - unsigned int cd_gpio; enum wp_types wp_type; enum cd_types cd_type; int max_bus_width; diff --git a/include/linux/platform_data/mmc-pxamci.h b/include/linux/platform_data/mmc-pxamci.h index 752f97c62ef2..7e44e84e7150 100644 --- a/include/linux/platform_data/mmc-pxamci.h +++ b/include/linux/platform_data/mmc-pxamci.h @@ -15,11 +15,7 @@ struct pxamci_platform_data { int (*get_ro)(struct device *); int (*setpower)(struct device *, unsigned int); void (*exit)(struct device *, void *); - int gpio_card_detect; /* gpio detecting card insertion */ - int gpio_card_ro; /* gpio detecting read only toggle */ bool gpio_card_ro_invert; /* gpio ro is inverted */ - int gpio_power; /* gpio powering up MMC bus */ - bool gpio_power_invert; /* gpio power is inverted */ }; extern void pxa_set_mci_info(struct pxamci_platform_data *info); diff --git a/include/linux/platform_data/mmc-s3cmci.h b/include/linux/platform_data/mmc-s3cmci.h index b68d9f0bdd9e..33310b11cbdd 100644 --- a/include/linux/platform_data/mmc-s3cmci.h +++ b/include/linux/platform_data/mmc-s3cmci.h @@ -7,7 +7,6 @@ * @no_wprotect: Set this to indicate there is no write-protect switch. * @no_detect: Set this if there is no detect switch. * @wprotect_invert: Invert the default sense of the write protect switch. - * @detect_invert: Invert the default sense of the write protect switch. * @use_dma: Set to allow the use of DMA. * @gpio_detect: GPIO number for the card detect line. * @gpio_wprotect: GPIO number for the write protect line. @@ -31,11 +30,8 @@ struct s3c24xx_mci_pdata { unsigned int no_wprotect:1; unsigned int no_detect:1; unsigned int wprotect_invert:1; - unsigned int detect_invert:1; /* set => detect active high */ unsigned int use_dma:1; - unsigned int gpio_detect; - unsigned int gpio_wprotect; unsigned long ocr_avail; void (*set_power)(unsigned char power_mode, unsigned short vdd); diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 804a50983ec5..14d558146aea 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -30,14 +30,24 @@ struct seq_file; */ struct sbitmap_word { /** - * @word: The bitmap word itself. + * @depth: Number of bits being used in @word/@cleared */ - unsigned long word; + unsigned long depth; /** - * @depth: Number of bits being used in @word. + * @word: word holding free bits */ - unsigned long depth; + unsigned long word ____cacheline_aligned_in_smp; + + /** + * @cleared: word holding cleared bits + */ + unsigned long cleared ____cacheline_aligned_in_smp; + + /** + * @swap_lock: Held while swapping word <-> cleared + */ + spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** @@ -125,6 +135,11 @@ struct sbitmap_queue { */ struct sbq_wait_state *ws; + /* + * @ws_active: count of currently active ws waitqueues + */ + atomic_t ws_active; + /** * @round_robin: Allocate bits in strict round-robin order. */ @@ -250,12 +265,14 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb, nr = SB_NR_TO_BIT(sb, start); while (scanned < sb->depth) { - struct sbitmap_word *word = &sb->map[index]; - unsigned int depth = min_t(unsigned int, word->depth - nr, + unsigned long word; + unsigned int depth = min_t(unsigned int, + sb->map[index].depth - nr, sb->depth - scanned); scanned += depth; - if (!word->word) + word = sb->map[index].word & ~sb->map[index].cleared; + if (!word) goto next; /* @@ -265,7 +282,7 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb, */ depth += nr; while (1) { - nr = find_next_bit(&word->word, depth, nr); + nr = find_next_bit(&word, depth, nr); if (nr >= depth) break; if (!fn(sb, (index << sb->shift) + nr, data)) @@ -310,6 +327,19 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr) clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } +/* + * This one is special, since it doesn't actually clear the bit, rather it + * sets the corresponding bit in the ->cleared mask instead. Paired with + * the caller doing sbitmap_batch_clear() if a given index is full, which + * will clear the previously freed entries in the corresponding ->word. + */ +static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr) +{ + unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared; + + set_bit(SB_NR_TO_BIT(sb, bitnr), addr); +} + static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb, unsigned int bitnr) { @@ -321,8 +351,6 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } -unsigned int sbitmap_weight(const struct sbitmap *sb); - /** * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file. * @sb: Bitmap to show. @@ -531,4 +559,45 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); */ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m); +struct sbq_wait { + struct sbitmap_queue *sbq; /* if set, sbq_wait is accounted */ + struct wait_queue_entry wait; +}; + +#define DEFINE_SBQ_WAIT(name) \ + struct sbq_wait name = { \ + .sbq = NULL, \ + .wait = { \ + .private = current, \ + .func = autoremove_wake_function, \ + .entry = LIST_HEAD_INIT((name).wait.entry), \ + } \ + } + +/* + * Wrapper around prepare_to_wait_exclusive(), which maintains some extra + * internal state. + */ +void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, + struct sbq_wait_state *ws, + struct sbq_wait *sbq_wait, int state); + +/* + * Must be paired with sbitmap_prepare_to_wait(). + */ +void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, + struct sbq_wait *sbq_wait); + +/* + * Wrapper around add_wait_queue(), which maintains some extra internal state + */ +void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, + struct sbq_wait_state *ws, + struct sbq_wait *sbq_wait); + +/* + * Must be paired with sbitmap_add_wait_queue() + */ +void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait); + #endif /* __LINUX_SCALE_BITMAP_H */ diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 093aa57120b0..b96f0d0b5b8f 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -324,10 +324,10 @@ size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, * Like SG_CHUNK_SIZE, but for archs that have sg chaining. This limit * is totally arbitrary, a setting of 2048 will get you at least 8mb ios. */ -#ifdef CONFIG_ARCH_HAS_SG_CHAIN -#define SG_MAX_SEGMENTS 2048 -#else +#ifdef CONFIG_ARCH_NO_SG_CHAIN #define SG_MAX_SEGMENTS SG_CHUNK_SIZE +#else +#define SG_MAX_SEGMENTS 2048 #endif #ifdef CONFIG_SG_POOL diff --git a/include/linux/signal.h b/include/linux/signal.h index f428e86f4800..cc7e2c1cd444 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -273,6 +273,10 @@ extern int group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type); extern int __group_send_sig_info(int, struct kernel_siginfo *, struct task_struct *); extern int sigprocmask(int, sigset_t *, sigset_t *); +extern int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set, + sigset_t *oldset, size_t sigsetsize); +extern void restore_user_sigmask(const void __user *usigmask, + sigset_t *sigsaved); extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2a57a365c711..93f56fddd92a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3339,6 +3339,9 @@ static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, } int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, struct msghdr *msg); +int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset, + struct iov_iter *to, int len, + struct ahash_request *hash); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); diff --git a/include/linux/socket.h b/include/linux/socket.h index 84c48a3c0227..ab2041a00e01 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -349,7 +349,8 @@ struct ucred { extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); -struct timespec64; +struct __kernel_timespec; +struct old_timespec32; /* The __sys_...msg variants allow MSG_CMSG_COMPAT iff * forbid_cmsg_compat==false @@ -358,8 +359,10 @@ extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, bool forbid_cmsg_compat); extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, bool forbid_cmsg_compat); -extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, - unsigned int flags, struct timespec64 *timeout); +extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, + unsigned int vlen, unsigned int flags, + struct __kernel_timespec __user *timeout, + struct old_timespec32 __user *timeout32); extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags, bool forbid_cmsg_compat); diff --git a/include/linux/spi/mmc_spi.h b/include/linux/spi/mmc_spi.h index bfde741a543d..778ae8eb1f3e 100644 --- a/include/linux/spi/mmc_spi.h +++ b/include/linux/spi/mmc_spi.h @@ -8,11 +8,6 @@ struct device; struct mmc_host; -#define MMC_SPI_USE_CD_GPIO (1 << 0) -#define MMC_SPI_USE_RO_GPIO (1 << 1) -#define MMC_SPI_CD_GPIO_ACTIVE_LOW (1 << 2) -#define MMC_SPI_RO_GPIO_ACTIVE_LOW (1 << 3) - /* Put this in platform_data of a device being used to manage an MMC/SD * card slot. (Modeled after PXA mmc glue; see that for usage examples.) * @@ -27,16 +22,6 @@ struct mmc_spi_platform_data { void *); void (*exit)(struct device *, void *); - /* - * Card Detect and Read Only GPIOs. To enable debouncing on the card - * detect GPIO, set the cd_debounce to the debounce time in - * microseconds. - */ - unsigned int flags; - unsigned int cd_gpio; - unsigned int cd_debounce; - unsigned int ro_gpio; - /* Capabilities to pass into mmc core (e.g. MMC_CAP_NEEDS_POLL). */ unsigned long caps; unsigned long caps2; diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index a387b59640a4..7c007ed7505f 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -16,8 +16,6 @@ enum swiotlb_force { SWIOTLB_NO_FORCE, /* swiotlb=noforce */ }; -extern enum swiotlb_force swiotlb_force; - /* * Maximum allowable number of contiguous slabs to map, * must be a power of 2. What is the appropriate value ? @@ -46,9 +44,6 @@ enum dma_sync_target { SYNC_FOR_DEVICE = 1, }; -/* define the last possible byte of physical address space as a mapping error */ -#define SWIOTLB_MAP_ERROR (~(phys_addr_t)0x0) - extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, phys_addr_t phys, size_t size, @@ -65,56 +60,44 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev, size_t size, enum dma_data_direction dir, enum dma_sync_target target); -/* Accessory functions. */ - -extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs); -extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs); - -extern int -swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, - enum dma_data_direction dir, - unsigned long attrs); - -extern void -swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - unsigned long attrs); - -extern void -swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir); - -extern void -swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); - -extern void -swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir); - -extern void -swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); - extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); #ifdef CONFIG_SWIOTLB -extern void __init swiotlb_exit(void); +extern enum swiotlb_force swiotlb_force; +extern phys_addr_t io_tlb_start, io_tlb_end; + +static inline bool is_swiotlb_buffer(phys_addr_t paddr) +{ + return paddr >= io_tlb_start && paddr < io_tlb_end; +} + +bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs); +void __init swiotlb_exit(void); unsigned int swiotlb_max_segment(void); #else -static inline void swiotlb_exit(void) { } -static inline unsigned int swiotlb_max_segment(void) { return 0; } -#endif +#define swiotlb_force SWIOTLB_NO_FORCE +static inline bool is_swiotlb_buffer(phys_addr_t paddr) +{ + return false; +} +static inline bool swiotlb_map(struct device *dev, phys_addr_t *phys, + dma_addr_t *dma_addr, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + return false; +} +static inline void swiotlb_exit(void) +{ +} +static inline unsigned int swiotlb_max_segment(void) +{ + return 0; +} +#endif /* CONFIG_SWIOTLB */ extern void swiotlb_print_info(void); extern void swiotlb_set_max_segment(unsigned int); -extern const struct dma_map_ops swiotlb_dma_ops; - #endif /* __LINUX_SWIOTLB_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2ac3d13a915b..251979d2e709 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -296,12 +296,18 @@ asmlinkage long sys_io_getevents(aio_context_t ctx_id, long min_nr, long nr, struct io_event __user *events, - struct timespec __user *timeout); + struct __kernel_timespec __user *timeout); asmlinkage long sys_io_pgetevents(aio_context_t ctx_id, long min_nr, long nr, struct io_event __user *events, - struct timespec __user *timeout, + struct __kernel_timespec __user *timeout, + const struct __aio_sigset *sig); +asmlinkage long sys_io_pgetevents_time32(aio_context_t ctx_id, + long min_nr, + long nr, + struct io_event __user *events, + struct old_timespec32 __user *timeout, const struct __aio_sigset *sig); /* fs/xattr.c */ @@ -466,10 +472,16 @@ asmlinkage long sys_sendfile64(int out_fd, int in_fd, /* fs/select.c */ asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *, - fd_set __user *, struct timespec __user *, + fd_set __user *, struct __kernel_timespec __user *, + void __user *); +asmlinkage long sys_pselect6_time32(int, fd_set __user *, fd_set __user *, + fd_set __user *, struct old_timespec32 __user *, void __user *); asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, - struct timespec __user *, const sigset_t __user *, + struct __kernel_timespec __user *, const sigset_t __user *, + size_t); +asmlinkage long sys_ppoll_time32(struct pollfd __user *, unsigned int, + struct old_timespec32 __user *, const sigset_t __user *, size_t); /* fs/signalfd.c */ @@ -541,7 +553,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags); /* kernel/futex.c */ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, - struct timespec __user *utime, u32 __user *uaddr2, + struct __kernel_timespec __user *utime, u32 __user *uaddr2, u32 val3); asmlinkage long sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, @@ -637,6 +649,10 @@ asmlinkage long sys_rt_sigtimedwait(const sigset_t __user *uthese, siginfo_t __user *uinfo, const struct __kernel_timespec __user *uts, size_t sigsetsize); +asmlinkage long sys_rt_sigtimedwait_time32(const sigset_t __user *uthese, + siginfo_t __user *uinfo, + const struct old_timespec32 __user *uts, + size_t sigsetsize); asmlinkage long sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo); /* kernel/sys.c */ @@ -831,6 +847,9 @@ asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int); asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg, unsigned int vlen, unsigned flags, struct __kernel_timespec __user *timeout); +asmlinkage long sys_recvmmsg_time32(int fd, struct mmsghdr __user *msg, + unsigned int vlen, unsigned flags, + struct old_timespec32 __user *timeout); asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr, int options, struct rusage __user *ru); diff --git a/include/linux/time32.h b/include/linux/time32.h index 61904a6c098f..118b9977080c 100644 --- a/include/linux/time32.h +++ b/include/linux/time32.h @@ -96,31 +96,6 @@ static inline int timespec_compare(const struct timespec *lhs, const struct time return lhs->tv_nsec - rhs->tv_nsec; } -extern void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec); - -static inline struct timespec timespec_add(struct timespec lhs, - struct timespec rhs) -{ - struct timespec ts_delta; - - set_normalized_timespec(&ts_delta, lhs.tv_sec + rhs.tv_sec, - lhs.tv_nsec + rhs.tv_nsec); - return ts_delta; -} - -/* - * sub = lhs - rhs, in normalized form - */ -static inline struct timespec timespec_sub(struct timespec lhs, - struct timespec rhs) -{ - struct timespec ts_delta; - - set_normalized_timespec(&ts_delta, lhs.tv_sec - rhs.tv_sec, - lhs.tv_nsec - rhs.tv_nsec); - return ts_delta; -} - /* * Returns true if the timespec is norm, false if denorm: */ diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 29975e93fcb8..a8ab0f143ac4 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -262,18 +262,4 @@ void read_persistent_wall_and_boot_offset(struct timespec64 *wall_clock, struct timespec64 *boot_offset); extern int update_persistent_clock64(struct timespec64 now); -/* - * deprecated aliases, don't use in new code - */ -#define getnstimeofday64(ts) ktime_get_real_ts64(ts) - -static inline struct timespec64 current_kernel_time64(void) -{ - struct timespec64 ts; - - ktime_get_coarse_real_ts64(&ts); - - return ts; -} - #endif diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h index a502616f7e1c..cc59cc9e0e84 100644 --- a/include/linux/timekeeping32.h +++ b/include/linux/timekeeping32.h @@ -6,15 +6,6 @@ * over time so we can remove the file here. */ -static inline void do_gettimeofday(struct timeval *tv) -{ - struct timespec64 now; - - ktime_get_real_ts64(&now); - tv->tv_sec = now.tv_sec; - tv->tv_usec = now.tv_nsec/1000; -} - static inline unsigned long get_seconds(void) { return ktime_get_real_seconds(); @@ -52,10 +43,4 @@ static inline void getboottime(struct timespec *ts) *ts = timespec64_to_timespec(ts64); } -/* - * Persistent clock related interfaces - */ -extern void read_persistent_clock(struct timespec *ts); -extern int update_persistent_clock(struct timespec now); - #endif diff --git a/include/linux/uio.h b/include/linux/uio.h index 55ce99ddb912..ecf584f6b82d 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/thread_info.h> +#include <crypto/hash.h> #include <uapi/linux/uio.h> struct page; @@ -266,9 +267,11 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) { i->count = count; } -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); +size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); +size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, + struct iov_iter *i); int import_iovec(int type, const struct iovec __user * uvector, unsigned nr_segs, unsigned fast_segs, diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fdfd04e348f6..738a0c24874f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -246,7 +246,8 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, * * @bio is a part of the writeback in progress controlled by @wbc. Perform * writeback specific initialization. This is used to apply the cgroup - * writeback context. + * writeback context. Must be called after the bio has been associated with + * a device. */ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) { @@ -257,7 +258,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) * regular writeback instead of writing things out itself. */ if (wbc->wb) - bio_associate_blkcg(bio, wbc->wb->blkcg_css); + bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); } #else /* CONFIG_CGROUP_WRITEBACK */ diff --git a/include/rdma/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h index f62b842e6596..f8982e4e9702 100644 --- a/include/rdma/ib_fmr_pool.h +++ b/include/rdma/ib_fmr_pool.h @@ -88,6 +88,6 @@ struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, int list_len, u64 io_virtual_address); -int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr); +void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr); #endif /* IB_FMR_POOL_H */ diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index f6ba366051c7..fdef558e3a2d 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -277,6 +277,7 @@ enum ib_port_capability_mask_bits { IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14, + IB_PORT_CAP_MASK2_SUP = 1 << 15, IB_PORT_CM_SUP = 1 << 16, IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, IB_PORT_REINIT_SUP = 1 << 18, @@ -295,6 +296,15 @@ enum ib_port_capability_mask_bits { IB_PORT_HIERARCHY_INFO_SUP = 1ULL << 31, }; +enum ib_port_capability_mask2_bits { + IB_PORT_SET_NODE_DESC_SUP = 1 << 0, + IB_PORT_EX_PORT_INFO_EX_SUP = 1 << 1, + IB_PORT_VIRT_SUP = 1 << 2, + IB_PORT_SWITCH_PORT_STATE_TABLE_SUP = 1 << 3, + IB_PORT_LINK_WIDTH_2X_SUP = 1 << 4, + IB_PORT_LINK_SPEED_HDR_SUP = 1 << 5, +}; + #define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26) struct opa_class_port_info { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9c0c2132a2d6..a3ceed3a040a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -41,14 +41,11 @@ #include <linux/types.h> #include <linux/device.h> -#include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/rwsem.h> -#include <linux/scatterlist.h> #include <linux/workqueue.h> -#include <linux/socket.h> #include <linux/irq_poll.h> #include <uapi/linux/if_ether.h> #include <net/ipv6.h> @@ -56,7 +53,7 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/netdevice.h> - +#include <linux/refcount.h> #include <linux/if_link.h> #include <linux/atomic.h> #include <linux/mmu_notifier.h> @@ -437,6 +434,7 @@ enum ib_port_state { enum ib_port_width { IB_WIDTH_1X = 1, + IB_WIDTH_2X = 16, IB_WIDTH_4X = 2, IB_WIDTH_8X = 4, IB_WIDTH_12X = 8 @@ -446,6 +444,7 @@ static inline int ib_width_enum_to_int(enum ib_port_width width) { switch (width) { case IB_WIDTH_1X: return 1; + case IB_WIDTH_2X: return 2; case IB_WIDTH_4X: return 4; case IB_WIDTH_8X: return 8; case IB_WIDTH_12X: return 12; @@ -595,6 +594,7 @@ struct ib_port_attr { u8 active_width; u8 active_speed; u8 phys_state; + u16 port_cap_flags2; }; enum ib_device_modify_flags { @@ -732,7 +732,11 @@ enum ib_rate { IB_RATE_25_GBPS = 15, IB_RATE_100_GBPS = 16, IB_RATE_200_GBPS = 17, - IB_RATE_300_GBPS = 18 + IB_RATE_300_GBPS = 18, + IB_RATE_28_GBPS = 19, + IB_RATE_50_GBPS = 20, + IB_RATE_400_GBPS = 21, + IB_RATE_600_GBPS = 22, }; /** @@ -1508,6 +1512,10 @@ struct ib_ucontext { #endif struct ib_rdmacg_object cg_obj; + /* + * Implementation details of the RDMA core, don't use in drivers: + */ + struct rdma_restrack_entry res; }; struct ib_uobject { @@ -2256,82 +2264,86 @@ struct ib_counters_read_attr { struct uverbs_attr_bundle; -struct ib_device { - /* Do not access @dma_device directly from ULP nor from HW drivers. */ - struct device *dma_device; - - char name[IB_DEVICE_NAME_MAX]; - - struct list_head event_handler_list; - spinlock_t event_handler_lock; - - rwlock_t client_data_lock; - struct list_head core_list; - /* Access to the client_data_list is protected by the client_data_lock - * rwlock and the lists_rwsem read-write semaphore - */ - struct list_head client_data_list; - - struct ib_cache cache; - /** - * port_immutable is indexed by port number - */ - struct ib_port_immutable *port_immutable; - - int num_comp_vectors; - - struct ib_port_pkey_list *port_pkey_list; - - struct iw_cm_verbs *iwcm; - +/** + * struct ib_device_ops - InfiniBand device operations + * This structure defines all the InfiniBand device operations, providers will + * need to define the supported operations, otherwise they will be set to null. + */ +struct ib_device_ops { + int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr, + const struct ib_send_wr **bad_send_wr); + int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); + void (*drain_rq)(struct ib_qp *qp); + void (*drain_sq)(struct ib_qp *qp); + int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc); + int (*peek_cq)(struct ib_cq *cq, int wc_cnt); + int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags); + int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt); + int (*post_srq_recv)(struct ib_srq *srq, + const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); + int (*process_mad)(struct ib_device *device, int process_mad_flags, + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in_mad, size_t in_mad_size, + struct ib_mad_hdr *out_mad, size_t *out_mad_size, + u16 *out_mad_pkey_index); + int (*query_device)(struct ib_device *device, + struct ib_device_attr *device_attr, + struct ib_udata *udata); + int (*modify_device)(struct ib_device *device, int device_modify_mask, + struct ib_device_modify *device_modify); + void (*get_dev_fw_str)(struct ib_device *device, char *str); + const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, + int comp_vector); + int (*query_port)(struct ib_device *device, u8 port_num, + struct ib_port_attr *port_attr); + int (*modify_port)(struct ib_device *device, u8 port_num, + int port_modify_mask, + struct ib_port_modify *port_modify); /** - * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the - * driver initialized data. The struct is kfree()'ed by the sysfs - * core when the device is removed. A lifespan of -1 in the return - * struct tells the core to set a default lifespan. + * The following mandatory functions are used only at device + * registration. Keep functions such as these at the end of this + * structure to avoid cache line misses when accessing struct ib_device + * in fast paths. */ - struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device, - u8 port_num); + int (*get_port_immutable)(struct ib_device *device, u8 port_num, + struct ib_port_immutable *immutable); + enum rdma_link_layer (*get_link_layer)(struct ib_device *device, + u8 port_num); /** - * get_hw_stats - Fill in the counter value(s) in the stats struct. - * @index - The index in the value array we wish to have updated, or - * num_counters if we want all stats updated - * Return codes - - * < 0 - Error, no counters updated - * index - Updated the single counter pointed to by index - * num_counters - Updated all counters (will reset the timestamp - * and prevent further calls for lifespan milliseconds) - * Drivers are allowed to update all counters in leiu of just the - * one given in index at their option - */ - int (*get_hw_stats)(struct ib_device *device, - struct rdma_hw_stats *stats, - u8 port, int index); - int (*query_device)(struct ib_device *device, - struct ib_device_attr *device_attr, - struct ib_udata *udata); - int (*query_port)(struct ib_device *device, - u8 port_num, - struct ib_port_attr *port_attr); - enum rdma_link_layer (*get_link_layer)(struct ib_device *device, - u8 port_num); - /* When calling get_netdev, the HW vendor's driver should return the + * When calling get_netdev, the HW vendor's driver should return the * net device of device @device at port @port_num or NULL if such * a net device doesn't exist. The vendor driver should call dev_hold * on this net device. The HW vendor's device driver must guarantee * that this function returns NULL before the net device has finished * NETDEV_UNREGISTER state. */ - struct net_device *(*get_netdev)(struct ib_device *device, - u8 port_num); - /* query_gid should be return GID value for @device, when @port_num + struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num); + /** + * rdma netdev operation + * + * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params + * must return -EOPNOTSUPP if it doesn't support the specified type. + */ + struct net_device *(*alloc_rdma_netdev)( + struct ib_device *device, u8 port_num, enum rdma_netdev_t type, + const char *name, unsigned char name_assign_type, + void (*setup)(struct net_device *)); + + int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num, + enum rdma_netdev_t type, + struct rdma_netdev_alloc_params *params); + /** + * query_gid should be return GID value for @device, when @port_num * link layer is either IB or iWarp. It is no-op if @port_num port * is RoCE link layer. */ - int (*query_gid)(struct ib_device *device, - u8 port_num, int index, - union ib_gid *gid); - /* When calling add_gid, the HW vendor's driver should add the gid + int (*query_gid)(struct ib_device *device, u8 port_num, int index, + union ib_gid *gid); + /** + * When calling add_gid, the HW vendor's driver should add the gid * of device of port at gid index available at @attr. Meta-info of * that gid (for example, the network device related to this gid) is * available at @attr. @context allows the HW vendor driver to store @@ -2343,213 +2355,186 @@ struct ib_device { * concurrently for different ports. This function is only called when * roce_gid_table is used. */ - int (*add_gid)(const struct ib_gid_attr *attr, - void **context); - /* When calling del_gid, the HW vendor's driver should delete the + int (*add_gid)(const struct ib_gid_attr *attr, void **context); + /** + * When calling del_gid, the HW vendor's driver should delete the * gid of device @device at gid index gid_index of port port_num * available in @attr. * Upon the deletion of a GID entry, the HW vendor must free any * allocated memory. The caller will clear @context afterwards. * This function is only called when roce_gid_table is used. */ - int (*del_gid)(const struct ib_gid_attr *attr, - void **context); - int (*query_pkey)(struct ib_device *device, - u8 port_num, u16 index, u16 *pkey); - int (*modify_device)(struct ib_device *device, - int device_modify_mask, - struct ib_device_modify *device_modify); - int (*modify_port)(struct ib_device *device, - u8 port_num, int port_modify_mask, - struct ib_port_modify *port_modify); - struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device, - struct ib_udata *udata); - int (*dealloc_ucontext)(struct ib_ucontext *context); - int (*mmap)(struct ib_ucontext *context, - struct vm_area_struct *vma); - struct ib_pd * (*alloc_pd)(struct ib_device *device, - struct ib_ucontext *context, - struct ib_udata *udata); - int (*dealloc_pd)(struct ib_pd *pd); - struct ib_ah * (*create_ah)(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); - int (*modify_ah)(struct ib_ah *ah, - struct rdma_ah_attr *ah_attr); - int (*query_ah)(struct ib_ah *ah, - struct rdma_ah_attr *ah_attr); - int (*destroy_ah)(struct ib_ah *ah); - struct ib_srq * (*create_srq)(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata); - int (*modify_srq)(struct ib_srq *srq, - struct ib_srq_attr *srq_attr, - enum ib_srq_attr_mask srq_attr_mask, - struct ib_udata *udata); - int (*query_srq)(struct ib_srq *srq, - struct ib_srq_attr *srq_attr); - int (*destroy_srq)(struct ib_srq *srq); - int (*post_srq_recv)(struct ib_srq *srq, - const struct ib_recv_wr *recv_wr, - const struct ib_recv_wr **bad_recv_wr); - struct ib_qp * (*create_qp)(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr, - struct ib_udata *udata); - int (*modify_qp)(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, - int qp_attr_mask, - struct ib_udata *udata); - int (*query_qp)(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, - int qp_attr_mask, - struct ib_qp_init_attr *qp_init_attr); - int (*destroy_qp)(struct ib_qp *qp); - int (*post_send)(struct ib_qp *qp, - const struct ib_send_wr *send_wr, - const struct ib_send_wr **bad_send_wr); - int (*post_recv)(struct ib_qp *qp, - const struct ib_recv_wr *recv_wr, - const struct ib_recv_wr **bad_recv_wr); - struct ib_cq * (*create_cq)(struct ib_device *device, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata); - int (*modify_cq)(struct ib_cq *cq, u16 cq_count, - u16 cq_period); - int (*destroy_cq)(struct ib_cq *cq); - int (*resize_cq)(struct ib_cq *cq, int cqe, - struct ib_udata *udata); - int (*poll_cq)(struct ib_cq *cq, int num_entries, - struct ib_wc *wc); - int (*peek_cq)(struct ib_cq *cq, int wc_cnt); - int (*req_notify_cq)(struct ib_cq *cq, - enum ib_cq_notify_flags flags); - int (*req_ncomp_notif)(struct ib_cq *cq, - int wc_cnt); - struct ib_mr * (*get_dma_mr)(struct ib_pd *pd, - int mr_access_flags); - struct ib_mr * (*reg_user_mr)(struct ib_pd *pd, - u64 start, u64 length, - u64 virt_addr, - int mr_access_flags, - struct ib_udata *udata); - int (*rereg_user_mr)(struct ib_mr *mr, - int flags, - u64 start, u64 length, - u64 virt_addr, - int mr_access_flags, - struct ib_pd *pd, - struct ib_udata *udata); - int (*dereg_mr)(struct ib_mr *mr); - struct ib_mr * (*alloc_mr)(struct ib_pd *pd, - enum ib_mr_type mr_type, - u32 max_num_sg); - int (*map_mr_sg)(struct ib_mr *mr, - struct scatterlist *sg, - int sg_nents, - unsigned int *sg_offset); - struct ib_mw * (*alloc_mw)(struct ib_pd *pd, - enum ib_mw_type type, - struct ib_udata *udata); - int (*dealloc_mw)(struct ib_mw *mw); - struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr); - int (*map_phys_fmr)(struct ib_fmr *fmr, - u64 *page_list, int list_len, - u64 iova); - int (*unmap_fmr)(struct list_head *fmr_list); - int (*dealloc_fmr)(struct ib_fmr *fmr); - int (*attach_mcast)(struct ib_qp *qp, - union ib_gid *gid, - u16 lid); - int (*detach_mcast)(struct ib_qp *qp, - union ib_gid *gid, - u16 lid); - int (*process_mad)(struct ib_device *device, - int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, - size_t in_mad_size, - struct ib_mad_hdr *out_mad, - size_t *out_mad_size, - u16 *out_mad_pkey_index); - struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device, - struct ib_ucontext *ucontext, - struct ib_udata *udata); - int (*dealloc_xrcd)(struct ib_xrcd *xrcd); - struct ib_flow * (*create_flow)(struct ib_qp *qp, - struct ib_flow_attr - *flow_attr, - int domain, - struct ib_udata *udata); - int (*destroy_flow)(struct ib_flow *flow_id); - int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, - struct ib_mr_status *mr_status); - void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); - void (*drain_rq)(struct ib_qp *qp); - void (*drain_sq)(struct ib_qp *qp); - int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port, - int state); - int (*get_vf_config)(struct ib_device *device, int vf, u8 port, - struct ifla_vf_info *ivf); - int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, - struct ifla_vf_stats *stats); - int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, - int type); - struct ib_wq * (*create_wq)(struct ib_pd *pd, - struct ib_wq_init_attr *init_attr, - struct ib_udata *udata); - int (*destroy_wq)(struct ib_wq *wq); - int (*modify_wq)(struct ib_wq *wq, - struct ib_wq_attr *attr, - u32 wq_attr_mask, - struct ib_udata *udata); - struct ib_rwq_ind_table * (*create_rwq_ind_table)(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr, - struct ib_udata *udata); - int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); - struct ib_flow_action * (*create_flow_action_esp)(struct ib_device *device, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs); - int (*destroy_flow_action)(struct ib_flow_action *action); - int (*modify_flow_action_esp)(struct ib_flow_action *action, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs); - struct ib_dm * (*alloc_dm)(struct ib_device *device, - struct ib_ucontext *context, - struct ib_dm_alloc_attr *attr, - struct uverbs_attr_bundle *attrs); - int (*dealloc_dm)(struct ib_dm *dm); - struct ib_mr * (*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, - struct ib_dm_mr_attr *attr, - struct uverbs_attr_bundle *attrs); - struct ib_counters * (*create_counters)(struct ib_device *device, - struct uverbs_attr_bundle *attrs); - int (*destroy_counters)(struct ib_counters *counters); - int (*read_counters)(struct ib_counters *counters, - struct ib_counters_read_attr *counters_read_attr, - struct uverbs_attr_bundle *attrs); + int (*del_gid)(const struct ib_gid_attr *attr, void **context); + int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index, + u16 *pkey); + struct ib_ucontext *(*alloc_ucontext)(struct ib_device *device, + struct ib_udata *udata); + int (*dealloc_ucontext)(struct ib_ucontext *context); + int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); + void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); + struct ib_pd *(*alloc_pd)(struct ib_device *device, + struct ib_ucontext *context, + struct ib_udata *udata); + int (*dealloc_pd)(struct ib_pd *pd); + struct ib_ah *(*create_ah)(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, u32 flags, + struct ib_udata *udata); + int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); + int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); + int (*destroy_ah)(struct ib_ah *ah, u32 flags); + struct ib_srq *(*create_srq)(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); + int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); + int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); + int (*destroy_srq)(struct ib_srq *srq); + struct ib_qp *(*create_qp)(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata); + int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_udata *udata); + int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); + int (*destroy_qp)(struct ib_qp *qp); + struct ib_cq *(*create_cq)(struct ib_device *device, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata); + int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); + int (*destroy_cq)(struct ib_cq *cq); + int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); + struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); + struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, + struct ib_udata *udata); + int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, + struct ib_pd *pd, struct ib_udata *udata); + int (*dereg_mr)(struct ib_mr *mr); + struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg); + int (*advise_mr)(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, u32 flags, + struct ib_sge *sg_list, u32 num_sge, + struct uverbs_attr_bundle *attrs); + int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, + unsigned int *sg_offset); + int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, + struct ib_mr_status *mr_status); + struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type, + struct ib_udata *udata); + int (*dealloc_mw)(struct ib_mw *mw); + struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len, + u64 iova); + int (*unmap_fmr)(struct list_head *fmr_list); + int (*dealloc_fmr)(struct ib_fmr *fmr); + int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); + int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); + struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, + struct ib_ucontext *ucontext, + struct ib_udata *udata); + int (*dealloc_xrcd)(struct ib_xrcd *xrcd); + struct ib_flow *(*create_flow)(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, + int domain, struct ib_udata *udata); + int (*destroy_flow)(struct ib_flow *flow_id); + struct ib_flow_action *(*create_flow_action_esp)( + struct ib_device *device, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs); + int (*destroy_flow_action)(struct ib_flow_action *action); + int (*modify_flow_action_esp)( + struct ib_flow_action *action, + const struct ib_flow_action_attrs_esp *attr, + struct uverbs_attr_bundle *attrs); + int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port, + int state); + int (*get_vf_config)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_info *ivf); + int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_stats *stats); + int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, + int type); + struct ib_wq *(*create_wq)(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata); + int (*destroy_wq)(struct ib_wq *wq); + int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr, + u32 wq_attr_mask, struct ib_udata *udata); + struct ib_rwq_ind_table *(*create_rwq_ind_table)( + struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); + int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); + struct ib_dm *(*alloc_dm)(struct ib_device *device, + struct ib_ucontext *context, + struct ib_dm_alloc_attr *attr, + struct uverbs_attr_bundle *attrs); + int (*dealloc_dm)(struct ib_dm *dm); + struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm, + struct ib_dm_mr_attr *attr, + struct uverbs_attr_bundle *attrs); + struct ib_counters *(*create_counters)( + struct ib_device *device, struct uverbs_attr_bundle *attrs); + int (*destroy_counters)(struct ib_counters *counters); + int (*read_counters)(struct ib_counters *counters, + struct ib_counters_read_attr *counters_read_attr, + struct uverbs_attr_bundle *attrs); + /** + * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the + * driver initialized data. The struct is kfree()'ed by the sysfs + * core when the device is removed. A lifespan of -1 in the return + * struct tells the core to set a default lifespan. + */ + struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device, + u8 port_num); + /** + * get_hw_stats - Fill in the counter value(s) in the stats struct. + * @index - The index in the value array we wish to have updated, or + * num_counters if we want all stats updated + * Return codes - + * < 0 - Error, no counters updated + * index - Updated the single counter pointed to by index + * num_counters - Updated all counters (will reset the timestamp + * and prevent further calls for lifespan milliseconds) + * Drivers are allowed to update all counters in leiu of just the + * one given in index at their option + */ + int (*get_hw_stats)(struct ib_device *device, + struct rdma_hw_stats *stats, u8 port, int index); +}; + +struct ib_device { + /* Do not access @dma_device directly from ULP nor from HW drivers. */ + struct device *dma_device; + struct ib_device_ops ops; + char name[IB_DEVICE_NAME_MAX]; + + struct list_head event_handler_list; + spinlock_t event_handler_lock; + + rwlock_t client_data_lock; + struct list_head core_list; + /* Access to the client_data_list is protected by the client_data_lock + * rwlock and the lists_rwsem read-write semaphore + */ + struct list_head client_data_list; + struct ib_cache cache; /** - * rdma netdev operation - * - * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params - * must return -EOPNOTSUPP if it doesn't support the specified type. + * port_immutable is indexed by port number */ - struct net_device *(*alloc_rdma_netdev)( - struct ib_device *device, - u8 port_num, - enum rdma_netdev_t type, - const char *name, - unsigned char name_assign_type, - void (*setup)(struct net_device *)); + struct ib_port_immutable *port_immutable; - int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num, - enum rdma_netdev_t type, - struct rdma_netdev_alloc_params *params); + int num_comp_vectors; + + struct ib_port_pkey_list *port_pkey_list; + + struct iw_cm_verbs *iwcm; struct module *owner; struct device dev; @@ -2592,19 +2577,14 @@ struct ib_device { */ struct rdma_restrack_root res; - /** - * The following mandatory functions are used only at device - * registration. Keep functions such as these at the end of this - * structure to avoid cache line misses when accessing struct ib_device - * in fast paths. - */ - int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *); - void (*get_dev_fw_str)(struct ib_device *, char *str); - const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, - int comp_vector); - - const struct uverbs_object_tree_def *const *driver_specs; + const struct uapi_definition *driver_def; enum rdma_driver_id driver_id; + /* + * Provides synchronization between device unregistration and netlink + * commands on a device. To be used only by core. + */ + refcount_t refcount; + struct completion unreg_completion; }; struct ib_client { @@ -2653,6 +2633,8 @@ void ib_unregister_client(struct ib_client *client); void *ib_get_client_data(struct ib_device *device, struct ib_client *client); void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data); +void ib_set_device_ops(struct ib_device *device, + const struct ib_device_ops *ops); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, @@ -3109,7 +3091,7 @@ static inline bool rdma_cap_roce_gid_table(const struct ib_device *device, u8 port_num) { return rdma_protocol_roce(device, port_num) && - device->add_gid && device->del_gid; + device->ops.add_gid && device->ops.del_gid; } /* @@ -3169,15 +3151,22 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, __ib_alloc_pd((device), (flags), KBUILD_MODNAME) void ib_dealloc_pd(struct ib_pd *pd); +enum rdma_create_ah_flags { + /* In a sleepable context */ + RDMA_CREATE_AH_SLEEPABLE = BIT(0), +}; + /** * rdma_create_ah - Creates an address handle for the given address vector. * @pd: The protection domain associated with the address handle. * @ah_attr: The attributes of the address vector. + * @flags: Create address handle flags (see enum rdma_create_ah_flags). * * The address handle is used to reference a local or global destination * in all UD QP post sends. */ -struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr); +struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags); /** * rdma_create_user_ah - Creates an address handle for the given address vector. @@ -3267,11 +3256,17 @@ int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); */ int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); +enum rdma_destroy_ah_flags { + /* In a sleepable context */ + RDMA_DESTROY_AH_SLEEPABLE = BIT(0), +}; + /** * rdma_destroy_ah - Destroys an address handle. * @ah: The address handle to destroy. + * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags). */ -int rdma_destroy_ah(struct ib_ah *ah); +int rdma_destroy_ah(struct ib_ah *ah, u32 flags); /** * ib_create_srq - Creates a SRQ associated with the specified protection @@ -3333,7 +3328,8 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, { const struct ib_recv_wr *dummy; - return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy); + return srq->device->ops.post_srq_recv(srq, recv_wr, + bad_recv_wr ? : &dummy); } /** @@ -3436,7 +3432,7 @@ static inline int ib_post_send(struct ib_qp *qp, { const struct ib_send_wr *dummy; - return qp->device->post_send(qp, send_wr, bad_send_wr ? : &dummy); + return qp->device->ops.post_send(qp, send_wr, bad_send_wr ? : &dummy); } /** @@ -3453,7 +3449,7 @@ static inline int ib_post_recv(struct ib_qp *qp, { const struct ib_recv_wr *dummy; - return qp->device->post_recv(qp, recv_wr, bad_recv_wr ? : &dummy); + return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy); } struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, @@ -3526,7 +3522,7 @@ int ib_destroy_cq(struct ib_cq *cq); static inline int ib_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) { - return cq->device->poll_cq(cq, num_entries, wc); + return cq->device->ops.poll_cq(cq, num_entries, wc); } /** @@ -3559,7 +3555,7 @@ static inline int ib_poll_cq(struct ib_cq *cq, int num_entries, static inline int ib_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags) { - return cq->device->req_notify_cq(cq, flags); + return cq->device->ops.req_notify_cq(cq, flags); } /** @@ -3571,8 +3567,8 @@ static inline int ib_req_notify_cq(struct ib_cq *cq, */ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) { - return cq->device->req_ncomp_notif ? - cq->device->req_ncomp_notif(cq, wc_cnt) : + return cq->device->ops.req_ncomp_notif ? + cq->device->ops.req_ncomp_notif(cq, wc_cnt) : -ENOSYS; } @@ -3836,7 +3832,7 @@ static inline int ib_map_phys_fmr(struct ib_fmr *fmr, u64 *page_list, int list_len, u64 iova) { - return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova); + return fmr->device->ops.map_phys_fmr(fmr, page_list, list_len, iova); } /** @@ -4189,10 +4185,10 @@ static inline const struct cpumask * ib_get_vector_affinity(struct ib_device *device, int comp_vector) { if (comp_vector < 0 || comp_vector >= device->num_comp_vectors || - !device->get_vector_affinity) + !device->ops.get_vector_affinity) return NULL; - return device->get_vector_affinity(device, comp_vector); + return device->ops.get_vector_affinity(device, comp_vector); } @@ -4204,10 +4200,10 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector) */ void rdma_roce_rescan_device(struct ib_device *ibdev); -struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile); +struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); + -int uverbs_destroy_def_handler(struct ib_uverbs_file *file, - struct uverbs_attr_bundle *attrs); +int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num, enum rdma_netdev_t type, const char *name, diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 3584d0816fcd..dd0ed8048bb4 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -269,6 +269,13 @@ struct rvt_driver_provided { void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp); /* + * Init a struture allocated with qp_priv_alloc(). This should be + * called after all qp fields have been initialized in rdmavt. + */ + int (*qp_priv_init)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_init_attr *init_attr); + + /* * Free the driver's private qp structure. */ void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 2638fa7cd702..8f179be9d9a9 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -39,6 +39,10 @@ enum rdma_restrack_type { */ RDMA_RESTRACK_MR, /** + * @RDMA_RESTRACK_CTX: Verbs contexts (CTX) + */ + RDMA_RESTRACK_CTX, + /** * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations */ RDMA_RESTRACK_MAX @@ -112,6 +116,10 @@ struct rdma_restrack_entry { * @type: various objects in restrack database */ enum rdma_restrack_type type; + /** + * @user: user resource + */ + bool user; }; /** @@ -136,11 +144,8 @@ int rdma_restrack_count(struct rdma_restrack_root *res, enum rdma_restrack_type type, struct pid_namespace *ns); -/** - * rdma_restrack_add() - add object to the reource tracking database - * @res: resource entry - */ -void rdma_restrack_add(struct rdma_restrack_entry *res); +void rdma_restrack_kadd(struct rdma_restrack_entry *res); +void rdma_restrack_uadd(struct rdma_restrack_entry *res); /** * rdma_restrack_del() - delete object from the reource tracking database @@ -155,7 +160,7 @@ void rdma_restrack_del(struct rdma_restrack_entry *res); */ static inline bool rdma_is_kernel_res(struct rdma_restrack_entry *res) { - return !res->task; + return !res->user; } /** diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 84d3d15f1f38..27da906beea7 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -79,6 +79,8 @@ struct uverbs_attr_spec { */ u8 alloc_and_copy:1; u8 mandatory:1; + /* True if this is from UVERBS_ATTR_UHW */ + u8 is_udata:1; union { struct { @@ -140,6 +142,13 @@ struct uverbs_attr_spec { * * The tree encodes multiple types, and uses a scheme where OBJ_ID,0,0 returns * the object slot, and OBJ_ID,METH_ID,0 and returns the method slot. + * + * This also encodes the tables for the write() and write() extended commands + * using the coding + * OBJ_ID,UVERBS_API_METHOD_IS_WRITE,command # + * OBJ_ID,UVERBS_API_METHOD_IS_WRITE_EX,command_ex # + * ie the WRITE path is treated as a special method type in the ioctl + * framework. */ enum uapi_radix_data { UVERBS_API_NS_FLAG = 1U << UVERBS_ID_NS_SHIFT, @@ -147,12 +156,16 @@ enum uapi_radix_data { UVERBS_API_ATTR_KEY_BITS = 6, UVERBS_API_ATTR_KEY_MASK = GENMASK(UVERBS_API_ATTR_KEY_BITS - 1, 0), UVERBS_API_ATTR_BKEY_LEN = (1 << UVERBS_API_ATTR_KEY_BITS) - 1, + UVERBS_API_WRITE_KEY_NUM = 1 << UVERBS_API_ATTR_KEY_BITS, UVERBS_API_METHOD_KEY_BITS = 5, UVERBS_API_METHOD_KEY_SHIFT = UVERBS_API_ATTR_KEY_BITS, - UVERBS_API_METHOD_KEY_NUM_CORE = 24, - UVERBS_API_METHOD_KEY_NUM_DRIVER = (1 << UVERBS_API_METHOD_KEY_BITS) - - UVERBS_API_METHOD_KEY_NUM_CORE, + UVERBS_API_METHOD_KEY_NUM_CORE = 22, + UVERBS_API_METHOD_IS_WRITE = 30 << UVERBS_API_METHOD_KEY_SHIFT, + UVERBS_API_METHOD_IS_WRITE_EX = 31 << UVERBS_API_METHOD_KEY_SHIFT, + UVERBS_API_METHOD_KEY_NUM_DRIVER = + (UVERBS_API_METHOD_IS_WRITE >> UVERBS_API_METHOD_KEY_SHIFT) - + UVERBS_API_METHOD_KEY_NUM_CORE, UVERBS_API_METHOD_KEY_MASK = GENMASK( UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT - 1, UVERBS_API_METHOD_KEY_SHIFT), @@ -205,7 +218,22 @@ static inline __attribute_const__ u32 uapi_key_ioctl_method(u32 id) return id << UVERBS_API_METHOD_KEY_SHIFT; } -static inline __attribute_const__ u32 uapi_key_attr_to_method(u32 attr_key) +static inline __attribute_const__ u32 uapi_key_write_method(u32 id) +{ + if (id >= UVERBS_API_WRITE_KEY_NUM) + return UVERBS_API_KEY_ERR; + return UVERBS_API_METHOD_IS_WRITE | id; +} + +static inline __attribute_const__ u32 uapi_key_write_ex_method(u32 id) +{ + if (id >= UVERBS_API_WRITE_KEY_NUM) + return UVERBS_API_KEY_ERR; + return UVERBS_API_METHOD_IS_WRITE_EX | id; +} + +static inline __attribute_const__ u32 +uapi_key_attr_to_ioctl_method(u32 attr_key) { return attr_key & (UVERBS_API_OBJ_KEY_MASK | UVERBS_API_METHOD_KEY_MASK); @@ -213,10 +241,23 @@ static inline __attribute_const__ u32 uapi_key_attr_to_method(u32 attr_key) static inline __attribute_const__ bool uapi_key_is_ioctl_method(u32 key) { - return (key & UVERBS_API_METHOD_KEY_MASK) != 0 && + unsigned int method = key & UVERBS_API_METHOD_KEY_MASK; + + return method != 0 && method < UVERBS_API_METHOD_IS_WRITE && (key & UVERBS_API_ATTR_KEY_MASK) == 0; } +static inline __attribute_const__ bool uapi_key_is_write_method(u32 key) +{ + return (key & UVERBS_API_METHOD_KEY_MASK) == UVERBS_API_METHOD_IS_WRITE; +} + +static inline __attribute_const__ bool uapi_key_is_write_ex_method(u32 key) +{ + return (key & UVERBS_API_METHOD_KEY_MASK) == + UVERBS_API_METHOD_IS_WRITE_EX; +} + static inline __attribute_const__ u32 uapi_key_attrs_start(u32 ioctl_method_key) { /* 0 is the method slot itself */ @@ -246,9 +287,12 @@ static inline __attribute_const__ u32 uapi_key_attr(u32 id) return id; } +/* Only true for ioctl methods */ static inline __attribute_const__ bool uapi_key_is_attr(u32 key) { - return (key & UVERBS_API_METHOD_KEY_MASK) != 0 && + unsigned int method = key & UVERBS_API_METHOD_KEY_MASK; + + return method != 0 && method < UVERBS_API_METHOD_IS_WRITE && (key & UVERBS_API_ATTR_KEY_MASK) != 0; } @@ -285,8 +329,7 @@ struct uverbs_method_def { u32 flags; size_t num_attrs; const struct uverbs_attr_def * const (*attrs)[]; - int (*handler)(struct ib_uverbs_file *ufile, - struct uverbs_attr_bundle *ctx); + int (*handler)(struct uverbs_attr_bundle *attrs); }; struct uverbs_object_def { @@ -296,11 +339,132 @@ struct uverbs_object_def { const struct uverbs_method_def * const (*methods)[]; }; -struct uverbs_object_tree_def { - size_t num_objects; - const struct uverbs_object_def * const (*objects)[]; +enum uapi_definition_kind { + UAPI_DEF_END = 0, + UAPI_DEF_OBJECT_START, + UAPI_DEF_WRITE, + UAPI_DEF_CHAIN_OBJ_TREE, + UAPI_DEF_CHAIN, + UAPI_DEF_IS_SUPPORTED_FUNC, + UAPI_DEF_IS_SUPPORTED_DEV_FN, +}; + +enum uapi_definition_scope { + UAPI_SCOPE_OBJECT = 1, + UAPI_SCOPE_METHOD = 2, }; +struct uapi_definition { + u8 kind; + u8 scope; + union { + struct { + u16 object_id; + } object_start; + struct { + u16 command_num; + u8 is_ex:1; + u8 has_udata:1; + u8 has_resp:1; + u8 req_size; + u8 resp_size; + } write; + }; + + union { + bool (*func_is_supported)(struct ib_device *device); + int (*func_write)(struct uverbs_attr_bundle *attrs); + const struct uapi_definition *chain; + const struct uverbs_object_def *chain_obj_tree; + size_t needs_fn_offset; + }; +}; + +/* Define things connected to object_id */ +#define DECLARE_UVERBS_OBJECT(_object_id, ...) \ + { \ + .kind = UAPI_DEF_OBJECT_START, \ + .object_start = { .object_id = _object_id }, \ + }, \ + ##__VA_ARGS__ + +/* Use in a var_args of DECLARE_UVERBS_OBJECT */ +#define DECLARE_UVERBS_WRITE(_command_num, _func, _cmd_desc, ...) \ + { \ + .kind = UAPI_DEF_WRITE, \ + .scope = UAPI_SCOPE_OBJECT, \ + .write = { .is_ex = 0, .command_num = _command_num }, \ + .func_write = _func, \ + _cmd_desc, \ + }, \ + ##__VA_ARGS__ + +/* Use in a var_args of DECLARE_UVERBS_OBJECT */ +#define DECLARE_UVERBS_WRITE_EX(_command_num, _func, _cmd_desc, ...) \ + { \ + .kind = UAPI_DEF_WRITE, \ + .scope = UAPI_SCOPE_OBJECT, \ + .write = { .is_ex = 1, .command_num = _command_num }, \ + .func_write = _func, \ + _cmd_desc, \ + }, \ + ##__VA_ARGS__ + +/* + * Object is only supported if the function pointer named ibdev_fn in struct + * ib_device is not NULL. + */ +#define UAPI_DEF_OBJ_NEEDS_FN(ibdev_fn) \ + { \ + .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \ + .scope = UAPI_SCOPE_OBJECT, \ + .needs_fn_offset = \ + offsetof(struct ib_device_ops, ibdev_fn) + \ + BUILD_BUG_ON_ZERO( \ + sizeof(((struct ib_device_ops *)0)->ibdev_fn) != \ + sizeof(void *)), \ + } + +/* + * Method is only supported if the function pointer named ibdev_fn in struct + * ib_device is not NULL. + */ +#define UAPI_DEF_METHOD_NEEDS_FN(ibdev_fn) \ + { \ + .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \ + .scope = UAPI_SCOPE_METHOD, \ + .needs_fn_offset = \ + offsetof(struct ib_device_ops, ibdev_fn) + \ + BUILD_BUG_ON_ZERO( \ + sizeof(((struct ib_device_ops *)0)->ibdev_fn) != \ + sizeof(void *)), \ + } + +/* Call a function to determine if the entire object is supported or not */ +#define UAPI_DEF_IS_OBJ_SUPPORTED(_func) \ + { \ + .kind = UAPI_DEF_IS_SUPPORTED_FUNC, \ + .scope = UAPI_SCOPE_OBJECT, .func_is_supported = _func, \ + } + +/* Include another struct uapi_definition in this one */ +#define UAPI_DEF_CHAIN(_def_var) \ + { \ + .kind = UAPI_DEF_CHAIN, .chain = _def_var, \ + } + +/* Temporary until the tree base description is replaced */ +#define UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, _object_ptr, ...) \ + { \ + .kind = UAPI_DEF_CHAIN_OBJ_TREE, \ + .object_start = { .object_id = _object_enum }, \ + .chain_obj_tree = _object_ptr, \ + }, \ + ##__VA_ARGS__ +#define UAPI_DEF_CHAIN_OBJ_TREE_NAMED(_object_enum, ...) \ + UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, &UVERBS_OBJECT(_object_enum), \ + ##__VA_ARGS__) + /* * ======================================= * Attribute Specifications @@ -361,6 +525,12 @@ struct uverbs_object_tree_def { .u2.objs_arr.max_len = _max_len, \ __VA_ARGS__ } }) +/* + * Only for use with UVERBS_ATTR_IDR, allows any uobject type to be accepted, + * the user must validate the type of the uobject instead. + */ +#define UVERBS_IDR_ANY_OBJECT 0xFFFF + #define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \ (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ @@ -433,25 +603,12 @@ struct uverbs_object_tree_def { #define UVERBS_ATTR_UHW() \ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \ UVERBS_ATTR_MIN_SIZE(0), \ - UA_OPTIONAL), \ + UA_OPTIONAL, \ + .is_udata = 1), \ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \ UVERBS_ATTR_MIN_SIZE(0), \ - UA_OPTIONAL) - -/* - * ======================================= - * Declaration helpers - * ======================================= - */ - -#define DECLARE_UVERBS_OBJECT_TREE(_name, ...) \ - static const struct uverbs_object_def *const _name##_ptr[] = { \ - __VA_ARGS__, \ - }; \ - static const struct uverbs_object_tree_def _name = { \ - .num_objects = ARRAY_SIZE(_name##_ptr), \ - .objects = &_name##_ptr, \ - } + UA_OPTIONAL, \ + .is_udata = 1) /* ================================================= * Parsing infrastructure @@ -492,6 +649,8 @@ struct uverbs_attr { }; struct uverbs_attr_bundle { + struct ib_udata driver_udata; + struct ib_udata ucore; struct ib_uverbs_file *ufile; DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN); struct uverbs_attr attrs[]; @@ -560,6 +719,28 @@ uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) return attr->ptr_attr.len; } +/* + * uverbs_attr_ptr_get_array_size() - Get array size pointer by a ptr + * attribute. + * @attrs: The attribute bundle + * @idx: The ID of the attribute + * @elem_size: The size of the element in the array + */ +static inline int +uverbs_attr_ptr_get_array_size(struct uverbs_attr_bundle *attrs, u16 idx, + size_t elem_size) +{ + int size = uverbs_attr_get_len(attrs, idx); + + if (size < 0) + return size; + + if (size % elem_size) + return -EINVAL; + + return size / elem_size; +} + /** * uverbs_attr_get_uobjs_arr() - Provides array's properties for attribute for * UVERBS_ATTR_TYPE_IDRS_ARRAY. @@ -660,6 +841,12 @@ static inline int _uverbs_copy_from_or_zero(void *to, #define uverbs_copy_from_or_zero(to, attrs_bundle, idx) \ _uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to)) +static inline struct ib_ucontext * +ib_uverbs_get_ucontext(const struct uverbs_attr_bundle *attrs) +{ + return ib_uverbs_get_ucontext_file(attrs->ufile); +} + #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 allowed_bits); @@ -684,6 +871,8 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, s64 lower_bound, u64 upper_bound, s64 *def_val); +int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, + size_t idx, const void *from, size_t size); #else static inline int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, @@ -719,6 +908,12 @@ _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, { return -EINVAL; } +static inline int +uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, + size_t idx, const void *from, size_t size) +{ + return -EINVAL; +} #endif #define uverbs_get_const(_to, _attrs_bundle, _idx) \ diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h index b3b21733cc55..3447bfe356d6 100644 --- a/include/rdma/uverbs_named_ioctl.h +++ b/include/rdma/uverbs_named_ioctl.h @@ -43,7 +43,7 @@ #define _UVERBS_NAME(x, y) _UVERBS_PASTE(x, y) #define UVERBS_METHOD(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id) #define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id) -#define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id) +#define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _object_##id) /* These are static so they do not need to be qualified */ #define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id @@ -102,18 +102,11 @@ #define ADD_UVERBS_METHODS(_name, _object_id, ...) \ static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ _object_id)[] = { __VA_ARGS__ }; \ - static const struct uverbs_object_def _name##_struct = { \ + static const struct uverbs_object_def _name = { \ .id = _object_id, \ .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ .methods = &UVERBS_OBJECT_METHODS(_object_id) \ - }; \ - static const struct uverbs_object_def *const _name##_ptrs[] = { \ - &_name##_struct, \ - }; \ - static const struct uverbs_object_tree_def _name = { \ - .num_objects = 1, \ - .objects = &_name##_ptrs, \ - } + }; /* Used by drivers to declare a complete parsing tree for a single method that * differs only in having additional driver specific attributes. diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 3db2802fbc68..883abcf6d36e 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -37,15 +37,6 @@ #include <rdma/uverbs_ioctl.h> #include <rdma/ib_user_ioctl_verbs.h> -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) -const struct uverbs_object_tree_def *uverbs_default_get_objects(void); -#else -static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(void) -{ - return NULL; -} -#endif - /* Returns _id, or causes a compile error if _id is not a u32. * * The uobj APIs should only be used with the write based uAPI to access @@ -54,15 +45,15 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo */ #define _uobj_check_id(_id) ((_id) * typecheck(u32, _id)) -#define uobj_get_type(_ufile, _object) \ - uapi_get_object((_ufile)->device->uapi, _object) +#define uobj_get_type(_attrs, _object) \ + uapi_get_object((_attrs)->ufile->device->uapi, _object) -#define uobj_get_read(_type, _id, _ufile) \ - rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \ +#define uobj_get_read(_type, _id, _attrs) \ + rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ _uobj_check_id(_id), UVERBS_LOOKUP_READ) -#define ufd_get_read(_type, _fdnum, _ufile) \ - rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \ +#define ufd_get_read(_type, _fdnum, _attrs) \ + rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ (_fdnum)*typecheck(s32, _fdnum), \ UVERBS_LOOKUP_READ) @@ -72,26 +63,27 @@ static inline void *_uobj_get_obj_read(struct ib_uobject *uobj) return NULL; return uobj->object; } -#define uobj_get_obj_read(_object, _type, _id, _ufile) \ +#define uobj_get_obj_read(_object, _type, _id, _attrs) \ ((struct ib_##_object *)_uobj_get_obj_read( \ - uobj_get_read(_type, _id, _ufile))) + uobj_get_read(_type, _id, _attrs))) -#define uobj_get_write(_type, _id, _ufile) \ - rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \ +#define uobj_get_write(_type, _id, _attrs) \ + rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \ _uobj_check_id(_id), UVERBS_LOOKUP_WRITE) int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, - struct ib_uverbs_file *ufile, int success_res); -#define uobj_perform_destroy(_type, _id, _ufile, _success_res) \ - __uobj_perform_destroy(uobj_get_type(_ufile, _type), \ - _uobj_check_id(_id), _ufile, _success_res) + const struct uverbs_attr_bundle *attrs); +#define uobj_perform_destroy(_type, _id, _attrs) \ + __uobj_perform_destroy(uobj_get_type(_attrs, _type), \ + _uobj_check_id(_id), _attrs) struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, - u32 id, struct ib_uverbs_file *ufile); + u32 id, + const struct uverbs_attr_bundle *attrs); -#define uobj_get_destroy(_type, _id, _ufile) \ - __uobj_get_destroy(uobj_get_type(_ufile, _type), _uobj_check_id(_id), \ - _ufile) +#define uobj_get_destroy(_type, _id, _attrs) \ + __uobj_get_destroy(uobj_get_type(_attrs, _type), _uobj_check_id(_id), \ + _attrs) static inline void uobj_put_destroy(struct ib_uobject *uobj) { @@ -111,14 +103,13 @@ static inline void uobj_put_write(struct ib_uobject *uobj) rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); } -static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj, - int success_res) +static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj) { int ret = rdma_alloc_commit_uobject(uobj); if (ret) return ret; - return success_res; + return 0; } static inline void uobj_alloc_abort(struct ib_uobject *uobj) @@ -127,18 +118,18 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj) } static inline struct ib_uobject * -__uobj_alloc(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile, - struct ib_device **ib_dev) +__uobj_alloc(const struct uverbs_api_object *obj, + struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev) { - struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, ufile); + struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs->ufile); if (!IS_ERR(uobj)) *ib_dev = uobj->context->device; return uobj; } -#define uobj_alloc(_type, _ufile, _ib_dev) \ - __uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev) +#define uobj_alloc(_type, _attrs, _ib_dev) \ + __uobj_alloc(uobj_get_type(_attrs, _type), _attrs, _ib_dev) static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action, struct ib_uobject *uobj, @@ -191,5 +182,17 @@ static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow, uflow->resources = uflow_res; } +struct uverbs_api_object { + const struct uverbs_obj_type *type_attrs; + const struct uverbs_obj_type_class *type_class; + u8 disabled:1; + u32 id; +}; + +static inline u32 uobj_get_object_id(struct ib_uobject *uobj) +{ + return uobj->uapi_object->id; +} + #endif diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index c891ada3c5c2..d85e6befa26b 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -61,6 +61,9 @@ struct scsi_pointer { /* flags preserved across unprep / reprep */ #define SCMD_PRESERVED_FLAGS (SCMD_UNCHECKED_ISA_DMA | SCMD_INITIALIZED) +/* for scmd->state */ +#define SCMD_STATE_COMPLETE 0 + struct scsi_cmnd { struct scsi_request req; struct scsi_device *device; @@ -145,6 +148,7 @@ struct scsi_cmnd { int result; /* Status code from lower level driver */ int flags; /* Command flags */ + unsigned long state; /* Command completion state */ unsigned char tag; /* SCSI-II queued command tag */ }; @@ -171,7 +175,7 @@ extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count, size_t *offset, size_t *len); extern void scsi_kunmap_atomic_sg(void *virt); -extern int scsi_init_io(struct scsi_cmnd *cmd); +extern blk_status_t scsi_init_io(struct scsi_cmnd *cmd); #ifdef CONFIG_SCSI_DMA extern int scsi_dma_map(struct scsi_cmnd *cmd); diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h index c7bba2b24849..a862dc23c68d 100644 --- a/include/scsi/scsi_dh.h +++ b/include/scsi/scsi_dh.h @@ -69,7 +69,7 @@ struct scsi_device_handler { int (*attach)(struct scsi_device *); void (*detach)(struct scsi_device *); int (*activate)(struct scsi_device *, activate_complete, void *); - int (*prep_fn)(struct scsi_device *, struct request *); + blk_status_t (*prep_fn)(struct scsi_device *, struct request *); int (*set_params)(struct scsi_device *, const char *); void (*rescan)(struct scsi_device *); }; diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index fae8b465233e..6dffa8555a39 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -2,6 +2,7 @@ #ifndef _SCSI_SCSI_DRIVER_H #define _SCSI_SCSI_DRIVER_H +#include <linux/blk_types.h> #include <linux/device.h> struct module; @@ -13,7 +14,7 @@ struct scsi_driver { struct device_driver gendrv; void (*rescan)(struct device *); - int (*init_command)(struct scsi_cmnd *); + blk_status_t (*init_command)(struct scsi_cmnd *); void (*uninit_command)(struct scsi_cmnd *); int (*done)(struct scsi_cmnd *); int (*eh_action)(struct scsi_cmnd *, int); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 5ea06d310a25..6ca954e9f752 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -11,7 +11,6 @@ #include <linux/blk-mq.h> #include <scsi/scsi.h> -struct request_queue; struct block_device; struct completion; struct module; @@ -22,7 +21,6 @@ struct scsi_target; struct Scsi_Host; struct scsi_host_cmd_pool; struct scsi_transport_template; -struct blk_queue_tags; /* @@ -44,9 +42,6 @@ struct blk_queue_tags; #define MODE_INITIATOR 0x01 #define MODE_TARGET 0x02 -#define DISABLE_CLUSTERING 0 -#define ENABLE_CLUSTERING 1 - struct scsi_host_template { struct module *module; const char *name; @@ -366,6 +361,11 @@ struct scsi_host_template { unsigned int max_sectors; /* + * Maximum size in bytes of a single segment. + */ + unsigned int max_segment_size; + + /* * DMA scatter gather segment boundary limit. A segment crossing this * boundary will be split in two. */ @@ -415,16 +415,6 @@ struct scsi_host_template { unsigned unchecked_isa_dma:1; /* - * True if this host adapter can make good use of clustering. - * I originally thought that if the tablesize was large that it - * was a waste of CPU cycles to prepare a cluster list, but - * it works out that the Buslogic is faster if you use a smaller - * number of segments (i.e. use clustering). I guess it is - * inefficient. - */ - unsigned use_clustering:1; - - /* * True for emulated SCSI host adapters (e.g. ATAPI). */ unsigned emulated:1; @@ -547,14 +537,8 @@ struct Scsi_Host { struct scsi_host_template *hostt; struct scsi_transport_template *transportt; - /* - * Area to keep a shared tag map (if needed, will be - * NULL if not). - */ - union { - struct blk_queue_tag *bqt; - struct blk_mq_tag_set tag_set; - }; + /* Area to keep a shared tag map */ + struct blk_mq_tag_set tag_set; atomic_t host_busy; /* commands actually active on low-level */ atomic_t host_blocked; @@ -604,6 +588,7 @@ struct Scsi_Host { short unsigned int sg_tablesize; short unsigned int sg_prot_tablesize; unsigned int max_sectors; + unsigned int max_segment_size; unsigned long dma_boundary; /* * In scsi-mq mode, the number of hardware queues supported by the LLD. @@ -621,7 +606,6 @@ struct Scsi_Host { unsigned active_mode:2; unsigned unchecked_isa_dma:1; - unsigned use_clustering:1; /* * Host has requested that no further requests come through for the @@ -648,7 +632,6 @@ struct Scsi_Host { /* The controller does not support WRITE SAME */ unsigned no_write_same:1; - unsigned use_blk_mq:1; unsigned use_cmd_list:1; /* Host responded with short (<36 bytes) INQUIRY result */ @@ -742,11 +725,6 @@ static inline int scsi_host_in_recovery(struct Scsi_Host *shost) shost->tmf_in_progress; } -static inline bool shost_use_blk_mq(struct Scsi_Host *shost) -{ - return shost->use_blk_mq; -} - extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *); extern void scsi_flush_work(struct Scsi_Host *); diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index e192a0caa850..6053d46e794e 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -23,19 +23,15 @@ static inline struct scsi_cmnd *scsi_host_find_tag(struct Scsi_Host *shost, int tag) { struct request *req = NULL; + u16 hwq; if (tag == SCSI_NO_TAG) return NULL; - if (shost_use_blk_mq(shost)) { - u16 hwq = blk_mq_unique_tag_to_hwq(tag); - - if (hwq < shost->tag_set.nr_hw_queues) { - req = blk_mq_tag_to_rq(shost->tag_set.tags[hwq], - blk_mq_unique_tag_to_tag(tag)); - } - } else { - req = blk_map_queue_find_tag(shost->bqt, tag); + hwq = blk_mq_unique_tag_to_hwq(tag); + if (hwq < shost->tag_set.nr_hw_queues) { + req = blk_mq_tag_to_rq(shost->tag_set.tags[hwq], + blk_mq_unique_tag_to_tag(tag)); } if (!req) diff --git a/include/scsi/srp.h b/include/scsi/srp.h index c16a3c9a4d9b..9220758d5087 100644 --- a/include/scsi/srp.h +++ b/include/scsi/srp.h @@ -67,7 +67,8 @@ enum { enum { SRP_NO_DATA_DESC = 0, SRP_DATA_DESC_DIRECT = 1, - SRP_DATA_DESC_INDIRECT = 2 + SRP_DATA_DESC_INDIRECT = 2, + SRP_DATA_DESC_IMM = 3, /* new in SRP2 */ }; enum { @@ -111,9 +112,16 @@ struct srp_indirect_buf { struct srp_direct_buf desc_list[0]; } __attribute__((packed)); +/* Immediate data buffer descriptor as defined in SRP2. */ +struct srp_imm_buf { + __be32 len; +}; + +/* srp_login_req.flags */ enum { SRP_MULTICHAN_SINGLE = 0, - SRP_MULTICHAN_MULTI = 1 + SRP_MULTICHAN_MULTI = 1, + SRP_IMMED_REQUESTED = 0x80, /* new in SRP2 */ }; struct srp_login_req { @@ -124,7 +132,9 @@ struct srp_login_req { u8 reserved2[4]; __be16 req_buf_fmt; u8 req_flags; - u8 reserved3[5]; + u8 reserved3[1]; + __be16 imm_data_offset; /* new in SRP2 */ + u8 reserved4[2]; u8 initiator_port_id[16]; u8 target_port_id[16]; }; @@ -144,6 +154,16 @@ struct srp_login_req_rdma { __be32 req_it_iu_len; u8 initiator_port_id[16]; u8 target_port_id[16]; + __be16 imm_data_offset; + u8 reserved[6]; +}; + +/* srp_login_rsp.rsp_flags */ +enum { + SRP_LOGIN_RSP_MULTICHAN_NO_CHAN = 0x0, + SRP_LOGIN_RSP_MULTICHAN_TERMINATED = 0x1, + SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2, + SRP_LOGIN_RSP_IMMED_SUPP = 0x80, /* new in SRP2 */ }; /* diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index e3bdb0550a59..69b7b955902c 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -46,6 +46,10 @@ /* Used by transport_get_inquiry_vpd_device_ident() */ #define INQUIRY_VPD_DEVICE_IDENTIFIER_LEN 254 +#define INQUIRY_VENDOR_LEN 8 +#define INQUIRY_MODEL_LEN 16 +#define INQUIRY_REVISION_LEN 4 + /* Attempts before moving from SHORT to LONG */ #define PYX_TRANSPORT_WINDOW_CLOSED_THRESHOLD 3 #define PYX_TRANSPORT_WINDOW_CLOSED_WAIT_SHORT 3 /* In milliseconds */ @@ -87,6 +91,8 @@ #define DA_EMULATE_3PC 1 /* No Emulation for PSCSI by default */ #define DA_EMULATE_ALUA 0 +/* Emulate SCSI2 RESERVE/RELEASE and Persistent Reservations by default */ +#define DA_EMULATE_PR 1 /* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */ #define DA_ENFORCE_PR_ISIDS 1 /* Force SPC-3 PR Activate Persistence across Target Power Loss */ @@ -134,7 +140,6 @@ enum se_cmd_flags_table { SCF_SENT_CHECK_CONDITION = 0x00000800, SCF_OVERFLOW_BIT = 0x00001000, SCF_UNDERFLOW_BIT = 0x00002000, - SCF_SEND_DELAYED_TAS = 0x00004000, SCF_ALUA_NON_OPTIMIZED = 0x00008000, SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC = 0x00020000, SCF_COMPARE_AND_WRITE = 0x00080000, @@ -314,9 +319,13 @@ struct t10_vpd { }; struct t10_wwn { - char vendor[8]; - char model[16]; - char revision[4]; + /* + * SCSI left aligned strings may not be null terminated. +1 to ensure a + * null terminator is always present. + */ + char vendor[INQUIRY_VENDOR_LEN + 1]; + char model[INQUIRY_MODEL_LEN + 1]; + char revision[INQUIRY_REVISION_LEN + 1]; char unit_serial[INQUIRY_VPD_SERIAL_LEN]; spinlock_t t10_vpd_lock; struct se_device *t10_dev; @@ -474,7 +483,8 @@ struct se_cmd { struct se_session *se_sess; struct se_tmr_req *se_tmr_req; struct list_head se_cmd_list; - struct completion *compl; + struct completion *free_compl; + struct completion *abrt_compl; const struct target_core_fabric_ops *se_tfo; sense_reason_t (*execute_cmd)(struct se_cmd *); sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool, int *); @@ -601,6 +611,7 @@ struct se_session { struct se_node_acl *se_node_acl; struct se_portal_group *se_tpg; void *fabric_sess_ptr; + struct percpu_ref cmd_count; struct list_head sess_list; struct list_head sess_acl_list; struct list_head sess_cmd_list; @@ -664,7 +675,7 @@ struct se_dev_attrib { int emulate_tpws; int emulate_caw; int emulate_3pc; - int pi_prot_format; + int emulate_pr; enum target_prot_type pi_prot_type; enum target_prot_type hw_pi_prot_type; int pi_prot_verify; @@ -731,7 +742,6 @@ struct se_lun { struct scsi_port_stats lun_stats; struct config_group lun_group; struct se_port_stat_grps port_stat_grps; - struct completion lun_ref_comp; struct completion lun_shutdown_comp; struct percpu_ref lun_ref; struct list_head lun_dev_link; @@ -794,7 +804,6 @@ struct se_device { struct t10_pr_registration *dev_pr_res_holder; struct list_head dev_sep_list; struct list_head dev_tmr_list; - struct workqueue_struct *tmr_wq; struct work_struct qf_work_queue; struct list_head delayed_cmd_list; struct list_head state_list; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index f4147b398431..ee5ddd81cd8d 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -8,7 +8,18 @@ struct target_core_fabric_ops { struct module *module; - const char *name; + /* + * XXX: Special case for iscsi/iSCSI... + * If non-null, fabric_alias is used for matching target/$fabric + * ConfigFS paths. If null, fabric_name is used for this (see below). + */ + const char *fabric_alias; + /* + * fabric_name is used for matching target/$fabric ConfigFS paths + * without a fabric_alias (see above). It's also used for the ALUA state + * path and is stored on disk with PR state. + */ + const char *fabric_name; size_t node_acl_size; /* * Limits number of scatterlist entries per SCF_SCSI_DATA_CDB payload. @@ -23,7 +34,6 @@ struct target_core_fabric_ops { * XXX: Currently assumes single PAGE_SIZE per scatterlist entry */ u32 max_data_sg_nents; - char *(*get_fabric_name)(void); char *(*tpg_get_wwn)(struct se_portal_group *); u16 (*tpg_get_tag)(struct se_portal_group *); u32 (*tpg_get_default_depth)(struct se_portal_group *); @@ -101,6 +111,13 @@ struct target_core_fabric_ops { struct configfs_attribute **tfc_tpg_nacl_attrib_attrs; struct configfs_attribute **tfc_tpg_nacl_auth_attrs; struct configfs_attribute **tfc_tpg_nacl_param_attrs; + + /* + * Set this member variable to true if the SCSI transport protocol + * (e.g. iSCSI) requires that the Data-Out buffer is transferred in + * its entirety before a command is aborted. + */ + bool write_pending_must_be_called; }; int target_register_template(const struct target_core_fabric_ops *fo); @@ -116,7 +133,7 @@ struct se_session *target_setup_session(struct se_portal_group *, struct se_session *, void *)); void target_remove_session(struct se_session *); -void transport_init_session(struct se_session *); +int transport_init_session(struct se_session *se_sess); struct se_session *transport_alloc_session(enum target_prot_op); int transport_alloc_session_tags(struct se_session *, unsigned int, unsigned int); @@ -149,12 +166,12 @@ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess, int transport_handle_cdb_direct(struct se_cmd *); sense_reason_t transport_generic_new_cmd(struct se_cmd *); +void target_put_cmd_and_wait(struct se_cmd *cmd); void target_execute_cmd(struct se_cmd *cmd); int transport_generic_free_cmd(struct se_cmd *, int); bool transport_wait_for_tasks(struct se_cmd *); -int transport_check_aborted_status(struct se_cmd *, int); int transport_send_check_condition_and_sense(struct se_cmd *, sense_reason_t, int); int target_get_sess_cmd(struct se_cmd *, bool); diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index 2cbd6e42ad83..e4526f85c19d 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -221,9 +221,30 @@ DEFINE_EVENT(cache_set, bcache_journal_entry_full, TP_ARGS(c) ); -DEFINE_EVENT(bcache_bio, bcache_journal_write, - TP_PROTO(struct bio *bio), - TP_ARGS(bio) +TRACE_EVENT(bcache_journal_write, + TP_PROTO(struct bio *bio, u32 keys), + TP_ARGS(bio, keys), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(sector_t, sector ) + __field(unsigned int, nr_sector ) + __array(char, rwbs, 6 ) + __field(u32, nr_keys ) + ), + + TP_fast_assign( + __entry->dev = bio_dev(bio); + __entry->sector = bio->bi_iter.bi_sector; + __entry->nr_sector = bio->bi_iter.bi_size >> 9; + __entry->nr_keys = keys; + blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); + ), + + TP_printk("%d,%d %s %llu + %u keys %u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, __entry->nr_sector, + __entry->nr_keys) ); /* Btree */ diff --git a/include/trace/events/iscsi.h b/include/trace/events/iscsi.h new file mode 100644 index 000000000000..87408faf6e4e --- /dev/null +++ b/include/trace/events/iscsi.h @@ -0,0 +1,107 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM iscsi + +#if !defined(_TRACE_ISCSI_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_ISCSI_H + +#include <linux/tracepoint.h> + +/* max debug message length */ +#define ISCSI_MSG_MAX 256 + +/* + * Declare tracepoint helper function. + */ +void iscsi_dbg_trace(void (*trace)(struct device *dev, struct va_format *), + struct device *dev, const char *fmt, ...); + +/* + * Declare event class for iscsi debug messages. + */ +DECLARE_EVENT_CLASS(iscsi_log_msg, + + TP_PROTO(struct device *dev, struct va_format *vaf), + + TP_ARGS(dev, vaf), + + TP_STRUCT__entry( + __string(dname, dev_name(dev) ) + __dynamic_array(char, msg, ISCSI_MSG_MAX ) + ), + + TP_fast_assign( + __assign_str(dname, dev_name(dev)); + vsnprintf(__get_str(msg), ISCSI_MSG_MAX, vaf->fmt, *vaf->va); + ), + + TP_printk("%s: %s",__get_str(dname), __get_str(msg) + ) +); + +/* + * Define event to capture iscsi connection debug messages. + */ +DEFINE_EVENT(iscsi_log_msg, iscsi_dbg_conn, + TP_PROTO(struct device *dev, struct va_format *vaf), + + TP_ARGS(dev, vaf) +); + +/* + * Define event to capture iscsi session debug messages. + */ +DEFINE_EVENT(iscsi_log_msg, iscsi_dbg_session, + TP_PROTO(struct device *dev, struct va_format *vaf), + + TP_ARGS(dev, vaf) +); + +/* + * Define event to capture iscsi error handling debug messages. + */ +DEFINE_EVENT(iscsi_log_msg, iscsi_dbg_eh, + TP_PROTO(struct device *dev, struct va_format *vaf), + + TP_ARGS(dev, vaf) +); + +/* + * Define event to capture iscsi tcp debug messages. + */ +DEFINE_EVENT(iscsi_log_msg, iscsi_dbg_tcp, + TP_PROTO(struct device *dev, struct va_format *vaf), + + TP_ARGS(dev, vaf) +); + +/* + * Define event to capture iscsi sw tcp debug messages. + */ +DEFINE_EVENT(iscsi_log_msg, iscsi_dbg_sw_tcp, + TP_PROTO(struct device *dev, struct va_format *vaf), + + TP_ARGS(dev, vaf) +); + +/* + * Define event to capture iscsi transport session debug messages. + */ +DEFINE_EVENT(iscsi_log_msg, iscsi_dbg_trans_session, + TP_PROTO(struct device *dev, struct va_format *vaf), + + TP_ARGS(dev, vaf) +); + +/* + * Define event to capture iscsi transport connection debug messages. + */ +DEFINE_EVENT(iscsi_log_msg, iscsi_dbg_trans_conn, + TP_PROTO(struct device *dev, struct va_format *vaf), + + TP_ARGS(dev, vaf) +); + +#endif /* _TRACE_ISCSI_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index ce43d340f010..8387e0af0f76 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -50,6 +50,8 @@ enum { * * IOCB_FLAG_RESFD - Set if the "aio_resfd" member of the "struct iocb" * is valid. + * IOCB_FLAG_IOPRIO - Set if the "aio_reqprio" member of the "struct iocb" + * is valid. */ #define IOCB_FLAG_RESFD (1 << 0) #define IOCB_FLAG_IOPRIO (1 << 1) diff --git a/include/uapi/linux/mmc/ioctl.h b/include/uapi/linux/mmc/ioctl.h index 45f369dc0a42..00c08120f3ba 100644 --- a/include/uapi/linux/mmc/ioctl.h +++ b/include/uapi/linux/mmc/ioctl.h @@ -5,7 +5,10 @@ #include <linux/types.h> struct mmc_ioc_cmd { - /* Implies direction of data. true = write, false = read */ + /* + * Direction of data: nonzero = write, zero = read. + * Bit 31 selects 'Reliable Write' for RPMB. + */ int write_flag; /* Application-specific command. true = precede with CMD55 */ diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index c6a984c0c881..01ac5853d9ac 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -6,7 +6,7 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -95,7 +95,7 @@ #define HFI1_CAP_SDMA_AHG (1UL << 2) /* Enable SDMA AHG support */ #define HFI1_CAP_EXTENDED_PSN (1UL << 3) /* Enable Extended PSN support */ #define HFI1_CAP_HDRSUPP (1UL << 4) /* Enable Header Suppression */ -/* 1UL << 5 unused */ +#define HFI1_CAP_TID_RDMA (1UL << 5) /* Enable TID RDMA operations */ #define HFI1_CAP_USE_SDMA_HEAD (1UL << 6) /* DMA Hdr Q tail vs. use CSR */ #define HFI1_CAP_MULTI_PKT_EGR (1UL << 7) /* Enable multi-packet Egr buffs*/ #define HFI1_CAP_NODROP_RHQ_FULL (1UL << 8) /* Don't drop on Hdr Q full */ @@ -106,7 +106,7 @@ #define HFI1_CAP_NO_INTEGRITY (1UL << 13) /* Enable ctxt integrity checks */ #define HFI1_CAP_PKEY_CHECK (1UL << 14) /* Enable ctxt PKey checking */ #define HFI1_CAP_STATIC_RATE_CTRL (1UL << 15) /* Allow PBC.StaticRateControl */ -/* 1UL << 16 unused */ +#define HFI1_CAP_OPFN (1UL << 16) /* Enable the OPFN protocol */ #define HFI1_CAP_SDMA_HEAD_CHECK (1UL << 17) /* SDMA head checking */ #define HFI1_CAP_EARLY_CREDIT_RETURN (1UL << 18) /* early credit return */ diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index c1f87735514f..ef3c7ec793a7 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -46,6 +46,12 @@ struct hns_roce_ib_create_cq_resp { __aligned_u64 cap_flags; }; +struct hns_roce_ib_create_srq { + __aligned_u64 buf_addr; + __aligned_u64 db_addr; + __aligned_u64 que_addr; +}; + struct hns_roce_ib_create_qp { __aligned_u64 buf_addr; __aligned_u64 db_addr; diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index 2c881aaf05c2..64f0e3aacd3f 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -63,6 +63,23 @@ enum { UVERBS_ATTR_UHW_OUT, }; +enum uverbs_methods_device { + UVERBS_METHOD_INVOKE_WRITE, + UVERBS_METHOD_INFO_HANDLES, + UVERBS_METHOD_QUERY_PORT, +}; + +enum uverbs_attrs_invoke_write_cmd_attr_ids { + UVERBS_ATTR_CORE_IN, + UVERBS_ATTR_CORE_OUT, + UVERBS_ATTR_WRITE_CMD, +}; + +enum uverbs_attrs_query_port_cmd_attr_ids { + UVERBS_ATTR_QUERY_PORT_PORT_NUM, + UVERBS_ATTR_QUERY_PORT_RESP, +}; + enum uverbs_attrs_create_cq_cmd_attr_ids { UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_ATTR_CREATE_CQ_CQE, @@ -135,6 +152,19 @@ enum uverbs_attrs_reg_dm_mr_cmd_attr_ids { enum uverbs_methods_mr { UVERBS_METHOD_DM_MR_REG, + UVERBS_METHOD_MR_DESTROY, + UVERBS_METHOD_ADVISE_MR, +}; + +enum uverbs_attrs_mr_destroy_ids { + UVERBS_ATTR_DESTROY_MR_HANDLE, +}; + +enum uverbs_attrs_advise_mr_cmd_attr_ids { + UVERBS_ATTR_ADVISE_MR_PD_HANDLE, + UVERBS_ATTR_ADVISE_MR_ADVICE, + UVERBS_ATTR_ADVISE_MR_FLAGS, + UVERBS_ATTR_ADVISE_MR_SGE_LIST, }; enum uverbs_attrs_create_counters_cmd_attr_ids { @@ -157,4 +187,58 @@ enum uverbs_methods_actions_counters_ops { UVERBS_METHOD_COUNTERS_READ, }; +enum uverbs_attrs_info_handles_id { + UVERBS_ATTR_INFO_OBJECT_ID, + UVERBS_ATTR_INFO_TOTAL_HANDLES, + UVERBS_ATTR_INFO_HANDLES_LIST, +}; + +enum uverbs_methods_pd { + UVERBS_METHOD_PD_DESTROY, +}; + +enum uverbs_attrs_pd_destroy_ids { + UVERBS_ATTR_DESTROY_PD_HANDLE, +}; + +enum uverbs_methods_mw { + UVERBS_METHOD_MW_DESTROY, +}; + +enum uverbs_attrs_mw_destroy_ids { + UVERBS_ATTR_DESTROY_MW_HANDLE, +}; + +enum uverbs_methods_xrcd { + UVERBS_METHOD_XRCD_DESTROY, +}; + +enum uverbs_attrs_xrcd_destroy_ids { + UVERBS_ATTR_DESTROY_XRCD_HANDLE, +}; + +enum uverbs_methods_ah { + UVERBS_METHOD_AH_DESTROY, +}; + +enum uverbs_attrs_ah_destroy_ids { + UVERBS_ATTR_DESTROY_AH_HANDLE, +}; + +enum uverbs_methods_rwq_ind_tbl { + UVERBS_METHOD_RWQ_IND_TBL_DESTROY, +}; + +enum uverbs_attrs_rwq_ind_tbl_destroy_ids { + UVERBS_ATTR_DESTROY_RWQ_IND_TBL_HANDLE, +}; + +enum uverbs_methods_flow { + UVERBS_METHOD_FLOW_DESTROY, +}; + +enum uverbs_attrs_flow_destroy_ids { + UVERBS_ATTR_DESTROY_FLOW_HANDLE, +}; + #endif diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 6cdf192070a2..72c7fc75f960 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -35,6 +35,7 @@ #define IB_USER_IOCTL_VERBS_H #include <linux/types.h> +#include <rdma/ib_user_verbs.h> #ifndef RDMA_UAPI_PTR #define RDMA_UAPI_PTR(_type, _name) __aligned_u64 _name @@ -157,4 +158,19 @@ enum ib_uverbs_read_counters_flags { IB_UVERBS_READ_COUNTERS_PREFER_CACHED = 1 << 0, }; +enum ib_uverbs_advise_mr_advice { + IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH, + IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE, +}; + +enum ib_uverbs_advise_mr_flag { + IB_UVERBS_ADVISE_MR_FLAG_FLUSH = 1 << 0, +}; + +struct ib_uverbs_query_port_resp_ex { + struct ib_uverbs_query_port_resp legacy_resp; + __u16 port_cap_flags2; + __u8 reserved[6]; +}; + #endif diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 1254b51a551a..480d9a60b68e 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -46,7 +46,7 @@ #define IB_USER_VERBS_ABI_VERSION 6 #define IB_USER_VERBS_CMD_THRESHOLD 50 -enum { +enum ib_uverbs_write_cmds { IB_USER_VERBS_CMD_GET_CONTEXT, IB_USER_VERBS_CMD_QUERY_DEVICE, IB_USER_VERBS_CMD_QUERY_PORT, @@ -164,6 +164,7 @@ struct ib_uverbs_get_context { struct ib_uverbs_get_context_resp { __u32 async_fd; __u32 num_comp_vectors; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_query_device { @@ -310,6 +311,7 @@ struct ib_uverbs_alloc_pd { struct ib_uverbs_alloc_pd_resp { __u32 pd_handle; + __u32 driver_data[0]; }; struct ib_uverbs_dealloc_pd { @@ -325,6 +327,7 @@ struct ib_uverbs_open_xrcd { struct ib_uverbs_open_xrcd_resp { __u32 xrcd_handle; + __u32 driver_data[0]; }; struct ib_uverbs_close_xrcd { @@ -345,6 +348,7 @@ struct ib_uverbs_reg_mr_resp { __u32 mr_handle; __u32 lkey; __u32 rkey; + __u32 driver_data[0]; }; struct ib_uverbs_rereg_mr { @@ -356,11 +360,13 @@ struct ib_uverbs_rereg_mr { __aligned_u64 hca_va; __u32 pd_handle; __u32 access_flags; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_rereg_mr_resp { __u32 lkey; __u32 rkey; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_dereg_mr { @@ -372,11 +378,13 @@ struct ib_uverbs_alloc_mw { __u32 pd_handle; __u8 mw_type; __u8 reserved[3]; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_alloc_mw_resp { __u32 mw_handle; __u32 rkey; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_dealloc_mw { @@ -419,6 +427,7 @@ struct ib_uverbs_ex_create_cq { struct ib_uverbs_create_cq_resp { __u32 cq_handle; __u32 cqe; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_ex_create_cq_resp { @@ -629,6 +638,7 @@ struct ib_uverbs_create_qp_resp { __u32 max_recv_sge; __u32 max_inline_data; __u32 reserved; + __u32 driver_data[0]; }; struct ib_uverbs_ex_create_qp_resp { @@ -733,9 +743,6 @@ struct ib_uverbs_ex_modify_qp { __u32 reserved; }; -struct ib_uverbs_modify_qp_resp { -}; - struct ib_uverbs_ex_modify_qp_resp { __u32 comp_mask; __u32 response_length; @@ -863,10 +870,12 @@ struct ib_uverbs_create_ah { __u32 pd_handle; __u32 reserved; struct ib_uverbs_ah_attr attr; + __aligned_u64 driver_data[0]; }; struct ib_uverbs_create_ah_resp { __u32 ah_handle; + __u32 driver_data[0]; }; struct ib_uverbs_destroy_ah { @@ -1175,6 +1184,7 @@ struct ib_uverbs_create_srq_resp { __u32 max_wr; __u32 max_sge; __u32 srqn; + __u32 driver_data[0]; }; struct ib_uverbs_modify_srq { diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 8fa9f90e2bb1..87b3198f4b5d 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -48,6 +48,7 @@ enum { MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC = 1 << 6, MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC = 1 << 7, MLX5_QP_FLAG_ALLOW_SCATTER_CQE = 1 << 8, + MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE = 1 << 9, }; enum { @@ -236,6 +237,7 @@ enum mlx5_ib_query_dev_resp_flags { /* Support 128B CQE compression */ MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0, MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1, + MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2, }; enum mlx5_ib_tunnel_offloads { diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 408e220034de..b8d121d457f1 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -158,6 +158,7 @@ enum mlx5_ib_create_flow_attrs { MLX5_IB_ATTR_CREATE_FLOW_MATCHER, MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, MLX5_IB_ATTR_CREATE_FLOW_TAG, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, }; enum mlx5_ib_destoy_flow_attrs { diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index f9c41bf59efc..2e18b77a817f 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -283,6 +283,9 @@ enum rdma_nldev_attr { /* * Device and port capabilities + * + * When used for port info, first 32-bits are CapabilityMask followed by + * 16-bit CapabilityMask2. */ RDMA_NLDEV_ATTR_CAP_FLAGS, /* u64 */ |