summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/acpi/platform/acgcc.h9
-rw-r--r--include/asm-generic/cacheflush.h6
-rw-r--r--include/linux/ata.h1
-rw-r--r--include/linux/backing-dev.h6
-rw-r--r--include/linux/bio.h1
-rw-r--r--include/linux/blk-crypto-profile.h166
-rw-r--r--include/linux/blk-mq.h10
-rw-r--r--include/linux/blk_types.h18
-rw-r--r--include/linux/blkdev.h94
-rw-r--r--include/linux/bpf.h7
-rw-r--r--include/linux/bpf_types.h8
-rw-r--r--include/linux/cdrom.h1
-rw-r--r--include/linux/cpuhotplug.h4
-rw-r--r--include/linux/device-mapper.h4
-rw-r--r--include/linux/elfcore.h2
-rw-r--r--include/linux/filter.h1
-rw-r--r--include/linux/flex_proportions.h9
-rw-r--r--include/linux/genhd.h15
-rw-r--r--include/linux/gfp.h22
-rw-r--r--include/linux/highmem-internal.h11
-rw-r--r--include/linux/highmem.h37
-rw-r--r--include/linux/huge_mm.h15
-rw-r--r--include/linux/keyslot-manager.h120
-rw-r--r--include/linux/ksm.h4
-rw-r--r--include/linux/libata.h15
-rw-r--r--include/linux/memcontrol.h264
-rw-r--r--include/linux/memory.h5
-rw-r--r--include/linux/migrate.h4
-rw-r--r--include/linux/mlx5/driver.h1
-rw-r--r--include/linux/mm.h239
-rw-r--r--include/linux/mm_inline.h103
-rw-r--r--include/linux/mm_types.h77
-rw-r--r--include/linux/mmc/host.h4
-rw-r--r--include/linux/mmdebug.h20
-rw-r--r--include/linux/netfs.h77
-rw-r--r--include/linux/nvme-fc-driver.h7
-rw-r--r--include/linux/nvme-rdma.h2
-rw-r--r--include/linux/nvme.h30
-rw-r--r--include/linux/page-flags.h290
-rw-r--r--include/linux/page_idle.h99
-rw-r--r--include/linux/page_owner.h8
-rw-r--r--include/linux/page_ref.h158
-rw-r--r--include/linux/pagemap.h585
-rw-r--r--include/linux/rmap.h10
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/secretmem.h2
-rw-r--r--include/linux/skmsg.h1
-rw-r--r--include/linux/swap.h17
-rw-r--r--include/linux/tpm.h1
-rw-r--r--include/linux/trace_recursion.h49
-rw-r--r--include/linux/user_namespace.h2
-rw-r--r--include/linux/vmstat.h113
-rw-r--r--include/linux/writeback.h9
-rw-r--r--include/net/cfg80211.h2
-rw-r--r--include/net/mctp.h2
-rw-r--r--include/net/mptcp.h4
-rw-r--r--include/net/sctp/sm.h6
-rw-r--r--include/net/sock.h8
-rw-r--r--include/net/tcp.h5
-rw-r--r--include/net/tls.h11
-rw-r--r--include/net/udp.h5
-rw-r--r--include/scsi/scsi_cmnd.h3
-rw-r--r--include/trace/events/io_uring.h61
-rw-r--r--include/trace/events/pagemap.h46
-rw-r--r--include/trace/events/writeback.h28
-rw-r--r--include/uapi/asm-generic/fcntl.h4
-rw-r--r--include/uapi/linux/bcache.h445
-rw-r--r--include/uapi/linux/cdrom.h19
-rw-r--r--include/uapi/linux/io_uring.h1
-rw-r--r--include/uapi/linux/mctp.h7
70 files changed, 2081 insertions, 1341 deletions
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index fb172a03a753..20ecb004f5a4 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -22,9 +22,14 @@ typedef __builtin_va_list va_list;
#define va_arg(v, l) __builtin_va_arg(v, l)
#define va_copy(d, s) __builtin_va_copy(d, s)
#else
+#ifdef __KERNEL__
#include <linux/stdarg.h>
-#endif
-#endif
+#else
+/* Used to build acpi tools */
+#include <stdarg.h>
+#endif /* __KERNEL__ */
+#endif /* ACPI_USE_BUILTIN_STDARG */
+#endif /* ! va_arg */
#define ACPI_INLINE __inline__
diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h
index 4a674db4e1fa..fedc0dfa4877 100644
--- a/include/asm-generic/cacheflush.h
+++ b/include/asm-generic/cacheflush.h
@@ -49,9 +49,15 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
static inline void flush_dcache_page(struct page *page)
{
}
+
+static inline void flush_dcache_folio(struct folio *folio) { }
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
#endif
+#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
+void flush_dcache_folio(struct folio *folio);
+#endif
#ifndef flush_dcache_mmap_lock
static inline void flush_dcache_mmap_lock(struct address_space *mapping)
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 1b44f40c7700..199e47e97d64 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -329,6 +329,7 @@ enum {
ATA_LOG_SECURITY = 0x06,
ATA_LOG_SATA_SETTINGS = 0x08,
ATA_LOG_ZONED_INFORMATION = 0x09,
+ ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47,
/* Identify device SATA settings log:*/
ATA_LOG_DEVSLP_OFFSET = 0x30,
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 4ac7ce096013..9c14f0a8dbe5 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -64,7 +64,7 @@ static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
return atomic_long_read(&bdi->tot_write_bandwidth);
}
-static inline void __add_wb_stat(struct bdi_writeback *wb,
+static inline void wb_stat_mod(struct bdi_writeback *wb,
enum wb_stat_item item, s64 amount)
{
percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
@@ -72,12 +72,12 @@ static inline void __add_wb_stat(struct bdi_writeback *wb,
static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
{
- __add_wb_stat(wb, item, 1);
+ wb_stat_mod(wb, item, 1);
}
static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
{
- __add_wb_stat(wb, item, -1);
+ wb_stat_mod(wb, item, -1);
}
static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index c88700d1bdc3..fe6bdfbbef66 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -417,6 +417,7 @@ int bio_add_zone_append_page(struct bio *bio, struct page *page,
void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off);
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
+void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter);
void __bio_release_pages(struct bio *bio, bool mark_dirty);
extern void bio_set_pages_dirty(struct bio *bio);
extern void bio_check_pages_dirty(struct bio *bio);
diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h
new file mode 100644
index 000000000000..bbab65bd5428
--- /dev/null
+++ b/include/linux/blk-crypto-profile.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2019 Google LLC
+ */
+
+#ifndef __LINUX_BLK_CRYPTO_PROFILE_H
+#define __LINUX_BLK_CRYPTO_PROFILE_H
+
+#include <linux/bio.h>
+#include <linux/blk-crypto.h>
+
+struct blk_crypto_profile;
+
+/**
+ * struct blk_crypto_ll_ops - functions to control inline encryption hardware
+ *
+ * Low-level operations for controlling inline encryption hardware. This
+ * interface must be implemented by storage drivers that support inline
+ * encryption. All functions may sleep, are serialized by profile->lock, and
+ * are never called while profile->dev (if set) is runtime-suspended.
+ */
+struct blk_crypto_ll_ops {
+
+ /**
+ * @keyslot_program: Program a key into the inline encryption hardware.
+ *
+ * Program @key into the specified @slot in the inline encryption
+ * hardware, overwriting any key that the keyslot may already contain.
+ * The keyslot is guaranteed to not be in-use by any I/O.
+ *
+ * This is required if the device has keyslots. Otherwise (i.e. if the
+ * device is a layered device, or if the device is real hardware that
+ * simply doesn't have the concept of keyslots) it is never called.
+ *
+ * Must return 0 on success, or -errno on failure.
+ */
+ int (*keyslot_program)(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key,
+ unsigned int slot);
+
+ /**
+ * @keyslot_evict: Evict a key from the inline encryption hardware.
+ *
+ * If the device has keyslots, this function must evict the key from the
+ * specified @slot. The slot will contain @key, but there should be no
+ * need for the @key argument to be used as @slot should be sufficient.
+ * The keyslot is guaranteed to not be in-use by any I/O.
+ *
+ * If the device doesn't have keyslots itself, this function must evict
+ * @key from any underlying devices. @slot won't be valid in this case.
+ *
+ * If there are no keyslots and no underlying devices, this function
+ * isn't required.
+ *
+ * Must return 0 on success, or -errno on failure.
+ */
+ int (*keyslot_evict)(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key,
+ unsigned int slot);
+};
+
+/**
+ * struct blk_crypto_profile - inline encryption profile for a device
+ *
+ * This struct contains a storage device's inline encryption capabilities (e.g.
+ * the supported crypto algorithms), driver-provided functions to control the
+ * inline encryption hardware (e.g. programming and evicting keys), and optional
+ * device-independent keyslot management data.
+ */
+struct blk_crypto_profile {
+
+ /* public: Drivers must initialize the following fields. */
+
+ /**
+ * @ll_ops: Driver-provided functions to control the inline encryption
+ * hardware, e.g. program and evict keys.
+ */
+ struct blk_crypto_ll_ops ll_ops;
+
+ /**
+ * @max_dun_bytes_supported: The maximum number of bytes supported for
+ * specifying the data unit number (DUN). Specifically, the range of
+ * supported DUNs is 0 through (1 << (8 * max_dun_bytes_supported)) - 1.
+ */
+ unsigned int max_dun_bytes_supported;
+
+ /**
+ * @modes_supported: Array of bitmasks that specifies whether each
+ * combination of crypto mode and data unit size is supported.
+ * Specifically, the i'th bit of modes_supported[crypto_mode] is set if
+ * crypto_mode can be used with a data unit size of (1 << i). Note that
+ * only data unit sizes that are powers of 2 can be supported.
+ */
+ unsigned int modes_supported[BLK_ENCRYPTION_MODE_MAX];
+
+ /**
+ * @dev: An optional device for runtime power management. If the driver
+ * provides this device, it will be runtime-resumed before any function
+ * in @ll_ops is called and will remain resumed during the call.
+ */
+ struct device *dev;
+
+ /* private: The following fields shouldn't be accessed by drivers. */
+
+ /* Number of keyslots, or 0 if not applicable */
+ unsigned int num_slots;
+
+ /*
+ * Serializes all calls to functions in @ll_ops as well as all changes
+ * to @slot_hashtable. This can also be taken in read mode to look up
+ * keyslots while ensuring that they can't be changed concurrently.
+ */
+ struct rw_semaphore lock;
+
+ /* List of idle slots, with least recently used slot at front */
+ wait_queue_head_t idle_slots_wait_queue;
+ struct list_head idle_slots;
+ spinlock_t idle_slots_lock;
+
+ /*
+ * Hash table which maps struct *blk_crypto_key to keyslots, so that we
+ * can find a key's keyslot in O(1) time rather than O(num_slots).
+ * Protected by 'lock'.
+ */
+ struct hlist_head *slot_hashtable;
+ unsigned int log_slot_ht_size;
+
+ /* Per-keyslot data */
+ struct blk_crypto_keyslot *slots;
+};
+
+int blk_crypto_profile_init(struct blk_crypto_profile *profile,
+ unsigned int num_slots);
+
+int devm_blk_crypto_profile_init(struct device *dev,
+ struct blk_crypto_profile *profile,
+ unsigned int num_slots);
+
+unsigned int blk_crypto_keyslot_index(struct blk_crypto_keyslot *slot);
+
+blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key,
+ struct blk_crypto_keyslot **slot_ptr);
+
+void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot);
+
+bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile,
+ const struct blk_crypto_config *cfg);
+
+int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key);
+
+void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile);
+
+void blk_crypto_profile_destroy(struct blk_crypto_profile *profile);
+
+void blk_crypto_intersect_capabilities(struct blk_crypto_profile *parent,
+ const struct blk_crypto_profile *child);
+
+bool blk_crypto_has_capabilities(const struct blk_crypto_profile *target,
+ const struct blk_crypto_profile *reference);
+
+void blk_crypto_update_capabilities(struct blk_crypto_profile *dst,
+ const struct blk_crypto_profile *src);
+
+#endif /* __LINUX_BLK_CRYPTO_PROFILE_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e13780236550..8682663e7368 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -133,7 +133,7 @@ struct request {
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
struct bio_crypt_ctx *crypt_ctx;
- struct blk_ksm_keyslot *crypt_keyslot;
+ struct blk_crypto_keyslot *crypt_keyslot;
#endif
unsigned short write_hint;
@@ -567,11 +567,6 @@ struct blk_mq_ops {
unsigned int);
/**
- * @initialize_rq_fn: Called from inside blk_get_request().
- */
- void (*initialize_rq_fn)(struct request *rq);
-
- /**
* @cleanup_rq: Called before freeing one request which isn't completed
* yet, and usually for freeing the driver private data.
*/
@@ -897,9 +892,6 @@ static inline bool rq_is_sync(struct request *rq)
}
void blk_rq_init(struct request_queue *q, struct request *rq);
-void blk_put_request(struct request *rq);
-struct request *blk_get_request(struct request_queue *q, unsigned int op,
- blk_mq_req_flags_t flags);
int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
struct bio_set *bs, gfp_t gfp_mask,
int (*bio_ctr)(struct bio *, struct bio *, void *), void *data);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 1e370929c89e..fe065c394fff 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -20,8 +20,26 @@ struct cgroup_subsys_state;
typedef void (bio_end_io_t) (struct bio *);
struct bio_crypt_ctx;
+/*
+ * The basic unit of block I/O is a sector. It is used in a number of contexts
+ * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9
+ * bytes. Variables of type sector_t represent an offset or size that is a
+ * multiple of 512 bytes. Hence these two constants.
+ */
+#ifndef SECTOR_SHIFT
+#define SECTOR_SHIFT 9
+#endif
+#ifndef SECTOR_SIZE
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+#endif
+
+#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
+#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
+#define SECTOR_MASK (PAGE_SECTORS - 1)
+
struct block_device {
sector_t bd_start_sect;
+ sector_t bd_nr_sectors;
struct disk_stats __percpu *bd_stats;
unsigned long bd_stamp;
bool bd_read_only; /* read-only policy */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c7b1e9355123..9a8317757fc1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -30,7 +30,7 @@ struct pr_ops;
struct rq_qos;
struct blk_queue_stats;
struct blk_stat_callback;
-struct blk_keyslot_manager;
+struct blk_crypto_profile;
/* Must be consistent with blk_mq_poll_stats_bkt() */
#define BLK_MQ_POLL_STATS_BKTS 16
@@ -44,6 +44,14 @@ struct blk_keyslot_manager;
*/
#define BLKCG_MAX_POLS 6
+static inline int blk_validate_block_size(unsigned int bsize)
+{
+ if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
+ return -EINVAL;
+
+ return 0;
+}
+
static inline bool blk_op_is_passthrough(unsigned int op)
{
op &= REQ_OP_MASK;
@@ -150,6 +158,34 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
#endif /* CONFIG_BLK_DEV_ZONED */
+/*
+ * Independent access ranges: struct blk_independent_access_range describes
+ * a range of contiguous sectors that can be accessed using device command
+ * execution resources that are independent from the resources used for
+ * other access ranges. This is typically found with single-LUN multi-actuator
+ * HDDs where each access range is served by a different set of heads.
+ * The set of independent ranges supported by the device is defined using
+ * struct blk_independent_access_ranges. The independent ranges must not overlap
+ * and must include all sectors within the disk capacity (no sector holes
+ * allowed).
+ * For a device with multiple ranges, requests targeting sectors in different
+ * ranges can be executed in parallel. A request can straddle an access range
+ * boundary.
+ */
+struct blk_independent_access_range {
+ struct kobject kobj;
+ struct request_queue *queue;
+ sector_t sector;
+ sector_t nr_sectors;
+};
+
+struct blk_independent_access_ranges {
+ struct kobject kobj;
+ bool sysfs_registered;
+ unsigned int nr_ia_ranges;
+ struct blk_independent_access_range ia_range[];
+};
+
struct request_queue {
struct request *last_merge;
struct elevator_queue *elevator;
@@ -224,8 +260,7 @@ struct request_queue {
unsigned int dma_alignment;
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
- /* Inline crypto capabilities */
- struct blk_keyslot_manager *ksm;
+ struct blk_crypto_profile *crypto_profile;
#endif
unsigned int rq_timeout;
@@ -332,6 +367,12 @@ struct request_queue {
#define BLK_MAX_WRITE_HINTS 5
u64 write_hints[BLK_MAX_WRITE_HINTS];
+
+ /*
+ * Independent sector access ranges. This is always NULL for
+ * devices that do not have multiple independent access ranges.
+ */
+ struct blk_independent_access_ranges *ia_ranges;
};
/* Keep blk_queue_flag_name[] in sync with the definitions below */
@@ -358,7 +399,6 @@ struct request_queue {
#define QUEUE_FLAG_STATS 20 /* track IO start and completion times */
#define QUEUE_FLAG_POLL_STATS 21 /* collecting stats for hybrid polling */
#define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */
-#define QUEUE_FLAG_SCSI_PASSTHROUGH 23 /* queue supports SCSI commands */
#define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */
#define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */
#define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */
@@ -392,8 +432,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_secure_erase(q) \
(test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
-#define blk_queue_scsi_passthrough(q) \
- test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
#define blk_queue_pci_p2pdma(q) \
test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
#ifdef CONFIG_BLK_RQ_ALLOC_TIME
@@ -580,23 +618,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
return bdev->bd_queue; /* this is never NULL */
}
-/*
- * The basic unit of block I/O is a sector. It is used in a number of contexts
- * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9
- * bytes. Variables of type sector_t represent an offset or size that is a
- * multiple of 512 bytes. Hence these two constants.
- */
-#ifndef SECTOR_SHIFT
-#define SECTOR_SHIFT 9
-#endif
-#ifndef SECTOR_SIZE
-#define SECTOR_SIZE (1 << SECTOR_SHIFT)
-#endif
-
-#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
-#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
-#define SECTOR_MASK (PAGE_SECTORS - 1)
-
#ifdef CONFIG_BLK_DEV_ZONED
/* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */
@@ -699,6 +720,11 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int);
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
+struct blk_independent_access_ranges *
+disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges);
+void disk_set_independent_access_ranges(struct gendisk *disk,
+ struct blk_independent_access_ranges *iars);
+
/*
* Elevator features for blk_queue_required_elevator_features:
*/
@@ -1142,22 +1168,31 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned lo
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
-bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q);
+bool blk_crypto_register(struct blk_crypto_profile *profile,
+ struct request_queue *q);
-void blk_ksm_unregister(struct request_queue *q);
+void blk_crypto_unregister(struct request_queue *q);
#else /* CONFIG_BLK_INLINE_ENCRYPTION */
-static inline bool blk_ksm_register(struct blk_keyslot_manager *ksm,
- struct request_queue *q)
+static inline bool blk_crypto_register(struct blk_crypto_profile *profile,
+ struct request_queue *q)
{
return true;
}
-static inline void blk_ksm_unregister(struct request_queue *q) { }
+static inline void blk_crypto_unregister(struct request_queue *q) { }
#endif /* CONFIG_BLK_INLINE_ENCRYPTION */
+enum blk_unique_id {
+ /* these match the Designator Types specified in SPC */
+ BLK_UID_T10 = 1,
+ BLK_UID_EUI64 = 2,
+ BLK_UID_NAA = 3,
+};
+
+#define NFL4_UFLG_MASK 0x0000003F
struct block_device_operations {
void (*submit_bio)(struct bio *bio);
@@ -1176,6 +1211,9 @@ struct block_device_operations {
int (*report_zones)(struct gendisk *, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
char *(*devnode)(struct gendisk *disk, umode_t *mode);
+ /* returns the length of the identifier or a negative errno: */
+ int (*get_unique_id)(struct gendisk *disk, u8 id[16],
+ enum blk_unique_id id_type);
struct module *owner;
const struct pr_ops *pr_ops;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 020a7d5bf470..3db6f6c95489 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -929,8 +929,11 @@ struct bpf_array_aux {
* stored in the map to make sure that all callers and callees have
* the same prog type and JITed flag.
*/
- enum bpf_prog_type type;
- bool jited;
+ struct {
+ spinlock_t lock;
+ enum bpf_prog_type type;
+ bool jited;
+ } owner;
/* Programs with direct jumps into programs part of this array. */
struct list_head poke_progs;
struct bpf_map *map;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 9c81724e4b98..bbe1eefa4c8a 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -101,14 +101,14 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
-#ifdef CONFIG_NET
-BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
#ifdef CONFIG_BPF_LSM
BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
+#ifdef CONFIG_NET
+BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#if defined(CONFIG_XDP_SOCKETS)
BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index c4fef00abdf3..0a89f111e00e 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -64,6 +64,7 @@ struct cdrom_device_info {
int for_data;
int (*exit)(struct cdrom_device_info *);
int mrw_mode_page;
+ __s64 last_media_change_ms;
};
struct cdrom_device_ops {
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 832d8a74fa59..991911048857 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -72,6 +72,8 @@ enum cpuhp_state {
CPUHP_SLUB_DEAD,
CPUHP_DEBUG_OBJ_DEAD,
CPUHP_MM_WRITEBACK_DEAD,
+ /* Must be after CPUHP_MM_VMSTAT_DEAD */
+ CPUHP_MM_DEMOTION_DEAD,
CPUHP_MM_VMSTAT_DEAD,
CPUHP_SOFTIRQ_DEAD,
CPUHP_NET_MVNETA_DEAD,
@@ -240,6 +242,8 @@ enum cpuhp_state {
CPUHP_AP_BASE_CACHEINFO_ONLINE,
CPUHP_AP_ONLINE_DYN,
CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
+ /* Must be after CPUHP_AP_ONLINE_DYN for node_states[N_CPU] update */
+ CPUHP_AP_MM_DEMOTION_ONLINE,
CPUHP_AP_X86_HPET_ONLINE,
CPUHP_AP_X86_KVM_CLK_ONLINE,
CPUHP_AP_DTPM_CPU_ONLINE,
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 114553b487ef..a7df155ea49b 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -576,9 +576,9 @@ struct dm_table *dm_swap_table(struct mapped_device *md,
struct dm_table *t);
/*
- * Table keyslot manager functions
+ * Table blk_crypto_profile functions
*/
-void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm);
+void dm_destroy_crypto_profile(struct blk_crypto_profile *profile);
/*-----------------------------------------------------------------
* Macros.
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 2aaa15779d50..957ebec35aad 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -109,7 +109,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg
#endif
}
-#if defined(CONFIG_UM) || defined(CONFIG_IA64)
+#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64)
/*
* These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out
* extra segments containing the gate DSO contents. Dumping its
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4a93c12543ee..ef03ff34234d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1051,6 +1051,7 @@ extern int bpf_jit_enable;
extern int bpf_jit_harden;
extern int bpf_jit_kallsyms;
extern long bpf_jit_limit;
+extern long bpf_jit_limit_max;
typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h
index c12df59d3f5f..3e378b1fb0bc 100644
--- a/include/linux/flex_proportions.h
+++ b/include/linux/flex_proportions.h
@@ -83,9 +83,10 @@ struct fprop_local_percpu {
int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp);
void fprop_local_destroy_percpu(struct fprop_local_percpu *pl);
-void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl);
-void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl,
- int max_frac);
+void __fprop_add_percpu(struct fprop_global *p, struct fprop_local_percpu *pl,
+ long nr);
+void __fprop_add_percpu_max(struct fprop_global *p,
+ struct fprop_local_percpu *pl, int max_frac, long nr);
void fprop_fraction_percpu(struct fprop_global *p,
struct fprop_local_percpu *pl, unsigned long *numerator,
unsigned long *denominator);
@@ -96,7 +97,7 @@ void fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl)
unsigned long flags;
local_irq_save(flags);
- __fprop_inc_percpu(p, pl);
+ __fprop_add_percpu(p, pl, 1);
local_irq_restore(flags);
}
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c70bc5fce4db..59eabbc3a36b 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -213,6 +213,8 @@ static inline int add_disk(struct gendisk *disk)
}
extern void del_gendisk(struct gendisk *gp);
+void invalidate_disk(struct gendisk *disk);
+
void set_disk_ro(struct gendisk *disk, bool read_only);
static inline int get_disk_ro(struct gendisk *disk)
@@ -243,7 +245,12 @@ static inline sector_t get_start_sect(struct block_device *bdev)
static inline sector_t bdev_nr_sectors(struct block_device *bdev)
{
- return i_size_read(bdev->bd_inode) >> 9;
+ return bdev->bd_nr_sectors;
+}
+
+static inline loff_t bdev_nr_bytes(struct block_device *bdev)
+{
+ return bdev_nr_sectors(bdev) << SECTOR_SHIFT;
}
static inline sector_t get_capacity(struct gendisk *disk)
@@ -251,6 +258,12 @@ static inline sector_t get_capacity(struct gendisk *disk)
return bdev_nr_sectors(disk->part0);
}
+static inline u64 sb_bdev_nr_blocks(struct super_block *sb)
+{
+ return bdev_nr_sectors(sb->s_bdev) >>
+ (sb->s_blocksize_bits - SECTOR_SHIFT);
+}
+
int bdev_disk_changed(struct gendisk *disk, bool invalidate);
void blk_drop_partitions(struct gendisk *disk);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 55b2ec1f965a..3745efd21cf6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -520,15 +520,11 @@ static inline void arch_free_page(struct page *page, int order) { }
#ifndef HAVE_ARCH_ALLOC_PAGE
static inline void arch_alloc_page(struct page *page, int order) { }
#endif
-#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
-static inline int arch_make_page_accessible(struct page *page)
-{
- return 0;
-}
-#endif
struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
nodemask_t *nodemask);
+struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid,
+ nodemask_t *nodemask);
unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
nodemask_t *nodemask, int nr_pages,
@@ -570,6 +566,15 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
return __alloc_pages(gfp_mask, order, nid, NULL);
}
+static inline
+struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
+{
+ VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
+ VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid));
+
+ return __folio_alloc(gfp, order, nid, NULL);
+}
+
/*
* Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE,
* prefer the current CPU's closest node. Otherwise node must be valid and
@@ -586,6 +591,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
#ifdef CONFIG_NUMA
struct page *alloc_pages(gfp_t gfp, unsigned int order);
+struct folio *folio_alloc(gfp_t gfp, unsigned order);
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
struct vm_area_struct *vma, unsigned long addr,
int node, bool hugepage);
@@ -596,6 +602,10 @@ static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
{
return alloc_pages_node(numa_node_id(), gfp_mask, order);
}
+static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
+{
+ return __folio_alloc_node(gfp, order, numa_node_id());
+}
#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
alloc_pages(gfp_mask, order)
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 4aa1031d3e4c..0a0b2b09b1b8 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -73,6 +73,12 @@ static inline void *kmap_local_page(struct page *page)
return __kmap_local_page_prot(page, kmap_prot);
}
+static inline void *kmap_local_folio(struct folio *folio, size_t offset)
+{
+ struct page *page = folio_page(folio, offset / PAGE_SIZE);
+ return __kmap_local_page_prot(page, kmap_prot) + offset % PAGE_SIZE;
+}
+
static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
{
return __kmap_local_page_prot(page, prot);
@@ -171,6 +177,11 @@ static inline void *kmap_local_page(struct page *page)
return page_address(page);
}
+static inline void *kmap_local_folio(struct folio *folio, size_t offset)
+{
+ return page_address(&folio->page) + offset;
+}
+
static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
{
return kmap_local_page(page);
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index b4c49f9cc379..27cdd715c5f9 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -97,6 +97,43 @@ static inline void kmap_flush_unused(void);
static inline void *kmap_local_page(struct page *page);
/**
+ * kmap_local_folio - Map a page in this folio for temporary usage
+ * @folio: The folio containing the page.
+ * @offset: The byte offset within the folio which identifies the page.
+ *
+ * Requires careful handling when nesting multiple mappings because the map
+ * management is stack based. The unmap has to be in the reverse order of
+ * the map operation::
+ *
+ * addr1 = kmap_local_folio(folio1, offset1);
+ * addr2 = kmap_local_folio(folio2, offset2);
+ * ...
+ * kunmap_local(addr2);
+ * kunmap_local(addr1);
+ *
+ * Unmapping addr1 before addr2 is invalid and causes malfunction.
+ *
+ * Contrary to kmap() mappings the mapping is only valid in the context of
+ * the caller and cannot be handed to other contexts.
+ *
+ * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the
+ * virtual address of the direct mapping. Only real highmem pages are
+ * temporarily mapped.
+ *
+ * While it is significantly faster than kmap() for the higmem case it
+ * comes with restrictions about the pointer validity. Only use when really
+ * necessary.
+ *
+ * On HIGHMEM enabled systems mapping a highmem page has the side effect of
+ * disabling migration in order to keep the virtual address stable across
+ * preemption. No caller of kmap_local_folio() can rely on this side effect.
+ *
+ * Context: Can be invoked from any context.
+ * Return: The virtual address of @offset.
+ */
+static inline void *kmap_local_folio(struct folio *folio, size_t offset);
+
+/**
* kmap_atomic - Atomically map a page for temporary usage - Deprecated!
* @page: Pointer to the page to be mapped
*
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f123e15d966e..f280f33ff223 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -251,15 +251,6 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
}
/**
- * thp_head - Head page of a transparent huge page.
- * @page: Any page (tail, head or regular) found in the page cache.
- */
-static inline struct page *thp_head(struct page *page)
-{
- return compound_head(page);
-}
-
-/**
* thp_order - Order of a transparent huge page.
* @page: Head page of a transparent huge page.
*/
@@ -336,12 +327,6 @@ static inline struct list_head *page_deferred_list(struct page *page)
#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
-static inline struct page *thp_head(struct page *page)
-{
- VM_BUG_ON_PGFLAGS(PageTail(page), page);
- return page;
-}
-
static inline unsigned int thp_order(struct page *page)
{
VM_BUG_ON_PGFLAGS(PageTail(page), page);
diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h
deleted file mode 100644
index a27605e2f826..000000000000
--- a/include/linux/keyslot-manager.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright 2019 Google LLC
- */
-
-#ifndef __LINUX_KEYSLOT_MANAGER_H
-#define __LINUX_KEYSLOT_MANAGER_H
-
-#include <linux/bio.h>
-#include <linux/blk-crypto.h>
-
-struct blk_keyslot_manager;
-
-/**
- * struct blk_ksm_ll_ops - functions to manage keyslots in hardware
- * @keyslot_program: Program the specified key into the specified slot in the
- * inline encryption hardware.
- * @keyslot_evict: Evict key from the specified keyslot in the hardware.
- * The key is provided so that e.g. dm layers can evict
- * keys from the devices that they map over.
- * Returns 0 on success, -errno otherwise.
- *
- * This structure should be provided by storage device drivers when they set up
- * a keyslot manager - this structure holds the function ptrs that the keyslot
- * manager will use to manipulate keyslots in the hardware.
- */
-struct blk_ksm_ll_ops {
- int (*keyslot_program)(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_key *key,
- unsigned int slot);
- int (*keyslot_evict)(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_key *key,
- unsigned int slot);
-};
-
-struct blk_keyslot_manager {
- /*
- * The struct blk_ksm_ll_ops that this keyslot manager will use
- * to perform operations like programming and evicting keys on the
- * device
- */
- struct blk_ksm_ll_ops ksm_ll_ops;
-
- /*
- * The maximum number of bytes supported for specifying the data unit
- * number.
- */
- unsigned int max_dun_bytes_supported;
-
- /*
- * Array of size BLK_ENCRYPTION_MODE_MAX of bitmasks that represents
- * whether a crypto mode and data unit size are supported. The i'th
- * bit of crypto_mode_supported[crypto_mode] is set iff a data unit
- * size of (1 << i) is supported. We only support data unit sizes
- * that are powers of 2.
- */
- unsigned int crypto_modes_supported[BLK_ENCRYPTION_MODE_MAX];
-
- /* Device for runtime power management (NULL if none) */
- struct device *dev;
-
- /* Here onwards are *private* fields for internal keyslot manager use */
-
- unsigned int num_slots;
-
- /* Protects programming and evicting keys from the device */
- struct rw_semaphore lock;
-
- /* List of idle slots, with least recently used slot at front */
- wait_queue_head_t idle_slots_wait_queue;
- struct list_head idle_slots;
- spinlock_t idle_slots_lock;
-
- /*
- * Hash table which maps struct *blk_crypto_key to keyslots, so that we
- * can find a key's keyslot in O(1) time rather than O(num_slots).
- * Protected by 'lock'.
- */
- struct hlist_head *slot_hashtable;
- unsigned int log_slot_ht_size;
-
- /* Per-keyslot data */
- struct blk_ksm_keyslot *slots;
-};
-
-int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots);
-
-int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm,
- unsigned int num_slots);
-
-blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_key *key,
- struct blk_ksm_keyslot **slot_ptr);
-
-unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot);
-
-void blk_ksm_put_slot(struct blk_ksm_keyslot *slot);
-
-bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_config *cfg);
-
-int blk_ksm_evict_key(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_key *key);
-
-void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm);
-
-void blk_ksm_destroy(struct blk_keyslot_manager *ksm);
-
-void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent,
- const struct blk_keyslot_manager *child);
-
-void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm);
-
-bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset,
- struct blk_keyslot_manager *ksm_subset);
-
-void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm,
- struct blk_keyslot_manager *reference_ksm);
-
-#endif /* __LINUX_KEYSLOT_MANAGER_H */
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 161e8164abcf..a38a5bca1ba5 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -52,7 +52,7 @@ struct page *ksm_might_need_to_copy(struct page *page,
struct vm_area_struct *vma, unsigned long address);
void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
-void ksm_migrate_page(struct page *newpage, struct page *oldpage);
+void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
#else /* !CONFIG_KSM */
@@ -83,7 +83,7 @@ static inline void rmap_walk_ksm(struct page *page,
{
}
-static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
+static inline void folio_migrate_ksm(struct folio *newfolio, struct folio *old)
{
}
#endif /* CONFIG_MMU */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c0c64f03e107..236ec689056a 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -676,6 +676,18 @@ struct ata_ering {
struct ata_ering_entry ring[ATA_ERING_SIZE];
};
+struct ata_cpr {
+ u8 num;
+ u8 num_storage_elements;
+ u64 start_lba;
+ u64 num_lbas;
+};
+
+struct ata_cpr_log {
+ u8 nr_cpr;
+ struct ata_cpr cpr[];
+};
+
struct ata_device {
struct ata_link *link;
unsigned int devno; /* 0 or 1 */
@@ -735,6 +747,9 @@ struct ata_device {
u32 zac_zones_optimal_nonseq;
u32 zac_zones_max_open;
+ /* Concurrent positioning ranges */
+ struct ata_cpr_log *cpr_log;
+
/* error history */
int spdn_cnt;
/* ering is CLEAR_END, read comment above CLEAR_END */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3096c9a0ee01..e34bf0cbdf55 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -369,7 +369,7 @@ enum page_memcg_data_flags {
#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1)
-static inline bool PageMemcgKmem(struct page *page);
+static inline bool folio_memcg_kmem(struct folio *folio);
/*
* After the initialization objcg->memcg is always pointing at
@@ -384,89 +384,95 @@ static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
}
/*
- * __page_memcg - get the memory cgroup associated with a non-kmem page
- * @page: a pointer to the page struct
+ * __folio_memcg - Get the memory cgroup associated with a non-kmem folio
+ * @folio: Pointer to the folio.
*
- * Returns a pointer to the memory cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * Returns a pointer to the memory cgroup associated with the folio,
+ * or NULL. This function assumes that the folio is known to have a
* proper memory cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages or
- * kmem pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios or
+ * kmem folios.
*/
-static inline struct mem_cgroup *__page_memcg(struct page *page)
+static inline struct mem_cgroup *__folio_memcg(struct folio *folio)
{
- unsigned long memcg_data = page->memcg_data;
+ unsigned long memcg_data = folio->memcg_data;
- VM_BUG_ON_PAGE(PageSlab(page), page);
- VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
- VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
+ VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
+ VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio);
+ VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio);
return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
- * __page_objcg - get the object cgroup associated with a kmem page
- * @page: a pointer to the page struct
+ * __folio_objcg - get the object cgroup associated with a kmem folio.
+ * @folio: Pointer to the folio.
*
- * Returns a pointer to the object cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * Returns a pointer to the object cgroup associated with the folio,
+ * or NULL. This function assumes that the folio is known to have a
* proper object cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages or
- * LRU pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios or
+ * LRU folios.
*/
-static inline struct obj_cgroup *__page_objcg(struct page *page)
+static inline struct obj_cgroup *__folio_objcg(struct folio *folio)
{
- unsigned long memcg_data = page->memcg_data;
+ unsigned long memcg_data = folio->memcg_data;
- VM_BUG_ON_PAGE(PageSlab(page), page);
- VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
- VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page);
+ VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
+ VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio);
+ VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio);
return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
}
/*
- * page_memcg - get the memory cgroup associated with a page
- * @page: a pointer to the page struct
+ * folio_memcg - Get the memory cgroup associated with a folio.
+ * @folio: Pointer to the folio.
*
- * Returns a pointer to the memory cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * Returns a pointer to the memory cgroup associated with the folio,
+ * or NULL. This function assumes that the folio is known to have a
* proper memory cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios.
*
- * For a non-kmem page any of the following ensures page and memcg binding
+ * For a non-kmem folio any of the following ensures folio and memcg binding
* stability:
*
- * - the page lock
+ * - the folio lock
* - LRU isolation
* - lock_page_memcg()
* - exclusive reference
*
- * For a kmem page a caller should hold an rcu read lock to protect memcg
- * associated with a kmem page from being released.
+ * For a kmem folio a caller should hold an rcu read lock to protect memcg
+ * associated with a kmem folio from being released.
*/
+static inline struct mem_cgroup *folio_memcg(struct folio *folio)
+{
+ if (folio_memcg_kmem(folio))
+ return obj_cgroup_memcg(__folio_objcg(folio));
+ return __folio_memcg(folio);
+}
+
static inline struct mem_cgroup *page_memcg(struct page *page)
{
- if (PageMemcgKmem(page))
- return obj_cgroup_memcg(__page_objcg(page));
- else
- return __page_memcg(page);
+ return folio_memcg(page_folio(page));
}
-/*
- * page_memcg_rcu - locklessly get the memory cgroup associated with a page
- * @page: a pointer to the page struct
+/**
+ * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio.
+ * @folio: Pointer to the folio.
*
- * Returns a pointer to the memory cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * This function assumes that the folio is known to have a
* proper memory cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios.
+ *
+ * Return: A pointer to the memory cgroup associated with the folio,
+ * or NULL.
*/
-static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
+static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
{
- unsigned long memcg_data = READ_ONCE(page->memcg_data);
+ unsigned long memcg_data = READ_ONCE(folio->memcg_data);
- VM_BUG_ON_PAGE(PageSlab(page), page);
+ VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
WARN_ON_ONCE(!rcu_read_lock_held());
if (memcg_data & MEMCG_DATA_KMEM) {
@@ -523,17 +529,18 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
#ifdef CONFIG_MEMCG_KMEM
/*
- * PageMemcgKmem - check if the page has MemcgKmem flag set
- * @page: a pointer to the page struct
+ * folio_memcg_kmem - Check if the folio has the memcg_kmem flag set.
+ * @folio: Pointer to the folio.
*
- * Checks if the page has MemcgKmem flag set. The caller must ensure that
- * the page has an associated memory cgroup. It's not safe to call this function
- * against some types of pages, e.g. slab pages.
+ * Checks if the folio has MemcgKmem flag set. The caller must ensure
+ * that the folio has an associated memory cgroup. It's not safe to call
+ * this function against some types of folios, e.g. slab folios.
*/
-static inline bool PageMemcgKmem(struct page *page)
+static inline bool folio_memcg_kmem(struct folio *folio)
{
- VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page);
- return page->memcg_data & MEMCG_DATA_KMEM;
+ VM_BUG_ON_PGFLAGS(PageTail(&folio->page), &folio->page);
+ VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJCGS, folio);
+ return folio->memcg_data & MEMCG_DATA_KMEM;
}
/*
@@ -577,7 +584,7 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
}
#else
-static inline bool PageMemcgKmem(struct page *page)
+static inline bool folio_memcg_kmem(struct folio *folio)
{
return false;
}
@@ -593,6 +600,11 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
}
#endif
+static inline bool PageMemcgKmem(struct page *page)
+{
+ return folio_memcg_kmem(page_folio(page));
+}
+
static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
{
return (memcg == root_mem_cgroup);
@@ -684,26 +696,47 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
page_counter_read(&memcg->memory);
}
-int __mem_cgroup_charge(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask);
-static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask)
+int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
+
+/**
+ * mem_cgroup_charge - Charge a newly allocated folio to a cgroup.
+ * @folio: Folio to charge.
+ * @mm: mm context of the allocating task.
+ * @gfp: Reclaim mode.
+ *
+ * Try to charge @folio to the memcg that @mm belongs to, reclaiming
+ * pages according to @gfp if necessary. If @mm is NULL, try to
+ * charge to the active memcg.
+ *
+ * Do not use this for folios allocated for swapin.
+ *
+ * Return: 0 on success. Otherwise, an error code is returned.
+ */
+static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
+ gfp_t gfp)
{
if (mem_cgroup_disabled())
return 0;
- return __mem_cgroup_charge(page, mm, gfp_mask);
+ return __mem_cgroup_charge(folio, mm, gfp);
}
int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
gfp_t gfp, swp_entry_t entry);
void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
-void __mem_cgroup_uncharge(struct page *page);
-static inline void mem_cgroup_uncharge(struct page *page)
+void __mem_cgroup_uncharge(struct folio *folio);
+
+/**
+ * mem_cgroup_uncharge - Uncharge a folio.
+ * @folio: Folio to uncharge.
+ *
+ * Uncharge a folio previously charged with mem_cgroup_charge().
+ */
+static inline void mem_cgroup_uncharge(struct folio *folio)
{
if (mem_cgroup_disabled())
return;
- __mem_cgroup_uncharge(page);
+ __mem_cgroup_uncharge(folio);
}
void __mem_cgroup_uncharge_list(struct list_head *page_list);
@@ -714,7 +747,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
__mem_cgroup_uncharge_list(page_list);
}
-void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
+void mem_cgroup_migrate(struct folio *old, struct folio *new);
/**
* mem_cgroup_lruvec - get the lru list vector for a memcg & node
@@ -753,33 +786,33 @@ out:
}
/**
- * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
- * @page: the page
+ * folio_lruvec - return lruvec for isolating/putting an LRU folio
+ * @folio: Pointer to the folio.
*
- * This function relies on page->mem_cgroup being stable.
+ * This function relies on folio->mem_cgroup being stable.
*/
-static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
+static inline struct lruvec *folio_lruvec(struct folio *folio)
{
- pg_data_t *pgdat = page_pgdat(page);
- struct mem_cgroup *memcg = page_memcg(page);
+ struct mem_cgroup *memcg = folio_memcg(folio);
- VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page);
- return mem_cgroup_lruvec(memcg, pgdat);
+ VM_WARN_ON_ONCE_FOLIO(!memcg && !mem_cgroup_disabled(), folio);
+ return mem_cgroup_lruvec(memcg, folio_pgdat(folio));
}
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
-struct lruvec *lock_page_lruvec(struct page *page);
-struct lruvec *lock_page_lruvec_irq(struct page *page);
-struct lruvec *lock_page_lruvec_irqsave(struct page *page,
+struct lruvec *folio_lruvec_lock(struct folio *folio);
+struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
+struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
unsigned long *flags);
#ifdef CONFIG_DEBUG_VM
-void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page);
+void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio);
#else
-static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+static inline
+void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
{
}
#endif
@@ -947,6 +980,8 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
extern bool cgroup_memory_noswap;
#endif
+void folio_memcg_lock(struct folio *folio);
+void folio_memcg_unlock(struct folio *folio);
void lock_page_memcg(struct page *page);
void unlock_page_memcg(struct page *page);
@@ -1115,12 +1150,17 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
#define MEM_CGROUP_ID_SHIFT 0
#define MEM_CGROUP_ID_MAX 0
+static inline struct mem_cgroup *folio_memcg(struct folio *folio)
+{
+ return NULL;
+}
+
static inline struct mem_cgroup *page_memcg(struct page *page)
{
return NULL;
}
-static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
+static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
{
WARN_ON_ONCE(!rcu_read_lock_held());
return NULL;
@@ -1131,6 +1171,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
return NULL;
}
+static inline bool folio_memcg_kmem(struct folio *folio)
+{
+ return false;
+}
+
static inline bool PageMemcgKmem(struct page *page)
{
return false;
@@ -1179,8 +1224,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
return false;
}
-static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask)
+static inline int mem_cgroup_charge(struct folio *folio,
+ struct mm_struct *mm, gfp_t gfp)
{
return 0;
}
@@ -1195,7 +1240,7 @@ static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry)
{
}
-static inline void mem_cgroup_uncharge(struct page *page)
+static inline void mem_cgroup_uncharge(struct folio *folio)
{
}
@@ -1203,7 +1248,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
{
}
-static inline void mem_cgroup_migrate(struct page *old, struct page *new)
+static inline void mem_cgroup_migrate(struct folio *old, struct folio *new)
{
}
@@ -1213,14 +1258,14 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
return &pgdat->__lruvec;
}
-static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
+static inline struct lruvec *folio_lruvec(struct folio *folio)
{
- pg_data_t *pgdat = page_pgdat(page);
-
+ struct pglist_data *pgdat = folio_pgdat(folio);
return &pgdat->__lruvec;
}
-static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+static inline
+void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
{
}
@@ -1250,26 +1295,26 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
}
-static inline struct lruvec *lock_page_lruvec(struct page *page)
+static inline struct lruvec *folio_lruvec_lock(struct folio *folio)
{
- struct pglist_data *pgdat = page_pgdat(page);
+ struct pglist_data *pgdat = folio_pgdat(folio);
spin_lock(&pgdat->__lruvec.lru_lock);
return &pgdat->__lruvec;
}
-static inline struct lruvec *lock_page_lruvec_irq(struct page *page)
+static inline struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
{
- struct pglist_data *pgdat = page_pgdat(page);
+ struct pglist_data *pgdat = folio_pgdat(folio);
spin_lock_irq(&pgdat->__lruvec.lru_lock);
return &pgdat->__lruvec;
}
-static inline struct lruvec *lock_page_lruvec_irqsave(struct page *page,
+static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
unsigned long *flagsp)
{
- struct pglist_data *pgdat = page_pgdat(page);
+ struct pglist_data *pgdat = folio_pgdat(folio);
spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp);
return &pgdat->__lruvec;
@@ -1356,6 +1401,14 @@ static inline void unlock_page_memcg(struct page *page)
{
}
+static inline void folio_memcg_lock(struct folio *folio)
+{
+}
+
+static inline void folio_memcg_unlock(struct folio *folio)
+{
+}
+
static inline void mem_cgroup_handle_over_high(void)
{
}
@@ -1517,38 +1570,39 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
}
/* Test requires a stable page->memcg binding, see page_memcg() */
-static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec)
+static inline bool folio_matches_lruvec(struct folio *folio,
+ struct lruvec *lruvec)
{
- return lruvec_pgdat(lruvec) == page_pgdat(page) &&
- lruvec_memcg(lruvec) == page_memcg(page);
+ return lruvec_pgdat(lruvec) == folio_pgdat(folio) &&
+ lruvec_memcg(lruvec) == folio_memcg(folio);
}
/* Don't lock again iff page's lruvec locked */
-static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
+static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio,
struct lruvec *locked_lruvec)
{
if (locked_lruvec) {
- if (page_matches_lruvec(page, locked_lruvec))
+ if (folio_matches_lruvec(folio, locked_lruvec))
return locked_lruvec;
unlock_page_lruvec_irq(locked_lruvec);
}
- return lock_page_lruvec_irq(page);
+ return folio_lruvec_lock_irq(folio);
}
/* Don't lock again iff page's lruvec locked */
-static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page,
+static inline struct lruvec *folio_lruvec_relock_irqsave(struct folio *folio,
struct lruvec *locked_lruvec, unsigned long *flags)
{
if (locked_lruvec) {
- if (page_matches_lruvec(page, locked_lruvec))
+ if (folio_matches_lruvec(folio, locked_lruvec))
return locked_lruvec;
unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
}
- return lock_page_lruvec_irqsave(page, flags);
+ return folio_lruvec_lock_irqsave(folio, flags);
}
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -1558,17 +1612,17 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
unsigned long *pheadroom, unsigned long *pdirty,
unsigned long *pwriteback);
-void mem_cgroup_track_foreign_dirty_slowpath(struct page *page,
+void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
struct bdi_writeback *wb);
-static inline void mem_cgroup_track_foreign_dirty(struct page *page,
+static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
struct bdi_writeback *wb)
{
if (mem_cgroup_disabled())
return;
- if (unlikely(&page_memcg(page)->css != wb->memcg_css))
- mem_cgroup_track_foreign_dirty_slowpath(page, wb);
+ if (unlikely(&folio_memcg(folio)->css != wb->memcg_css))
+ mem_cgroup_track_foreign_dirty_slowpath(folio, wb);
}
void mem_cgroup_flush_foreign(struct bdi_writeback *wb);
@@ -1588,7 +1642,7 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
{
}
-static inline void mem_cgroup_track_foreign_dirty(struct page *page,
+static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
struct bdi_writeback *wb)
{
}
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 7efc0a7c14c9..182c606adb06 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -160,7 +160,10 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
#define register_hotmemory_notifier(nb) register_memory_notifier(nb)
#define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb)
#else
-#define hotplug_memory_notifier(fn, pri) ({ 0; })
+static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
+{
+ return 0;
+}
/* These aren't inline functions due to a GCC bug. */
#define register_hotmemory_notifier(nb) ({ (void)(nb); 0; })
#define unregister_hotmemory_notifier(nb) ({ (void)(nb); })
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index c8077e936691..0d2aeb9b0f66 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -57,6 +57,10 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page);
extern int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page, int extra_count);
+void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
+void folio_migrate_copy(struct folio *newfolio, struct folio *folio);
+int folio_migrate_mapping(struct address_space *mapping,
+ struct folio *newfolio, struct folio *folio, int extra_count);
#else
static inline void putback_movable_pages(struct list_head *l) {}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e23417424373..f17d2101af7a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1138,7 +1138,6 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
-bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 73a52aba448f..40ff114aaf9e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -36,10 +36,7 @@
struct mempolicy;
struct anon_vma;
struct anon_vma_chain;
-struct file_ra_state;
struct user_struct;
-struct writeback_control;
-struct bdi_writeback;
struct pt_regs;
extern int sysctl_page_lock_unfairness;
@@ -216,13 +213,6 @@ int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
-/*
- * Any attempt to mark this function as static leads to build failure
- * when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked()
- * is referred to by BPF code. This must be visible for error injection.
- */
-int __add_to_page_cache_locked(struct page *page, struct address_space *mapping,
- pgoff_t index, gfp_t gfp, void **shadowp);
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
@@ -748,13 +738,18 @@ static inline int put_page_testzero(struct page *page)
return page_ref_dec_and_test(page);
}
+static inline int folio_put_testzero(struct folio *folio)
+{
+ return put_page_testzero(&folio->page);
+}
+
/*
* Try to grab a ref unless the page has a refcount of zero, return false if
* that is the case.
* This can be called when MMU is off so it must not access
* any of the virtual mappings.
*/
-static inline int get_page_unless_zero(struct page *page)
+static inline bool get_page_unless_zero(struct page *page)
{
return page_ref_add_unless(page, 1, 0);
}
@@ -907,7 +902,7 @@ void __put_page(struct page *page);
void put_pages_list(struct list_head *pages);
void split_page(struct page *page, unsigned int order);
-void copy_huge_page(struct page *dst, struct page *src);
+void folio_copy(struct folio *dst, struct folio *src);
/*
* Compound pages have a destructor function. Provide a
@@ -950,6 +945,20 @@ static inline unsigned int compound_order(struct page *page)
return page[1].compound_order;
}
+/**
+ * folio_order - The allocation order of a folio.
+ * @folio: The folio.
+ *
+ * A folio is composed of 2^order pages. See get_order() for the definition
+ * of order.
+ *
+ * Return: The order of the folio.
+ */
+static inline unsigned int folio_order(struct folio *folio)
+{
+ return compound_order(&folio->page);
+}
+
static inline bool hpage_pincount_available(struct page *page)
{
/*
@@ -1131,6 +1140,11 @@ static inline enum zone_type page_zonenum(const struct page *page)
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
}
+static inline enum zone_type folio_zonenum(const struct folio *folio)
+{
+ return page_zonenum(&folio->page);
+}
+
#ifdef CONFIG_ZONE_DEVICE
static inline bool is_zone_device_page(const struct page *page)
{
@@ -1200,18 +1214,26 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
}
/* 127: arbitrary random number, small enough to assemble well */
-#define page_ref_zero_or_close_to_overflow(page) \
- ((unsigned int) page_ref_count(page) + 127u <= 127u)
+#define folio_ref_zero_or_close_to_overflow(folio) \
+ ((unsigned int) folio_ref_count(folio) + 127u <= 127u)
+
+/**
+ * folio_get - Increment the reference count on a folio.
+ * @folio: The folio.
+ *
+ * Context: May be called in any context, as long as you know that
+ * you have a refcount on the folio. If you do not already have one,
+ * folio_try_get() may be the right interface for you to use.
+ */
+static inline void folio_get(struct folio *folio)
+{
+ VM_BUG_ON_FOLIO(folio_ref_zero_or_close_to_overflow(folio), folio);
+ folio_ref_inc(folio);
+}
static inline void get_page(struct page *page)
{
- page = compound_head(page);
- /*
- * Getting a normal page or the head of a compound page
- * requires to already have an elevated page->_refcount.
- */
- VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page);
- page_ref_inc(page);
+ folio_get(page_folio(page));
}
bool __must_check try_grab_page(struct page *page, unsigned int flags);
@@ -1228,9 +1250,28 @@ static inline __must_check bool try_get_page(struct page *page)
return true;
}
+/**
+ * folio_put - Decrement the reference count on a folio.
+ * @folio: The folio.
+ *
+ * If the folio's reference count reaches zero, the memory will be
+ * released back to the page allocator and may be used by another
+ * allocation immediately. Do not access the memory or the struct folio
+ * after calling folio_put() unless you can be sure that it wasn't the
+ * last reference.
+ *
+ * Context: May be called in process or interrupt context, but not in NMI
+ * context. May be called while holding a spinlock.
+ */
+static inline void folio_put(struct folio *folio)
+{
+ if (folio_put_testzero(folio))
+ __put_page(&folio->page);
+}
+
static inline void put_page(struct page *page)
{
- page = compound_head(page);
+ struct folio *folio = page_folio(page);
/*
* For devmap managed pages we need to catch refcount transition from
@@ -1238,13 +1279,12 @@ static inline void put_page(struct page *page)
* need to inform the device driver through callback. See
* include/linux/memremap.h and HMM for details.
*/
- if (page_is_devmap_managed(page)) {
- put_devmap_managed_page(page);
+ if (page_is_devmap_managed(&folio->page)) {
+ put_devmap_managed_page(&folio->page);
return;
}
- if (put_page_testzero(page))
- __put_page(page);
+ folio_put(folio);
}
/*
@@ -1379,6 +1419,11 @@ static inline int page_to_nid(const struct page *page)
}
#endif
+static inline int folio_nid(const struct folio *folio)
+{
+ return page_to_nid(&folio->page);
+}
+
#ifdef CONFIG_NUMA_BALANCING
static inline int cpu_pid_to_cpupid(int cpu, int pid)
{
@@ -1546,6 +1591,16 @@ static inline pg_data_t *page_pgdat(const struct page *page)
return NODE_DATA(page_to_nid(page));
}
+static inline struct zone *folio_zone(const struct folio *folio)
+{
+ return page_zone(&folio->page);
+}
+
+static inline pg_data_t *folio_pgdat(const struct folio *folio)
+{
+ return page_pgdat(&folio->page);
+}
+
#ifdef SECTION_IN_PAGE_FLAGS
static inline void set_page_section(struct page *page, unsigned long section)
{
@@ -1559,6 +1614,20 @@ static inline unsigned long page_to_section(const struct page *page)
}
#endif
+/**
+ * folio_pfn - Return the Page Frame Number of a folio.
+ * @folio: The folio.
+ *
+ * A folio may contain multiple pages. The pages have consecutive
+ * Page Frame Numbers.
+ *
+ * Return: The Page Frame Number of the first page in the folio.
+ */
+static inline unsigned long folio_pfn(struct folio *folio)
+{
+ return page_to_pfn(&folio->page);
+}
+
/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */
#ifdef CONFIG_MIGRATION
static inline bool is_pinnable_page(struct page *page)
@@ -1595,6 +1664,89 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
#endif
}
+/**
+ * folio_nr_pages - The number of pages in the folio.
+ * @folio: The folio.
+ *
+ * Return: A positive power of two.
+ */
+static inline long folio_nr_pages(struct folio *folio)
+{
+ return compound_nr(&folio->page);
+}
+
+/**
+ * folio_next - Move to the next physical folio.
+ * @folio: The folio we're currently operating on.
+ *
+ * If you have physically contiguous memory which may span more than
+ * one folio (eg a &struct bio_vec), use this function to move from one
+ * folio to the next. Do not use it if the memory is only virtually
+ * contiguous as the folios are almost certainly not adjacent to each
+ * other. This is the folio equivalent to writing ``page++``.
+ *
+ * Context: We assume that the folios are refcounted and/or locked at a
+ * higher level and do not adjust the reference counts.
+ * Return: The next struct folio.
+ */
+static inline struct folio *folio_next(struct folio *folio)
+{
+ return (struct folio *)folio_page(folio, folio_nr_pages(folio));
+}
+
+/**
+ * folio_shift - The size of the memory described by this folio.
+ * @folio: The folio.
+ *
+ * A folio represents a number of bytes which is a power-of-two in size.
+ * This function tells you which power-of-two the folio is. See also
+ * folio_size() and folio_order().
+ *
+ * Context: The caller should have a reference on the folio to prevent
+ * it from being split. It is not necessary for the folio to be locked.
+ * Return: The base-2 logarithm of the size of this folio.
+ */
+static inline unsigned int folio_shift(struct folio *folio)
+{
+ return PAGE_SHIFT + folio_order(folio);
+}
+
+/**
+ * folio_size - The number of bytes in a folio.
+ * @folio: The folio.
+ *
+ * Context: The caller should have a reference on the folio to prevent
+ * it from being split. It is not necessary for the folio to be locked.
+ * Return: The number of bytes in this folio.
+ */
+static inline size_t folio_size(struct folio *folio)
+{
+ return PAGE_SIZE << folio_order(folio);
+}
+
+#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
+static inline int arch_make_page_accessible(struct page *page)
+{
+ return 0;
+}
+#endif
+
+#ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
+static inline int arch_make_folio_accessible(struct folio *folio)
+{
+ int ret;
+ long i, nr = folio_nr_pages(folio);
+
+ for (i = 0; i < nr; i++) {
+ ret = arch_make_page_accessible(folio_page(folio, i));
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+#endif
+
/*
* Some inline functions in vmstat.h depend on page_zone()
*/
@@ -1635,19 +1787,6 @@ void page_address_init(void);
extern void *page_rmapping(struct page *page);
extern struct anon_vma *page_anon_vma(struct page *page);
-extern struct address_space *page_mapping(struct page *page);
-
-extern struct address_space *__page_file_mapping(struct page *);
-
-static inline
-struct address_space *page_file_mapping(struct page *page)
-{
- if (unlikely(PageSwapCache(page)))
- return __page_file_mapping(page);
-
- return page->mapping;
-}
-
extern pgoff_t __page_file_index(struct page *page);
/*
@@ -1662,7 +1801,7 @@ static inline pgoff_t page_index(struct page *page)
}
bool page_mapped(struct page *page);
-struct address_space *page_mapping(struct page *page);
+bool folio_mapped(struct folio *folio);
/*
* Return true only if the page has been allocated with
@@ -1700,6 +1839,7 @@ extern void pagefault_out_of_memory(void);
#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
#define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1))
+#define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1))
/*
* Flags passed to show_mem() and show_free_areas() to suppress output in
@@ -1854,20 +1994,9 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
extern void do_invalidatepage(struct page *page, unsigned int offset,
unsigned int length);
-int redirty_page_for_writepage(struct writeback_control *wbc,
- struct page *page);
-void account_page_cleaned(struct page *page, struct address_space *mapping,
- struct bdi_writeback *wb);
-int set_page_dirty(struct page *page);
+bool folio_mark_dirty(struct folio *folio);
+bool set_page_dirty(struct page *page);
int set_page_dirty_lock(struct page *page);
-void __cancel_dirty_page(struct page *page);
-static inline void cancel_dirty_page(struct page *page)
-{
- /* Avoid atomic ops, locking, etc. when not actually needed. */
- if (PageDirty(page))
- __cancel_dirty_page(page);
-}
-int clear_page_dirty_for_io(struct page *page);
int get_cmdline(struct task_struct *task, char *buffer, int buflen);
@@ -2659,10 +2788,6 @@ extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
-/* mm/page-writeback.c */
-int __must_check write_one_page(struct page *page);
-void task_dirty_inc(struct task_struct *tsk);
-
extern unsigned long stack_guard_gap;
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 355ea1ee32bd..e2ec68b0515c 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -6,27 +6,33 @@
#include <linux/swap.h>
/**
- * page_is_file_lru - should the page be on a file LRU or anon LRU?
- * @page: the page to test
- *
- * Returns 1 if @page is a regular filesystem backed page cache page or a lazily
- * freed anonymous page (e.g. via MADV_FREE). Returns 0 if @page is a normal
- * anonymous page, a tmpfs page or otherwise ram or swap backed page. Used by
- * functions that manipulate the LRU lists, to sort a page onto the right LRU
- * list.
+ * folio_is_file_lru - Should the folio be on a file LRU or anon LRU?
+ * @folio: The folio to test.
*
* We would like to get this info without a page flag, but the state
- * needs to survive until the page is last deleted from the LRU, which
+ * needs to survive until the folio is last deleted from the LRU, which
* could be as far down as __page_cache_release.
+ *
+ * Return: An integer (not a boolean!) used to sort a folio onto the
+ * right LRU list and to account folios correctly.
+ * 1 if @folio is a regular filesystem backed page cache folio
+ * or a lazily freed anonymous folio (e.g. via MADV_FREE).
+ * 0 if @folio is a normal anonymous folio, a tmpfs folio or otherwise
+ * ram or swap backed folio.
*/
+static inline int folio_is_file_lru(struct folio *folio)
+{
+ return !folio_test_swapbacked(folio);
+}
+
static inline int page_is_file_lru(struct page *page)
{
- return !PageSwapBacked(page);
+ return folio_is_file_lru(page_folio(page));
}
static __always_inline void update_lru_size(struct lruvec *lruvec,
enum lru_list lru, enum zone_type zid,
- int nr_pages)
+ long nr_pages)
{
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@@ -39,69 +45,94 @@ static __always_inline void update_lru_size(struct lruvec *lruvec,
}
/**
- * __clear_page_lru_flags - clear page lru flags before releasing a page
- * @page: the page that was on lru and now has a zero reference
+ * __folio_clear_lru_flags - Clear page lru flags before releasing a page.
+ * @folio: The folio that was on lru and now has a zero reference.
*/
-static __always_inline void __clear_page_lru_flags(struct page *page)
+static __always_inline void __folio_clear_lru_flags(struct folio *folio)
{
- VM_BUG_ON_PAGE(!PageLRU(page), page);
+ VM_BUG_ON_FOLIO(!folio_test_lru(folio), folio);
- __ClearPageLRU(page);
+ __folio_clear_lru(folio);
/* this shouldn't happen, so leave the flags to bad_page() */
- if (PageActive(page) && PageUnevictable(page))
+ if (folio_test_active(folio) && folio_test_unevictable(folio))
return;
- __ClearPageActive(page);
- __ClearPageUnevictable(page);
+ __folio_clear_active(folio);
+ __folio_clear_unevictable(folio);
+}
+
+static __always_inline void __clear_page_lru_flags(struct page *page)
+{
+ __folio_clear_lru_flags(page_folio(page));
}
/**
- * page_lru - which LRU list should a page be on?
- * @page: the page to test
+ * folio_lru_list - Which LRU list should a folio be on?
+ * @folio: The folio to test.
*
- * Returns the LRU list a page should be on, as an index
+ * Return: The LRU list a folio should be on, as an index
* into the array of LRU lists.
*/
-static __always_inline enum lru_list page_lru(struct page *page)
+static __always_inline enum lru_list folio_lru_list(struct folio *folio)
{
enum lru_list lru;
- VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
+ VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio);
- if (PageUnevictable(page))
+ if (folio_test_unevictable(folio))
return LRU_UNEVICTABLE;
- lru = page_is_file_lru(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
- if (PageActive(page))
+ lru = folio_is_file_lru(folio) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
+ if (folio_test_active(folio))
lru += LRU_ACTIVE;
return lru;
}
+static __always_inline
+void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
+{
+ enum lru_list lru = folio_lru_list(folio);
+
+ update_lru_size(lruvec, lru, folio_zonenum(folio),
+ folio_nr_pages(folio));
+ list_add(&folio->lru, &lruvec->lists[lru]);
+}
+
static __always_inline void add_page_to_lru_list(struct page *page,
struct lruvec *lruvec)
{
- enum lru_list lru = page_lru(page);
+ lruvec_add_folio(lruvec, page_folio(page));
+}
- update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
- list_add(&page->lru, &lruvec->lists[lru]);
+static __always_inline
+void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio)
+{
+ enum lru_list lru = folio_lru_list(folio);
+
+ update_lru_size(lruvec, lru, folio_zonenum(folio),
+ folio_nr_pages(folio));
+ list_add_tail(&folio->lru, &lruvec->lists[lru]);
}
static __always_inline void add_page_to_lru_list_tail(struct page *page,
struct lruvec *lruvec)
{
- enum lru_list lru = page_lru(page);
+ lruvec_add_folio_tail(lruvec, page_folio(page));
+}
- update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
- list_add_tail(&page->lru, &lruvec->lists[lru]);
+static __always_inline
+void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
+{
+ list_del(&folio->lru);
+ update_lru_size(lruvec, folio_lru_list(folio), folio_zonenum(folio),
+ -folio_nr_pages(folio));
}
static __always_inline void del_page_from_lru_list(struct page *page,
struct lruvec *lruvec)
{
- list_del(&page->lru);
- update_lru_size(lruvec, page_lru(page), page_zonenum(page),
- -thp_nr_pages(page));
+ lruvec_del_folio(lruvec, page_folio(page));
}
#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7f8ee09c711f..82dab23205c3 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -239,6 +239,72 @@ struct page {
#endif
} _struct_page_alignment;
+/**
+ * struct folio - Represents a contiguous set of bytes.
+ * @flags: Identical to the page flags.
+ * @lru: Least Recently Used list; tracks how recently this folio was used.
+ * @mapping: The file this page belongs to, or refers to the anon_vma for
+ * anonymous memory.
+ * @index: Offset within the file, in units of pages. For anonymous memory,
+ * this is the index from the beginning of the mmap.
+ * @private: Filesystem per-folio data (see folio_attach_private()).
+ * Used for swp_entry_t if folio_test_swapcache().
+ * @_mapcount: Do not access this member directly. Use folio_mapcount() to
+ * find out how many times this folio is mapped by userspace.
+ * @_refcount: Do not access this member directly. Use folio_ref_count()
+ * to find how many references there are to this folio.
+ * @memcg_data: Memory Control Group data.
+ *
+ * A folio is a physically, virtually and logically contiguous set
+ * of bytes. It is a power-of-two in size, and it is aligned to that
+ * same power-of-two. It is at least as large as %PAGE_SIZE. If it is
+ * in the page cache, it is at a file offset which is a multiple of that
+ * power-of-two. It may be mapped into userspace at an address which is
+ * at an arbitrary page offset, but its kernel virtual address is aligned
+ * to its size.
+ */
+struct folio {
+ /* private: don't document the anon union */
+ union {
+ struct {
+ /* public: */
+ unsigned long flags;
+ struct list_head lru;
+ struct address_space *mapping;
+ pgoff_t index;
+ void *private;
+ atomic_t _mapcount;
+ atomic_t _refcount;
+#ifdef CONFIG_MEMCG
+ unsigned long memcg_data;
+#endif
+ /* private: the union with struct page is transitional */
+ };
+ struct page page;
+ };
+};
+
+static_assert(sizeof(struct page) == sizeof(struct folio));
+#define FOLIO_MATCH(pg, fl) \
+ static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl))
+FOLIO_MATCH(flags, flags);
+FOLIO_MATCH(lru, lru);
+FOLIO_MATCH(compound_head, lru);
+FOLIO_MATCH(index, index);
+FOLIO_MATCH(private, private);
+FOLIO_MATCH(_mapcount, _mapcount);
+FOLIO_MATCH(_refcount, _refcount);
+#ifdef CONFIG_MEMCG
+FOLIO_MATCH(memcg_data, memcg_data);
+#endif
+#undef FOLIO_MATCH
+
+static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
+{
+ struct page *tail = &folio->page + 1;
+ return &tail->compound_mapcount;
+}
+
static inline atomic_t *compound_mapcount_ptr(struct page *page)
{
return &page[1].compound_mapcount;
@@ -257,6 +323,12 @@ static inline atomic_t *compound_pincount_ptr(struct page *page)
#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
+/*
+ * page_private can be used on tail pages. However, PagePrivate is only
+ * checked by the VM on the head page. So page_private on the tail pages
+ * should be used for data that's ancillary to the head page (eg attaching
+ * buffer heads to tail pages after attaching buffer heads to the head page)
+ */
#define page_private(page) ((page)->private)
static inline void set_page_private(struct page *page, unsigned long private)
@@ -264,6 +336,11 @@ static inline void set_page_private(struct page *page, unsigned long private)
page->private = private;
}
+static inline void *folio_get_private(struct folio *folio)
+{
+ return folio->private;
+}
+
struct page_frag_cache {
void * va;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0c0c9a0fdf57..52eae8c45b8d 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -15,7 +15,7 @@
#include <linux/mmc/card.h>
#include <linux/mmc/pm.h>
#include <linux/dma-direction.h>
-#include <linux/keyslot-manager.h>
+#include <linux/blk-crypto-profile.h>
struct mmc_ios {
unsigned int clock; /* clock rate */
@@ -492,7 +492,7 @@ struct mmc_host {
/* Inline encryption support */
#ifdef CONFIG_MMC_CRYPTO
- struct blk_keyslot_manager ksm;
+ struct blk_crypto_profile crypto_profile;
#endif
/* Host Software Queue support */
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 1935d4c72d10..d7285f8148a3 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -22,6 +22,13 @@ void dump_mm(const struct mm_struct *mm);
BUG(); \
} \
} while (0)
+#define VM_BUG_ON_FOLIO(cond, folio) \
+ do { \
+ if (unlikely(cond)) { \
+ dump_page(&folio->page, "VM_BUG_ON_FOLIO(" __stringify(cond)")");\
+ BUG(); \
+ } \
+ } while (0)
#define VM_BUG_ON_VMA(cond, vma) \
do { \
if (unlikely(cond)) { \
@@ -47,6 +54,17 @@ void dump_mm(const struct mm_struct *mm);
} \
unlikely(__ret_warn_once); \
})
+#define VM_WARN_ON_ONCE_FOLIO(cond, folio) ({ \
+ static bool __section(".data.once") __warned; \
+ int __ret_warn_once = !!(cond); \
+ \
+ if (unlikely(__ret_warn_once && !__warned)) { \
+ dump_page(&folio->page, "VM_WARN_ON_ONCE_FOLIO(" __stringify(cond)")");\
+ __warned = true; \
+ WARN_ON(1); \
+ } \
+ unlikely(__ret_warn_once); \
+})
#define VM_WARN_ON(cond) (void)WARN_ON(cond)
#define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
@@ -55,11 +73,13 @@ void dump_mm(const struct mm_struct *mm);
#else
#define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
+#define VM_BUG_ON_FOLIO(cond, folio) VM_BUG_ON(cond)
#define VM_BUG_ON_VMA(cond, vma) VM_BUG_ON(cond)
#define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
#define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ON_ONCE_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
#endif
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 5d6a4158a9a6..12c4177f7703 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -22,6 +22,7 @@
* Overload PG_private_2 to give us PG_fscache - this is used to indicate that
* a page is currently backed by a local disk cache
*/
+#define folio_test_fscache(folio) folio_test_private_2(folio)
#define PageFsCache(page) PagePrivate2((page))
#define SetPageFsCache(page) SetPagePrivate2((page))
#define ClearPageFsCache(page) ClearPagePrivate2((page))
@@ -29,60 +30,80 @@
#define TestClearPageFsCache(page) TestClearPagePrivate2((page))
/**
- * set_page_fscache - Set PG_fscache on a page and take a ref
- * @page: The page.
+ * folio_start_fscache - Start an fscache write on a folio.
+ * @folio: The folio.
*
- * Set the PG_fscache (PG_private_2) flag on a page and take the reference
- * needed for the VM to handle its lifetime correctly. This sets the flag and
- * takes the reference unconditionally, so care must be taken not to set the
- * flag again if it's already set.
+ * Call this function before writing a folio to a local cache. Starting a
+ * second write before the first one finishes is not allowed.
*/
-static inline void set_page_fscache(struct page *page)
+static inline void folio_start_fscache(struct folio *folio)
{
- set_page_private_2(page);
+ VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio);
+ folio_get(folio);
+ folio_set_private_2(folio);
}
/**
- * end_page_fscache - Clear PG_fscache and release any waiters
- * @page: The page
- *
- * Clear the PG_fscache (PG_private_2) bit on a page and wake up any sleepers
- * waiting for this. The page ref held for PG_private_2 being set is released.
+ * folio_end_fscache - End an fscache write on a folio.
+ * @folio: The folio.
*
- * This is, for example, used when a netfs page is being written to a local
- * disk cache, thereby allowing writes to the cache for the same page to be
- * serialised.
+ * Call this function after the folio has been written to the local cache.
+ * This will wake any sleepers waiting on this folio.
*/
-static inline void end_page_fscache(struct page *page)
+static inline void folio_end_fscache(struct folio *folio)
{
- end_page_private_2(page);
+ folio_end_private_2(folio);
}
/**
- * wait_on_page_fscache - Wait for PG_fscache to be cleared on a page
- * @page: The page to wait on
+ * folio_wait_fscache - Wait for an fscache write on this folio to end.
+ * @folio: The folio.
*
- * Wait for PG_fscache (aka PG_private_2) to be cleared on a page.
+ * If this folio is currently being written to a local cache, wait for
+ * the write to finish. Another write may start after this one finishes,
+ * unless the caller holds the folio lock.
*/
-static inline void wait_on_page_fscache(struct page *page)
+static inline void folio_wait_fscache(struct folio *folio)
{
- wait_on_page_private_2(page);
+ folio_wait_private_2(folio);
}
/**
- * wait_on_page_fscache_killable - Wait for PG_fscache to be cleared on a page
- * @page: The page to wait on
+ * folio_wait_fscache_killable - Wait for an fscache write on this folio to end.
+ * @folio: The folio.
*
- * Wait for PG_fscache (aka PG_private_2) to be cleared on a page or until a
- * fatal signal is received by the calling task.
+ * If this folio is currently being written to a local cache, wait
+ * for the write to finish or for a fatal signal to be received.
+ * Another write may start after this one finishes, unless the caller
+ * holds the folio lock.
*
* Return:
* - 0 if successful.
* - -EINTR if a fatal signal was encountered.
*/
+static inline int folio_wait_fscache_killable(struct folio *folio)
+{
+ return folio_wait_private_2_killable(folio);
+}
+
+static inline void set_page_fscache(struct page *page)
+{
+ folio_start_fscache(page_folio(page));
+}
+
+static inline void end_page_fscache(struct page *page)
+{
+ folio_end_private_2(page_folio(page));
+}
+
+static inline void wait_on_page_fscache(struct page *page)
+{
+ folio_wait_private_2(page_folio(page));
+}
+
static inline int wait_on_page_fscache_killable(struct page *page)
{
- return wait_on_page_private_2_killable(page);
+ return folio_wait_private_2_killable(page_folio(page));
}
enum netfs_read_source {
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 2a38f2b477a5..cb909edb76c4 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -7,6 +7,7 @@
#define _NVME_FC_DRIVER_H 1
#include <linux/scatterlist.h>
+#include <linux/blk-mq.h>
/*
@@ -497,6 +498,8 @@ struct nvme_fc_port_template {
int (*xmt_ls_rsp)(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *rport,
struct nvmefc_ls_rsp *ls_rsp);
+ void (*map_queues)(struct nvme_fc_local_port *localport,
+ struct blk_mq_queue_map *map);
u32 max_hw_queues;
u16 max_sgl_segments;
@@ -779,6 +782,10 @@ struct nvmet_fc_target_port {
* LS received.
* Entrypoint is Mandatory.
*
+ * @map_queues: This functions lets the driver expose the queue mapping
+ * to the block layer.
+ * Entrypoint is Optional.
+ *
* @fcp_op: Called to perform a data transfer or transmit a response.
* The nvmefc_tgt_fcp_req structure is the same LLDD-supplied
* exchange structure specified in the nvmet_fc_rcv_fcp_req() call
diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h
index 3ec8e50efa16..4dd7e6fe92fb 100644
--- a/include/linux/nvme-rdma.h
+++ b/include/linux/nvme-rdma.h
@@ -6,6 +6,8 @@
#ifndef _LINUX_NVME_RDMA_H
#define _LINUX_NVME_RDMA_H
+#define NVME_RDMA_MAX_QUEUE_SIZE 128
+
enum nvme_rdma_cm_fmt {
NVME_RDMA_CM_FMT_1_0 = 0x0,
};
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b7c4c4130b65..855dd9b3e84b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -27,8 +27,20 @@
#define NVME_NSID_ALL 0xffffffff
enum nvme_subsys_type {
- NVME_NQN_DISC = 1, /* Discovery type target subsystem */
- NVME_NQN_NVME = 2, /* NVME type target subsystem */
+ /* Referral to another discovery type target subsystem */
+ NVME_NQN_DISC = 1,
+
+ /* NVME type target subsystem */
+ NVME_NQN_NVME = 2,
+
+ /* Current discovery type target subsystem */
+ NVME_NQN_CURR = 3,
+};
+
+enum nvme_ctrl_type {
+ NVME_CTRL_IO = 1, /* I/O controller */
+ NVME_CTRL_DISC = 2, /* Discovery controller */
+ NVME_CTRL_ADMIN = 3, /* Administrative controller */
};
/* Address Family codes for Discovery Log Page entry ADRFAM field */
@@ -244,7 +256,9 @@ struct nvme_id_ctrl {
__le32 rtd3e;
__le32 oaes;
__le32 ctratt;
- __u8 rsvd100[28];
+ __u8 rsvd100[11];
+ __u8 cntrltype;
+ __u8 fguid[16];
__le16 crdt1;
__le16 crdt2;
__le16 crdt3;
@@ -312,6 +326,7 @@ struct nvme_id_ctrl {
};
enum {
+ NVME_CTRL_CMIC_MULTI_PORT = 1 << 0,
NVME_CTRL_CMIC_MULTI_CTRL = 1 << 1,
NVME_CTRL_CMIC_ANA = 1 << 3,
NVME_CTRL_ONCS_COMPARE = 1 << 0,
@@ -1303,6 +1318,12 @@ struct nvmf_common_command {
#define MAX_DISC_LOGS 255
+/* Discovery log page entry flags (EFLAGS): */
+enum {
+ NVME_DISC_EFLAGS_EPCSD = (1 << 1),
+ NVME_DISC_EFLAGS_DUPRETINFO = (1 << 0),
+};
+
/* Discovery log page entry */
struct nvmf_disc_rsp_page_entry {
__u8 trtype;
@@ -1312,7 +1333,8 @@ struct nvmf_disc_rsp_page_entry {
__le16 portid;
__le16 cntlid;
__le16 asqsz;
- __u8 resv8[22];
+ __le16 eflags;
+ __u8 resv10[20];
char trsvcid[NVMF_TRSVCID_SIZE];
__u8 resv64[192];
char subnqn[NVMF_NQN_FIELD_LEN];
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a558d67ee86f..d8623d6e1141 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -143,6 +143,8 @@ enum pageflags {
#endif
__NR_PAGEFLAGS,
+ PG_readahead = PG_reclaim,
+
/* Filesystems */
PG_checked = PG_owner_priv_1,
@@ -171,6 +173,15 @@ enum pageflags {
/* Compound pages. Stored in first tail page's flags */
PG_double_map = PG_workingset,
+#ifdef CONFIG_MEMORY_FAILURE
+ /*
+ * Compound pages. Stored in first tail page's flags.
+ * Indicates that at least one subpage is hwpoisoned in the
+ * THP.
+ */
+ PG_has_hwpoisoned = PG_mappedtodisk,
+#endif
+
/* non-lru isolated movable page */
PG_isolated = PG_reclaim,
@@ -193,6 +204,34 @@ static inline unsigned long _compound_head(const struct page *page)
#define compound_head(page) ((typeof(page))_compound_head(page))
+/**
+ * page_folio - Converts from page to folio.
+ * @p: The page.
+ *
+ * Every page is part of a folio. This function cannot be called on a
+ * NULL pointer.
+ *
+ * Context: No reference, nor lock is required on @page. If the caller
+ * does not hold a reference, this call may race with a folio split, so
+ * it should re-check the folio still contains this page after gaining
+ * a reference on the folio.
+ * Return: The folio which contains this page.
+ */
+#define page_folio(p) (_Generic((p), \
+ const struct page *: (const struct folio *)_compound_head(p), \
+ struct page *: (struct folio *)_compound_head(p)))
+
+/**
+ * folio_page - Return a page from a folio.
+ * @folio: The folio.
+ * @n: The page number to return.
+ *
+ * @n is relative to the start of the folio. This function does not
+ * check that the page number lies within @folio; the caller is presumed
+ * to have a reference to the page.
+ */
+#define folio_page(folio, n) nth_page(&(folio)->page, n)
+
static __always_inline int PageTail(struct page *page)
{
return READ_ONCE(page->compound_head) & 1;
@@ -217,6 +256,15 @@ static inline void page_init_poison(struct page *page, size_t size)
}
#endif
+static unsigned long *folio_flags(struct folio *folio, unsigned n)
+{
+ struct page *page = &folio->page;
+
+ VM_BUG_ON_PGFLAGS(PageTail(page), page);
+ VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page);
+ return &page[n].flags;
+}
+
/*
* Page flags policies wrt compound pages
*
@@ -261,36 +309,64 @@ static inline void page_init_poison(struct page *page, size_t size)
VM_BUG_ON_PGFLAGS(!PageHead(page), page); \
PF_POISONED_CHECK(&page[1]); })
+/* Which page is the flag stored in */
+#define FOLIO_PF_ANY 0
+#define FOLIO_PF_HEAD 0
+#define FOLIO_PF_ONLY_HEAD 0
+#define FOLIO_PF_NO_TAIL 0
+#define FOLIO_PF_NO_COMPOUND 0
+#define FOLIO_PF_SECOND 1
+
/*
* Macros to create function definitions for page flags
*/
#define TESTPAGEFLAG(uname, lname, policy) \
+static __always_inline bool folio_test_##lname(struct folio *folio) \
+{ return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline int Page##uname(struct page *page) \
- { return test_bit(PG_##lname, &policy(page, 0)->flags); }
+{ return test_bit(PG_##lname, &policy(page, 0)->flags); }
#define SETPAGEFLAG(uname, lname, policy) \
+static __always_inline \
+void folio_set_##lname(struct folio *folio) \
+{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline void SetPage##uname(struct page *page) \
- { set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ set_bit(PG_##lname, &policy(page, 1)->flags); }
#define CLEARPAGEFLAG(uname, lname, policy) \
+static __always_inline \
+void folio_clear_##lname(struct folio *folio) \
+{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline void ClearPage##uname(struct page *page) \
- { clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ clear_bit(PG_##lname, &policy(page, 1)->flags); }
#define __SETPAGEFLAG(uname, lname, policy) \
+static __always_inline \
+void __folio_set_##lname(struct folio *folio) \
+{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline void __SetPage##uname(struct page *page) \
- { __set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ __set_bit(PG_##lname, &policy(page, 1)->flags); }
#define __CLEARPAGEFLAG(uname, lname, policy) \
+static __always_inline \
+void __folio_clear_##lname(struct folio *folio) \
+{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline void __ClearPage##uname(struct page *page) \
- { __clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ __clear_bit(PG_##lname, &policy(page, 1)->flags); }
#define TESTSETFLAG(uname, lname, policy) \
+static __always_inline \
+bool folio_test_set_##lname(struct folio *folio) \
+{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline int TestSetPage##uname(struct page *page) \
- { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
#define TESTCLEARFLAG(uname, lname, policy) \
+static __always_inline \
+bool folio_test_clear_##lname(struct folio *folio) \
+{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
static __always_inline int TestClearPage##uname(struct page *page) \
- { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
#define PAGEFLAG(uname, lname, policy) \
TESTPAGEFLAG(uname, lname, policy) \
@@ -306,29 +382,37 @@ static __always_inline int TestClearPage##uname(struct page *page) \
TESTSETFLAG(uname, lname, policy) \
TESTCLEARFLAG(uname, lname, policy)
-#define TESTPAGEFLAG_FALSE(uname) \
+#define TESTPAGEFLAG_FALSE(uname, lname) \
+static inline bool folio_test_##lname(const struct folio *folio) { return 0; } \
static inline int Page##uname(const struct page *page) { return 0; }
-#define SETPAGEFLAG_NOOP(uname) \
+#define SETPAGEFLAG_NOOP(uname, lname) \
+static inline void folio_set_##lname(struct folio *folio) { } \
static inline void SetPage##uname(struct page *page) { }
-#define CLEARPAGEFLAG_NOOP(uname) \
+#define CLEARPAGEFLAG_NOOP(uname, lname) \
+static inline void folio_clear_##lname(struct folio *folio) { } \
static inline void ClearPage##uname(struct page *page) { }
-#define __CLEARPAGEFLAG_NOOP(uname) \
+#define __CLEARPAGEFLAG_NOOP(uname, lname) \
+static inline void __folio_clear_##lname(struct folio *folio) { } \
static inline void __ClearPage##uname(struct page *page) { }
-#define TESTSETFLAG_FALSE(uname) \
+#define TESTSETFLAG_FALSE(uname, lname) \
+static inline bool folio_test_set_##lname(struct folio *folio) \
+{ return 0; } \
static inline int TestSetPage##uname(struct page *page) { return 0; }
-#define TESTCLEARFLAG_FALSE(uname) \
+#define TESTCLEARFLAG_FALSE(uname, lname) \
+static inline bool folio_test_clear_##lname(struct folio *folio) \
+{ return 0; } \
static inline int TestClearPage##uname(struct page *page) { return 0; }
-#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \
- SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname)
+#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \
+ SETPAGEFLAG_NOOP(uname, lname) CLEARPAGEFLAG_NOOP(uname, lname)
-#define TESTSCFLAG_FALSE(uname) \
- TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
+#define TESTSCFLAG_FALSE(uname, lname) \
+ TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname)
__PAGEFLAG(Locked, locked, PF_NO_TAIL)
PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
@@ -384,8 +468,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL)
/* PG_readahead is only used for reads; PG_reclaim is only for writes */
PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL)
TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL)
-PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND)
- TESTCLEARFLAG(Readahead, reclaim, PF_NO_COMPOUND)
+PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND)
+ TESTCLEARFLAG(Readahead, readahead, PF_NO_COMPOUND)
#ifdef CONFIG_HIGHMEM
/*
@@ -394,22 +478,25 @@ PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND)
*/
#define PageHighMem(__p) is_highmem_idx(page_zonenum(__p))
#else
-PAGEFLAG_FALSE(HighMem)
+PAGEFLAG_FALSE(HighMem, highmem)
#endif
#ifdef CONFIG_SWAP
-static __always_inline int PageSwapCache(struct page *page)
+static __always_inline bool folio_test_swapcache(struct folio *folio)
{
-#ifdef CONFIG_THP_SWAP
- page = compound_head(page);
-#endif
- return PageSwapBacked(page) && test_bit(PG_swapcache, &page->flags);
+ return folio_test_swapbacked(folio) &&
+ test_bit(PG_swapcache, folio_flags(folio, 0));
+}
+static __always_inline bool PageSwapCache(struct page *page)
+{
+ return folio_test_swapcache(page_folio(page));
}
+
SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
#else
-PAGEFLAG_FALSE(SwapCache)
+PAGEFLAG_FALSE(SwapCache, swapcache)
#endif
PAGEFLAG(Unevictable, unevictable, PF_HEAD)
@@ -421,14 +508,14 @@ PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
__CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL)
#else
-PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked)
- TESTSCFLAG_FALSE(Mlocked)
+PAGEFLAG_FALSE(Mlocked, mlocked) __CLEARPAGEFLAG_NOOP(Mlocked, mlocked)
+ TESTSCFLAG_FALSE(Mlocked, mlocked)
#endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND)
#else
-PAGEFLAG_FALSE(Uncached)
+PAGEFLAG_FALSE(Uncached, uncached)
#endif
#ifdef CONFIG_MEMORY_FAILURE
@@ -437,7 +524,7 @@ TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
#define __PG_HWPOISON (1UL << PG_hwpoison)
extern bool take_page_off_buddy(struct page *page);
#else
-PAGEFLAG_FALSE(HWPoison)
+PAGEFLAG_FALSE(HWPoison, hwpoison)
#define __PG_HWPOISON 0
#endif
@@ -451,7 +538,7 @@ PAGEFLAG(Idle, idle, PF_ANY)
#ifdef CONFIG_KASAN_HW_TAGS
PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD)
#else
-PAGEFLAG_FALSE(SkipKASanPoison)
+PAGEFLAG_FALSE(SkipKASanPoison, skip_kasan_poison)
#endif
/*
@@ -489,10 +576,14 @@ static __always_inline int PageMappingFlags(struct page *page)
return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0;
}
-static __always_inline int PageAnon(struct page *page)
+static __always_inline bool folio_test_anon(struct folio *folio)
+{
+ return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0;
+}
+
+static __always_inline bool PageAnon(struct page *page)
{
- page = compound_head(page);
- return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
+ return folio_test_anon(page_folio(page));
}
static __always_inline int __PageMovable(struct page *page)
@@ -508,30 +599,32 @@ static __always_inline int __PageMovable(struct page *page)
* is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any
* anon_vma, but to that page's node of the stable tree.
*/
-static __always_inline int PageKsm(struct page *page)
+static __always_inline bool folio_test_ksm(struct folio *folio)
{
- page = compound_head(page);
- return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
+ return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) ==
PAGE_MAPPING_KSM;
}
+
+static __always_inline bool PageKsm(struct page *page)
+{
+ return folio_test_ksm(page_folio(page));
+}
#else
-TESTPAGEFLAG_FALSE(Ksm)
+TESTPAGEFLAG_FALSE(Ksm, ksm)
#endif
u64 stable_page_flags(struct page *page);
-static inline int PageUptodate(struct page *page)
+static inline bool folio_test_uptodate(struct folio *folio)
{
- int ret;
- page = compound_head(page);
- ret = test_bit(PG_uptodate, &(page)->flags);
+ bool ret = test_bit(PG_uptodate, folio_flags(folio, 0));
/*
- * Must ensure that the data we read out of the page is loaded
- * _after_ we've loaded page->flags to check for PageUptodate.
- * We can skip the barrier if the page is not uptodate, because
+ * Must ensure that the data we read out of the folio is loaded
+ * _after_ we've loaded folio->flags to check the uptodate bit.
+ * We can skip the barrier if the folio is not uptodate, because
* we wouldn't be reading anything from it.
*
- * See SetPageUptodate() for the other side of the story.
+ * See folio_mark_uptodate() for the other side of the story.
*/
if (ret)
smp_rmb();
@@ -539,47 +632,71 @@ static inline int PageUptodate(struct page *page)
return ret;
}
-static __always_inline void __SetPageUptodate(struct page *page)
+static inline int PageUptodate(struct page *page)
+{
+ return folio_test_uptodate(page_folio(page));
+}
+
+static __always_inline void __folio_mark_uptodate(struct folio *folio)
{
- VM_BUG_ON_PAGE(PageTail(page), page);
smp_wmb();
- __set_bit(PG_uptodate, &page->flags);
+ __set_bit(PG_uptodate, folio_flags(folio, 0));
}
-static __always_inline void SetPageUptodate(struct page *page)
+static __always_inline void folio_mark_uptodate(struct folio *folio)
{
- VM_BUG_ON_PAGE(PageTail(page), page);
/*
* Memory barrier must be issued before setting the PG_uptodate bit,
- * so that all previous stores issued in order to bring the page
- * uptodate are actually visible before PageUptodate becomes true.
+ * so that all previous stores issued in order to bring the folio
+ * uptodate are actually visible before folio_test_uptodate becomes true.
*/
smp_wmb();
- set_bit(PG_uptodate, &page->flags);
+ set_bit(PG_uptodate, folio_flags(folio, 0));
+}
+
+static __always_inline void __SetPageUptodate(struct page *page)
+{
+ __folio_mark_uptodate((struct folio *)page);
+}
+
+static __always_inline void SetPageUptodate(struct page *page)
+{
+ folio_mark_uptodate((struct folio *)page);
}
CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL)
-int test_clear_page_writeback(struct page *page);
-int __test_set_page_writeback(struct page *page, bool keep_write);
+bool __folio_start_writeback(struct folio *folio, bool keep_write);
+bool set_page_writeback(struct page *page);
-#define test_set_page_writeback(page) \
- __test_set_page_writeback(page, false)
-#define test_set_page_writeback_keepwrite(page) \
- __test_set_page_writeback(page, true)
+#define folio_start_writeback(folio) \
+ __folio_start_writeback(folio, false)
+#define folio_start_writeback_keepwrite(folio) \
+ __folio_start_writeback(folio, true)
-static inline void set_page_writeback(struct page *page)
+static inline void set_page_writeback_keepwrite(struct page *page)
{
- test_set_page_writeback(page);
+ folio_start_writeback_keepwrite(page_folio(page));
}
-static inline void set_page_writeback_keepwrite(struct page *page)
+static inline bool test_set_page_writeback(struct page *page)
{
- test_set_page_writeback_keepwrite(page);
+ return set_page_writeback(page);
}
__PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY)
+/* Whether there are one or multiple pages in a folio */
+static inline bool folio_test_single(struct folio *folio)
+{
+ return !folio_test_head(folio);
+}
+
+static inline bool folio_test_multi(struct folio *folio)
+{
+ return folio_test_head(folio);
+}
+
static __always_inline void set_compound_head(struct page *page, struct page *head)
{
WRITE_ONCE(page->compound_head, (unsigned long)head + 1);
@@ -603,12 +720,15 @@ static inline void ClearPageCompound(struct page *page)
#ifdef CONFIG_HUGETLB_PAGE
int PageHuge(struct page *page);
int PageHeadHuge(struct page *page);
+static inline bool folio_test_hugetlb(struct folio *folio)
+{
+ return PageHeadHuge(&folio->page);
+}
#else
-TESTPAGEFLAG_FALSE(Huge)
-TESTPAGEFLAG_FALSE(HeadHuge)
+TESTPAGEFLAG_FALSE(Huge, hugetlb)
+TESTPAGEFLAG_FALSE(HeadHuge, headhuge)
#endif
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* PageHuge() only returns true for hugetlbfs pages, but not for
@@ -624,6 +744,11 @@ static inline int PageTransHuge(struct page *page)
return PageHead(page);
}
+static inline bool folio_test_transhuge(struct folio *folio)
+{
+ return folio_test_head(folio);
+}
+
/*
* PageTransCompound returns true for both transparent huge pages
* and hugetlbfs pages, so it should only be called when it's known
@@ -660,12 +785,26 @@ static inline int PageTransTail(struct page *page)
PAGEFLAG(DoubleMap, double_map, PF_SECOND)
TESTSCFLAG(DoubleMap, double_map, PF_SECOND)
#else
-TESTPAGEFLAG_FALSE(TransHuge)
-TESTPAGEFLAG_FALSE(TransCompound)
-TESTPAGEFLAG_FALSE(TransCompoundMap)
-TESTPAGEFLAG_FALSE(TransTail)
-PAGEFLAG_FALSE(DoubleMap)
- TESTSCFLAG_FALSE(DoubleMap)
+TESTPAGEFLAG_FALSE(TransHuge, transhuge)
+TESTPAGEFLAG_FALSE(TransCompound, transcompound)
+TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap)
+TESTPAGEFLAG_FALSE(TransTail, transtail)
+PAGEFLAG_FALSE(DoubleMap, double_map)
+ TESTSCFLAG_FALSE(DoubleMap, double_map)
+#endif
+
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+/*
+ * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the
+ * compound page.
+ *
+ * This flag is set by hwpoison handler. Cleared by THP split or free page.
+ */
+PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
+ TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
+#else
+PAGEFLAG_FALSE(HasHWPoisoned)
+ TESTSCFLAG_FALSE(HasHWPoisoned)
#endif
/*
@@ -849,6 +988,11 @@ static inline int page_has_private(struct page *page)
return !!(page->flags & PAGE_FLAGS_PRIVATE);
}
+static inline bool folio_has_private(struct folio *folio)
+{
+ return page_has_private(&folio->page);
+}
+
#undef PF_ANY
#undef PF_HEAD
#undef PF_ONLY_HEAD
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
index d8a6aecf99cb..83abf95e9fa7 100644
--- a/include/linux/page_idle.h
+++ b/include/linux/page_idle.h
@@ -8,46 +8,16 @@
#ifdef CONFIG_PAGE_IDLE_FLAG
-#ifdef CONFIG_64BIT
-static inline bool page_is_young(struct page *page)
-{
- return PageYoung(page);
-}
-
-static inline void set_page_young(struct page *page)
-{
- SetPageYoung(page);
-}
-
-static inline bool test_and_clear_page_young(struct page *page)
-{
- return TestClearPageYoung(page);
-}
-
-static inline bool page_is_idle(struct page *page)
-{
- return PageIdle(page);
-}
-
-static inline void set_page_idle(struct page *page)
-{
- SetPageIdle(page);
-}
-
-static inline void clear_page_idle(struct page *page)
-{
- ClearPageIdle(page);
-}
-#else /* !CONFIG_64BIT */
+#ifndef CONFIG_64BIT
/*
* If there is not enough space to store Idle and Young bits in page flags, use
* page ext flags instead.
*/
extern struct page_ext_operations page_idle_ops;
-static inline bool page_is_young(struct page *page)
+static inline bool folio_test_young(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return false;
@@ -55,9 +25,9 @@ static inline bool page_is_young(struct page *page)
return test_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
-static inline void set_page_young(struct page *page)
+static inline void folio_set_young(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return;
@@ -65,9 +35,9 @@ static inline void set_page_young(struct page *page)
set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
-static inline bool test_and_clear_page_young(struct page *page)
+static inline bool folio_test_clear_young(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return false;
@@ -75,9 +45,9 @@ static inline bool test_and_clear_page_young(struct page *page)
return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
}
-static inline bool page_is_idle(struct page *page)
+static inline bool folio_test_idle(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return false;
@@ -85,9 +55,9 @@ static inline bool page_is_idle(struct page *page)
return test_bit(PAGE_EXT_IDLE, &page_ext->flags);
}
-static inline void set_page_idle(struct page *page)
+static inline void folio_set_idle(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return;
@@ -95,46 +65,75 @@ static inline void set_page_idle(struct page *page)
set_bit(PAGE_EXT_IDLE, &page_ext->flags);
}
-static inline void clear_page_idle(struct page *page)
+static inline void folio_clear_idle(struct folio *folio)
{
- struct page_ext *page_ext = lookup_page_ext(page);
+ struct page_ext *page_ext = lookup_page_ext(&folio->page);
if (unlikely(!page_ext))
return;
clear_bit(PAGE_EXT_IDLE, &page_ext->flags);
}
-#endif /* CONFIG_64BIT */
+#endif /* !CONFIG_64BIT */
#else /* !CONFIG_PAGE_IDLE_FLAG */
-static inline bool page_is_young(struct page *page)
+static inline bool folio_test_young(struct folio *folio)
{
return false;
}
-static inline void set_page_young(struct page *page)
+static inline void folio_set_young(struct folio *folio)
{
}
-static inline bool test_and_clear_page_young(struct page *page)
+static inline bool folio_test_clear_young(struct folio *folio)
{
return false;
}
-static inline bool page_is_idle(struct page *page)
+static inline bool folio_test_idle(struct folio *folio)
{
return false;
}
-static inline void set_page_idle(struct page *page)
+static inline void folio_set_idle(struct folio *folio)
{
}
-static inline void clear_page_idle(struct page *page)
+static inline void folio_clear_idle(struct folio *folio)
{
}
#endif /* CONFIG_PAGE_IDLE_FLAG */
+static inline bool page_is_young(struct page *page)
+{
+ return folio_test_young(page_folio(page));
+}
+
+static inline void set_page_young(struct page *page)
+{
+ folio_set_young(page_folio(page));
+}
+
+static inline bool test_and_clear_page_young(struct page *page)
+{
+ return folio_test_clear_young(page_folio(page));
+}
+
+static inline bool page_is_idle(struct page *page)
+{
+ return folio_test_idle(page_folio(page));
+}
+
+static inline void set_page_idle(struct page *page)
+{
+ folio_set_idle(page_folio(page));
+}
+
+static inline void clear_page_idle(struct page *page)
+{
+ folio_clear_idle(page_folio(page));
+}
#endif /* _LINUX_MM_PAGE_IDLE_H */
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 719bfe5108c5..43c638c51c1f 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -12,7 +12,7 @@ extern void __reset_page_owner(struct page *page, unsigned int order);
extern void __set_page_owner(struct page *page,
unsigned int order, gfp_t gfp_mask);
extern void __split_page_owner(struct page *page, unsigned int nr);
-extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
+extern void __folio_copy_owner(struct folio *newfolio, struct folio *old);
extern void __set_page_owner_migrate_reason(struct page *page, int reason);
extern void __dump_page_owner(const struct page *page);
extern void pagetypeinfo_showmixedcount_print(struct seq_file *m,
@@ -36,10 +36,10 @@ static inline void split_page_owner(struct page *page, unsigned int nr)
if (static_branch_unlikely(&page_owner_inited))
__split_page_owner(page, nr);
}
-static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
+static inline void folio_copy_owner(struct folio *newfolio, struct folio *old)
{
if (static_branch_unlikely(&page_owner_inited))
- __copy_page_owner(oldpage, newpage);
+ __folio_copy_owner(newfolio, old);
}
static inline void set_page_owner_migrate_reason(struct page *page, int reason)
{
@@ -63,7 +63,7 @@ static inline void split_page_owner(struct page *page,
unsigned int order)
{
}
-static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
+static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio)
{
}
static inline void set_page_owner_migrate_reason(struct page *page, int reason)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 7ad46f45df39..2e677e6ad09f 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -67,9 +67,31 @@ static inline int page_ref_count(const struct page *page)
return atomic_read(&page->_refcount);
}
+/**
+ * folio_ref_count - The reference count on this folio.
+ * @folio: The folio.
+ *
+ * The refcount is usually incremented by calls to folio_get() and
+ * decremented by calls to folio_put(). Some typical users of the
+ * folio refcount:
+ *
+ * - Each reference from a page table
+ * - The page cache
+ * - Filesystem private data
+ * - The LRU list
+ * - Pipes
+ * - Direct IO which references this page in the process address space
+ *
+ * Return: The number of references to this folio.
+ */
+static inline int folio_ref_count(const struct folio *folio)
+{
+ return page_ref_count(&folio->page);
+}
+
static inline int page_count(const struct page *page)
{
- return atomic_read(&compound_head(page)->_refcount);
+ return folio_ref_count(page_folio(page));
}
static inline void set_page_count(struct page *page, int v)
@@ -79,6 +101,11 @@ static inline void set_page_count(struct page *page, int v)
__page_ref_set(page, v);
}
+static inline void folio_set_count(struct folio *folio, int v)
+{
+ set_page_count(&folio->page, v);
+}
+
/*
* Setup the page count before being freed into the page allocator for
* the first time (boot or memory hotplug)
@@ -95,6 +122,11 @@ static inline void page_ref_add(struct page *page, int nr)
__page_ref_mod(page, nr);
}
+static inline void folio_ref_add(struct folio *folio, int nr)
+{
+ page_ref_add(&folio->page, nr);
+}
+
static inline void page_ref_sub(struct page *page, int nr)
{
atomic_sub(nr, &page->_refcount);
@@ -102,6 +134,11 @@ static inline void page_ref_sub(struct page *page, int nr)
__page_ref_mod(page, -nr);
}
+static inline void folio_ref_sub(struct folio *folio, int nr)
+{
+ page_ref_sub(&folio->page, nr);
+}
+
static inline int page_ref_sub_return(struct page *page, int nr)
{
int ret = atomic_sub_return(nr, &page->_refcount);
@@ -111,6 +148,11 @@ static inline int page_ref_sub_return(struct page *page, int nr)
return ret;
}
+static inline int folio_ref_sub_return(struct folio *folio, int nr)
+{
+ return page_ref_sub_return(&folio->page, nr);
+}
+
static inline void page_ref_inc(struct page *page)
{
atomic_inc(&page->_refcount);
@@ -118,6 +160,11 @@ static inline void page_ref_inc(struct page *page)
__page_ref_mod(page, 1);
}
+static inline void folio_ref_inc(struct folio *folio)
+{
+ page_ref_inc(&folio->page);
+}
+
static inline void page_ref_dec(struct page *page)
{
atomic_dec(&page->_refcount);
@@ -125,6 +172,11 @@ static inline void page_ref_dec(struct page *page)
__page_ref_mod(page, -1);
}
+static inline void folio_ref_dec(struct folio *folio)
+{
+ page_ref_dec(&folio->page);
+}
+
static inline int page_ref_sub_and_test(struct page *page, int nr)
{
int ret = atomic_sub_and_test(nr, &page->_refcount);
@@ -134,6 +186,11 @@ static inline int page_ref_sub_and_test(struct page *page, int nr)
return ret;
}
+static inline int folio_ref_sub_and_test(struct folio *folio, int nr)
+{
+ return page_ref_sub_and_test(&folio->page, nr);
+}
+
static inline int page_ref_inc_return(struct page *page)
{
int ret = atomic_inc_return(&page->_refcount);
@@ -143,6 +200,11 @@ static inline int page_ref_inc_return(struct page *page)
return ret;
}
+static inline int folio_ref_inc_return(struct folio *folio)
+{
+ return page_ref_inc_return(&folio->page);
+}
+
static inline int page_ref_dec_and_test(struct page *page)
{
int ret = atomic_dec_and_test(&page->_refcount);
@@ -152,6 +214,11 @@ static inline int page_ref_dec_and_test(struct page *page)
return ret;
}
+static inline int folio_ref_dec_and_test(struct folio *folio)
+{
+ return page_ref_dec_and_test(&folio->page);
+}
+
static inline int page_ref_dec_return(struct page *page)
{
int ret = atomic_dec_return(&page->_refcount);
@@ -161,15 +228,91 @@ static inline int page_ref_dec_return(struct page *page)
return ret;
}
-static inline int page_ref_add_unless(struct page *page, int nr, int u)
+static inline int folio_ref_dec_return(struct folio *folio)
+{
+ return page_ref_dec_return(&folio->page);
+}
+
+static inline bool page_ref_add_unless(struct page *page, int nr, int u)
{
- int ret = atomic_add_unless(&page->_refcount, nr, u);
+ bool ret = atomic_add_unless(&page->_refcount, nr, u);
if (page_ref_tracepoint_active(page_ref_mod_unless))
__page_ref_mod_unless(page, nr, ret);
return ret;
}
+static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u)
+{
+ return page_ref_add_unless(&folio->page, nr, u);
+}
+
+/**
+ * folio_try_get - Attempt to increase the refcount on a folio.
+ * @folio: The folio.
+ *
+ * If you do not already have a reference to a folio, you can attempt to
+ * get one using this function. It may fail if, for example, the folio
+ * has been freed since you found a pointer to it, or it is frozen for
+ * the purposes of splitting or migration.
+ *
+ * Return: True if the reference count was successfully incremented.
+ */
+static inline bool folio_try_get(struct folio *folio)
+{
+ return folio_ref_add_unless(folio, 1, 0);
+}
+
+static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)
+{
+#ifdef CONFIG_TINY_RCU
+ /*
+ * The caller guarantees the folio will not be freed from interrupt
+ * context, so (on !SMP) we only need preemption to be disabled
+ * and TINY_RCU does that for us.
+ */
+# ifdef CONFIG_PREEMPT_COUNT
+ VM_BUG_ON(!in_atomic() && !irqs_disabled());
+# endif
+ VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio);
+ folio_ref_add(folio, count);
+#else
+ if (unlikely(!folio_ref_add_unless(folio, count, 0))) {
+ /* Either the folio has been freed, or will be freed. */
+ return false;
+ }
+#endif
+ return true;
+}
+
+/**
+ * folio_try_get_rcu - Attempt to increase the refcount on a folio.
+ * @folio: The folio.
+ *
+ * This is a version of folio_try_get() optimised for non-SMP kernels.
+ * If you are still holding the rcu_read_lock() after looking up the
+ * page and know that the page cannot have its refcount decreased to
+ * zero in interrupt context, you can use this instead of folio_try_get().
+ *
+ * Example users include get_user_pages_fast() (as pages are not unmapped
+ * from interrupt context) and the page cache lookups (as pages are not
+ * truncated from interrupt context). We also know that pages are not
+ * frozen in interrupt context for the purposes of splitting or migration.
+ *
+ * You can also use this function if you're holding a lock that prevents
+ * pages being frozen & removed; eg the i_pages lock for the page cache
+ * or the mmap_sem or page table lock for page tables. In this case,
+ * it will always succeed, and you could have used a plain folio_get(),
+ * but it's sometimes more convenient to have a common function called
+ * from both locked and RCU-protected contexts.
+ *
+ * Return: True if the reference count was successfully incremented.
+ */
+static inline bool folio_try_get_rcu(struct folio *folio)
+{
+ return folio_ref_try_add_rcu(folio, 1);
+}
+
static inline int page_ref_freeze(struct page *page, int count)
{
int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count);
@@ -179,6 +322,11 @@ static inline int page_ref_freeze(struct page *page, int count)
return ret;
}
+static inline int folio_ref_freeze(struct folio *folio, int count)
+{
+ return page_ref_freeze(&folio->page, count);
+}
+
static inline void page_ref_unfreeze(struct page *page, int count)
{
VM_BUG_ON_PAGE(page_count(page) != 0, page);
@@ -189,4 +337,8 @@ static inline void page_ref_unfreeze(struct page *page, int count)
__page_ref_unfreeze(page, count);
}
+static inline void folio_ref_unfreeze(struct folio *folio, int count)
+{
+ page_ref_unfreeze(&folio->page, count);
+}
#endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 62db6b0176b9..013cdc90f5fd 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -162,149 +162,119 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping)
void release_pages(struct page **pages, int nr);
-/*
- * For file cache pages, return the address_space, otherwise return NULL
+struct address_space *page_mapping(struct page *);
+struct address_space *folio_mapping(struct folio *);
+struct address_space *swapcache_mapping(struct folio *);
+
+/**
+ * folio_file_mapping - Find the mapping this folio belongs to.
+ * @folio: The folio.
+ *
+ * For folios which are in the page cache, return the mapping that this
+ * page belongs to. Folios in the swap cache return the mapping of the
+ * swap file or swap device where the data is stored. This is different
+ * from the mapping returned by folio_mapping(). The only reason to
+ * use it is if, like NFS, you return 0 from ->activate_swapfile.
+ *
+ * Do not call this for folios which aren't in the page cache or swap cache.
*/
-static inline struct address_space *page_mapping_file(struct page *page)
+static inline struct address_space *folio_file_mapping(struct folio *folio)
{
- if (unlikely(PageSwapCache(page)))
- return NULL;
- return page_mapping(page);
+ if (unlikely(folio_test_swapcache(folio)))
+ return swapcache_mapping(folio);
+
+ return folio->mapping;
+}
+
+static inline struct address_space *page_file_mapping(struct page *page)
+{
+ return folio_file_mapping(page_folio(page));
}
/*
- * speculatively take a reference to a page.
- * If the page is free (_refcount == 0), then _refcount is untouched, and 0
- * is returned. Otherwise, _refcount is incremented by 1 and 1 is returned.
- *
- * This function must be called inside the same rcu_read_lock() section as has
- * been used to lookup the page in the pagecache radix-tree (or page table):
- * this allows allocators to use a synchronize_rcu() to stabilize _refcount.
- *
- * Unless an RCU grace period has passed, the count of all pages coming out
- * of the allocator must be considered unstable. page_count may return higher
- * than expected, and put_page must be able to do the right thing when the
- * page has been finished with, no matter what it is subsequently allocated
- * for (because put_page is what is used here to drop an invalid speculative
- * reference).
- *
- * This is the interesting part of the lockless pagecache (and lockless
- * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page)
- * has the following pattern:
- * 1. find page in radix tree
- * 2. conditionally increment refcount
- * 3. check the page is still in pagecache (if no, goto 1)
- *
- * Remove-side that cares about stability of _refcount (eg. reclaim) has the
- * following (with the i_pages lock held):
- * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
- * B. remove page from pagecache
- * C. free the page
- *
- * There are 2 critical interleavings that matter:
- * - 2 runs before A: in this case, A sees elevated refcount and bails out
- * - A runs before 2: in this case, 2 sees zero refcount and retries;
- * subsequently, B will complete and 1 will find no page, causing the
- * lookup to return NULL.
- *
- * It is possible that between 1 and 2, the page is removed then the exact same
- * page is inserted into the same position in pagecache. That's OK: the
- * old find_get_page using a lock could equally have run before or after
- * such a re-insertion, depending on order that locks are granted.
- *
- * Lookups racing against pagecache insertion isn't a big problem: either 1
- * will find the page or it will not. Likewise, the old find_get_page could run
- * either before the insertion or afterwards, depending on timing.
+ * For file cache pages, return the address_space, otherwise return NULL
*/
-static inline int __page_cache_add_speculative(struct page *page, int count)
+static inline struct address_space *page_mapping_file(struct page *page)
{
-#ifdef CONFIG_TINY_RCU
-# ifdef CONFIG_PREEMPT_COUNT
- VM_BUG_ON(!in_atomic() && !irqs_disabled());
-# endif
- /*
- * Preempt must be disabled here - we rely on rcu_read_lock doing
- * this for us.
- *
- * Pagecache won't be truncated from interrupt context, so if we have
- * found a page in the radix tree here, we have pinned its refcount by
- * disabling preempt, and hence no need for the "speculative get" that
- * SMP requires.
- */
- VM_BUG_ON_PAGE(page_count(page) == 0, page);
- page_ref_add(page, count);
+ struct folio *folio = page_folio(page);
-#else
- if (unlikely(!page_ref_add_unless(page, count, 0))) {
- /*
- * Either the page has been freed, or will be freed.
- * In either case, retry here and the caller should
- * do the right thing (see comments above).
- */
- return 0;
- }
-#endif
- VM_BUG_ON_PAGE(PageTail(page), page);
-
- return 1;
+ if (unlikely(folio_test_swapcache(folio)))
+ return NULL;
+ return folio_mapping(folio);
}
-static inline int page_cache_get_speculative(struct page *page)
+static inline bool page_cache_add_speculative(struct page *page, int count)
{
- return __page_cache_add_speculative(page, 1);
+ VM_BUG_ON_PAGE(PageTail(page), page);
+ return folio_ref_try_add_rcu((struct folio *)page, count);
}
-static inline int page_cache_add_speculative(struct page *page, int count)
+static inline bool page_cache_get_speculative(struct page *page)
{
- return __page_cache_add_speculative(page, count);
+ return page_cache_add_speculative(page, 1);
}
/**
- * attach_page_private - Attach private data to a page.
- * @page: Page to attach data to.
- * @data: Data to attach to page.
+ * folio_attach_private - Attach private data to a folio.
+ * @folio: Folio to attach data to.
+ * @data: Data to attach to folio.
*
- * Attaching private data to a page increments the page's reference count.
- * The data must be detached before the page will be freed.
+ * Attaching private data to a folio increments the page's reference count.
+ * The data must be detached before the folio will be freed.
*/
-static inline void attach_page_private(struct page *page, void *data)
+static inline void folio_attach_private(struct folio *folio, void *data)
{
- get_page(page);
- set_page_private(page, (unsigned long)data);
- SetPagePrivate(page);
+ folio_get(folio);
+ folio->private = data;
+ folio_set_private(folio);
}
/**
- * detach_page_private - Detach private data from a page.
- * @page: Page to detach data from.
+ * folio_detach_private - Detach private data from a folio.
+ * @folio: Folio to detach data from.
*
- * Removes the data that was previously attached to the page and decrements
+ * Removes the data that was previously attached to the folio and decrements
* the refcount on the page.
*
- * Return: Data that was attached to the page.
+ * Return: Data that was attached to the folio.
*/
-static inline void *detach_page_private(struct page *page)
+static inline void *folio_detach_private(struct folio *folio)
{
- void *data = (void *)page_private(page);
+ void *data = folio_get_private(folio);
- if (!PagePrivate(page))
+ if (!folio_test_private(folio))
return NULL;
- ClearPagePrivate(page);
- set_page_private(page, 0);
- put_page(page);
+ folio_clear_private(folio);
+ folio->private = NULL;
+ folio_put(folio);
return data;
}
+static inline void attach_page_private(struct page *page, void *data)
+{
+ folio_attach_private(page_folio(page), data);
+}
+
+static inline void *detach_page_private(struct page *page)
+{
+ return folio_detach_private(page_folio(page));
+}
+
#ifdef CONFIG_NUMA
-extern struct page *__page_cache_alloc(gfp_t gfp);
+struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
#else
-static inline struct page *__page_cache_alloc(gfp_t gfp)
+static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
{
- return alloc_pages(gfp, 0);
+ return folio_alloc(gfp, order);
}
#endif
+static inline struct page *__page_cache_alloc(gfp_t gfp)
+{
+ return &filemap_alloc_folio(gfp, 0)->page;
+}
+
static inline struct page *page_cache_alloc(struct address_space *x)
{
return __page_cache_alloc(mapping_gfp_mask(x));
@@ -331,9 +301,28 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
#define FGP_FOR_MMAP 0x00000040
#define FGP_HEAD 0x00000080
#define FGP_ENTRY 0x00000100
+#define FGP_STABLE 0x00000200
-struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
- int fgp_flags, gfp_t cache_gfp_mask);
+struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
+ int fgp_flags, gfp_t gfp);
+struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
+ int fgp_flags, gfp_t gfp);
+
+/**
+ * filemap_get_folio - Find and get a folio.
+ * @mapping: The address_space to search.
+ * @index: The page index.
+ *
+ * Looks up the page cache entry at @mapping & @index. If a folio is
+ * present, it is returned with an increased refcount.
+ *
+ * Otherwise, %NULL is returned.
+ */
+static inline struct folio *filemap_get_folio(struct address_space *mapping,
+ pgoff_t index)
+{
+ return __filemap_get_folio(mapping, index, 0, 0);
+}
/**
* find_get_page - find and get a page reference
@@ -377,25 +366,6 @@ static inline struct page *find_lock_page(struct address_space *mapping,
}
/**
- * find_lock_head - Locate, pin and lock a pagecache page.
- * @mapping: The address_space to search.
- * @index: The page index.
- *
- * Looks up the page cache entry at @mapping & @index. If there is a
- * page cache page, its head page is returned locked and with an increased
- * refcount.
- *
- * Context: May sleep.
- * Return: A struct page which is !PageTail, or %NULL if there is no page
- * in the cache for this index.
- */
-static inline struct page *find_lock_head(struct address_space *mapping,
- pgoff_t index)
-{
- return pagecache_get_page(mapping, index, FGP_LOCK | FGP_HEAD, 0);
-}
-
-/**
* find_or_create_page - locate or add a pagecache page
* @mapping: the page's address_space
* @index: the page's index into the mapping
@@ -452,6 +422,73 @@ static inline bool thp_contains(struct page *head, pgoff_t index)
return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL));
}
+#define swapcache_index(folio) __page_file_index(&(folio)->page)
+
+/**
+ * folio_index - File index of a folio.
+ * @folio: The folio.
+ *
+ * For a folio which is either in the page cache or the swap cache,
+ * return its index within the address_space it belongs to. If you know
+ * the page is definitely in the page cache, you can look at the folio's
+ * index directly.
+ *
+ * Return: The index (offset in units of pages) of a folio in its file.
+ */
+static inline pgoff_t folio_index(struct folio *folio)
+{
+ if (unlikely(folio_test_swapcache(folio)))
+ return swapcache_index(folio);
+ return folio->index;
+}
+
+/**
+ * folio_next_index - Get the index of the next folio.
+ * @folio: The current folio.
+ *
+ * Return: The index of the folio which follows this folio in the file.
+ */
+static inline pgoff_t folio_next_index(struct folio *folio)
+{
+ return folio->index + folio_nr_pages(folio);
+}
+
+/**
+ * folio_file_page - The page for a particular index.
+ * @folio: The folio which contains this index.
+ * @index: The index we want to look up.
+ *
+ * Sometimes after looking up a folio in the page cache, we need to
+ * obtain the specific page for an index (eg a page fault).
+ *
+ * Return: The page containing the file data for this index.
+ */
+static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
+{
+ /* HugeTLBfs indexes the page cache in units of hpage_size */
+ if (folio_test_hugetlb(folio))
+ return &folio->page;
+ return folio_page(folio, index & (folio_nr_pages(folio) - 1));
+}
+
+/**
+ * folio_contains - Does this folio contain this index?
+ * @folio: The folio.
+ * @index: The page index within the file.
+ *
+ * Context: The caller should have the page locked in order to prevent
+ * (eg) shmem from moving the page between the page cache and swap cache
+ * and changing its index in the middle of the operation.
+ * Return: true or false.
+ */
+static inline bool folio_contains(struct folio *folio, pgoff_t index)
+{
+ /* HugeTLBfs indexes the page cache in units of hpage_size */
+ if (folio_test_hugetlb(folio))
+ return folio->index == index;
+ return index - folio_index(folio) < folio_nr_pages(folio);
+}
+
/*
* Given the page we found in the page cache, return the page corresponding
* to this index in the file
@@ -560,6 +597,27 @@ static inline loff_t page_file_offset(struct page *page)
return ((loff_t)page_index(page)) << PAGE_SHIFT;
}
+/**
+ * folio_pos - Returns the byte position of this folio in its file.
+ * @folio: The folio.
+ */
+static inline loff_t folio_pos(struct folio *folio)
+{
+ return page_offset(&folio->page);
+}
+
+/**
+ * folio_file_pos - Returns the byte position of this folio in its file.
+ * @folio: The folio.
+ *
+ * This differs from folio_pos() for folios which belong to a swap file.
+ * NFS is the only filesystem today which needs to use folio_file_pos().
+ */
+static inline loff_t folio_file_pos(struct folio *folio)
+{
+ return page_file_offset(&folio->page);
+}
+
extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
unsigned long address);
@@ -575,13 +633,13 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
}
struct wait_page_key {
- struct page *page;
+ struct folio *folio;
int bit_nr;
int page_match;
};
struct wait_page_queue {
- struct page *page;
+ struct folio *folio;
int bit_nr;
wait_queue_entry_t wait;
};
@@ -589,7 +647,7 @@ struct wait_page_queue {
static inline bool wake_page_match(struct wait_page_queue *wait_page,
struct wait_page_key *key)
{
- if (wait_page->page != key->page)
+ if (wait_page->folio != key->folio)
return false;
key->page_match = 1;
@@ -599,20 +657,31 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page,
return true;
}
-extern void __lock_page(struct page *page);
-extern int __lock_page_killable(struct page *page);
-extern int __lock_page_async(struct page *page, struct wait_page_queue *wait);
-extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
+void __folio_lock(struct folio *folio);
+int __folio_lock_killable(struct folio *folio);
+bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
unsigned int flags);
-extern void unlock_page(struct page *page);
+void unlock_page(struct page *page);
+void folio_unlock(struct folio *folio);
+
+static inline bool folio_trylock(struct folio *folio)
+{
+ return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0)));
+}
/*
* Return true if the page was successfully locked
*/
static inline int trylock_page(struct page *page)
{
- page = compound_head(page);
- return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
+ return folio_trylock(page_folio(page));
+}
+
+static inline void folio_lock(struct folio *folio)
+{
+ might_sleep();
+ if (!folio_trylock(folio))
+ __folio_lock(folio);
}
/*
@@ -620,38 +689,30 @@ static inline int trylock_page(struct page *page)
*/
static inline void lock_page(struct page *page)
{
+ struct folio *folio;
might_sleep();
- if (!trylock_page(page))
- __lock_page(page);
+
+ folio = page_folio(page);
+ if (!folio_trylock(folio))
+ __folio_lock(folio);
}
-/*
- * lock_page_killable is like lock_page but can be interrupted by fatal
- * signals. It returns 0 if it locked the page and -EINTR if it was
- * killed while waiting.
- */
-static inline int lock_page_killable(struct page *page)
+static inline int folio_lock_killable(struct folio *folio)
{
might_sleep();
- if (!trylock_page(page))
- return __lock_page_killable(page);
+ if (!folio_trylock(folio))
+ return __folio_lock_killable(folio);
return 0;
}
/*
- * lock_page_async - Lock the page, unless this would block. If the page
- * is already locked, then queue a callback when the page becomes unlocked.
- * This callback can then retry the operation.
- *
- * Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page
- * was already locked and the callback defined in 'wait' was queued.
+ * lock_page_killable is like lock_page but can be interrupted by fatal
+ * signals. It returns 0 if it locked the page and -EINTR if it was
+ * killed while waiting.
*/
-static inline int lock_page_async(struct page *page,
- struct wait_page_queue *wait)
+static inline int lock_page_killable(struct page *page)
{
- if (!trylock_page(page))
- return __lock_page_async(page, wait);
- return 0;
+ return folio_lock_killable(page_folio(page));
}
/*
@@ -659,78 +720,108 @@ static inline int lock_page_async(struct page *page,
* caller indicated that it can handle a retry.
*
* Return value and mmap_lock implications depend on flags; see
- * __lock_page_or_retry().
+ * __folio_lock_or_retry().
*/
-static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
+static inline bool lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags)
{
+ struct folio *folio;
might_sleep();
- return trylock_page(page) || __lock_page_or_retry(page, mm, flags);
+
+ folio = page_folio(page);
+ return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags);
}
/*
- * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc.,
+ * This is exported only for folio_wait_locked/folio_wait_writeback, etc.,
* and should not be used directly.
*/
-extern void wait_on_page_bit(struct page *page, int bit_nr);
-extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
+void folio_wait_bit(struct folio *folio, int bit_nr);
+int folio_wait_bit_killable(struct folio *folio, int bit_nr);
/*
- * Wait for a page to be unlocked.
+ * Wait for a folio to be unlocked.
*
- * This must be called with the caller "holding" the page,
- * ie with increased "page->count" so that the page won't
+ * This must be called with the caller "holding" the folio,
+ * ie with increased "page->count" so that the folio won't
* go away during the wait..
*/
+static inline void folio_wait_locked(struct folio *folio)
+{
+ if (folio_test_locked(folio))
+ folio_wait_bit(folio, PG_locked);
+}
+
+static inline int folio_wait_locked_killable(struct folio *folio)
+{
+ if (!folio_test_locked(folio))
+ return 0;
+ return folio_wait_bit_killable(folio, PG_locked);
+}
+
static inline void wait_on_page_locked(struct page *page)
{
- if (PageLocked(page))
- wait_on_page_bit(compound_head(page), PG_locked);
+ folio_wait_locked(page_folio(page));
}
static inline int wait_on_page_locked_killable(struct page *page)
{
- if (!PageLocked(page))
- return 0;
- return wait_on_page_bit_killable(compound_head(page), PG_locked);
+ return folio_wait_locked_killable(page_folio(page));
}
int put_and_wait_on_page_locked(struct page *page, int state);
void wait_on_page_writeback(struct page *page);
-int wait_on_page_writeback_killable(struct page *page);
-extern void end_page_writeback(struct page *page);
+void folio_wait_writeback(struct folio *folio);
+int folio_wait_writeback_killable(struct folio *folio);
+void end_page_writeback(struct page *page);
+void folio_end_writeback(struct folio *folio);
void wait_for_stable_page(struct page *page);
+void folio_wait_stable(struct folio *folio);
+void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn);
+static inline void __set_page_dirty(struct page *page,
+ struct address_space *mapping, int warn)
+{
+ __folio_mark_dirty(page_folio(page), mapping, warn);
+}
+void folio_account_cleaned(struct folio *folio, struct address_space *mapping,
+ struct bdi_writeback *wb);
+static inline void account_page_cleaned(struct page *page,
+ struct address_space *mapping, struct bdi_writeback *wb)
+{
+ return folio_account_cleaned(page_folio(page), mapping, wb);
+}
+void __folio_cancel_dirty(struct folio *folio);
+static inline void folio_cancel_dirty(struct folio *folio)
+{
+ /* Avoid atomic ops, locking, etc. when not actually needed. */
+ if (folio_test_dirty(folio))
+ __folio_cancel_dirty(folio);
+}
+static inline void cancel_dirty_page(struct page *page)
+{
+ folio_cancel_dirty(page_folio(page));
+}
+bool folio_clear_dirty_for_io(struct folio *folio);
+bool clear_page_dirty_for_io(struct page *page);
+int __must_check folio_write_one(struct folio *folio);
+static inline int __must_check write_one_page(struct page *page)
+{
+ return folio_write_one(page_folio(page));
+}
-void __set_page_dirty(struct page *, struct address_space *, int warn);
int __set_page_dirty_nobuffers(struct page *page);
int __set_page_dirty_no_writeback(struct page *page);
void page_endio(struct page *page, bool is_write, int err);
-/**
- * set_page_private_2 - Set PG_private_2 on a page and take a ref
- * @page: The page.
- *
- * Set the PG_private_2 flag on a page and take the reference needed for the VM
- * to handle its lifetime correctly. This sets the flag and takes the
- * reference unconditionally, so care must be taken not to set the flag again
- * if it's already set.
- */
-static inline void set_page_private_2(struct page *page)
-{
- page = compound_head(page);
- get_page(page);
- SetPagePrivate2(page);
-}
-
-void end_page_private_2(struct page *page);
-void wait_on_page_private_2(struct page *page);
-int wait_on_page_private_2_killable(struct page *page);
+void folio_end_private_2(struct folio *folio);
+void folio_wait_private_2(struct folio *folio);
+int folio_wait_private_2_killable(struct folio *folio);
/*
* Add an arbitrary waiter to a page's wait queue
*/
-extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
+void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter);
/*
* Fault everything in given userspace address range in.
@@ -790,9 +881,11 @@ static inline int fault_in_pages_readable(const char __user *uaddr, size_t size)
}
int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
- pgoff_t index, gfp_t gfp_mask);
+ pgoff_t index, gfp_t gfp);
int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
- pgoff_t index, gfp_t gfp_mask);
+ pgoff_t index, gfp_t gfp);
+int filemap_add_folio(struct address_space *mapping, struct folio *folio,
+ pgoff_t index, gfp_t gfp);
extern void delete_from_page_cache(struct page *page);
extern void __delete_from_page_cache(struct page *page, void *shadow);
void replace_page_cache_page(struct page *old, struct page *new);
@@ -817,6 +910,10 @@ static inline int add_to_page_cache(struct page *page,
return error;
}
+/* Must be non-static for BPF error injection */
+int __filemap_add_folio(struct address_space *mapping, struct folio *folio,
+ pgoff_t index, gfp_t gfp, void **shadowp);
+
/**
* struct readahead_control - Describes a readahead request.
*
@@ -906,33 +1003,57 @@ void page_cache_async_readahead(struct address_space *mapping,
page_cache_async_ra(&ractl, page, req_count);
}
+static inline struct folio *__readahead_folio(struct readahead_control *ractl)
+{
+ struct folio *folio;
+
+ BUG_ON(ractl->_batch_count > ractl->_nr_pages);
+ ractl->_nr_pages -= ractl->_batch_count;
+ ractl->_index += ractl->_batch_count;
+
+ if (!ractl->_nr_pages) {
+ ractl->_batch_count = 0;
+ return NULL;
+ }
+
+ folio = xa_load(&ractl->mapping->i_pages, ractl->_index);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ ractl->_batch_count = folio_nr_pages(folio);
+
+ return folio;
+}
+
/**
* readahead_page - Get the next page to read.
- * @rac: The current readahead request.
+ * @ractl: The current readahead request.
*
* Context: The page is locked and has an elevated refcount. The caller
* should decreases the refcount once the page has been submitted for I/O
* and unlock the page once all I/O to that page has completed.
* Return: A pointer to the next page, or %NULL if we are done.
*/
-static inline struct page *readahead_page(struct readahead_control *rac)
+static inline struct page *readahead_page(struct readahead_control *ractl)
{
- struct page *page;
-
- BUG_ON(rac->_batch_count > rac->_nr_pages);
- rac->_nr_pages -= rac->_batch_count;
- rac->_index += rac->_batch_count;
+ struct folio *folio = __readahead_folio(ractl);
- if (!rac->_nr_pages) {
- rac->_batch_count = 0;
- return NULL;
- }
+ return &folio->page;
+}
- page = xa_load(&rac->mapping->i_pages, rac->_index);
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- rac->_batch_count = thp_nr_pages(page);
+/**
+ * readahead_folio - Get the next folio to read.
+ * @ractl: The current readahead request.
+ *
+ * Context: The folio is locked. The caller should unlock the folio once
+ * all I/O to that folio has completed.
+ * Return: A pointer to the next folio, or %NULL if we are done.
+ */
+static inline struct folio *readahead_folio(struct readahead_control *ractl)
+{
+ struct folio *folio = __readahead_folio(ractl);
- return page;
+ if (folio)
+ folio_put(folio);
+ return folio;
}
static inline unsigned int __readahead_batch(struct readahead_control *rac,
@@ -1040,6 +1161,34 @@ static inline unsigned long dir_pages(struct inode *inode)
}
/**
+ * folio_mkwrite_check_truncate - check if folio was truncated
+ * @folio: the folio to check
+ * @inode: the inode to check the folio against
+ *
+ * Return: the number of bytes in the folio up to EOF,
+ * or -EFAULT if the folio was truncated.
+ */
+static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio,
+ struct inode *inode)
+{
+ loff_t size = i_size_read(inode);
+ pgoff_t index = size >> PAGE_SHIFT;
+ size_t offset = offset_in_folio(folio, size);
+
+ if (!folio->mapping)
+ return -EFAULT;
+
+ /* folio is wholly inside EOF */
+ if (folio_next_index(folio) - 1 < index)
+ return folio_size(folio);
+ /* folio is wholly past EOF */
+ if (folio->index > index || !offset)
+ return -EFAULT;
+ /* folio is partially inside EOF */
+ return offset;
+}
+
+/**
* page_mkwrite_check_truncate - check if page was truncated
* @page: the page to check
* @inode: the inode to check the page against
@@ -1068,19 +1217,25 @@ static inline int page_mkwrite_check_truncate(struct page *page,
}
/**
- * i_blocks_per_page - How many blocks fit in this page.
+ * i_blocks_per_folio - How many blocks fit in this folio.
* @inode: The inode which contains the blocks.
- * @page: The page (head page if the page is a THP).
+ * @folio: The folio.
*
- * If the block size is larger than the size of this page, return zero.
+ * If the block size is larger than the size of this folio, return zero.
*
- * Context: The caller should hold a refcount on the page to prevent it
+ * Context: The caller should hold a refcount on the folio to prevent it
* from being split.
- * Return: The number of filesystem blocks covered by this page.
+ * Return: The number of filesystem blocks covered by this folio.
*/
static inline
+unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio)
+{
+ return folio_size(folio) >> inode->i_blkbits;
+}
+
+static inline
unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
{
- return thp_size(page) >> inode->i_blkbits;
+ return i_blocks_per_folio(inode, page_folio(page));
}
#endif /* _LINUX_PAGEMAP_H */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index c976cc6de257..e704b1a4c06c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -235,7 +235,7 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
*
* returns the number of cleaned PTEs.
*/
-int page_mkclean(struct page *);
+int folio_mkclean(struct folio *);
/*
* called in munlock()/munmap() path to check for other vmas holding
@@ -295,12 +295,14 @@ static inline void try_to_unmap(struct page *page, enum ttu_flags flags)
{
}
-static inline int page_mkclean(struct page *page)
+static inline int folio_mkclean(struct folio *folio)
{
return 0;
}
-
-
#endif /* CONFIG_MMU */
+static inline int page_mkclean(struct page *page)
+{
+ return folio_mkclean(page_folio(page));
+}
#endif /* _LINUX_RMAP_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c1a927ddec64..e0454e60fe8f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1160,10 +1160,8 @@ struct task_struct {
/* Stacked block device info: */
struct bio_list *bio_list;
-#ifdef CONFIG_BLOCK
/* Stack plugging: */
struct blk_plug *plug;
-#endif
/* VM state: */
struct reclaim_state *reclaim_state;
diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
index 21c3771e6a56..988528b5da43 100644
--- a/include/linux/secretmem.h
+++ b/include/linux/secretmem.h
@@ -23,7 +23,7 @@ static inline bool page_is_secretmem(struct page *page)
mapping = (struct address_space *)
((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);
- if (mapping != page->mapping)
+ if (!mapping || mapping != page->mapping)
return false;
return mapping->a_ops == &secretmem_aops;
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 14ab0c0bc924..1ce9a9eb223b 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -128,6 +128,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes);
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
int len, int flags);
+bool sk_msg_is_readable(struct sock *sk);
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
{
diff --git a/include/linux/swap.h b/include/linux/swap.h
index ba52f3a3478e..cdf0957a88a4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -320,11 +320,17 @@ struct vma_swap_readahead {
#endif
};
+static inline swp_entry_t folio_swap_entry(struct folio *folio)
+{
+ swp_entry_t entry = { .val = page_private(&folio->page) };
+ return entry;
+}
+
/* linux/mm/workingset.c */
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg);
-void workingset_refault(struct page *page, void *shadow);
-void workingset_activation(struct page *page);
+void workingset_refault(struct folio *folio, void *shadow);
+void workingset_activation(struct folio *folio);
/* Only track the nodes of mappings with shadow entries */
void workingset_update_node(struct xa_node *node);
@@ -344,9 +350,11 @@ extern unsigned long nr_free_buffer_pages(void);
/* linux/mm/swap.c */
extern void lru_note_cost(struct lruvec *lruvec, bool file,
unsigned int nr_pages);
-extern void lru_note_cost_page(struct page *);
+extern void lru_note_cost_folio(struct folio *);
+extern void folio_add_lru(struct folio *);
extern void lru_cache_add(struct page *);
-extern void mark_page_accessed(struct page *);
+void mark_page_accessed(struct page *);
+void folio_mark_accessed(struct folio *);
extern atomic_t lru_disable_count;
@@ -365,7 +373,6 @@ extern void lru_add_drain(void);
extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_cpu_zone(struct zone *zone);
extern void lru_add_drain_all(void);
-extern void rotate_reclaimable_page(struct page *page);
extern void deactivate_file_page(struct page *page);
extern void deactivate_page(struct page *page);
extern void mark_page_lazyfree(struct page *page);
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index aa11fe323c56..12d827734686 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -269,6 +269,7 @@ enum tpm2_cc_attrs {
#define TPM_VID_INTEL 0x8086
#define TPM_VID_WINBOND 0x1050
#define TPM_VID_STM 0x104A
+#define TPM_VID_ATML 0x1114
enum tpm_chip_flags {
TPM_CHIP_FLAG_TPM2 = BIT(1),
diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index a9f9c5714e65..fe95f0922526 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -16,23 +16,8 @@
* When function tracing occurs, the following steps are made:
* If arch does not support a ftrace feature:
* call internal function (uses INTERNAL bits) which calls...
- * If callback is registered to the "global" list, the list
- * function is called and recursion checks the GLOBAL bits.
- * then this function calls...
* The function callback, which can use the FTRACE bits to
* check for recursion.
- *
- * Now if the arch does not support a feature, and it calls
- * the global list function which calls the ftrace callback
- * all three of these steps will do a recursion protection.
- * There's no reason to do one if the previous caller already
- * did. The recursion that we are protecting against will
- * go through the same steps again.
- *
- * To prevent the multiple recursion checks, if a recursion
- * bit is set that is higher than the MAX bit of the current
- * check, then we know that the check was made by the previous
- * caller, and we can skip the current check.
*/
enum {
/* Function recursion bits */
@@ -40,12 +25,14 @@ enum {
TRACE_FTRACE_NMI_BIT,
TRACE_FTRACE_IRQ_BIT,
TRACE_FTRACE_SIRQ_BIT,
+ TRACE_FTRACE_TRANSITION_BIT,
- /* INTERNAL_BITs must be greater than FTRACE_BITs */
+ /* Internal use recursion bits */
TRACE_INTERNAL_BIT,
TRACE_INTERNAL_NMI_BIT,
TRACE_INTERNAL_IRQ_BIT,
TRACE_INTERNAL_SIRQ_BIT,
+ TRACE_INTERNAL_TRANSITION_BIT,
TRACE_BRANCH_BIT,
/*
@@ -86,12 +73,6 @@ enum {
*/
TRACE_GRAPH_NOTRACE_BIT,
- /*
- * When transitioning between context, the preempt_count() may
- * not be correct. Allow for a single recursion to cover this case.
- */
- TRACE_TRANSITION_BIT,
-
/* Used to prevent recursion recording from recursing. */
TRACE_RECORD_RECURSION_BIT,
};
@@ -113,12 +94,10 @@ enum {
#define TRACE_CONTEXT_BITS 4
#define TRACE_FTRACE_START TRACE_FTRACE_BIT
-#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
#define TRACE_LIST_START TRACE_INTERNAL_BIT
-#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
-#define TRACE_CONTEXT_MASK TRACE_LIST_MAX
+#define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
/*
* Used for setting context
@@ -132,6 +111,7 @@ enum {
TRACE_CTX_IRQ,
TRACE_CTX_SOFTIRQ,
TRACE_CTX_NORMAL,
+ TRACE_CTX_TRANSITION,
};
static __always_inline int trace_get_context_bit(void)
@@ -160,45 +140,34 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip);
#endif
static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip,
- int start, int max)
+ int start)
{
unsigned int val = READ_ONCE(current->trace_recursion);
int bit;
- /* A previous recursion check was made */
- if ((val & TRACE_CONTEXT_MASK) > max)
- return 0;
-
bit = trace_get_context_bit() + start;
if (unlikely(val & (1 << bit))) {
/*
* It could be that preempt_count has not been updated during
* a switch between contexts. Allow for a single recursion.
*/
- bit = TRACE_TRANSITION_BIT;
+ bit = TRACE_CTX_TRANSITION + start;
if (val & (1 << bit)) {
do_ftrace_record_recursion(ip, pip);
return -1;
}
- } else {
- /* Normal check passed, clear the transition to allow it again */
- val &= ~(1 << TRACE_TRANSITION_BIT);
}
val |= 1 << bit;
current->trace_recursion = val;
barrier();
- return bit + 1;
+ return bit;
}
static __always_inline void trace_clear_recursion(int bit)
{
- if (!bit)
- return;
-
barrier();
- bit--;
trace_recursion_clear(bit);
}
@@ -214,7 +183,7 @@ static __always_inline void trace_clear_recursion(int bit)
static __always_inline int ftrace_test_recursion_trylock(unsigned long ip,
unsigned long parent_ip)
{
- return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX);
+ return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START);
}
/**
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index eb70cabe6e7f..33a4240e6a6f 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -127,6 +127,8 @@ static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type t
long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
+long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type);
+void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type);
bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
static inline void set_rlimit_ucount_max(struct user_namespace *ns,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d6a6cf53b127..bfe38869498d 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -415,6 +415,78 @@ static inline void drain_zonestat(struct zone *zone,
struct per_cpu_zonestat *pzstats) { }
#endif /* CONFIG_SMP */
+static inline void __zone_stat_mod_folio(struct folio *folio,
+ enum zone_stat_item item, long nr)
+{
+ __mod_zone_page_state(folio_zone(folio), item, nr);
+}
+
+static inline void __zone_stat_add_folio(struct folio *folio,
+ enum zone_stat_item item)
+{
+ __mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio));
+}
+
+static inline void __zone_stat_sub_folio(struct folio *folio,
+ enum zone_stat_item item)
+{
+ __mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio));
+}
+
+static inline void zone_stat_mod_folio(struct folio *folio,
+ enum zone_stat_item item, long nr)
+{
+ mod_zone_page_state(folio_zone(folio), item, nr);
+}
+
+static inline void zone_stat_add_folio(struct folio *folio,
+ enum zone_stat_item item)
+{
+ mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio));
+}
+
+static inline void zone_stat_sub_folio(struct folio *folio,
+ enum zone_stat_item item)
+{
+ mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio));
+}
+
+static inline void __node_stat_mod_folio(struct folio *folio,
+ enum node_stat_item item, long nr)
+{
+ __mod_node_page_state(folio_pgdat(folio), item, nr);
+}
+
+static inline void __node_stat_add_folio(struct folio *folio,
+ enum node_stat_item item)
+{
+ __mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio));
+}
+
+static inline void __node_stat_sub_folio(struct folio *folio,
+ enum node_stat_item item)
+{
+ __mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio));
+}
+
+static inline void node_stat_mod_folio(struct folio *folio,
+ enum node_stat_item item, long nr)
+{
+ mod_node_page_state(folio_pgdat(folio), item, nr);
+}
+
+static inline void node_stat_add_folio(struct folio *folio,
+ enum node_stat_item item)
+{
+ mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio));
+}
+
+static inline void node_stat_sub_folio(struct folio *folio,
+ enum node_stat_item item)
+{
+ mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio));
+}
+
static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
int migratetype)
{
@@ -525,12 +597,6 @@ static inline void mod_lruvec_page_state(struct page *page,
#endif /* CONFIG_MEMCG */
-static inline void inc_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
- mod_lruvec_state(lruvec, idx, 1);
-}
-
static inline void __inc_lruvec_page_state(struct page *page,
enum node_stat_item idx)
{
@@ -543,6 +609,24 @@ static inline void __dec_lruvec_page_state(struct page *page,
__mod_lruvec_page_state(page, idx, -1);
}
+static inline void __lruvec_stat_mod_folio(struct folio *folio,
+ enum node_stat_item idx, int val)
+{
+ __mod_lruvec_page_state(&folio->page, idx, val);
+}
+
+static inline void __lruvec_stat_add_folio(struct folio *folio,
+ enum node_stat_item idx)
+{
+ __lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio));
+}
+
+static inline void __lruvec_stat_sub_folio(struct folio *folio,
+ enum node_stat_item idx)
+{
+ __lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
+}
+
static inline void inc_lruvec_page_state(struct page *page,
enum node_stat_item idx)
{
@@ -555,4 +639,21 @@ static inline void dec_lruvec_page_state(struct page *page,
mod_lruvec_page_state(page, idx, -1);
}
+static inline void lruvec_stat_mod_folio(struct folio *folio,
+ enum node_stat_item idx, int val)
+{
+ mod_lruvec_page_state(&folio->page, idx, val);
+}
+
+static inline void lruvec_stat_add_folio(struct folio *folio,
+ enum node_stat_item idx)
+{
+ lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio));
+}
+
+static inline void lruvec_stat_sub_folio(struct folio *folio,
+ enum node_stat_item idx)
+{
+ lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
+}
#endif /* _LINUX_VMSTAT_H */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 8eb165760752..3bfd487d1dd2 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -389,7 +389,14 @@ void writeback_set_ratelimit(void);
void tag_pages_for_writeback(struct address_space *mapping,
pgoff_t start, pgoff_t end);
-void account_page_redirty(struct page *page);
+bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio);
+void folio_account_redirty(struct folio *folio);
+static inline void account_page_redirty(struct page *page)
+{
+ folio_account_redirty(page_folio(page));
+}
+bool folio_redirty_for_writepage(struct writeback_control *, struct folio *);
+bool redirty_page_for_writepage(struct writeback_control *, struct page *);
void sb_mark_inode_writeback(struct inode *inode);
void sb_clear_inode_writeback(struct inode *inode);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 62dd8422e0dc..27336fc70467 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5376,7 +5376,6 @@ static inline void wiphy_unlock(struct wiphy *wiphy)
* netdev and may otherwise be used by driver read-only, will be update
* by cfg80211 on change_interface
* @mgmt_registrations: list of registrations for management frames
- * @mgmt_registrations_lock: lock for the list
* @mgmt_registrations_need_update: mgmt registrations were updated,
* need to propagate the update to the driver
* @mtx: mutex used to lock data in this struct, may be used by drivers
@@ -5423,7 +5422,6 @@ struct wireless_dev {
u32 identifier;
struct list_head mgmt_registrations;
- spinlock_t mgmt_registrations_lock;
u8 mgmt_registrations_need_update:1;
struct mutex mtx;
diff --git a/include/net/mctp.h b/include/net/mctp.h
index a824d47c3c6d..ffd2c23bd76d 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -54,7 +54,7 @@ struct mctp_sock {
struct sock sk;
/* bind() params */
- int bind_net;
+ unsigned int bind_net;
mctp_eid_t bind_addr;
__u8 bind_type;
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 6026bbefbffd..3214848402ec 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -69,6 +69,10 @@ struct mptcp_out_options {
struct {
u64 sndr_key;
u64 rcvr_key;
+ u64 data_seq;
+ u32 subflow_seq;
+ u16 data_len;
+ __sum16 csum;
};
struct {
struct mptcp_addr_info addr;
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 2eb6d7c2c931..f37c7a558d6d 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -384,11 +384,11 @@ sctp_vtag_verify(const struct sctp_chunk *chunk,
* Verification Tag value does not match the receiver's own
* tag value, the receiver shall silently discard the packet...
*/
- if (ntohl(chunk->sctp_hdr->vtag) == asoc->c.my_vtag)
- return 1;
+ if (ntohl(chunk->sctp_hdr->vtag) != asoc->c.my_vtag)
+ return 0;
chunk->transport->encap_port = SCTP_INPUT_CB(chunk->skb)->encap_port;
- return 0;
+ return 1;
}
/* Check VTAG of the packet matches the sender's own tag and the T bit is
diff --git a/include/net/sock.h b/include/net/sock.h
index ea6fbc88c8f9..463f390d90b3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1208,7 +1208,7 @@ struct proto {
#endif
bool (*stream_memory_free)(const struct sock *sk, int wake);
- bool (*stream_memory_read)(const struct sock *sk);
+ bool (*sock_is_readable)(struct sock *sk);
/* Memory pressure */
void (*enter_memory_pressure)(struct sock *sk);
void (*leave_memory_pressure)(struct sock *sk);
@@ -2820,4 +2820,10 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs);
int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len);
+static inline bool sk_is_readable(struct sock *sk)
+{
+ if (sk->sk_prot->sock_is_readable)
+ return sk->sk_prot->sock_is_readable(sk);
+ return false;
+}
#endif /* _SOCK_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3166dc15d7d6..60c384569e9c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1576,6 +1576,7 @@ struct tcp_md5sig_key {
u8 keylen;
u8 family; /* AF_INET or AF_INET6 */
u8 prefixlen;
+ u8 flags;
union tcp_md5_addr addr;
int l3index; /* set if key added with L3 scope */
u8 key[TCP_MD5SIG_MAXKEYLEN];
@@ -1621,10 +1622,10 @@ struct tcp_md5sig_pool {
int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
const struct sock *sk, const struct sk_buff *skb);
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, int l3index,
+ int family, u8 prefixlen, int l3index, u8 flags,
const u8 *newkey, u8 newkeylen, gfp_t gfp);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, int l3index);
+ int family, u8 prefixlen, int l3index, u8 flags);
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk);
diff --git a/include/net/tls.h b/include/net/tls.h
index be4b3e1cac46..1fffb206f09f 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -358,6 +358,7 @@ int tls_sk_query(struct sock *sk, int optname, char __user *optval,
int __user *optlen);
int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
unsigned int optlen);
+void tls_err_abort(struct sock *sk, int err);
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
@@ -375,7 +376,7 @@ void tls_sw_release_resources_rx(struct sock *sk);
void tls_sw_free_ctx_rx(struct tls_context *tls_ctx);
int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len);
-bool tls_sw_stream_read(const struct sock *sk);
+bool tls_sw_sock_is_readable(struct sock *sk);
ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
@@ -466,12 +467,6 @@ static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
#endif
}
-static inline void tls_err_abort(struct sock *sk, int err)
-{
- sk->sk_err = err;
- sk_error_report(sk);
-}
-
static inline bool tls_bigint_increment(unsigned char *seq, int len)
{
int i;
@@ -512,7 +507,7 @@ static inline void tls_advance_record_sn(struct sock *sk,
struct cipher_context *ctx)
{
if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size))
- tls_err_abort(sk, EBADMSG);
+ tls_err_abort(sk, -EBADMSG);
if (prot->version != TLS_1_3_VERSION &&
prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305)
diff --git a/include/net/udp.h b/include/net/udp.h
index 360df454356c..909ecf447e0f 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -494,8 +494,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
* CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial
* packets in udp_gro_complete_segment. As does UDP GSO, verified by
* udp_send_skb. But when those packets are looped in dev_loopback_xmit
- * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this
- * specific case, where PARTIAL is both correct and required.
+ * their ip_summed CHECKSUM_NONE is changed to CHECKSUM_UNNECESSARY.
+ * Reset in this specific case, where PARTIAL is both correct and
+ * required.
*/
if (skb->pkt_type == PACKET_LOOPBACK)
skb->ip_summed = CHECKSUM_PARTIAL;
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index eaf04c9a1dfc..31078063afac 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -396,4 +396,7 @@ static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd)
extern void scsi_build_sense(struct scsi_cmnd *scmd, int desc,
u8 key, u8 asc, u8 ascq);
+struct request *scsi_alloc_request(struct request_queue *q,
+ unsigned int op, blk_mq_req_flags_t flags);
+
#endif /* _SCSI_SCSI_CMND_H */
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 0dd30de00e5b..7346f0164cf4 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -6,6 +6,7 @@
#define _TRACE_IO_URING_H
#include <linux/tracepoint.h>
+#include <uapi/linux/io_uring.h>
struct io_wq_work;
@@ -497,6 +498,66 @@ TRACE_EVENT(io_uring_task_run,
(unsigned long long) __entry->user_data)
);
+/*
+ * io_uring_req_failed - called when an sqe is errored dring submission
+ *
+ * @sqe: pointer to the io_uring_sqe that failed
+ * @error: error it failed with
+ *
+ * Allows easier diagnosing of malformed requests in production systems.
+ */
+TRACE_EVENT(io_uring_req_failed,
+
+ TP_PROTO(const struct io_uring_sqe *sqe, int error),
+
+ TP_ARGS(sqe, error),
+
+ TP_STRUCT__entry (
+ __field( u8, opcode )
+ __field( u8, flags )
+ __field( u8, ioprio )
+ __field( u64, off )
+ __field( u64, addr )
+ __field( u32, len )
+ __field( u32, op_flags )
+ __field( u64, user_data )
+ __field( u16, buf_index )
+ __field( u16, personality )
+ __field( u32, file_index )
+ __field( u64, pad1 )
+ __field( u64, pad2 )
+ __field( int, error )
+ ),
+
+ TP_fast_assign(
+ __entry->opcode = sqe->opcode;
+ __entry->flags = sqe->flags;
+ __entry->ioprio = sqe->ioprio;
+ __entry->off = sqe->off;
+ __entry->addr = sqe->addr;
+ __entry->len = sqe->len;
+ __entry->op_flags = sqe->rw_flags;
+ __entry->user_data = sqe->user_data;
+ __entry->buf_index = sqe->buf_index;
+ __entry->personality = sqe->personality;
+ __entry->file_index = sqe->file_index;
+ __entry->pad1 = sqe->__pad2[0];
+ __entry->pad2 = sqe->__pad2[1];
+ __entry->error = error;
+ ),
+
+ TP_printk("op %d, flags=0x%x, prio=%d, off=%llu, addr=%llu, "
+ "len=%u, rw_flags=0x%x, user_data=0x%llx, buf_index=%d, "
+ "personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d",
+ __entry->opcode, __entry->flags, __entry->ioprio,
+ (unsigned long long)__entry->off,
+ (unsigned long long) __entry->addr, __entry->len,
+ __entry->op_flags, (unsigned long long) __entry->user_data,
+ __entry->buf_index, __entry->personality, __entry->file_index,
+ (unsigned long long) __entry->pad1,
+ (unsigned long long) __entry->pad2, __entry->error)
+);
+
#endif /* _TRACE_IO_URING_H */
/* This part must be outside protection */
diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h
index 1d28431e85bd..171524d3526d 100644
--- a/include/trace/events/pagemap.h
+++ b/include/trace/events/pagemap.h
@@ -16,38 +16,38 @@
#define PAGEMAP_MAPPEDDISK 0x0020u
#define PAGEMAP_BUFFERS 0x0040u
-#define trace_pagemap_flags(page) ( \
- (PageAnon(page) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \
- (page_mapped(page) ? PAGEMAP_MAPPED : 0) | \
- (PageSwapCache(page) ? PAGEMAP_SWAPCACHE : 0) | \
- (PageSwapBacked(page) ? PAGEMAP_SWAPBACKED : 0) | \
- (PageMappedToDisk(page) ? PAGEMAP_MAPPEDDISK : 0) | \
- (page_has_private(page) ? PAGEMAP_BUFFERS : 0) \
+#define trace_pagemap_flags(folio) ( \
+ (folio_test_anon(folio) ? PAGEMAP_ANONYMOUS : PAGEMAP_FILE) | \
+ (folio_mapped(folio) ? PAGEMAP_MAPPED : 0) | \
+ (folio_test_swapcache(folio) ? PAGEMAP_SWAPCACHE : 0) | \
+ (folio_test_swapbacked(folio) ? PAGEMAP_SWAPBACKED : 0) | \
+ (folio_test_mappedtodisk(folio) ? PAGEMAP_MAPPEDDISK : 0) | \
+ (folio_test_private(folio) ? PAGEMAP_BUFFERS : 0) \
)
TRACE_EVENT(mm_lru_insertion,
- TP_PROTO(struct page *page),
+ TP_PROTO(struct folio *folio),
- TP_ARGS(page),
+ TP_ARGS(folio),
TP_STRUCT__entry(
- __field(struct page *, page )
+ __field(struct folio *, folio )
__field(unsigned long, pfn )
__field(enum lru_list, lru )
__field(unsigned long, flags )
),
TP_fast_assign(
- __entry->page = page;
- __entry->pfn = page_to_pfn(page);
- __entry->lru = page_lru(page);
- __entry->flags = trace_pagemap_flags(page);
+ __entry->folio = folio;
+ __entry->pfn = folio_pfn(folio);
+ __entry->lru = folio_lru_list(folio);
+ __entry->flags = trace_pagemap_flags(folio);
),
/* Flag format is based on page-types.c formatting for pagemap */
- TP_printk("page=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s",
- __entry->page,
+ TP_printk("folio=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s",
+ __entry->folio,
__entry->pfn,
__entry->lru,
__entry->flags & PAGEMAP_MAPPED ? "M" : " ",
@@ -60,23 +60,21 @@ TRACE_EVENT(mm_lru_insertion,
TRACE_EVENT(mm_lru_activate,
- TP_PROTO(struct page *page),
+ TP_PROTO(struct folio *folio),
- TP_ARGS(page),
+ TP_ARGS(folio),
TP_STRUCT__entry(
- __field(struct page *, page )
+ __field(struct folio *, folio )
__field(unsigned long, pfn )
),
TP_fast_assign(
- __entry->page = page;
- __entry->pfn = page_to_pfn(page);
+ __entry->folio = folio;
+ __entry->pfn = folio_pfn(folio);
),
- /* Flag format is based on page-types.c formatting for pagemap */
- TP_printk("page=%p pfn=0x%lx", __entry->page, __entry->pfn)
-
+ TP_printk("folio=%p pfn=0x%lx", __entry->folio, __entry->pfn)
);
#endif /* _TRACE_PAGEMAP_H */
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 840d1ba84cf5..7dccb66474f7 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -52,11 +52,11 @@ WB_WORK_REASON
struct wb_writeback_work;
-DECLARE_EVENT_CLASS(writeback_page_template,
+DECLARE_EVENT_CLASS(writeback_folio_template,
- TP_PROTO(struct page *page, struct address_space *mapping),
+ TP_PROTO(struct folio *folio, struct address_space *mapping),
- TP_ARGS(page, mapping),
+ TP_ARGS(folio, mapping),
TP_STRUCT__entry (
__array(char, name, 32)
@@ -69,7 +69,7 @@ DECLARE_EVENT_CLASS(writeback_page_template,
bdi_dev_name(mapping ? inode_to_bdi(mapping->host) :
NULL), 32);
__entry->ino = mapping ? mapping->host->i_ino : 0;
- __entry->index = page->index;
+ __entry->index = folio->index;
),
TP_printk("bdi %s: ino=%lu index=%lu",
@@ -79,18 +79,18 @@ DECLARE_EVENT_CLASS(writeback_page_template,
)
);
-DEFINE_EVENT(writeback_page_template, writeback_dirty_page,
+DEFINE_EVENT(writeback_folio_template, writeback_dirty_folio,
- TP_PROTO(struct page *page, struct address_space *mapping),
+ TP_PROTO(struct folio *folio, struct address_space *mapping),
- TP_ARGS(page, mapping)
+ TP_ARGS(folio, mapping)
);
-DEFINE_EVENT(writeback_page_template, wait_on_page_writeback,
+DEFINE_EVENT(writeback_folio_template, folio_wait_writeback,
- TP_PROTO(struct page *page, struct address_space *mapping),
+ TP_PROTO(struct folio *folio, struct address_space *mapping),
- TP_ARGS(page, mapping)
+ TP_ARGS(folio, mapping)
);
DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
@@ -236,9 +236,9 @@ TRACE_EVENT(inode_switch_wbs,
TRACE_EVENT(track_foreign_dirty,
- TP_PROTO(struct page *page, struct bdi_writeback *wb),
+ TP_PROTO(struct folio *folio, struct bdi_writeback *wb),
- TP_ARGS(page, wb),
+ TP_ARGS(folio, wb),
TP_STRUCT__entry(
__array(char, name, 32)
@@ -250,7 +250,7 @@ TRACE_EVENT(track_foreign_dirty,
),
TP_fast_assign(
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping = folio_mapping(folio);
struct inode *inode = mapping ? mapping->host : NULL;
strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
@@ -258,7 +258,7 @@ TRACE_EVENT(track_foreign_dirty,
__entry->ino = inode ? inode->i_ino : 0;
__entry->memcg_id = wb->memcg_css->id;
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
- __entry->page_cgroup_ino = cgroup_ino(page_memcg(page)->css.cgroup);
+ __entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup);
),
TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu",
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 9dc0bf0c5a6e..ecd0f5bdfc1d 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -181,6 +181,10 @@ struct f_owner_ex {
blocking */
#define LOCK_UN 8 /* remove lock */
+/*
+ * LOCK_MAND support has been removed from the kernel. We leave the symbols
+ * here to not break legacy builds, but these should not be used in new code.
+ */
#define LOCK_MAND 32 /* This is a mandatory flock ... */
#define LOCK_READ 64 /* which allows concurrent read operations */
#define LOCK_WRITE 128 /* which allows concurrent write operations */
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
deleted file mode 100644
index cf7399f03b71..000000000000
--- a/include/uapi/linux/bcache.h
+++ /dev/null
@@ -1,445 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _LINUX_BCACHE_H
-#define _LINUX_BCACHE_H
-
-/*
- * Bcache on disk data structures
- */
-
-#include <linux/types.h>
-
-#define BITMASK(name, type, field, offset, size) \
-static inline __u64 name(const type *k) \
-{ return (k->field >> offset) & ~(~0ULL << size); } \
- \
-static inline void SET_##name(type *k, __u64 v) \
-{ \
- k->field &= ~(~(~0ULL << size) << offset); \
- k->field |= (v & ~(~0ULL << size)) << offset; \
-}
-
-/* Btree keys - all units are in sectors */
-
-struct bkey {
- __u64 high;
- __u64 low;
- __u64 ptr[];
-};
-
-#define KEY_FIELD(name, field, offset, size) \
- BITMASK(name, struct bkey, field, offset, size)
-
-#define PTR_FIELD(name, offset, size) \
-static inline __u64 name(const struct bkey *k, unsigned int i) \
-{ return (k->ptr[i] >> offset) & ~(~0ULL << size); } \
- \
-static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v) \
-{ \
- k->ptr[i] &= ~(~(~0ULL << size) << offset); \
- k->ptr[i] |= (v & ~(~0ULL << size)) << offset; \
-}
-
-#define KEY_SIZE_BITS 16
-#define KEY_MAX_U64S 8
-
-KEY_FIELD(KEY_PTRS, high, 60, 3)
-KEY_FIELD(HEADER_SIZE, high, 58, 2)
-KEY_FIELD(KEY_CSUM, high, 56, 2)
-KEY_FIELD(KEY_PINNED, high, 55, 1)
-KEY_FIELD(KEY_DIRTY, high, 36, 1)
-
-KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS)
-KEY_FIELD(KEY_INODE, high, 0, 20)
-
-/* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */
-
-static inline __u64 KEY_OFFSET(const struct bkey *k)
-{
- return k->low;
-}
-
-static inline void SET_KEY_OFFSET(struct bkey *k, __u64 v)
-{
- k->low = v;
-}
-
-/*
- * The high bit being set is a relic from when we used it to do binary
- * searches - it told you where a key started. It's not used anymore,
- * and can probably be safely dropped.
- */
-#define KEY(inode, offset, size) \
-((struct bkey) { \
- .high = (1ULL << 63) | ((__u64) (size) << 20) | (inode), \
- .low = (offset) \
-})
-
-#define ZERO_KEY KEY(0, 0, 0)
-
-#define MAX_KEY_INODE (~(~0 << 20))
-#define MAX_KEY_OFFSET (~0ULL >> 1)
-#define MAX_KEY KEY(MAX_KEY_INODE, MAX_KEY_OFFSET, 0)
-
-#define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k))
-#define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0)
-
-#define PTR_DEV_BITS 12
-
-PTR_FIELD(PTR_DEV, 51, PTR_DEV_BITS)
-PTR_FIELD(PTR_OFFSET, 8, 43)
-PTR_FIELD(PTR_GEN, 0, 8)
-
-#define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1)
-
-#define MAKE_PTR(gen, offset, dev) \
- ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen)
-
-/* Bkey utility code */
-
-static inline unsigned long bkey_u64s(const struct bkey *k)
-{
- return (sizeof(struct bkey) / sizeof(__u64)) + KEY_PTRS(k);
-}
-
-static inline unsigned long bkey_bytes(const struct bkey *k)
-{
- return bkey_u64s(k) * sizeof(__u64);
-}
-
-#define bkey_copy(_dest, _src) memcpy(_dest, _src, bkey_bytes(_src))
-
-static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src)
-{
- SET_KEY_INODE(dest, KEY_INODE(src));
- SET_KEY_OFFSET(dest, KEY_OFFSET(src));
-}
-
-static inline struct bkey *bkey_next(const struct bkey *k)
-{
- __u64 *d = (void *) k;
-
- return (struct bkey *) (d + bkey_u64s(k));
-}
-
-static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys)
-{
- __u64 *d = (void *) k;
-
- return (struct bkey *) (d + nr_keys);
-}
-/* Enough for a key with 6 pointers */
-#define BKEY_PAD 8
-
-#define BKEY_PADDED(key) \
- union { struct bkey key; __u64 key ## _pad[BKEY_PAD]; }
-
-/* Superblock */
-
-/* Version 0: Cache device
- * Version 1: Backing device
- * Version 2: Seed pointer into btree node checksum
- * Version 3: Cache device with new UUID format
- * Version 4: Backing device with data offset
- */
-#define BCACHE_SB_VERSION_CDEV 0
-#define BCACHE_SB_VERSION_BDEV 1
-#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3
-#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4
-#define BCACHE_SB_VERSION_CDEV_WITH_FEATURES 5
-#define BCACHE_SB_VERSION_BDEV_WITH_FEATURES 6
-#define BCACHE_SB_MAX_VERSION 6
-
-#define SB_SECTOR 8
-#define SB_OFFSET (SB_SECTOR << SECTOR_SHIFT)
-#define SB_SIZE 4096
-#define SB_LABEL_SIZE 32
-#define SB_JOURNAL_BUCKETS 256U
-/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */
-#define MAX_CACHES_PER_SET 8
-
-#define BDEV_DATA_START_DEFAULT 16 /* sectors */
-
-struct cache_sb_disk {
- __le64 csum;
- __le64 offset; /* sector where this sb was written */
- __le64 version;
-
- __u8 magic[16];
-
- __u8 uuid[16];
- union {
- __u8 set_uuid[16];
- __le64 set_magic;
- };
- __u8 label[SB_LABEL_SIZE];
-
- __le64 flags;
- __le64 seq;
-
- __le64 feature_compat;
- __le64 feature_incompat;
- __le64 feature_ro_compat;
-
- __le64 pad[5];
-
- union {
- struct {
- /* Cache devices */
- __le64 nbuckets; /* device size */
-
- __le16 block_size; /* sectors */
- __le16 bucket_size; /* sectors */
-
- __le16 nr_in_set;
- __le16 nr_this_dev;
- };
- struct {
- /* Backing devices */
- __le64 data_offset;
-
- /*
- * block_size from the cache device section is still used by
- * backing devices, so don't add anything here until we fix
- * things to not need it for backing devices anymore
- */
- };
- };
-
- __le32 last_mount; /* time overflow in y2106 */
-
- __le16 first_bucket;
- union {
- __le16 njournal_buckets;
- __le16 keys;
- };
- __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */
- __le16 obso_bucket_size_hi; /* obsoleted */
-};
-
-/*
- * This is for in-memory bcache super block.
- * NOTE: cache_sb is NOT exactly mapping to cache_sb_disk, the member
- * size, ordering and even whole struct size may be different
- * from cache_sb_disk.
- */
-struct cache_sb {
- __u64 offset; /* sector where this sb was written */
- __u64 version;
-
- __u8 magic[16];
-
- __u8 uuid[16];
- union {
- __u8 set_uuid[16];
- __u64 set_magic;
- };
- __u8 label[SB_LABEL_SIZE];
-
- __u64 flags;
- __u64 seq;
-
- __u64 feature_compat;
- __u64 feature_incompat;
- __u64 feature_ro_compat;
-
- union {
- struct {
- /* Cache devices */
- __u64 nbuckets; /* device size */
-
- __u16 block_size; /* sectors */
- __u16 nr_in_set;
- __u16 nr_this_dev;
- __u32 bucket_size; /* sectors */
- };
- struct {
- /* Backing devices */
- __u64 data_offset;
-
- /*
- * block_size from the cache device section is still used by
- * backing devices, so don't add anything here until we fix
- * things to not need it for backing devices anymore
- */
- };
- };
-
- __u32 last_mount; /* time overflow in y2106 */
-
- __u16 first_bucket;
- union {
- __u16 njournal_buckets;
- __u16 keys;
- };
- __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */
-};
-
-static inline _Bool SB_IS_BDEV(const struct cache_sb *sb)
-{
- return sb->version == BCACHE_SB_VERSION_BDEV
- || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET
- || sb->version == BCACHE_SB_VERSION_BDEV_WITH_FEATURES;
-}
-
-BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1);
-BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1);
-BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
-#define CACHE_REPLACEMENT_LRU 0U
-#define CACHE_REPLACEMENT_FIFO 1U
-#define CACHE_REPLACEMENT_RANDOM 2U
-
-BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4);
-#define CACHE_MODE_WRITETHROUGH 0U
-#define CACHE_MODE_WRITEBACK 1U
-#define CACHE_MODE_WRITEAROUND 2U
-#define CACHE_MODE_NONE 3U
-BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2);
-#define BDEV_STATE_NONE 0U
-#define BDEV_STATE_CLEAN 1U
-#define BDEV_STATE_DIRTY 2U
-#define BDEV_STATE_STALE 3U
-
-/*
- * Magic numbers
- *
- * The various other data structures have their own magic numbers, which are
- * xored with the first part of the cache set's UUID
- */
-
-#define JSET_MAGIC 0x245235c1a3625032ULL
-#define PSET_MAGIC 0x6750e15f87337f91ULL
-#define BSET_MAGIC 0x90135c78b99e07f5ULL
-
-static inline __u64 jset_magic(struct cache_sb *sb)
-{
- return sb->set_magic ^ JSET_MAGIC;
-}
-
-static inline __u64 pset_magic(struct cache_sb *sb)
-{
- return sb->set_magic ^ PSET_MAGIC;
-}
-
-static inline __u64 bset_magic(struct cache_sb *sb)
-{
- return sb->set_magic ^ BSET_MAGIC;
-}
-
-/*
- * Journal
- *
- * On disk format for a journal entry:
- * seq is monotonically increasing; every journal entry has its own unique
- * sequence number.
- *
- * last_seq is the oldest journal entry that still has keys the btree hasn't
- * flushed to disk yet.
- *
- * version is for on disk format changes.
- */
-
-#define BCACHE_JSET_VERSION_UUIDv1 1
-#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */
-#define BCACHE_JSET_VERSION 1
-
-struct jset {
- __u64 csum;
- __u64 magic;
- __u64 seq;
- __u32 version;
- __u32 keys;
-
- __u64 last_seq;
-
- BKEY_PADDED(uuid_bucket);
- BKEY_PADDED(btree_root);
- __u16 btree_level;
- __u16 pad[3];
-
- __u64 prio_bucket[MAX_CACHES_PER_SET];
-
- union {
- struct bkey start[0];
- __u64 d[0];
- };
-};
-
-/* Bucket prios/gens */
-
-struct prio_set {
- __u64 csum;
- __u64 magic;
- __u64 seq;
- __u32 version;
- __u32 pad;
-
- __u64 next_bucket;
-
- struct bucket_disk {
- __u16 prio;
- __u8 gen;
- } __attribute((packed)) data[];
-};
-
-/* UUIDS - per backing device/flash only volume metadata */
-
-struct uuid_entry {
- union {
- struct {
- __u8 uuid[16];
- __u8 label[32];
- __u32 first_reg; /* time overflow in y2106 */
- __u32 last_reg;
- __u32 invalidated;
-
- __u32 flags;
- /* Size of flash only volumes */
- __u64 sectors;
- };
-
- __u8 pad[128];
- };
-};
-
-BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1);
-
-/* Btree nodes */
-
-/* Version 1: Seed pointer into btree node checksum
- */
-#define BCACHE_BSET_CSUM 1
-#define BCACHE_BSET_VERSION 1
-
-/*
- * Btree nodes
- *
- * On disk a btree node is a list/log of these; within each set the keys are
- * sorted
- */
-struct bset {
- __u64 csum;
- __u64 magic;
- __u64 seq;
- __u32 version;
- __u32 keys;
-
- union {
- struct bkey start[0];
- __u64 d[0];
- };
-};
-
-/* OBSOLETE */
-
-/* UUIDS - per backing device/flash only volume metadata */
-
-struct uuid_entry_v0 {
- __u8 uuid[16];
- __u8 label[32];
- __u32 first_reg;
- __u32 last_reg;
- __u32 invalidated;
- __u32 pad;
-};
-
-#endif /* _LINUX_BCACHE_H */
diff --git a/include/uapi/linux/cdrom.h b/include/uapi/linux/cdrom.h
index 6c34f6e2f1f7..804ff8d98f71 100644
--- a/include/uapi/linux/cdrom.h
+++ b/include/uapi/linux/cdrom.h
@@ -147,6 +147,8 @@
#define CDROM_NEXT_WRITABLE 0x5394 /* get next writable block */
#define CDROM_LAST_WRITTEN 0x5395 /* get last block written on disc */
+#define CDROM_TIMED_MEDIA_CHANGE 0x5396 /* get the timestamp of the last media change */
+
/*******************************************************
* CDROM IOCTL structures
*******************************************************/
@@ -295,6 +297,23 @@ struct cdrom_generic_command
};
};
+/* This struct is used by CDROM_TIMED_MEDIA_CHANGE */
+struct cdrom_timed_media_change_info {
+ __s64 last_media_change; /* Timestamp of the last detected media
+ * change in ms. May be set by caller,
+ * updated upon successful return of
+ * ioctl.
+ */
+ __u64 media_flags; /* Flags returned by ioctl to indicate
+ * media status.
+ */
+};
+#define MEDIA_CHANGED_FLAG 0x1 /* Last detected media change was more
+ * recent than last_media_change set by
+ * caller.
+ */
+/* other bits of media_flags available for future use */
+
/*
* A CD-ROM physical sector size is 2048, 2052, 2056, 2324, 2332, 2336,
* 2340, or 2352 bytes long.
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index b270a07b285e..c45b5e9a9387 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -158,6 +158,7 @@ enum {
#define IORING_TIMEOUT_BOOTTIME (1U << 2)
#define IORING_TIMEOUT_REALTIME (1U << 3)
#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
+#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5)
#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
/*
diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
index 52b54d13f385..6acd4ccafbf7 100644
--- a/include/uapi/linux/mctp.h
+++ b/include/uapi/linux/mctp.h
@@ -10,6 +10,7 @@
#define __UAPI_MCTP_H
#include <linux/types.h>
+#include <linux/socket.h>
typedef __u8 mctp_eid_t;
@@ -18,11 +19,13 @@ struct mctp_addr {
};
struct sockaddr_mctp {
- unsigned short int smctp_family;
- int smctp_network;
+ __kernel_sa_family_t smctp_family;
+ __u16 __smctp_pad0;
+ unsigned int smctp_network;
struct mctp_addr smctp_addr;
__u8 smctp_type;
__u8 smctp_tag;
+ __u8 __smctp_pad1;
};
#define MCTP_NET_ANY 0x0