diff options
68 files changed, 1854 insertions, 274 deletions
diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node index 3e90e1f3bf0a..f7ce68fbd4b9 100644 --- a/Documentation/ABI/stable/sysfs-devices-node +++ b/Documentation/ABI/stable/sysfs-devices-node @@ -90,4 +90,89 @@ Date: December 2009 Contact: Lee Schermerhorn <lee.schermerhorn@hp.com> Description: The node's huge page size control/query attributes. - See Documentation/admin-guide/mm/hugetlbpage.rst
\ No newline at end of file + See Documentation/admin-guide/mm/hugetlbpage.rst + +What: /sys/devices/system/node/nodeX/accessY/ +Date: December 2018 +Contact: Keith Busch <keith.busch@intel.com> +Description: + The node's relationship to other nodes for access class "Y". + +What: /sys/devices/system/node/nodeX/accessY/initiators/ +Date: December 2018 +Contact: Keith Busch <keith.busch@intel.com> +Description: + The directory containing symlinks to memory initiator + nodes that have class "Y" access to this target node's + memory. CPUs and other memory initiators in nodes not in + the list accessing this node's memory may have different + performance. + +What: /sys/devices/system/node/nodeX/accessY/targets/ +Date: December 2018 +Contact: Keith Busch <keith.busch@intel.com> +Description: + The directory containing symlinks to memory targets that + this initiator node has class "Y" access. + +What: /sys/devices/system/node/nodeX/accessY/initiators/read_bandwidth +Date: December 2018 +Contact: Keith Busch <keith.busch@intel.com> +Description: + This node's read bandwidth in MB/s when accessed from + nodes found in this access class's linked initiators. + +What: /sys/devices/system/node/nodeX/accessY/initiators/read_latency +Date: December 2018 +Contact: Keith Busch <keith.busch@intel.com> +Description: + This node's read latency in nanoseconds when accessed + from nodes found in this access class's linked initiators. + +What: /sys/devices/system/node/nodeX/accessY/initiators/write_bandwidth +Date: December 2018 +Contact: Keith Busch <keith.busch@intel.com> +Description: + This node's write bandwidth in MB/s when accessed from + found in this access class's linked initiators. + +What: /sys/devices/system/node/nodeX/accessY/initiators/write_latency +Date: December 2018 +Contact: Keith Busch <keith.busch@intel.com> +Description: + This node's write latency in nanoseconds when access + from nodes found in this class's linked initiators. + +What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/ +Date: December 2018 +Contact: Keith Busch <keith.busch@intel.com> +Description: + The directory containing attributes for the memory-side cache + level 'Y'. + +What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/indexing +Date: December 2018 +Contact: Keith Busch <keith.busch@intel.com> +Description: + The caches associativity indexing: 0 for direct mapped, + non-zero if indexed. + +What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/line_size +Date: December 2018 +Contact: Keith Busch <keith.busch@intel.com> +Description: + The number of bytes accessed from the next cache level on a + cache miss. + +What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/size +Date: December 2018 +Contact: Keith Busch <keith.busch@intel.com> +Description: + The size of this memory side cache in bytes. + +What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/write_policy +Date: December 2018 +Contact: Keith Busch <keith.busch@intel.com> +Description: + The cache write policy: 0 for write-back, 1 for write-through, + other or unknown. diff --git a/Documentation/admin-guide/mm/numaperf.rst b/Documentation/admin-guide/mm/numaperf.rst new file mode 100644 index 000000000000..b79f70c04397 --- /dev/null +++ b/Documentation/admin-guide/mm/numaperf.rst @@ -0,0 +1,169 @@ +.. _numaperf: + +============= +NUMA Locality +============= + +Some platforms may have multiple types of memory attached to a compute +node. These disparate memory ranges may share some characteristics, such +as CPU cache coherence, but may have different performance. For example, +different media types and buses affect bandwidth and latency. + +A system supports such heterogeneous memory by grouping each memory type +under different domains, or "nodes", based on locality and performance +characteristics. Some memory may share the same node as a CPU, and others +are provided as memory only nodes. While memory only nodes do not provide +CPUs, they may still be local to one or more compute nodes relative to +other nodes. The following diagram shows one such example of two compute +nodes with local memory and a memory only node for each of compute node: + + +------------------+ +------------------+ + | Compute Node 0 +-----+ Compute Node 1 | + | Local Node0 Mem | | Local Node1 Mem | + +--------+---------+ +--------+---------+ + | | + +--------+---------+ +--------+---------+ + | Slower Node2 Mem | | Slower Node3 Mem | + +------------------+ +--------+---------+ + +A "memory initiator" is a node containing one or more devices such as +CPUs or separate memory I/O devices that can initiate memory requests. +A "memory target" is a node containing one or more physical address +ranges accessible from one or more memory initiators. + +When multiple memory initiators exist, they may not all have the same +performance when accessing a given memory target. Each initiator-target +pair may be organized into different ranked access classes to represent +this relationship. The highest performing initiator to a given target +is considered to be one of that target's local initiators, and given +the highest access class, 0. Any given target may have one or more +local initiators, and any given initiator may have multiple local +memory targets. + +To aid applications matching memory targets with their initiators, the +kernel provides symlinks to each other. The following example lists the +relationship for the access class "0" memory initiators and targets:: + + # symlinks -v /sys/devices/system/node/nodeX/access0/targets/ + relative: /sys/devices/system/node/nodeX/access0/targets/nodeY -> ../../nodeY + + # symlinks -v /sys/devices/system/node/nodeY/access0/initiators/ + relative: /sys/devices/system/node/nodeY/access0/initiators/nodeX -> ../../nodeX + +A memory initiator may have multiple memory targets in the same access +class. The target memory's initiators in a given class indicate the +nodes' access characteristics share the same performance relative to other +linked initiator nodes. Each target within an initiator's access class, +though, do not necessarily perform the same as each other. + +================ +NUMA Performance +================ + +Applications may wish to consider which node they want their memory to +be allocated from based on the node's performance characteristics. If +the system provides these attributes, the kernel exports them under the +node sysfs hierarchy by appending the attributes directory under the +memory node's access class 0 initiators as follows:: + + /sys/devices/system/node/nodeY/access0/initiators/ + +These attributes apply only when accessed from nodes that have the +are linked under the this access's inititiators. + +The performance characteristics the kernel provides for the local initiators +are exported are as follows:: + + # tree -P "read*|write*" /sys/devices/system/node/nodeY/access0/initiators/ + /sys/devices/system/node/nodeY/access0/initiators/ + |-- read_bandwidth + |-- read_latency + |-- write_bandwidth + `-- write_latency + +The bandwidth attributes are provided in MiB/second. + +The latency attributes are provided in nanoseconds. + +The values reported here correspond to the rated latency and bandwidth +for the platform. + +========== +NUMA Cache +========== + +System memory may be constructed in a hierarchy of elements with various +performance characteristics in order to provide large address space of +slower performing memory cached by a smaller higher performing memory. The +system physical addresses memory initiators are aware of are provided +by the last memory level in the hierarchy. The system meanwhile uses +higher performing memory to transparently cache access to progressively +slower levels. + +The term "far memory" is used to denote the last level memory in the +hierarchy. Each increasing cache level provides higher performing +initiator access, and the term "near memory" represents the fastest +cache provided by the system. + +This numbering is different than CPU caches where the cache level (ex: +L1, L2, L3) uses the CPU-side view where each increased level is lower +performing. In contrast, the memory cache level is centric to the last +level memory, so the higher numbered cache level corresponds to memory +nearer to the CPU, and further from far memory. + +The memory-side caches are not directly addressable by software. When +software accesses a system address, the system will return it from the +near memory cache if it is present. If it is not present, the system +accesses the next level of memory until there is either a hit in that +cache level, or it reaches far memory. + +An application does not need to know about caching attributes in order +to use the system. Software may optionally query the memory cache +attributes in order to maximize the performance out of such a setup. +If the system provides a way for the kernel to discover this information, +for example with ACPI HMAT (Heterogeneous Memory Attribute Table), +the kernel will append these attributes to the NUMA node memory target. + +When the kernel first registers a memory cache with a node, the kernel +will create the following directory:: + + /sys/devices/system/node/nodeX/memory_side_cache/ + +If that directory is not present, the system either does not not provide +a memory-side cache, or that information is not accessible to the kernel. + +The attributes for each level of cache is provided under its cache +level index:: + + /sys/devices/system/node/nodeX/memory_side_cache/indexA/ + /sys/devices/system/node/nodeX/memory_side_cache/indexB/ + /sys/devices/system/node/nodeX/memory_side_cache/indexC/ + +Each cache level's directory provides its attributes. For example, the +following shows a single cache level and the attributes available for +software to query:: + + # tree sys/devices/system/node/node0/memory_side_cache/ + /sys/devices/system/node/node0/memory_side_cache/ + |-- index1 + | |-- indexing + | |-- line_size + | |-- size + | `-- write_policy + +The "indexing" will be 0 if it is a direct-mapped cache, and non-zero +for any other indexed based, multi-way associativity. + +The "line_size" is the number of bytes accessed from the next cache +level on a miss. + +The "size" is the number of bytes provided by this cache level. + +The "write_policy" will be 0 for write-back, and non-zero for +write-through caching. + +======== +See Also +======== +.. [1] https://www.uefi.org/sites/default/files/resources/ACPI_6_2.pdf + Section 5.2.27 diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt index 4f45f71149cb..4a0a9c3f4af6 100644 --- a/Documentation/filesystems/debugfs.txt +++ b/Documentation/filesystems/debugfs.txt @@ -31,10 +31,10 @@ This call, if successful, will make a directory called name underneath the indicated parent directory. If parent is NULL, the directory will be created in the debugfs root. On success, the return value is a struct dentry pointer which can be used to create files in the directory (and to -clean it up at the end). A NULL return value indicates that something went -wrong. If ERR_PTR(-ENODEV) is returned, that is an indication that the -kernel has been built without debugfs support and none of the functions -described below will work. +clean it up at the end). An ERR_PTR(-ERROR) return value indicates that +something went wrong. If ERR_PTR(-ENODEV) is returned, that is an +indication that the kernel has been built without debugfs support and none +of the functions described below will work. The most general way to create a file within a debugfs directory is with: @@ -48,8 +48,9 @@ should hold the file, data will be stored in the i_private field of the resulting inode structure, and fops is a set of file operations which implement the file's behavior. At a minimum, the read() and/or write() operations should be provided; others can be included as needed. Again, -the return value will be a dentry pointer to the created file, NULL for -error, or ERR_PTR(-ENODEV) if debugfs support is missing. +the return value will be a dentry pointer to the created file, +ERR_PTR(-ERROR) on error, or ERR_PTR(-ENODEV) if debugfs support is +missing. Create a file with an initial size, the following function can be used instead: @@ -214,7 +215,8 @@ can be removed with: void debugfs_remove(struct dentry *dentry); -The dentry value can be NULL, in which case nothing will be removed. +The dentry value can be NULL or an error value, in which case nothing will +be removed. Once upon a time, debugfs users were required to remember the dentry pointer for every debugfs file they created so that all files could be diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index eac1d0cc595c..7ff800045434 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -45,7 +45,7 @@ static inline int get_cpu_for_acpi_id(u32 uid) return -EINVAL; } -static int __init acpi_parse_gicc_pxm(struct acpi_subtable_header *header, +static int __init acpi_parse_gicc_pxm(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_srat_gicc_affinity *pa; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 824de7038967..bb4b3f07761a 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -586,7 +586,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) } static int __init -acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, +acpi_parse_gic_cpu_interface(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_generic_interrupt *processor; @@ -595,7 +595,7 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, if (BAD_MADT_GICC_ENTRY(processor, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); acpi_map_gic_cpu_interface(processor); diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 41eb281709da..1435e7a1a8cd 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -177,7 +177,7 @@ struct acpi_table_madt *acpi_madt __initdata; static u8 has_8259; static int __init -acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, +acpi_parse_lapic_addr_ovr(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic_override *lapic; @@ -195,7 +195,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, } static int __init -acpi_parse_lsapic(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_lsapic(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_local_sapic *lsapic; @@ -216,7 +216,7 @@ acpi_parse_lsapic(struct acpi_subtable_header * header, const unsigned long end) } static int __init -acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic_nmi *lacpi_nmi; @@ -230,7 +230,7 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e } static int __init -acpi_parse_iosapic(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_iosapic(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_io_sapic *iosapic; @@ -245,7 +245,7 @@ acpi_parse_iosapic(struct acpi_subtable_header * header, const unsigned long end static unsigned int __initdata acpi_madt_rev; static int __init -acpi_parse_plat_int_src(struct acpi_subtable_header * header, +acpi_parse_plat_int_src(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_interrupt_source *plintsrc; @@ -329,7 +329,7 @@ unsigned int get_cpei_target_cpu(void) } static int __init -acpi_parse_int_src_ovr(struct acpi_subtable_header * header, +acpi_parse_int_src_ovr(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_interrupt_override *p; @@ -350,7 +350,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, } static int __init -acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_nmi_src(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_nmi_source *nmi_src; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8dcbf6890714..9fc92e4539d8 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -197,7 +197,7 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) } static int __init -acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) +acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_local_x2apic *processor = NULL; #ifdef CONFIG_X86_X2APIC @@ -210,7 +210,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) if (BAD_MADT_ENTRY(processor, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); #ifdef CONFIG_X86_X2APIC apic_id = processor->local_apic_id; @@ -242,7 +242,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) } static int __init -acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic *processor = NULL; @@ -251,7 +251,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) if (BAD_MADT_ENTRY(processor, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); /* Ignore invalid ID */ if (processor->id == 0xff) @@ -272,7 +272,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) } static int __init -acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) +acpi_parse_sapic(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_local_sapic *processor = NULL; @@ -281,7 +281,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) if (BAD_MADT_ENTRY(processor, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */ processor->processor_id, /* ACPI ID */ @@ -291,7 +291,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) } static int __init -acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, +acpi_parse_lapic_addr_ovr(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic_override *lapic_addr_ovr = NULL; @@ -301,7 +301,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); acpi_lapic_addr = lapic_addr_ovr->address; @@ -309,7 +309,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, } static int __init -acpi_parse_x2apic_nmi(struct acpi_subtable_header *header, +acpi_parse_x2apic_nmi(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_local_x2apic_nmi *x2apic_nmi = NULL; @@ -319,7 +319,7 @@ acpi_parse_x2apic_nmi(struct acpi_subtable_header *header, if (BAD_MADT_ENTRY(x2apic_nmi, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); if (x2apic_nmi->lint != 1) printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); @@ -328,7 +328,7 @@ acpi_parse_x2apic_nmi(struct acpi_subtable_header *header, } static int __init -acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic_nmi *lapic_nmi = NULL; @@ -337,7 +337,7 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e if (BAD_MADT_ENTRY(lapic_nmi, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); if (lapic_nmi->lint != 1) printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); @@ -449,7 +449,7 @@ static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, } static int __init -acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_ioapic(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_io_apic *ioapic = NULL; struct ioapic_domain_cfg cfg = { @@ -462,7 +462,7 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) if (BAD_MADT_ENTRY(ioapic, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); /* Statically assign IRQ numbers for IOAPICs hosting legacy IRQs */ if (ioapic->global_irq_base < nr_legacy_irqs()) @@ -508,7 +508,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, } static int __init -acpi_parse_int_src_ovr(struct acpi_subtable_header * header, +acpi_parse_int_src_ovr(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_interrupt_override *intsrc = NULL; @@ -518,7 +518,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, if (BAD_MADT_ENTRY(intsrc, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { acpi_sci_ioapic_setup(intsrc->source_irq, @@ -550,7 +550,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, } static int __init -acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_nmi_src(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_nmi_source *nmi_src = NULL; @@ -559,7 +559,7 @@ acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end if (BAD_MADT_ENTRY(nmi_src, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); /* TBD: Support nimsrc entries? */ diff --git a/block/blk-integrity.c b/block/blk-integrity.c index d1ab089e0919..85864c71e858 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -365,6 +365,7 @@ static struct attribute *integrity_attrs[] = { &integrity_device_entry.attr, NULL, }; +ATTRIBUTE_GROUPS(integrity); static const struct sysfs_ops integrity_ops = { .show = &integrity_attr_show, @@ -372,7 +373,7 @@ static const struct sysfs_ops integrity_ops = { }; static struct kobj_type integrity_ktype = { - .default_attrs = integrity_attrs, + .default_groups = integrity_groups, .sysfs_ops = &integrity_ops, }; diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 3f9c3f4ac44c..5315e538b3b1 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -173,10 +173,6 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) return ret; } -static struct attribute *default_ctx_attrs[] = { - NULL, -}; - static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = { .attr = {.name = "nr_tags", .mode = 0444 }, .show = blk_mq_hw_sysfs_nr_tags_show, @@ -196,6 +192,7 @@ static struct attribute *default_hw_ctx_attrs[] = { &blk_mq_hw_sysfs_cpus.attr, NULL, }; +ATTRIBUTE_GROUPS(default_hw_ctx); static const struct sysfs_ops blk_mq_sysfs_ops = { .show = blk_mq_sysfs_show, @@ -214,13 +211,12 @@ static struct kobj_type blk_mq_ktype = { static struct kobj_type blk_mq_ctx_ktype = { .sysfs_ops = &blk_mq_sysfs_ops, - .default_attrs = default_ctx_attrs, .release = blk_mq_ctx_sysfs_release, }; static struct kobj_type blk_mq_hw_ktype = { .sysfs_ops = &blk_mq_hw_sysfs_ops, - .default_attrs = default_hw_ctx_attrs, + .default_groups = default_hw_ctx_groups, .release = blk_mq_hw_sysfs_release, }; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 422327089e0f..7a95a1eb27e1 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -769,6 +769,7 @@ static struct attribute *default_attrs[] = { #endif NULL, }; +ATTRIBUTE_GROUPS(default); #define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr) @@ -890,7 +891,7 @@ static const struct sysfs_ops queue_sysfs_ops = { struct kobj_type blk_queue_ktype = { .sysfs_ops = &queue_sysfs_ops, - .default_attrs = default_attrs, + .default_groups = default_groups, .release = blk_release_queue, }; diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 4e015c77e48e..283ee94224c6 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -475,6 +475,7 @@ config ACPI_REDUCED_HARDWARE_ONLY If you are unsure what to do, do not enable this option. source "drivers/acpi/nfit/Kconfig" +source "drivers/acpi/hmat/Kconfig" source "drivers/acpi/apei/Kconfig" source "drivers/acpi/dptf/Kconfig" diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index bb857421c2e8..5d361e4e3405 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -80,6 +80,7 @@ obj-$(CONFIG_ACPI_PROCESSOR) += processor.o obj-$(CONFIG_ACPI) += container.o obj-$(CONFIG_ACPI_THERMAL) += thermal.o obj-$(CONFIG_ACPI_NFIT) += nfit/ +obj-$(CONFIG_ACPI_HMAT) += hmat/ obj-$(CONFIG_ACPI) += acpi_memhotplug.o obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o obj-$(CONFIG_ACPI_BATTERY) += battery.o diff --git a/drivers/acpi/hmat/Kconfig b/drivers/acpi/hmat/Kconfig new file mode 100644 index 000000000000..95a29964dbea --- /dev/null +++ b/drivers/acpi/hmat/Kconfig @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 +config ACPI_HMAT + bool "ACPI Heterogeneous Memory Attribute Table Support" + depends on ACPI_NUMA + select HMEM_REPORTING + help + If set, this option has the kernel parse and report the + platform's ACPI HMAT (Heterogeneous Memory Attributes Table), + register memory initiators with their targets, and export + performance attributes through the node's sysfs device if + provided. diff --git a/drivers/acpi/hmat/Makefile b/drivers/acpi/hmat/Makefile new file mode 100644 index 000000000000..e909051d3d00 --- /dev/null +++ b/drivers/acpi/hmat/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_ACPI_HMAT) := hmat.o diff --git a/drivers/acpi/hmat/hmat.c b/drivers/acpi/hmat/hmat.c new file mode 100644 index 000000000000..96b7d39a97c6 --- /dev/null +++ b/drivers/acpi/hmat/hmat.c @@ -0,0 +1,666 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019, Intel Corporation. + * + * Heterogeneous Memory Attributes Table (HMAT) representation + * + * This program parses and reports the platform's HMAT tables, and registers + * the applicable attributes with the node's interfaces. + */ + +#include <linux/acpi.h> +#include <linux/bitops.h> +#include <linux/device.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/list_sort.h> +#include <linux/node.h> +#include <linux/sysfs.h> + +static __initdata u8 hmat_revision; + +static __initdata LIST_HEAD(targets); +static __initdata LIST_HEAD(initiators); +static __initdata LIST_HEAD(localities); + +/* + * The defined enum order is used to prioritize attributes to break ties when + * selecting the best performing node. + */ +enum locality_types { + WRITE_LATENCY, + READ_LATENCY, + WRITE_BANDWIDTH, + READ_BANDWIDTH, +}; + +static struct memory_locality *localities_types[4]; + +struct memory_target { + struct list_head node; + unsigned int memory_pxm; + unsigned int processor_pxm; + struct node_hmem_attrs hmem_attrs; +}; + +struct memory_initiator { + struct list_head node; + unsigned int processor_pxm; +}; + +struct memory_locality { + struct list_head node; + struct acpi_hmat_locality *hmat_loc; +}; + +static __init struct memory_initiator *find_mem_initiator(unsigned int cpu_pxm) +{ + struct memory_initiator *initiator; + + list_for_each_entry(initiator, &initiators, node) + if (initiator->processor_pxm == cpu_pxm) + return initiator; + return NULL; +} + +static __init struct memory_target *find_mem_target(unsigned int mem_pxm) +{ + struct memory_target *target; + + list_for_each_entry(target, &targets, node) + if (target->memory_pxm == mem_pxm) + return target; + return NULL; +} + +static __init void alloc_memory_initiator(unsigned int cpu_pxm) +{ + struct memory_initiator *initiator; + + if (pxm_to_node(cpu_pxm) == NUMA_NO_NODE) + return; + + initiator = find_mem_initiator(cpu_pxm); + if (initiator) + return; + + initiator = kzalloc(sizeof(*initiator), GFP_KERNEL); + if (!initiator) + return; + + initiator->processor_pxm = cpu_pxm; + list_add_tail(&initiator->node, &initiators); +} + +static __init void alloc_memory_target(unsigned int mem_pxm) +{ + struct memory_target *target; + + if (pxm_to_node(mem_pxm) == NUMA_NO_NODE) + return; + + target = find_mem_target(mem_pxm); + if (target) + return; + + target = kzalloc(sizeof(*target), GFP_KERNEL); + if (!target) + return; + + target->memory_pxm = mem_pxm; + target->processor_pxm = PXM_INVAL; + list_add_tail(&target->node, &targets); +} + +static __init const char *hmat_data_type(u8 type) +{ + switch (type) { + case ACPI_HMAT_ACCESS_LATENCY: + return "Access Latency"; + case ACPI_HMAT_READ_LATENCY: + return "Read Latency"; + case ACPI_HMAT_WRITE_LATENCY: + return "Write Latency"; + case ACPI_HMAT_ACCESS_BANDWIDTH: + return "Access Bandwidth"; + case ACPI_HMAT_READ_BANDWIDTH: + return "Read Bandwidth"; + case ACPI_HMAT_WRITE_BANDWIDTH: + return "Write Bandwidth"; + default: + return "Reserved"; + } +} + +static __init const char *hmat_data_type_suffix(u8 type) +{ + switch (type) { + case ACPI_HMAT_ACCESS_LATENCY: + case ACPI_HMAT_READ_LATENCY: + case ACPI_HMAT_WRITE_LATENCY: + return " nsec"; + case ACPI_HMAT_ACCESS_BANDWIDTH: + case ACPI_HMAT_READ_BANDWIDTH: + case ACPI_HMAT_WRITE_BANDWIDTH: + return " MB/s"; + default: + return ""; + } +} + +static __init u32 hmat_normalize(u16 entry, u64 base, u8 type) +{ + u32 value; + + /* + * Check for invalid and overflow values + */ + if (entry == 0xffff || !entry) + return 0; + else if (base > (UINT_MAX / (entry))) + return 0; + + /* + * Divide by the base unit for version 1, convert latency from + * picosenonds to nanoseconds if revision 2. + */ + value = entry * base; + if (hmat_revision == 1) { + if (value < 10) + return 0; + value = DIV_ROUND_UP(value, 10); + } else if (hmat_revision == 2) { + switch (type) { + case ACPI_HMAT_ACCESS_LATENCY: + case ACPI_HMAT_READ_LATENCY: + case ACPI_HMAT_WRITE_LATENCY: + value = DIV_ROUND_UP(value, 1000); + break; + default: + break; + } + } + return value; +} + +static __init void hmat_update_target_access(struct memory_target *target, + u8 type, u32 value) +{ + switch (type) { + case ACPI_HMAT_ACCESS_LATENCY: + target->hmem_attrs.read_latency = value; + target->hmem_attrs.write_latency = value; + break; + case ACPI_HMAT_READ_LATENCY: + target->hmem_attrs.read_latency = value; + break; + case ACPI_HMAT_WRITE_LATENCY: + target->hmem_attrs.write_latency = value; + break; + case ACPI_HMAT_ACCESS_BANDWIDTH: + target->hmem_attrs.read_bandwidth = value; + target->hmem_attrs.write_bandwidth = value; + break; + case ACPI_HMAT_READ_BANDWIDTH: + target->hmem_attrs.read_bandwidth = value; + break; + case ACPI_HMAT_WRITE_BANDWIDTH: + target->hmem_attrs.write_bandwidth = value; + break; + default: + break; + } +} + +static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc) +{ + struct memory_locality *loc; + + loc = kzalloc(sizeof(*loc), GFP_KERNEL); + if (!loc) { + pr_notice_once("Failed to allocate HMAT locality\n"); + return; + } + + loc->hmat_loc = hmat_loc; + list_add_tail(&loc->node, &localities); + + switch (hmat_loc->data_type) { + case ACPI_HMAT_ACCESS_LATENCY: + localities_types[READ_LATENCY] = loc; + localities_types[WRITE_LATENCY] = loc; + break; + case ACPI_HMAT_READ_LATENCY: + localities_types[READ_LATENCY] = loc; + break; + case ACPI_HMAT_WRITE_LATENCY: + localities_types[WRITE_LATENCY] = loc; + break; + case ACPI_HMAT_ACCESS_BANDWIDTH: + localities_types[READ_BANDWIDTH] = loc; + localities_types[WRITE_BANDWIDTH] = loc; + break; + case ACPI_HMAT_READ_BANDWIDTH: + localities_types[READ_BANDWIDTH] = loc; + break; + case ACPI_HMAT_WRITE_BANDWIDTH: + localities_types[WRITE_BANDWIDTH] = loc; + break; + default: + break; + } +} + +static __init int hmat_parse_locality(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_hmat_locality *hmat_loc = (void *)header; + struct memory_target *target; + unsigned int init, targ, total_size, ipds, tpds; + u32 *inits, *targs, value; + u16 *entries; + u8 type, mem_hier; + + if (hmat_loc->header.length < sizeof(*hmat_loc)) { + pr_notice("HMAT: Unexpected locality header length: %d\n", + hmat_loc->header.length); + return -EINVAL; + } + + type = hmat_loc->data_type; + mem_hier = hmat_loc->flags & ACPI_HMAT_MEMORY_HIERARCHY; + ipds = hmat_loc->number_of_initiator_Pds; + tpds = hmat_loc->number_of_target_Pds; + total_size = sizeof(*hmat_loc) + sizeof(*entries) * ipds * tpds + + sizeof(*inits) * ipds + sizeof(*targs) * tpds; + if (hmat_loc->header.length < total_size) { + pr_notice("HMAT: Unexpected locality header length:%d, minimum required:%d\n", + hmat_loc->header.length, total_size); + return -EINVAL; + } + + pr_info("HMAT: Locality: Flags:%02x Type:%s Initiator Domains:%d Target Domains:%d Base:%lld\n", + hmat_loc->flags, hmat_data_type(type), ipds, tpds, + hmat_loc->entry_base_unit); + + inits = (u32 *)(hmat_loc + 1); + targs = inits + ipds; + entries = (u16 *)(targs + tpds); + for (init = 0; init < ipds; init++) { + alloc_memory_initiator(inits[init]); + for (targ = 0; targ < tpds; targ++) { + value = hmat_normalize(entries[init * tpds + targ], + hmat_loc->entry_base_unit, + type); + pr_info(" Initiator-Target[%d-%d]:%d%s\n", + inits[init], targs[targ], value, + hmat_data_type_suffix(type)); + + if (mem_hier == ACPI_HMAT_MEMORY) { + target = find_mem_target(targs[targ]); + if (target && target->processor_pxm == inits[init]) + hmat_update_target_access(target, type, value); + } + } + } + + if (mem_hier == ACPI_HMAT_MEMORY) + hmat_add_locality(hmat_loc); + + return 0; +} + +static __init int hmat_parse_cache(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_hmat_cache *cache = (void *)header; + struct node_cache_attrs cache_attrs; + u32 attrs; + + if (cache->header.length < sizeof(*cache)) { + pr_notice("HMAT: Unexpected cache header length: %d\n", + cache->header.length); + return -EINVAL; + } + + attrs = cache->cache_attributes; + pr_info("HMAT: Cache: Domain:%d Size:%llu Attrs:%08x SMBIOS Handles:%d\n", + cache->memory_PD, cache->cache_size, attrs, + cache->number_of_SMBIOShandles); + + cache_attrs.size = cache->cache_size; + cache_attrs.level = (attrs & ACPI_HMAT_CACHE_LEVEL) >> 4; + cache_attrs.line_size = (attrs & ACPI_HMAT_CACHE_LINE_SIZE) >> 16; + + switch ((attrs & ACPI_HMAT_CACHE_ASSOCIATIVITY) >> 8) { + case ACPI_HMAT_CA_DIRECT_MAPPED: + cache_attrs.indexing = NODE_CACHE_DIRECT_MAP; + break; + case ACPI_HMAT_CA_COMPLEX_CACHE_INDEXING: + cache_attrs.indexing = NODE_CACHE_INDEXED; + break; + case ACPI_HMAT_CA_NONE: + default: + cache_attrs.indexing = NODE_CACHE_OTHER; + break; + } + + switch ((attrs & ACPI_HMAT_WRITE_POLICY) >> 12) { + case ACPI_HMAT_CP_WB: + cache_attrs.write_policy = NODE_CACHE_WRITE_BACK; + break; + case ACPI_HMAT_CP_WT: + cache_attrs.write_policy = NODE_CACHE_WRITE_THROUGH; + break; + case ACPI_HMAT_CP_NONE: + default: + cache_attrs.write_policy = NODE_CACHE_WRITE_OTHER; + break; + } + + node_add_cache(pxm_to_node(cache->memory_PD), &cache_attrs); + return 0; +} + +static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_hmat_proximity_domain *p = (void *)header; + struct memory_target *target = NULL; + + if (p->header.length != sizeof(*p)) { + pr_notice("HMAT: Unexpected address range header length: %d\n", + p->header.length); + return -EINVAL; + } + + if (hmat_revision == 1) + pr_info("HMAT: Memory (%#llx length %#llx) Flags:%04x Processor Domain:%d Memory Domain:%d\n", + p->reserved3, p->reserved4, p->flags, p->processor_PD, + p->memory_PD); + else + pr_info("HMAT: Memory Flags:%04x Processor Domain:%d Memory Domain:%d\n", + p->flags, p->processor_PD, p->memory_PD); + + if (p->flags & ACPI_HMAT_MEMORY_PD_VALID) { + target = find_mem_target(p->memory_PD); + if (!target) { + pr_debug("HMAT: Memory Domain missing from SRAT\n"); + return -EINVAL; + } + } + if (target && p->flags & ACPI_HMAT_PROCESSOR_PD_VALID) { + int p_node = pxm_to_node(p->processor_PD); + + if (p_node == NUMA_NO_NODE) { + pr_debug("HMAT: Invalid Processor Domain\n"); + return -EINVAL; + } + target->processor_pxm = p_node; + } + + return 0; +} + +static int __init hmat_parse_subtable(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_hmat_structure *hdr = (void *)header; + + if (!hdr) + return -EINVAL; + + switch (hdr->type) { + case ACPI_HMAT_TYPE_PROXIMITY: + return hmat_parse_proximity_domain(header, end); + case ACPI_HMAT_TYPE_LOCALITY: + return hmat_parse_locality(header, end); + case ACPI_HMAT_TYPE_CACHE: + return hmat_parse_cache(header, end); + default: + return -EINVAL; + } +} + +static __init int srat_parse_mem_affinity(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_srat_mem_affinity *ma = (void *)header; + + if (!ma) + return -EINVAL; + if (!(ma->flags & ACPI_SRAT_MEM_ENABLED)) + return 0; + alloc_memory_target(ma->proximity_domain); + return 0; +} + +static __init u32 hmat_initiator_perf(struct memory_target *target, + struct memory_initiator *initiator, + struct acpi_hmat_locality *hmat_loc) +{ + unsigned int ipds, tpds, i, idx = 0, tdx = 0; + u32 *inits, *targs; + u16 *entries; + + ipds = hmat_loc->number_of_initiator_Pds; + tpds = hmat_loc->number_of_target_Pds; + inits = (u32 *)(hmat_loc + 1); + targs = inits + ipds; + entries = (u16 *)(targs + tpds); + + for (i = 0; i < ipds; i++) { + if (inits[i] == initiator->processor_pxm) { + idx = i; + break; + } + } + + if (i == ipds) + return 0; + + for (i = 0; i < tpds; i++) { + if (targs[i] == target->memory_pxm) { + tdx = i; + break; + } + } + if (i == tpds) + return 0; + + return hmat_normalize(entries[idx * tpds + tdx], + hmat_loc->entry_base_unit, + hmat_loc->data_type); +} + +static __init bool hmat_update_best(u8 type, u32 value, u32 *best) +{ + bool updated = false; + + if (!value) + return false; + + switch (type) { + case ACPI_HMAT_ACCESS_LATENCY: + case ACPI_HMAT_READ_LATENCY: + case ACPI_HMAT_WRITE_LATENCY: + if (!*best || *best > value) { + *best = value; + updated = true; + } + break; + case ACPI_HMAT_ACCESS_BANDWIDTH: + case ACPI_HMAT_READ_BANDWIDTH: + case ACPI_HMAT_WRITE_BANDWIDTH: + if (!*best || *best < value) { + *best = value; + updated = true; + } + break; + } + + return updated; +} + +static int initiator_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct memory_initiator *ia; + struct memory_initiator *ib; + unsigned long *p_nodes = priv; + + ia = list_entry(a, struct memory_initiator, node); + ib = list_entry(b, struct memory_initiator, node); + + set_bit(ia->processor_pxm, p_nodes); + set_bit(ib->processor_pxm, p_nodes); + + return ia->processor_pxm - ib->processor_pxm; +} + +static __init void hmat_register_target_initiators(struct memory_target *target) +{ + static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); + struct memory_initiator *initiator; + unsigned int mem_nid, cpu_nid; + struct memory_locality *loc = NULL; + u32 best = 0; + int i; + + mem_nid = pxm_to_node(target->memory_pxm); + /* + * If the Address Range Structure provides a local processor pxm, link + * only that one. Otherwise, find the best performance attributes and + * register all initiators that match. + */ + if (target->processor_pxm != PXM_INVAL) { + cpu_nid = pxm_to_node(target->processor_pxm); + register_memory_node_under_compute_node(mem_nid, cpu_nid, 0); + return; + } + + if (list_empty(&localities)) + return; + + /* + * We need the initiator list sorted so we can use bitmap_clear for + * previously set initiators when we find a better memory accessor. + * We'll also use the sorting to prime the candidate nodes with known + * initiators. + */ + bitmap_zero(p_nodes, MAX_NUMNODES); + list_sort(p_nodes, &initiators, initiator_cmp); + for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { + loc = localities_types[i]; + if (!loc) + continue; + + best = 0; + list_for_each_entry(initiator, &initiators, node) { + u32 value; + + if (!test_bit(initiator->processor_pxm, p_nodes)) + continue; + + value = hmat_initiator_perf(target, initiator, loc->hmat_loc); + if (hmat_update_best(loc->hmat_loc->data_type, value, &best)) + bitmap_clear(p_nodes, 0, initiator->processor_pxm); + if (value != best) + clear_bit(initiator->processor_pxm, p_nodes); + } + if (best) + hmat_update_target_access(target, loc->hmat_loc->data_type, best); + } + + for_each_set_bit(i, p_nodes, MAX_NUMNODES) { + cpu_nid = pxm_to_node(i); + register_memory_node_under_compute_node(mem_nid, cpu_nid, 0); + } +} + +static __init void hmat_register_target_perf(struct memory_target *target) +{ + unsigned mem_nid = pxm_to_node(target->memory_pxm); + node_set_perf_attrs(mem_nid, &target->hmem_attrs, 0); +} + +static __init void hmat_register_targets(void) +{ + struct memory_target *target; + + list_for_each_entry(target, &targets, node) { + hmat_register_target_initiators(target); + hmat_register_target_perf(target); + } +} + +static __init void hmat_free_structures(void) +{ + struct memory_target *target, *tnext; + struct memory_locality *loc, *lnext; + struct memory_initiator *initiator, *inext; + + list_for_each_entry_safe(target, tnext, &targets, node) { + list_del(&target->node); + kfree(target); + } + + list_for_each_entry_safe(initiator, inext, &initiators, node) { + list_del(&initiator->node); + kfree(initiator); + } + + list_for_each_entry_safe(loc, lnext, &localities, node) { + list_del(&loc->node); + kfree(loc); + } +} + +static __init int hmat_init(void) +{ + struct acpi_table_header *tbl; + enum acpi_hmat_type i; + acpi_status status; + + if (srat_disabled()) + return 0; + + status = acpi_get_table(ACPI_SIG_SRAT, 0, &tbl); + if (ACPI_FAILURE(status)) + return 0; + + if (acpi_table_parse_entries(ACPI_SIG_SRAT, + sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_MEMORY_AFFINITY, + srat_parse_mem_affinity, 0) < 0) + goto out_put; + acpi_put_table(tbl); + + status = acpi_get_table(ACPI_SIG_HMAT, 0, &tbl); + if (ACPI_FAILURE(status)) + goto out_put; + + hmat_revision = tbl->revision; + switch (hmat_revision) { + case 1: + case 2: + break; + default: + pr_notice("Ignoring HMAT: Unknown revision:%d\n", hmat_revision); + goto out_put; + } + + for (i = ACPI_HMAT_TYPE_PROXIMITY; i < ACPI_HMAT_TYPE_RESERVED; i++) { + if (acpi_table_parse_entries(ACPI_SIG_HMAT, + sizeof(struct acpi_table_hmat), i, + hmat_parse_subtable, 0) < 0) { + pr_notice("Ignoring HMAT: Invalid table"); + goto out_put; + } + } + hmat_register_targets(); +out_put: + hmat_free_structures(); + acpi_put_table(tbl); + return 0; +} +subsys_initcall(hmat_init); diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 867f6e3f2b4f..30995834ad70 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -339,7 +339,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) } static int __init -acpi_parse_x2apic_affinity(struct acpi_subtable_header *header, +acpi_parse_x2apic_affinity(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_srat_x2apic_cpu_affinity *processor_affinity; @@ -348,7 +348,7 @@ acpi_parse_x2apic_affinity(struct acpi_subtable_header *header, if (!processor_affinity) return -EINVAL; - acpi_table_print_srat_entry(header); + acpi_table_print_srat_entry(&header->common); /* let architecture-dependent part to do it */ acpi_numa_x2apic_affinity_init(processor_affinity); @@ -357,7 +357,7 @@ acpi_parse_x2apic_affinity(struct acpi_subtable_header *header, } static int __init -acpi_parse_processor_affinity(struct acpi_subtable_header *header, +acpi_parse_processor_affinity(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_srat_cpu_affinity *processor_affinity; @@ -366,7 +366,7 @@ acpi_parse_processor_affinity(struct acpi_subtable_header *header, if (!processor_affinity) return -EINVAL; - acpi_table_print_srat_entry(header); + acpi_table_print_srat_entry(&header->common); /* let architecture-dependent part to do it */ acpi_numa_processor_affinity_init(processor_affinity); @@ -375,7 +375,7 @@ acpi_parse_processor_affinity(struct acpi_subtable_header *header, } static int __init -acpi_parse_gicc_affinity(struct acpi_subtable_header *header, +acpi_parse_gicc_affinity(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_srat_gicc_affinity *processor_affinity; @@ -384,7 +384,7 @@ acpi_parse_gicc_affinity(struct acpi_subtable_header *header, if (!processor_affinity) return -EINVAL; - acpi_table_print_srat_entry(header); + acpi_table_print_srat_entry(&header->common); /* let architecture-dependent part to do it */ acpi_numa_gicc_affinity_init(processor_affinity); @@ -395,7 +395,7 @@ acpi_parse_gicc_affinity(struct acpi_subtable_header *header, static int __initdata parsed_numa_memblks; static int __init -acpi_parse_memory_affinity(struct acpi_subtable_header * header, +acpi_parse_memory_affinity(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_srat_mem_affinity *memory_affinity; @@ -404,7 +404,7 @@ acpi_parse_memory_affinity(struct acpi_subtable_header * header, if (!memory_affinity) return -EINVAL; - acpi_table_print_srat_entry(header); + acpi_table_print_srat_entry(&header->common); /* let architecture-dependent part to do it */ if (!acpi_numa_memory_affinity_init(memory_affinity)) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index b845dc3e0ba9..566270d0e91a 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2238,10 +2238,10 @@ static struct acpi_probe_entry *ape; static int acpi_probe_count; static DEFINE_MUTEX(acpi_probe_mutex); -static int __init acpi_match_madt(struct acpi_subtable_header *header, +static int __init acpi_match_madt(union acpi_subtable_headers *header, const unsigned long end) { - if (!ape->subtable_valid || ape->subtable_valid(header, ape)) + if (!ape->subtable_valid || ape->subtable_valid(&header->common, ape)) if (!ape->probe_subtbl(header, end)) acpi_probe_count++; diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index d7bf936b1646..3b5d04fd5e3e 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -49,6 +49,16 @@ static struct acpi_table_desc initial_tables[ACPI_MAX_TABLES] __initdata; static int acpi_apic_instance __initdata; +enum acpi_subtable_type { + ACPI_SUBTABLE_COMMON, + ACPI_SUBTABLE_HMAT, +}; + +struct acpi_subtable_entry { + union acpi_subtable_headers *hdr; + enum acpi_subtable_type type; +}; + /* * Disable table checksum verification for the early stage due to the size * limitation of the current x86 early mapping implementation. @@ -217,6 +227,50 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header) } } +static unsigned long __init +acpi_get_entry_type(struct acpi_subtable_entry *entry) +{ + switch (entry->type) { + case ACPI_SUBTABLE_COMMON: + return entry->hdr->common.type; + case ACPI_SUBTABLE_HMAT: + return entry->hdr->hmat.type; + } + return 0; +} + +static unsigned long __init +acpi_get_entry_length(struct acpi_subtable_entry *entry) +{ + switch (entry->type) { + case ACPI_SUBTABLE_COMMON: + return entry->hdr->common.length; + case ACPI_SUBTABLE_HMAT: + return entry->hdr->hmat.length; + } + return 0; +} + +static unsigned long __init +acpi_get_subtable_header_length(struct acpi_subtable_entry *entry) +{ + switch (entry->type) { + case ACPI_SUBTABLE_COMMON: + return sizeof(entry->hdr->common); + case ACPI_SUBTABLE_HMAT: + return sizeof(entry->hdr->hmat); + } + return 0; +} + +static enum acpi_subtable_type __init +acpi_get_subtable_type(char *id) +{ + if (strncmp(id, ACPI_SIG_HMAT, 4) == 0) + return ACPI_SUBTABLE_HMAT; + return ACPI_SUBTABLE_COMMON; +} + /** * acpi_parse_entries_array - for each proc_num find a suitable subtable * @@ -245,8 +299,8 @@ static int __init acpi_parse_entries_array(char *id, unsigned long table_size, struct acpi_subtable_proc *proc, int proc_num, unsigned int max_entries) { - struct acpi_subtable_header *entry; - unsigned long table_end; + struct acpi_subtable_entry entry; + unsigned long table_end, subtable_len, entry_len; int count = 0; int errs = 0; int i; @@ -269,19 +323,20 @@ static int __init acpi_parse_entries_array(char *id, unsigned long table_size, /* Parse all entries looking for a match. */ - entry = (struct acpi_subtable_header *) + entry.type = acpi_get_subtable_type(id); + entry.hdr = (union acpi_subtable_headers *) ((unsigned long)table_header + table_size); + subtable_len = acpi_get_subtable_header_length(&entry); - while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) < - table_end) { + while (((unsigned long)entry.hdr) + subtable_len < table_end) { if (max_entries && count >= max_entries) break; for (i = 0; i < proc_num; i++) { - if (entry->type != proc[i].id) + if (acpi_get_entry_type(&entry) != proc[i].id) continue; if (!proc[i].handler || - (!errs && proc[i].handler(entry, table_end))) { + (!errs && proc[i].handler(entry.hdr, table_end))) { errs++; continue; } @@ -296,13 +351,14 @@ static int __init acpi_parse_entries_array(char *id, unsigned long table_size, * If entry->length is 0, break from this loop to avoid * infinite loop. */ - if (entry->length == 0) { + entry_len = acpi_get_entry_length(&entry); + if (entry_len == 0) { pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, proc->id); return -EINVAL; } - entry = (struct acpi_subtable_header *) - ((unsigned long)entry + entry->length); + entry.hdr = (union acpi_subtable_headers *) + ((unsigned long)entry.hdr + entry_len); } if (max_entries && count > max_entries) { diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 03f067da12ee..dc404492381d 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -3,7 +3,6 @@ menu "Generic Driver Options" config UEVENT_HELPER bool "Support for uevent helper" - default y help The uevent helper program is forked by the kernel for every uevent. @@ -149,6 +148,14 @@ config DEBUG_TEST_DRIVER_REMOVE unusable. You should say N here unless you are explicitly looking to test this functionality. +config HMEM_REPORTING + bool + default n + depends on NUMA + help + Enable reporting for heterogenous memory access attributes under + their non-uniform memory nodes. + source "drivers/base/test/Kconfig" config SYS_HYPERVISOR diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index edfcf8d982e4..1739d7e1952a 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -7,7 +7,6 @@ */ #include <linux/acpi.h> -#include <linux/arch_topology.h> #include <linux/cpu.h> #include <linux/cpufreq.h> #include <linux/device.h> @@ -31,7 +30,6 @@ void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, per_cpu(freq_scale, i) = scale; } -static DEFINE_MUTEX(cpu_scale_mutex); DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity) @@ -51,37 +49,7 @@ static ssize_t cpu_capacity_show(struct device *dev, static void update_topology_flags_workfn(struct work_struct *work); static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn); -static ssize_t cpu_capacity_store(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct cpu *cpu = container_of(dev, struct cpu, dev); - int this_cpu = cpu->dev.id; - int i; - unsigned long new_capacity; - ssize_t ret; - - if (!count) - return 0; - - ret = kstrtoul(buf, 0, &new_capacity); - if (ret) - return ret; - if (new_capacity > SCHED_CAPACITY_SCALE) - return -EINVAL; - - mutex_lock(&cpu_scale_mutex); - for_each_cpu(i, &cpu_topology[this_cpu].core_sibling) - topology_set_cpu_scale(i, new_capacity); - mutex_unlock(&cpu_scale_mutex); - - schedule_work(&update_topology_flags_work); - - return count; -} - -static DEVICE_ATTR_RW(cpu_capacity); +static DEVICE_ATTR_RO(cpu_capacity); static int register_cpu_capacity_sysctl(void) { @@ -141,7 +109,6 @@ void topology_normalize_cpu_scale(void) return; pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale); - mutex_lock(&cpu_scale_mutex); for_each_possible_cpu(cpu) { pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n", cpu, raw_capacity[cpu]); @@ -151,7 +118,6 @@ void topology_normalize_cpu_scale(void) pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", cpu, topology_get_cpu_scale(NULL, cpu)); } - mutex_unlock(&cpu_scale_mutex); } bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) diff --git a/drivers/base/core.c b/drivers/base/core.c index 4aeaa0c92bda..fd7511e04e62 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1999,6 +1999,11 @@ static int device_private_init(struct device *dev) * NOTE: _Never_ directly free @dev after calling this function, even * if it returned an error! Always use put_device() to give up your * reference instead. + * + * Rule of thumb is: if device_add() succeeds, you should call + * device_del() when you want to get rid of it. If device_add() has + * *not* succeeded, use *only* put_device() to drop the reference + * count. */ int device_add(struct device *dev) { diff --git a/drivers/base/dd.c b/drivers/base/dd.c index a823f469e53f..0df9b4461766 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -490,7 +490,7 @@ re_probe: if (dev->bus->dma_configure) { ret = dev->bus->dma_configure(dev); if (ret) - goto dma_failed; + goto probe_failed; } if (driver_sysfs_add(dev)) { @@ -546,14 +546,13 @@ re_probe: goto done; probe_failed: - arch_teardown_dma_ops(dev); -dma_failed: if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DRIVER_NOT_BOUND, dev); pinctrl_bind_failed: device_links_no_driver(dev); devres_release_all(dev); + arch_teardown_dma_ops(dev); driver_sysfs_remove(dev); dev->driver = NULL; dev_set_drvdata(dev, NULL); diff --git a/drivers/base/firmware_loader/Kconfig b/drivers/base/firmware_loader/Kconfig index eb15d976a9ea..38f2da6f5c2b 100644 --- a/drivers/base/firmware_loader/Kconfig +++ b/drivers/base/firmware_loader/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 menu "Firmware loader" config FW_LOADER diff --git a/drivers/base/firmware_loader/builtin/.gitignore b/drivers/base/firmware_loader/builtin/.gitignore index 9c8bdb9fdcc3..166f76b43049 100644 --- a/drivers/base/firmware_loader/builtin/.gitignore +++ b/drivers/base/firmware_loader/builtin/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 *.gen.S diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c index b5c865fe263b..f962488546b6 100644 --- a/drivers/base/firmware_loader/fallback.c +++ b/drivers/base/firmware_loader/fallback.c @@ -674,8 +674,8 @@ static bool fw_run_sysfs_fallback(enum fw_opt opt_flags) * * This function is called if direct lookup for the firmware failed, it enables * a fallback mechanism through userspace by exposing a sysfs loading - * interface. Userspace is in charge of loading the firmware through the syfs - * loading interface. This syfs fallback mechanism may be disabled completely + * interface. Userspace is in charge of loading the firmware through the sysfs + * loading interface. This sysfs fallback mechanism may be disabled completely * on a system by setting the proc sysctl value ignore_sysfs_fallback to true. * If this false we check if the internal API caller set the @FW_OPT_NOFALLBACK * flag, if so it would also disable the fallback mechanism. A system may want @@ -693,7 +693,7 @@ int firmware_fallback_sysfs(struct firmware *fw, const char *name, return ret; if (!(opt_flags & FW_OPT_NO_WARN)) - dev_warn(device, "Falling back to syfs fallback for: %s\n", + dev_warn(device, "Falling back to sysfs fallback for: %s\n", name); else dev_dbg(device, "Falling back to sysfs fallback for: %s\n", diff --git a/drivers/base/node.c b/drivers/base/node.c index 86d6cd92ce3d..8598fcbd2a17 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -17,6 +17,7 @@ #include <linux/nodemask.h> #include <linux/cpu.h> #include <linux/device.h> +#include <linux/pm_runtime.h> #include <linux/swap.h> #include <linux/slab.h> @@ -59,6 +60,302 @@ static inline ssize_t node_read_cpulist(struct device *dev, static DEVICE_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL); static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL); +/** + * struct node_access_nodes - Access class device to hold user visible + * relationships to other nodes. + * @dev: Device for this memory access class + * @list_node: List element in the node's access list + * @access: The access class rank + */ +struct node_access_nodes { + struct device dev; + struct list_head list_node; + unsigned access; +#ifdef CONFIG_HMEM_REPORTING + struct node_hmem_attrs hmem_attrs; +#endif +}; +#define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev) + +static struct attribute *node_init_access_node_attrs[] = { + NULL, +}; + +static struct attribute *node_targ_access_node_attrs[] = { + NULL, +}; + +static const struct attribute_group initiators = { + .name = "initiators", + .attrs = node_init_access_node_attrs, +}; + +static const struct attribute_group targets = { + .name = "targets", + .attrs = node_targ_access_node_attrs, +}; + +static const struct attribute_group *node_access_node_groups[] = { + &initiators, + &targets, + NULL, +}; + +static void node_remove_accesses(struct node *node) +{ + struct node_access_nodes *c, *cnext; + + list_for_each_entry_safe(c, cnext, &node->access_list, list_node) { + list_del(&c->list_node); + device_unregister(&c->dev); + } +} + +static void node_access_release(struct device *dev) +{ + kfree(to_access_nodes(dev)); +} + +static struct node_access_nodes *node_init_node_access(struct node *node, + unsigned access) +{ + struct node_access_nodes *access_node; + struct device *dev; + + list_for_each_entry(access_node, &node->access_list, list_node) + if (access_node->access == access) + return access_node; + + access_node = kzalloc(sizeof(*access_node), GFP_KERNEL); + if (!access_node) + return NULL; + + access_node->access = access; + dev = &access_node->dev; + dev->parent = &node->dev; + dev->release = node_access_release; + dev->groups = node_access_node_groups; + if (dev_set_name(dev, "access%u", access)) + goto free; + + if (device_register(dev)) + goto free_name; + + pm_runtime_no_callbacks(dev); + list_add_tail(&access_node->list_node, &node->access_list); + return access_node; +free_name: + kfree_const(dev->kobj.name); +free: + kfree(access_node); + return NULL; +} + +#ifdef CONFIG_HMEM_REPORTING +#define ACCESS_ATTR(name) \ +static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + return sprintf(buf, "%u\n", to_access_nodes(dev)->hmem_attrs.name); \ +} \ +static DEVICE_ATTR_RO(name); + +ACCESS_ATTR(read_bandwidth) +ACCESS_ATTR(read_latency) +ACCESS_ATTR(write_bandwidth) +ACCESS_ATTR(write_latency) + +static struct attribute *access_attrs[] = { + &dev_attr_read_bandwidth.attr, + &dev_attr_read_latency.attr, + &dev_attr_write_bandwidth.attr, + &dev_attr_write_latency.attr, + NULL, +}; + +/** + * node_set_perf_attrs - Set the performance values for given access class + * @nid: Node identifier to be set + * @hmem_attrs: Heterogeneous memory performance attributes + * @access: The access class the for the given attributes + */ +void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, + unsigned access) +{ + struct node_access_nodes *c; + struct node *node; + int i; + + if (WARN_ON_ONCE(!node_online(nid))) + return; + + node = node_devices[nid]; + c = node_init_node_access(node, access); + if (!c) + return; + + c->hmem_attrs = *hmem_attrs; + for (i = 0; access_attrs[i] != NULL; i++) { + if (sysfs_add_file_to_group(&c->dev.kobj, access_attrs[i], + "initiators")) { + pr_info("failed to add performance attribute to node %d\n", + nid); + break; + } + } +} + +/** + * struct node_cache_info - Internal tracking for memory node caches + * @dev: Device represeting the cache level + * @node: List element for tracking in the node + * @cache_attrs:Attributes for this cache level + */ +struct node_cache_info { + struct device dev; + struct list_head node; + struct node_cache_attrs cache_attrs; +}; +#define to_cache_info(device) container_of(device, struct node_cache_info, dev) + +#define CACHE_ATTR(name, fmt) \ +static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + return sprintf(buf, fmt "\n", to_cache_info(dev)->cache_attrs.name);\ +} \ +DEVICE_ATTR_RO(name); + +CACHE_ATTR(size, "%llu") +CACHE_ATTR(line_size, "%u") +CACHE_ATTR(indexing, "%u") +CACHE_ATTR(write_policy, "%u") + +static struct attribute *cache_attrs[] = { + &dev_attr_indexing.attr, + &dev_attr_size.attr, + &dev_attr_line_size.attr, + &dev_attr_write_policy.attr, + NULL, +}; +ATTRIBUTE_GROUPS(cache); + +static void node_cache_release(struct device *dev) +{ + kfree(dev); +} + +static void node_cacheinfo_release(struct device *dev) +{ + struct node_cache_info *info = to_cache_info(dev); + kfree(info); +} + +static void node_init_cache_dev(struct node *node) +{ + struct device *dev; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return; + + dev->parent = &node->dev; + dev->release = node_cache_release; + if (dev_set_name(dev, "memory_side_cache")) + goto free_dev; + + if (device_register(dev)) + goto free_name; + + pm_runtime_no_callbacks(dev); + node->cache_dev = dev; + return; +free_name: + kfree_const(dev->kobj.name); +free_dev: + kfree(dev); +} + +/** + * node_add_cache() - add cache attribute to a memory node + * @nid: Node identifier that has new cache attributes + * @cache_attrs: Attributes for the cache being added + */ +void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs) +{ + struct node_cache_info *info; + struct device *dev; + struct node *node; + + if (!node_online(nid) || !node_devices[nid]) + return; + + node = node_devices[nid]; + list_for_each_entry(info, &node->cache_attrs, node) { + if (info->cache_attrs.level == cache_attrs->level) { + dev_warn(&node->dev, + "attempt to add duplicate cache level:%d\n", + cache_attrs->level); + return; + } + } + + if (!node->cache_dev) + node_init_cache_dev(node); + if (!node->cache_dev) + return; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return; + + dev = &info->dev; + dev->parent = node->cache_dev; + dev->release = node_cacheinfo_release; + dev->groups = cache_groups; + if (dev_set_name(dev, "index%d", cache_attrs->level)) + goto free_cache; + + info->cache_attrs = *cache_attrs; + if (device_register(dev)) { + dev_warn(&node->dev, "failed to add cache level:%d\n", + cache_attrs->level); + goto free_name; + } + pm_runtime_no_callbacks(dev); + list_add_tail(&info->node, &node->cache_attrs); + return; +free_name: + kfree_const(dev->kobj.name); +free_cache: + kfree(info); +} + +static void node_remove_caches(struct node *node) +{ + struct node_cache_info *info, *next; + + if (!node->cache_dev) + return; + + list_for_each_entry_safe(info, next, &node->cache_attrs, node) { + list_del(&info->node); + device_unregister(&info->dev); + } + device_unregister(node->cache_dev); +} + +static void node_init_caches(unsigned int nid) +{ + INIT_LIST_HEAD(&node_devices[nid]->cache_attrs); +} +#else +static void node_init_caches(unsigned int nid) { } +static void node_remove_caches(struct node *node) { } +#endif + #define K(x) ((x) << (PAGE_SHIFT - 10)) static ssize_t node_read_meminfo(struct device *dev, struct device_attribute *attr, char *buf) @@ -340,7 +637,8 @@ static int register_node(struct node *node, int num) void unregister_node(struct node *node) { hugetlb_unregister_node(node); /* no-op, if memoryless node */ - + node_remove_accesses(node); + node_remove_caches(node); device_unregister(&node->dev); } @@ -372,6 +670,56 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid) kobject_name(&node_devices[nid]->dev.kobj)); } +/** + * register_memory_node_under_compute_node - link memory node to its compute + * node for a given access class. + * @mem_node: Memory node number + * @cpu_node: Cpu node number + * @access: Access class to register + * + * Description: + * For use with platforms that may have separate memory and compute nodes. + * This function will export node relationships linking which memory + * initiator nodes can access memory targets at a given ranked access + * class. + */ +int register_memory_node_under_compute_node(unsigned int mem_nid, + unsigned int cpu_nid, + unsigned access) +{ + struct node *init_node, *targ_node; + struct node_access_nodes *initiator, *target; + int ret; + + if (!node_online(cpu_nid) || !node_online(mem_nid)) + return -ENODEV; + + init_node = node_devices[cpu_nid]; + targ_node = node_devices[mem_nid]; + initiator = node_init_node_access(init_node, access); + target = node_init_node_access(targ_node, access); + if (!initiator || !target) + return -ENOMEM; + + ret = sysfs_add_link_to_group(&initiator->dev.kobj, "targets", + &targ_node->dev.kobj, + dev_name(&targ_node->dev)); + if (ret) + return ret; + + ret = sysfs_add_link_to_group(&target->dev.kobj, "initiators", + &init_node->dev.kobj, + dev_name(&init_node->dev)); + if (ret) + goto err; + + return 0; + err: + sysfs_remove_link_from_group(&initiator->dev.kobj, "targets", + dev_name(&targ_node->dev)); + return ret; +} + int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) { struct device *obj; @@ -580,8 +928,10 @@ int __register_one_node(int nid) register_cpu_under_node(cpu, nid); } + INIT_LIST_HEAD(&node_devices[nid]->access_list); /* initialize work queue for memory hot plug */ init_node_hugetlb_work(nid); + node_init_caches(nid); return error; } diff --git a/drivers/base/platform.c b/drivers/base/platform.c index dab0a5abc391..4d1729853d1a 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(platform_get_resource); * device * * @pdev: platform device to use both for memory resource lookup as well as - * resource managemend + * resource management * @index: resource index */ #ifdef CONFIG_HAS_IOMEM @@ -438,10 +438,12 @@ int platform_device_add(struct platform_device *pdev) p = &ioport_resource; } - if (p && insert_resource(p, r)) { - dev_err(&pdev->dev, "failed to claim resource %d: %pR\n", i, r); - ret = -EBUSY; - goto failed; + if (p) { + ret = insert_resource(p, r); + if (ret) { + dev_err(&pdev->dev, "failed to claim resource %d: %pR\n", i, r); + goto failed; + } } } diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index 365ad751ce0f..59d19dd64928 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/base/power/clock_ops.c - Generic clock manipulation PM callbacks * * Copyright (c) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp. - * - * This file is released under the GPLv2. */ #include <linux/kernel.h> diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index 22aedb28aad7..8db98a1f83dc 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/base/power/common.c - Common device power management code. * * Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp. - * - * This file is released under the GPLv2. */ - #include <linux/kernel.h> #include <linux/device.h> #include <linux/export.h> diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 3d899e8abd58..7a6aa2318915 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/base/power/domain.c - Common code related to device power domains. * * Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp. - * - * This file is released under the GPLv2. */ - #define pr_fmt(fmt) "PM: " fmt #include <linux/delay.h> diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 7912bc957244..3838045c9277 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/base/power/domain_governor.c - Governors for device PM domains. * * Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp. - * - * This file is released under the GPLv2. */ - #include <linux/kernel.h> #include <linux/pm_domain.h> #include <linux/pm_qos.h> diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c index b2ed606265a8..4fa525668cb7 100644 --- a/drivers/base/power/generic_ops.c +++ b/drivers/base/power/generic_ops.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/base/power/generic_ops.c - Generic PM callbacks for subsystems * * Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. - * - * This file is released under the GPLv2. */ - #include <linux/pm.h> #include <linux/pm_runtime.h> #include <linux/export.h> diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 10528a7747bf..dcfc0a36c8f7 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/base/power/main.c - Where the driver meets power management. * * Copyright (c) 2003 Patrick Mochel * Copyright (c) 2003 Open Source Development Lab * - * This file is released under the GPLv2 - * - * * The driver model core calls device_pm_add() when a device is registered. * This will initialize the embedded device_pm_info object in the device * and add it to the list of power-controlled devices. sysfs entries for diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index f80e402ef778..6c91f8df1d59 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Devices PM QoS constraints management * * Copyright (C) 2011 Texas Instruments, Inc. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * * This module exposes the interface to kernel space for specifying * per-device PM QoS dependencies. It provides infrastructure for registration * of: diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 977db40378b0..952a1e7057c7 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/base/power/runtime.c - Helper functions for device runtime PM * * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. * Copyright (C) 2010 Alan Stern <stern@rowland.harvard.edu> - * - * This file is released under the GPLv2. */ - #include <linux/sched/mm.h> #include <linux/ktime.h> #include <linux/hrtimer.h> diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 1226e441ddfe..1b9c281cbe41 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -1,7 +1,5 @@ -/* - * drivers/base/power/sysfs.c - sysfs entries for device PM - */ - +// SPDX-License-Identifier: GPL-2.0 +/* sysfs entries for device PM */ #include <linux/device.h> #include <linux/string.h> #include <linux/export.h> diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 2bd9d2c744ca..977d27bd1a22 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/base/power/trace.c * @@ -6,7 +7,6 @@ * Trace facility for suspend/resume problems, when none of the * devices may be working. */ - #define pr_fmt(fmt) "PM: " fmt #include <linux/pm-trace.h> diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index b8fa5c0f2d13..5ce77d1ef9fc 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -1,16 +1,5 @@ -/* - * wakeirq.c - Device wakeirq helper functions - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - +// SPDX-License-Identifier: GPL-2.0 +/* Device wakeirq helper functions */ #include <linux/device.h> #include <linux/interrupt.h> #include <linux/irq.h> diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 23c243a4c675..5b2b6a05a4f3 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * drivers/base/power/wakeup.c - System wakeup events framework * * Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. - * - * This file is released under the GPLv2. */ - #define pr_fmt(fmt) "PM: " fmt #include <linux/device.h> diff --git a/drivers/base/test/Makefile b/drivers/base/test/Makefile index 90477c5fd9f9..0f1f7277a013 100644 --- a/drivers/base/test/Makefile +++ b/drivers/base/test/Makefile @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_TEST_ASYNC_DRIVER_PROBE) += test_async_driver_probe.o diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index f5fe0100f9ff..de14e06fd9ec 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -446,7 +446,7 @@ static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) } static int __init -acpi_parse_madt_msi(struct acpi_subtable_header *header, +acpi_parse_madt_msi(union acpi_subtable_headers *header, const unsigned long end) { int ret; diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c index 8d6d009d1d58..c81d5b81da56 100644 --- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c @@ -159,7 +159,7 @@ static int __init its_pci_of_msi_init(void) #ifdef CONFIG_ACPI static int __init -its_pci_msi_parse_madt(struct acpi_subtable_header *header, +its_pci_msi_parse_madt(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_generic_translator *its_entry; diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c b/drivers/irqchip/irq-gic-v3-its-platform-msi.c index 7b8e87b493fe..9cdcda5bb3bd 100644 --- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c @@ -117,7 +117,7 @@ static int __init its_pmsi_init_one(struct fwnode_handle *fwnode, #ifdef CONFIG_ACPI static int __init -its_pmsi_parse_madt(struct acpi_subtable_header *header, +its_pmsi_parse_madt(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_generic_translator *its_entry; diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 7577755bdcf4..128ac893d7e4 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3830,13 +3830,13 @@ static int __init acpi_get_its_numa_node(u32 its_id) return NUMA_NO_NODE; } -static int __init gic_acpi_match_srat_its(struct acpi_subtable_header *header, +static int __init gic_acpi_match_srat_its(union acpi_subtable_headers *header, const unsigned long end) { return 0; } -static int __init gic_acpi_parse_srat_its(struct acpi_subtable_header *header, +static int __init gic_acpi_parse_srat_its(union acpi_subtable_headers *header, const unsigned long end) { int node; @@ -3903,7 +3903,7 @@ static int __init acpi_get_its_numa_node(u32 its_id) { return NUMA_NO_NODE; } static void __init acpi_its_srat_maps_free(void) { } #endif -static int __init gic_acpi_parse_madt_its(struct acpi_subtable_header *header, +static int __init gic_acpi_parse_madt_its(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_generic_translator *its_entry; diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 15e55d327505..f44cd89cfc40 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1593,7 +1593,7 @@ gic_acpi_register_redist(phys_addr_t phys_base, void __iomem *redist_base) } static int __init -gic_acpi_parse_madt_redist(struct acpi_subtable_header *header, +gic_acpi_parse_madt_redist(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_generic_redistributor *redist = @@ -1611,7 +1611,7 @@ gic_acpi_parse_madt_redist(struct acpi_subtable_header *header, } static int __init -gic_acpi_parse_madt_gicc(struct acpi_subtable_header *header, +gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_generic_interrupt *gicc = @@ -1653,14 +1653,14 @@ static int __init gic_acpi_collect_gicr_base(void) return -ENODEV; } -static int __init gic_acpi_match_gicr(struct acpi_subtable_header *header, +static int __init gic_acpi_match_gicr(union acpi_subtable_headers *header, const unsigned long end) { /* Subtable presence means that redist exists, that's it */ return 0; } -static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header, +static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_generic_interrupt *gicc = @@ -1726,7 +1726,7 @@ static bool __init acpi_validate_gic_table(struct acpi_subtable_header *header, return true; } -static int __init gic_acpi_parse_virt_madt_gicc(struct acpi_subtable_header *header, +static int __init gic_acpi_parse_virt_madt_gicc(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_generic_interrupt *gicc = diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index fd3110c171ba..c6dbe5018972 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1495,7 +1495,7 @@ static struct } acpi_data __initdata; static int __init -gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header, +gic_acpi_parse_madt_cpu(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_generic_interrupt *processor; @@ -1527,7 +1527,7 @@ gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header, } /* The things you have to do to just *count* something... */ -static int __init acpi_dummy_func(struct acpi_subtable_header *header, +static int __init acpi_dummy_func(union acpi_subtable_headers *header, const unsigned long end) { return 0; diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index 256f18b67e8a..08a0a3517138 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -382,7 +382,7 @@ static const struct mbox_chan_ops pcc_chan_ops = { * * This gets called for each entry in the PCC table. */ -static int parse_pcc_subspace(struct acpi_subtable_header *header, +static int parse_pcc_subspace(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_pcct_subspace *ss = (struct acpi_pcct_subspace *) header; diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 4fce1da7db23..ddd708b09fa1 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -394,12 +394,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n"); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * - * If debugfs is not enabled in the kernel, the value -%ENODEV will be - * returned. It is not wise to check for this value, but rather, check for - * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling - * code. + * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will + * be returned. */ struct dentry *debugfs_create_u8(const char *name, umode_t mode, struct dentry *parent, u8 *value) @@ -440,12 +439,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n"); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * - * If debugfs is not enabled in the kernel, the value -%ENODEV will be - * returned. It is not wise to check for this value, but rather, check for - * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling - * code. + * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will + * be returned. */ struct dentry *debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, u16 *value) @@ -486,12 +484,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n"); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * - * If debugfs is not enabled in the kernel, the value -%ENODEV will be - * returned. It is not wise to check for this value, but rather, check for - * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling - * code. + * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will + * be returned. */ struct dentry *debugfs_create_u32(const char *name, umode_t mode, struct dentry *parent, u32 *value) @@ -533,12 +530,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * - * If debugfs is not enabled in the kernel, the value -%ENODEV will be - * returned. It is not wise to check for this value, but rather, check for - * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling - * code. + * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will + * be returned. */ struct dentry *debugfs_create_u64(const char *name, umode_t mode, struct dentry *parent, u64 *value) @@ -582,12 +578,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong_wo, NULL, debugfs_ulong_set, "%llu\n"); * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * - * If debugfs is not enabled in the kernel, the value -%ENODEV will be - * returned. It is not wise to check for this value, but rather, check for - * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling - * code. + * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will + * be returned. */ struct dentry *debugfs_create_ulong(const char *name, umode_t mode, struct dentry *parent, unsigned long *value) @@ -850,12 +845,11 @@ static const struct file_operations fops_bool_wo = { * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * - * If debugfs is not enabled in the kernel, the value -%ENODEV will be - * returned. It is not wise to check for this value, but rather, check for - * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling - * code. + * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will + * be returned. */ struct dentry *debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, bool *value) @@ -904,12 +898,11 @@ static const struct file_operations fops_blob = { * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * - * If debugfs is not enabled in the kernel, the value -%ENODEV will be - * returned. It is not wise to check for this value, but rather, check for - * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling - * code. + * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will + * be returned. */ struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, @@ -1005,8 +998,9 @@ static const struct file_operations u32_array_fops = { * Writing is not supported. Seek within the file is also not supported. * Once array is created its size can not be changed. * - * The function returns a pointer to dentry on success. If debugfs is not - * enabled in the kernel, the value -%ENODEV will be returned. + * The function returns a pointer to dentry on success. If an error occurs, + * %ERR_PTR(-ERROR) or NULL will be returned. If debugfs is not enabled in + * the kernel, the value %ERR_PTR(-ENODEV) will be returned. */ struct dentry *debugfs_create_u32_array(const char *name, umode_t mode, struct dentry *parent, @@ -1102,12 +1096,11 @@ static const struct file_operations fops_regset32 = { * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, - * you are responsible here.) If an error occurs, %NULL will be returned. + * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be + * returned. * - * If debugfs is not enabled in the kernel, the value -%ENODEV will be - * returned. It is not wise to check for this value, but rather, check for - * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling - * code. + * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will + * be returned. */ struct dentry *debugfs_create_regset32(const char *name, umode_t mode, struct dentry *parent, diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index b84d635567d3..1e7a74b8e064 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -650,11 +650,10 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, kn->id.generation = gen; /* - * set ino first. This barrier is paired with atomic_inc_not_zero in + * set ino first. This RELEASE is paired with atomic_inc_not_zero in * kernfs_find_and_get_node_by_ino */ - smp_mb__before_atomic(); - atomic_set(&kn->count, 1); + atomic_set_release(&kn->count, 1); atomic_set(&kn->active, KN_DEACTIVATED_BIAS); RB_CLEAR_NODE(&kn->rb); diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index d14037ddf108..22c039ebc6c5 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -1395,7 +1395,7 @@ struct acpi_table_hmat { /* Values for HMAT structure types */ enum acpi_hmat_type { - ACPI_HMAT_TYPE_ADDRESS_RANGE = 0, /* Memory subsystem address range */ + ACPI_HMAT_TYPE_PROXIMITY = 0, /* Memory proximity domain attributes */ ACPI_HMAT_TYPE_LOCALITY = 1, /* System locality latency and bandwidth information */ ACPI_HMAT_TYPE_CACHE = 2, /* Memory side cache information */ ACPI_HMAT_TYPE_RESERVED = 3 /* 3 and greater are reserved */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ca55ae00f8c9..e22c237be46a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -141,10 +141,14 @@ enum acpi_address_range_id { /* Table Handlers */ +union acpi_subtable_headers { + struct acpi_subtable_header common; + struct acpi_hmat_structure hmat; +}; typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table); -typedef int (*acpi_tbl_entry_handler)(struct acpi_subtable_header *header, +typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header, const unsigned long end); /* Debugger support */ diff --git a/include/linux/device.h b/include/linux/device.h index 4e6987e11f68..4457e560bc2b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -976,18 +976,14 @@ struct dev_links_info { * a higher-level representation of the device. */ struct device { + struct kobject kobj; struct device *parent; struct device_private *p; - struct kobject kobj; const char *init_name; /* initial name of the device */ const struct device_type *type; - struct mutex mutex; /* mutex to synchronize calls to - * its driver. - */ - struct bus_type *bus; /* type of bus device is on */ struct device_driver *driver; /* which driver has allocated this device */ @@ -995,6 +991,10 @@ struct device { core doesn't touch it */ void *driver_data; /* Driver data, set and get with dev_set_drvdata/dev_get_drvdata */ + struct mutex mutex; /* mutex to synchronize calls to + * its driver. + */ + struct dev_links_info links; struct dev_pm_info power; struct dev_pm_domain *pm_domain; @@ -1009,9 +1009,6 @@ struct device { struct list_head msi_list; #endif -#ifdef CONFIG_NUMA - int numa_node; /* NUMA node this device is close to */ -#endif const struct dma_map_ops *dma_ops; u64 *dma_mask; /* dma mask (if dma'able device) */ u64 coherent_dma_mask;/* Like dma_mask, but for @@ -1040,6 +1037,9 @@ struct device { struct device_node *of_node; /* associated device tree node */ struct fwnode_handle *fwnode; /* firmware device node */ +#ifdef CONFIG_NUMA + int numa_node; /* NUMA node this device is close to */ +#endif dev_t devt; /* dev_t, creates the sysfs "dev" */ u32 id; /* device instance */ diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index c8893f663470..e446ab97ee0c 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -64,7 +64,7 @@ enum kernfs_root_flag { KERNFS_ROOT_CREATE_DEACTIVATED = 0x0001, /* - * For regular flies, if the opener has CAP_DAC_OVERRIDE, open(2) + * For regular files, if the opener has CAP_DAC_OVERRIDE, open(2) * succeeds regardless of the RW permissions. sysfs had an extra * layer of enforcement where open(2) fails with -EACCES regardless * of CAP_DAC_OVERRIDE if the permission doesn't have the diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 1ab0d624fb36..e2ca0a292e21 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -139,7 +139,8 @@ static inline bool kobject_has_children(struct kobject *kobj) struct kobj_type { void (*release)(struct kobject *kobj); const struct sysfs_ops *sysfs_ops; - struct attribute **default_attrs; + struct attribute **default_attrs; /* use default_groups instead */ + const struct attribute_group **default_groups; const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); const void *(*namespace)(struct kobject *kobj); void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid); diff --git a/include/linux/node.h b/include/linux/node.h index 257bb3d6d014..1a557c589ecb 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -17,14 +17,81 @@ #include <linux/device.h> #include <linux/cpumask.h> +#include <linux/list.h> #include <linux/workqueue.h> +/** + * struct node_hmem_attrs - heterogeneous memory performance attributes + * + * @read_bandwidth: Read bandwidth in MB/s + * @write_bandwidth: Write bandwidth in MB/s + * @read_latency: Read latency in nanoseconds + * @write_latency: Write latency in nanoseconds + */ +struct node_hmem_attrs { + unsigned int read_bandwidth; + unsigned int write_bandwidth; + unsigned int read_latency; + unsigned int write_latency; +}; + +enum cache_indexing { + NODE_CACHE_DIRECT_MAP, + NODE_CACHE_INDEXED, + NODE_CACHE_OTHER, +}; + +enum cache_write_policy { + NODE_CACHE_WRITE_BACK, + NODE_CACHE_WRITE_THROUGH, + NODE_CACHE_WRITE_OTHER, +}; + +/** + * struct node_cache_attrs - system memory caching attributes + * + * @indexing: The ways memory blocks may be placed in cache + * @write_policy: Write back or write through policy + * @size: Total size of cache in bytes + * @line_size: Number of bytes fetched on a cache miss + * @level: The cache hierarchy level + */ +struct node_cache_attrs { + enum cache_indexing indexing; + enum cache_write_policy write_policy; + u64 size; + u16 line_size; + u8 level; +}; + +#ifdef CONFIG_HMEM_REPORTING +void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs); +void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, + unsigned access); +#else +static inline void node_add_cache(unsigned int nid, + struct node_cache_attrs *cache_attrs) +{ +} + +static inline void node_set_perf_attrs(unsigned int nid, + struct node_hmem_attrs *hmem_attrs, + unsigned access) +{ +} +#endif + struct node { struct device dev; + struct list_head access_list; #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS) struct work_struct node_work; #endif +#ifdef CONFIG_HMEM_REPORTING + struct list_head cache_attrs; + struct device *cache_dev; +#endif }; struct memory_block; @@ -75,6 +142,10 @@ extern int register_mem_sect_under_node(struct memory_block *mem_blk, extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, unsigned long phys_index); +extern int register_memory_node_under_compute_node(unsigned int mem_nid, + unsigned int cpu_nid, + unsigned access); + #ifdef CONFIG_HUGETLBFS extern void register_hugetlbfs_with_node(node_registration_func_t doregister, node_registration_func_t unregister); diff --git a/init/Kconfig b/init/Kconfig index be8f97e37a76..82b84e5ee30d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -562,7 +562,6 @@ config BUILD_BIN2C config IKCONFIG tristate "Kernel .config support" - select BUILD_BIN2C ---help--- This option enables the complete Linux kernel ".config" file contents to be saved in the kernel. It provides documentation @@ -580,6 +579,16 @@ config IKCONFIG_PROC This option enables access to the kernel configuration file through /proc/config.gz. +config IKHEADERS_PROC + tristate "Enable kernel header artifacts through /proc/kheaders.tar.xz" + depends on PROC_FS + help + This option enables access to the kernel header and other artifacts that + are generated during the build process. These can be used to build eBPF + tracing programs, or similar programs. If you build the headers as a + module, a module called kheaders.ko is built which can be loaded on-demand + to get access to the headers. + config LOG_BUF_SHIFT int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" range 12 25 diff --git a/kernel/.gitignore b/kernel/.gitignore index 6e699100872f..34d1e77ee9df 100644 --- a/kernel/.gitignore +++ b/kernel/.gitignore @@ -1,5 +1,6 @@ # # Generated files # +kheaders.md5 timeconst.h hz.bc diff --git a/kernel/Makefile b/kernel/Makefile index 62471e75a2b0..298437bb2c6a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -71,6 +71,7 @@ obj-$(CONFIG_UTS_NS) += utsname.o obj-$(CONFIG_USER_NS) += user_namespace.o obj-$(CONFIG_PID_NS) += pid_namespace.o obj-$(CONFIG_IKCONFIG) += configs.o +obj-$(CONFIG_IKHEADERS_PROC) += kheaders.o obj-$(CONFIG_SMP) += stop_machine.o obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o obj-$(CONFIG_AUDIT) += audit.o auditfilter.o @@ -122,3 +123,12 @@ $(obj)/configs.o: $(obj)/config_data.gz targets += config_data.gz $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE $(call if_changed,gzip) + +$(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz + +quiet_cmd_genikh = CHK $(obj)/kheaders_data.tar.xz +cmd_genikh = $(srctree)/kernel/gen_ikh_data.sh $@ +$(obj)/kheaders_data.tar.xz: FORCE + $(call cmd,genikh) + +clean-files := kheaders_data.tar.xz kheaders.md5 diff --git a/kernel/gen_ikh_data.sh b/kernel/gen_ikh_data.sh new file mode 100755 index 000000000000..591a94f7b387 --- /dev/null +++ b/kernel/gen_ikh_data.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This script generates an archive consisting of kernel headers +# for CONFIG_IKHEADERS_PROC. +set -e +spath="$(dirname "$(readlink -f "$0")")" +kroot="$spath/.." +outdir="$(pwd)" +tarfile=$1 +cpio_dir=$outdir/$tarfile.tmp + +# Script filename relative to the kernel source root +# We add it to the archive because it is small and any changes +# to this script will also cause a rebuild of the archive. +sfile="$(realpath --relative-to $kroot "$(readlink -f "$0")")" + +src_file_list=" +include/ +arch/$SRCARCH/include/ +$sfile +" + +obj_file_list=" +include/ +arch/$SRCARCH/include/ +" + +# Support incremental builds by skipping archive generation +# if timestamps of files being archived are not changed. + +# This block is useful for debugging the incremental builds. +# Uncomment it for debugging. +# iter=1 +# if [ ! -f /tmp/iter ]; then echo 1 > /tmp/iter; +# else; iter=$(($(cat /tmp/iter) + 1)); fi +# find $src_file_list -type f | xargs ls -lR > /tmp/src-ls-$iter +# find $obj_file_list -type f | xargs ls -lR > /tmp/obj-ls-$iter + +# include/generated/compile.h is ignored because it is touched even when none +# of the source files changed. This causes pointless regeneration, so let us +# ignore them for md5 calculation. +pushd $kroot > /dev/null +src_files_md5="$(find $src_file_list -type f | + grep -v "include/generated/compile.h" | + xargs ls -lR | md5sum | cut -d ' ' -f1)" +popd > /dev/null +obj_files_md5="$(find $obj_file_list -type f | + grep -v "include/generated/compile.h" | + xargs ls -lR | md5sum | cut -d ' ' -f1)" + +if [ -f $tarfile ]; then tarfile_md5="$(md5sum $tarfile | cut -d ' ' -f1)"; fi +if [ -f kernel/kheaders.md5 ] && + [ "$(cat kernel/kheaders.md5|head -1)" == "$src_files_md5" ] && + [ "$(cat kernel/kheaders.md5|head -2|tail -1)" == "$obj_files_md5" ] && + [ "$(cat kernel/kheaders.md5|tail -1)" == "$tarfile_md5" ]; then + exit +fi + +if [ "${quiet}" != "silent_" ]; then + echo " GEN $tarfile" +fi + +rm -rf $cpio_dir +mkdir $cpio_dir + +pushd $kroot > /dev/null +for f in $src_file_list; + do find "$f" ! -name "*.cmd" ! -name ".*"; +done | cpio --quiet -pd $cpio_dir +popd > /dev/null + +# The second CPIO can complain if files already exist which can +# happen with out of tree builds. Just silence CPIO for now. +for f in $obj_file_list; + do find "$f" ! -name "*.cmd" ! -name ".*"; +done | cpio --quiet -pd $cpio_dir >/dev/null 2>&1 + +# Remove comments except SDPX lines +find $cpio_dir -type f -print0 | + xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;' + +tar -Jcf $tarfile -C $cpio_dir/ . > /dev/null + +echo "$src_files_md5" > kernel/kheaders.md5 +echo "$obj_files_md5" >> kernel/kheaders.md5 +echo "$(md5sum $tarfile | cut -d ' ' -f1)" >> kernel/kheaders.md5 + +rm -rf $cpio_dir diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9f8a709337cf..c52b737ab8e3 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -275,11 +275,12 @@ static struct attribute *irq_attrs[] = { &actions_attr.attr, NULL }; +ATTRIBUTE_GROUPS(irq); static struct kobj_type irq_kobj_type = { .release = irq_kobj_release, .sysfs_ops = &kobj_sysfs_ops, - .default_attrs = irq_attrs, + .default_groups = irq_groups, }; static void irq_sysfs_add(int irq, struct irq_desc *desc) diff --git a/kernel/kheaders.c b/kernel/kheaders.c new file mode 100644 index 000000000000..70ae6052920d --- /dev/null +++ b/kernel/kheaders.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Provide kernel headers useful to build tracing programs + * such as for running eBPF tracing tools. + * + * (Borrowed code from kernel/configs.c) + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/init.h> +#include <linux/uaccess.h> + +/* + * Define kernel_headers_data and kernel_headers_data_end, within which the + * compressed kernel headers are stored. The file is first compressed with xz. + */ + +asm ( +" .pushsection .rodata, \"a\" \n" +" .global kernel_headers_data \n" +"kernel_headers_data: \n" +" .incbin \"kernel/kheaders_data.tar.xz\" \n" +" .global kernel_headers_data_end \n" +"kernel_headers_data_end: \n" +" .popsection \n" +); + +extern char kernel_headers_data; +extern char kernel_headers_data_end; + +static ssize_t +ikheaders_read_current(struct file *file, char __user *buf, + size_t len, loff_t *offset) +{ + return simple_read_from_buffer(buf, len, offset, + &kernel_headers_data, + &kernel_headers_data_end - + &kernel_headers_data); +} + +static const struct file_operations ikheaders_file_ops = { + .read = ikheaders_read_current, + .llseek = default_llseek, +}; + +static int __init ikheaders_init(void) +{ + struct proc_dir_entry *entry; + + /* create the current headers file */ + entry = proc_create("kheaders.tar.xz", S_IRUGO, NULL, + &ikheaders_file_ops); + if (!entry) + return -ENOMEM; + + proc_set_size(entry, + &kernel_headers_data_end - + &kernel_headers_data); + return 0; +} + +static void __exit ikheaders_cleanup(void) +{ + remove_proc_entry("kheaders.tar.xz", NULL); +} + +module_init(ikheaders_init); +module_exit(ikheaders_cleanup); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Joel Fernandes"); +MODULE_DESCRIPTION("Echo the kernel header artifacts used to build the kernel"); diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index f12c0eabd843..f6fbaff10e71 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -419,6 +419,7 @@ static struct attribute *klp_patch_attrs[] = { &force_kobj_attr.attr, NULL }; +ATTRIBUTE_GROUPS(klp_patch); static void klp_free_object_dynamic(struct klp_object *obj) { @@ -549,7 +550,7 @@ static void klp_kobj_release_patch(struct kobject *kobj) static struct kobj_type klp_ktype_patch = { .release = klp_kobj_release_patch, .sysfs_ops = &kobj_sysfs_ops, - .default_attrs = klp_patch_attrs, + .default_groups = klp_patch_groups, }; static void klp_kobj_release_object(struct kobject *kobj) diff --git a/kernel/padata.c b/kernel/padata.c index 3e2633ae3bca..2d2fddbb7a4c 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -957,6 +957,7 @@ static struct attribute *padata_default_attrs[] = { ¶llel_cpumask_attr.attr, NULL, }; +ATTRIBUTE_GROUPS(padata_default); static ssize_t padata_sysfs_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -995,7 +996,7 @@ static const struct sysfs_ops padata_sysfs_ops = { static struct kobj_type padata_attr_type = { .sysfs_ops = &padata_sysfs_ops, - .default_attrs = padata_default_attrs, + .default_groups = padata_default_groups, .release = padata_sysfs_release, }; diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 5403479073b0..962cf343f798 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -600,13 +600,14 @@ rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); -static struct attribute *sugov_attributes[] = { +static struct attribute *sugov_attrs[] = { &rate_limit_us.attr, NULL }; +ATTRIBUTE_GROUPS(sugov); static struct kobj_type sugov_tunables_ktype = { - .default_attrs = sugov_attributes, + .default_groups = sugov_groups, .sysfs_ops = &governor_sysfs_ops, }; diff --git a/lib/kobject.c b/lib/kobject.c index aa89edcd2b63..f2ccdbac8ed9 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -18,7 +18,7 @@ #include <linux/random.h> /** - * kobject_namespace - return @kobj's namespace tag + * kobject_namespace() - Return @kobj's namespace tag. * @kobj: kobject in question * * Returns namespace tag of @kobj if its parent has namespace ops enabled @@ -36,7 +36,7 @@ const void *kobject_namespace(struct kobject *kobj) } /** - * kobject_get_ownership - get sysfs ownership data for @kobj + * kobject_get_ownership() - Get sysfs ownership data for @kobj. * @kobj: kobject in question * @uid: kernel user ID for sysfs objects * @gid: kernel group ID for sysfs objects @@ -82,6 +82,7 @@ static int populate_dir(struct kobject *kobj) static int create_dir(struct kobject *kobj) { + const struct kobj_type *ktype = get_ktype(kobj); const struct kobj_ns_type_operations *ops; int error; @@ -95,6 +96,14 @@ static int create_dir(struct kobject *kobj) return error; } + if (ktype) { + error = sysfs_create_groups(kobj, ktype->default_groups); + if (error) { + sysfs_remove_dir(kobj); + return error; + } + } + /* * @kobj->sd may be deleted by an ancestor going away. Hold an * extra reference so that it stays until @kobj is gone. @@ -153,12 +162,11 @@ static void fill_kobj_path(struct kobject *kobj, char *path, int length) } /** - * kobject_get_path - generate and return the path associated with a given kobj and kset pair. - * + * kobject_get_path() - Allocate memory and fill in the path for @kobj. * @kobj: kobject in question, with which to build the path * @gfp_mask: the allocation type used to allocate the path * - * The result must be freed by the caller with kfree(). + * Return: The newly allocated memory, caller must free with kfree(). */ char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask) { @@ -265,7 +273,7 @@ static int kobject_add_internal(struct kobject *kobj) } /** - * kobject_set_name_vargs - Set the name of an kobject + * kobject_set_name_vargs() - Set the name of a kobject. * @kobj: struct kobject to set the name of * @fmt: format string used to build the name * @vargs: vargs to format the string. @@ -305,7 +313,7 @@ int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, } /** - * kobject_set_name - Set the name of a kobject + * kobject_set_name() - Set the name of a kobject. * @kobj: struct kobject to set the name of * @fmt: format string used to build the name * @@ -327,7 +335,7 @@ int kobject_set_name(struct kobject *kobj, const char *fmt, ...) EXPORT_SYMBOL(kobject_set_name); /** - * kobject_init - initialize a kobject structure + * kobject_init() - Initialize a kobject structure. * @kobj: pointer to the kobject to initialize * @ktype: pointer to the ktype for this kobject. * @@ -383,7 +391,7 @@ static __printf(3, 0) int kobject_add_varg(struct kobject *kobj, } /** - * kobject_add - the main kobject add function + * kobject_add() - The main kobject add function. * @kobj: the kobject to add * @parent: pointer to the parent of the kobject. * @fmt: format to name the kobject with. @@ -397,15 +405,23 @@ static __printf(3, 0) int kobject_add_varg(struct kobject *kobj, * is assigned to the kobject, then the kobject will be located in the * root of the sysfs tree. * - * If this function returns an error, kobject_put() must be called to - * properly clean up the memory associated with the object. - * Under no instance should the kobject that is passed to this function - * be directly freed with a call to kfree(), that can leak memory. - * * Note, no "add" uevent will be created with this call, the caller should set * up all of the necessary sysfs files for the object and then call * kobject_uevent() with the UEVENT_ADD parameter to ensure that * userspace is properly notified of this kobject's creation. + * + * Return: If this function returns an error, kobject_put() must be + * called to properly clean up the memory associated with the + * object. Under no instance should the kobject that is passed + * to this function be directly freed with a call to kfree(), + * that can leak memory. + * + * If this function returns success, kobject_put() must also be called + * in order to properly clean up the memory associated with the object. + * + * In short, once this function is called, kobject_put() MUST be called + * when the use of the object is finished in order to properly free + * everything. */ int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) @@ -431,15 +447,19 @@ int kobject_add(struct kobject *kobj, struct kobject *parent, EXPORT_SYMBOL(kobject_add); /** - * kobject_init_and_add - initialize a kobject structure and add it to the kobject hierarchy + * kobject_init_and_add() - Initialize a kobject structure and add it to + * the kobject hierarchy. * @kobj: pointer to the kobject to initialize * @ktype: pointer to the ktype for this kobject. * @parent: pointer to the parent of this kobject. * @fmt: the name of the kobject. * - * This function combines the call to kobject_init() and - * kobject_add(). The same type of error handling after a call to - * kobject_add() and kobject lifetime rules are the same here. + * This function combines the call to kobject_init() and kobject_add(). + * + * If this function returns an error, kobject_put() must be called to + * properly clean up the memory associated with the object. This is the + * same type of error handling after a call to kobject_add() and kobject + * lifetime rules are the same here. */ int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...) @@ -458,7 +478,7 @@ int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, EXPORT_SYMBOL_GPL(kobject_init_and_add); /** - * kobject_rename - change the name of an object + * kobject_rename() - Change the name of an object. * @kobj: object in question. * @new_name: object's new name * @@ -525,7 +545,7 @@ out: EXPORT_SYMBOL_GPL(kobject_rename); /** - * kobject_move - move object to another parent + * kobject_move() - Move object to another parent. * @kobj: object in question. * @new_parent: object's new parent (can be NULL) */ @@ -578,17 +598,26 @@ out: EXPORT_SYMBOL_GPL(kobject_move); /** - * kobject_del - unlink kobject from hierarchy. + * kobject_del() - Unlink kobject from hierarchy. * @kobj: object. + * + * This is the function that should be called to delete an object + * successfully added via kobject_add(). */ void kobject_del(struct kobject *kobj) { struct kernfs_node *sd; + const struct kobj_type *ktype; if (!kobj) return; sd = kobj->sd; + ktype = get_ktype(kobj); + + if (ktype) + sysfs_remove_groups(kobj, ktype->default_groups); + sysfs_remove_dir(kobj); sysfs_put(sd); @@ -600,7 +629,7 @@ void kobject_del(struct kobject *kobj) EXPORT_SYMBOL(kobject_del); /** - * kobject_get - increment refcount for object. + * kobject_get() - Increment refcount for object. * @kobj: object. */ struct kobject *kobject_get(struct kobject *kobj) @@ -693,7 +722,7 @@ static void kobject_release(struct kref *kref) } /** - * kobject_put - decrement refcount for object. + * kobject_put() - Decrement refcount for object. * @kobj: object. * * Decrement the refcount, and if 0, call kobject_cleanup(). @@ -722,7 +751,7 @@ static struct kobj_type dynamic_kobj_ktype = { }; /** - * kobject_create - create a struct kobject dynamically + * kobject_create() - Create a struct kobject dynamically. * * This function creates a kobject structure dynamically and sets it up * to be a "dynamic" kobject with a default release function set up. @@ -745,8 +774,8 @@ struct kobject *kobject_create(void) } /** - * kobject_create_and_add - create a struct kobject dynamically and register it with sysfs - * + * kobject_create_and_add() - Create a struct kobject dynamically and + * register it with sysfs. * @name: the name for the kobject * @parent: the parent kobject of this kobject, if any. * @@ -777,7 +806,7 @@ struct kobject *kobject_create_and_add(const char *name, struct kobject *parent) EXPORT_SYMBOL_GPL(kobject_create_and_add); /** - * kset_init - initialize a kset for use + * kset_init() - Initialize a kset for use. * @k: kset */ void kset_init(struct kset *k) @@ -819,7 +848,7 @@ const struct sysfs_ops kobj_sysfs_ops = { EXPORT_SYMBOL_GPL(kobj_sysfs_ops); /** - * kset_register - initialize and add a kset. + * kset_register() - Initialize and add a kset. * @k: kset. */ int kset_register(struct kset *k) @@ -839,7 +868,7 @@ int kset_register(struct kset *k) EXPORT_SYMBOL(kset_register); /** - * kset_unregister - remove a kset. + * kset_unregister() - Remove a kset. * @k: kset. */ void kset_unregister(struct kset *k) @@ -852,7 +881,7 @@ void kset_unregister(struct kset *k) EXPORT_SYMBOL(kset_unregister); /** - * kset_find_obj - search for object in kset. + * kset_find_obj() - Search for object in kset. * @kset: kset we're looking in. * @name: object's name. * @@ -900,7 +929,7 @@ static struct kobj_type kset_ktype = { }; /** - * kset_create - create a struct kset dynamically + * kset_create() - Create a struct kset dynamically. * * @name: the name for the kset * @uevent_ops: a struct kset_uevent_ops for the kset @@ -944,7 +973,7 @@ static struct kset *kset_create(const char *name, } /** - * kset_create_and_add - create a struct kset dynamically and add it to sysfs + * kset_create_and_add() - Create a struct kset dynamically and add it to sysfs. * * @name: the name for the kset * @uevent_ops: a struct kset_uevent_ops for the kset diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index f05802687ba4..7998affa45d4 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -466,6 +466,13 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, int i = 0; int retval = 0; + /* + * Mark "remove" event done regardless of result, for some subsystems + * do not want to re-trigger "remove" event via automatic cleanup. + */ + if (action == KOBJ_REMOVE) + kobj->state_remove_uevent_sent = 1; + pr_debug("kobject: '%s' (%p): %s\n", kobject_name(kobj), kobj, __func__); @@ -567,10 +574,6 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, kobj->state_add_uevent_sent = 1; break; - case KOBJ_REMOVE: - kobj->state_remove_uevent_sent = 1; - break; - case KOBJ_UNBIND: zap_modalias_env(env); break; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 8f8b7b6c2945..530e5b04b97d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -863,6 +863,7 @@ static struct attribute *rx_queue_default_attrs[] __ro_after_init = { #endif NULL }; +ATTRIBUTE_GROUPS(rx_queue_default); static void rx_queue_release(struct kobject *kobj) { @@ -911,7 +912,7 @@ static void rx_queue_get_ownership(struct kobject *kobj, static struct kobj_type rx_queue_ktype __ro_after_init = { .sysfs_ops = &rx_queue_sysfs_ops, .release = rx_queue_release, - .default_attrs = rx_queue_default_attrs, + .default_groups = rx_queue_default_groups, .namespace = rx_queue_namespace, .get_ownership = rx_queue_get_ownership, }; @@ -1416,6 +1417,7 @@ static struct attribute *netdev_queue_default_attrs[] __ro_after_init = { #endif NULL }; +ATTRIBUTE_GROUPS(netdev_queue_default); static void netdev_queue_release(struct kobject *kobj) { @@ -1448,7 +1450,7 @@ static void netdev_queue_get_ownership(struct kobject *kobj, static struct kobj_type netdev_queue_ktype __ro_after_init = { .sysfs_ops = &netdev_queue_sysfs_ops, .release = netdev_queue_release, - .default_attrs = netdev_queue_default_attrs, + .default_groups = netdev_queue_default_groups, .namespace = netdev_queue_namespace, .get_ownership = netdev_queue_get_ownership, }; diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c index 401328fd687d..c8010f126808 100644 --- a/samples/kobject/kset-example.c +++ b/samples/kobject/kset-example.c @@ -178,6 +178,7 @@ static struct attribute *foo_default_attrs[] = { &bar_attribute.attr, NULL, /* need to NULL terminate the list of attributes */ }; +ATTRIBUTE_GROUPS(foo_default); /* * Our own ktype for our kobjects. Here we specify our sysfs ops, the @@ -187,7 +188,7 @@ static struct attribute *foo_default_attrs[] = { static struct kobj_type foo_ktype = { .sysfs_ops = &foo_sysfs_ops, .release = foo_release, - .default_attrs = foo_default_attrs, + .default_groups = foo_default_groups, }; static struct kset *example_kset; |