diff options
543 files changed, 19199 insertions, 5116 deletions
diff --git a/Documentation/ABI/stable/sysfs-devices-system-cpu b/Documentation/ABI/stable/sysfs-devices-system-cpu index 3965ce504484..902392d7eddf 100644 --- a/Documentation/ABI/stable/sysfs-devices-system-cpu +++ b/Documentation/ABI/stable/sysfs-devices-system-cpu @@ -86,6 +86,10 @@ What: /sys/devices/system/cpu/cpuX/topology/die_cpus Description: internal kernel map of CPUs within the same die. Values: hexadecimal bitmask. +What: /sys/devices/system/cpu/cpuX/topology/ppin +Description: per-socket protected processor inventory number +Values: hexadecimal. + What: /sys/devices/system/cpu/cpuX/topology/die_cpus_list Description: human-readable list of CPUs within the same die. The format is like 0-3, 8-11, 14,17. diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 61f5676a7429..2ad01cad7f1c 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -73,6 +73,7 @@ What: /sys/devices/system/cpu/cpuX/topology/core_id /sys/devices/system/cpu/cpuX/topology/physical_package_id /sys/devices/system/cpu/cpuX/topology/thread_siblings /sys/devices/system/cpu/cpuX/topology/thread_siblings_list + /sys/devices/system/cpu/cpuX/topology/ppin Date: December 2008 Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> Description: CPU topology files that describe a logical CPU's relationship @@ -103,6 +104,11 @@ Description: CPU topology files that describe a logical CPU's relationship thread_siblings_list: human-readable list of cpuX's hardware threads within the same core as cpuX + ppin: human-readable Protected Processor Identification + Number of the socket the cpu# belongs to. There should be + one per physical_package_id. File is readable only to + admin. + See Documentation/admin-guide/cputopology.rst for more information. @@ -662,6 +668,7 @@ Description: Preferred MTE tag checking mode ================ ============================================== "sync" Prefer synchronous mode + "asymm" Prefer asymmetric mode "async" Prefer asynchronous mode ================ ============================================== diff --git a/Documentation/Makefile b/Documentation/Makefile index 9f4bd42cef18..64d44c1ecad3 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -26,7 +26,7 @@ SPHINX_CONF = conf.py PAPER = BUILDDIR = $(obj)/output PDFLATEX = xelatex -LATEXOPTS = -interaction=batchmode +LATEXOPTS = -interaction=batchmode -no-shell-escape ifeq ($(KBUILD_VERBOSE),0) SPHINXOPTS += "-q" diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst index 3e11926a4df9..54fe63745ed8 100644 --- a/Documentation/admin-guide/blockdev/zram.rst +++ b/Documentation/admin-guide/blockdev/zram.rst @@ -315,8 +315,8 @@ To use the feature, admin should set up backing device via:: echo /dev/sda5 > /sys/block/zramX/backing_dev -before disksize setting. It supports only partition at this moment. -If admin wants to use incompressible page writeback, they could do via:: +before disksize setting. It supports only partitions at this moment. +If admin wants to use incompressible page writeback, they could do it via:: echo huge > /sys/block/zramX/writeback @@ -341,9 +341,9 @@ Admin can request writeback of those idle pages at right timing via:: echo idle > /sys/block/zramX/writeback -With the command, zram writeback idle pages from memory to the storage. +With the command, zram will writeback idle pages from memory to the storage. -If admin want to write a specific page in zram device to backing device, +If an admin wants to write a specific page in zram device to the backing device, they could write a page index into the interface. echo "page_index=1251" > /sys/block/zramX/writeback @@ -354,7 +354,7 @@ to guarantee storage health for entire product life. To overcome the concern, zram supports "writeback_limit" feature. The "writeback_limit_enable"'s default value is 0 so that it doesn't limit -any writeback. IOW, if admin wants to apply writeback budget, he should +any writeback. IOW, if admin wants to apply writeback budget, they should enable writeback_limit_enable via:: $ echo 1 > /sys/block/zramX/writeback_limit_enable @@ -365,7 +365,7 @@ until admin sets the budget via /sys/block/zramX/writeback_limit. (If admin doesn't enable writeback_limit_enable, writeback_limit's value assigned via /sys/block/zramX/writeback_limit is meaningless.) -If admin want to limit writeback as per-day 400M, he could do it +If admin wants to limit writeback as per-day 400M, they could do it like below:: $ MB_SHIFT=20 @@ -375,16 +375,16 @@ like below:: $ echo 1 > /sys/block/zram0/writeback_limit_enable If admins want to allow further write again once the budget is exhausted, -he could do it like below:: +they could do it like below:: $ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \ /sys/block/zram0/writeback_limit -If admin wants to see remaining writeback budget since last set:: +If an admin wants to see the remaining writeback budget since last set:: $ cat /sys/block/zramX/writeback_limit -If admin want to disable writeback limit, he could do:: +If an admin wants to disable writeback limit, they could do:: $ echo 0 > /sys/block/zramX/writeback_limit_enable @@ -393,7 +393,7 @@ system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of writeback happened until you reset the zram to allocate extra writeback budget in next setting is user's job. -If admin wants to measure writeback count in a certain period, he could +If admin wants to measure writeback count in a certain period, they could know it via /sys/block/zram0/bd_stat's 3rd column. memory tracking diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index 1bedab498104..5bfafcbb9562 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -35,6 +35,7 @@ problems and bugs in particular. :maxdepth: 1 reporting-issues + reporting-regressions security-bugs bug-hunting bug-bisect diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst index 9b14b0c2c9c4..609a3201fd4e 100644 --- a/Documentation/admin-guide/iostats.rst +++ b/Documentation/admin-guide/iostats.rst @@ -76,7 +76,7 @@ Field 3 -- # of sectors read (unsigned long) Field 4 -- # of milliseconds spent reading (unsigned int) This is the total number of milliseconds spent by all reads (as - measured from __make_request() to end_that_request_last()). + measured from blk_mq_alloc_request() to __blk_mq_end_request()). Field 5 -- # of writes completed (unsigned long) This is the total number of writes completed successfully. @@ -89,7 +89,7 @@ Field 7 -- # of sectors written (unsigned long) Field 8 -- # of milliseconds spent writing (unsigned int) This is the total number of milliseconds spent by all writes (as - measured from __make_request() to end_that_request_last()). + measured from blk_mq_alloc_request() to __blk_mq_end_request()). Field 9 -- # of I/Os currently in progress (unsigned int) The only field that should go to zero. Incremented as requests are @@ -120,7 +120,7 @@ Field 14 -- # of sectors discarded (unsigned long) Field 15 -- # of milliseconds spent discarding (unsigned int) This is the total number of milliseconds spent by all discards (as - measured from __make_request() to end_that_request_last()). + measured from blk_mq_alloc_request() to __blk_mq_end_request()). Field 16 -- # of flush requests completed This is the total number of flush requests completed successfully. diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index 3861a25faae1..8419019b6a88 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -494,6 +494,14 @@ architecture which is used to lookup the page-tables for the Virtual addresses in the higher VA range (refer to ARMv8 ARM document for more details). +MODULES_VADDR|MODULES_END|VMALLOC_START|VMALLOC_END|VMEMMAP_START|VMEMMAP_END +----------------------------------------------------------------------------- + +Used to get the correct ranges: + MODULES_VADDR ~ MODULES_END-1 : Kernel module space. + VMALLOC_START ~ VMALLOC_END-1 : vmalloc() / ioremap() space. + VMEMMAP_START ~ VMEMMAP_END-1 : vmemmap region, used for struct page array. + arm === diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7123524a86b8..c2d1f8b5e8f3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2827,6 +2827,9 @@ For details see: Documentation/admin-guide/hw-vuln/mds.rst + mem=nn[KMG] [HEXAGON] Set the memory size. + Must be specified, otherwise memory size will be 0. + mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory Amount of memory to be used in cases as follows: @@ -2834,6 +2837,13 @@ 2 when the kernel is not able to see the whole system memory; 3 memory that lies after 'mem=' boundary is excluded from the hypervisor, then assigned to KVM guests. + 4 to limit the memory available for kdump kernel. + + [ARC,MICROBLAZE] - the limit applies only to low memory, + high memory is not affected. + + [ARM64] - only limits memory covered by the linear + mapping. The NOMAP regions are not affected. [X86] Work as limiting max address. Use together with memmap= to avoid physical address space collisions. @@ -2844,6 +2854,14 @@ in above case 3, memory may need be hot added after boot if system memory of hypervisor is not sufficient. + mem=nn[KMG]@ss[KMG] + [ARM,MIPS] - override the memory layout reported by + firmware. + Define a memory region of size nn[KMG] starting at + ss[KMG]. + Multiple different regions can be specified with + multiple mem= parameters on the command line. + mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel memory. @@ -4504,6 +4522,8 @@ (the least-favored priority). Otherwise, when RCU_BOOST is not set, valid values are 0-99 and the default is zero (non-realtime operation). + When RCU_NOCB_CPU is set, also adjust the + priority of NOCB callback kthreads. rcutree.rcu_nocb_gp_stride= [KNL] Set the number of NOCB callback kthreads in diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst index 5a8f2529a033..69b23f087c05 100644 --- a/Documentation/admin-guide/perf/index.rst +++ b/Documentation/admin-guide/perf/index.rst @@ -8,6 +8,7 @@ Performance monitor support :maxdepth: 1 hisi-pmu + hisi-pcie-pmu imx-ddr qcom_l2_pmu qcom_l3_pmu diff --git a/Documentation/admin-guide/reporting-issues.rst b/Documentation/admin-guide/reporting-issues.rst index d7ac13f789cc..ec62151fe672 100644 --- a/Documentation/admin-guide/reporting-issues.rst +++ b/Documentation/admin-guide/reporting-issues.rst @@ -1,14 +1,5 @@ .. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0) -.. - If you want to distribute this text under CC-BY-4.0 only, please use 'The - Linux kernel developers' for author attribution and link this as source: - https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst -.. - Note: Only the content of this RST file as found in the Linux kernel sources - is available under CC-BY-4.0, as versions of this text that were processed - (for example by the kernel's build system) might contain content taken from - files which use a more restrictive license. - +.. See the bottom of this file for additional redistribution information. Reporting issues ++++++++++++++++ @@ -395,22 +386,16 @@ fixed as soon as possible, hence there are 'issues of high priority' that get handled slightly differently in the reporting process. Three type of cases qualify: regressions, security issues, and really severe problems. -You deal with a 'regression' if something that worked with an older version of -the Linux kernel does not work with a newer one or somehow works worse with it. -It thus is a regression when a WiFi driver that did a fine job with Linux 5.7 -somehow misbehaves with 5.8 or doesn't work at all. It's also a regression if -an application shows erratic behavior with a newer kernel, which might happen -due to incompatible changes in the interface between the kernel and the -userland (like procfs and sysfs). Significantly reduced performance or -increased power consumption also qualify as regression. But keep in mind: the -new kernel needs to be built with a configuration that is similar to the one -from the old kernel (see below how to achieve that). That's because the kernel -developers sometimes can not avoid incompatibilities when implementing new -features; but to avoid regressions such features have to be enabled explicitly -during build time configuration. +You deal with a regression if some application or practical use case running +fine with one Linux kernel works worse or not at all with a newer version +compiled using a similar configuration. The document +Documentation/admin-guide/reporting-regressions.rst explains this in more +detail. It also provides a good deal of other information about regressions you +might want to be aware of; it for example explains how to add your issue to the +list of tracked regressions, to ensure it won't fall through the cracks. What qualifies as security issue is left to your judgment. Consider reading -'Documentation/admin-guide/security-bugs.rst' before proceeding, as it +Documentation/admin-guide/security-bugs.rst before proceeding, as it provides additional details how to best handle security issues. An issue is a 'really severe problem' when something totally unacceptably bad @@ -517,7 +502,7 @@ line starting with 'CPU:'. It should end with 'Not tainted' if the kernel was not tainted when it noticed the problem; it was tainted if you see 'Tainted:' followed by a few spaces and some letters. -If your kernel is tainted, study 'Documentation/admin-guide/tainted-kernels.rst' +If your kernel is tainted, study Documentation/admin-guide/tainted-kernels.rst to find out why. Try to eliminate the reason. Often it's caused by one these three things: @@ -1043,7 +1028,7 @@ down the culprit, as maintainers often won't have the time or setup at hand to reproduce it themselves. To find the change there is a process called 'bisection' which the document -'Documentation/admin-guide/bug-bisect.rst' describes in detail. That process +Documentation/admin-guide/bug-bisect.rst describes in detail. That process will often require you to build about ten to twenty kernel images, trying to reproduce the issue with each of them before building the next. Yes, that takes some time, but don't worry, it works a lot quicker than most people assume. @@ -1073,10 +1058,11 @@ When dealing with regressions make sure the issue you face is really caused by the kernel and not by something else, as outlined above already. In the whole process keep in mind: an issue only qualifies as regression if the -older and the newer kernel got built with a similar configuration. The best way -to archive this: copy the configuration file (``.config``) from the old working -kernel freshly to each newer kernel version you try. Afterwards run ``make -olddefconfig`` to adjust it for the needs of the new version. +older and the newer kernel got built with a similar configuration. This can be +achieved by using ``make olddefconfig``, as explained in more detail by +Documentation/admin-guide/reporting-regressions.rst; that document also +provides a good deal of other information about regressions you might want to be +aware of. Write and send the report @@ -1283,7 +1269,7 @@ them when sending the report by mail. If you filed it in a bug tracker, forward the report's text to these addresses; but on top of it put a small note where you mention that you filed it with a link to the ticket. -See 'Documentation/admin-guide/security-bugs.rst' for more information. +See Documentation/admin-guide/security-bugs.rst for more information. Duties after the report went out @@ -1571,7 +1557,7 @@ Once your report is out your might get asked to do a proper one, as it allows to pinpoint the exact change that causes the issue (which then can easily get reverted to fix the issue quickly). Hence consider to do a proper bisection right away if time permits. See the section 'Special care for regressions' and -the document 'Documentation/admin-guide/bug-bisect.rst' for details how to +the document Documentation/admin-guide/bug-bisect.rst for details how to perform one. In case of a successful bisection add the author of the culprit to the recipients; also CC everyone in the signed-off-by chain, which you find at the end of its commit message. @@ -1594,7 +1580,7 @@ Some fixes are too complex Even small and seemingly obvious code-changes sometimes introduce new and totally unexpected problems. The maintainers of the stable and longterm kernels are very aware of that and thus only apply changes to these kernels that are -within rules outlined in 'Documentation/process/stable-kernel-rules.rst'. +within rules outlined in Documentation/process/stable-kernel-rules.rst. Complex or risky changes for example do not qualify and thus only get applied to mainline. Other fixes are easy to get backported to the newest stable and @@ -1756,10 +1742,23 @@ art will lay some groundwork to improve the situation over time. .. - This text is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If you - spot a typo or small mistake, feel free to let him know directly and he'll - fix it. You are free to do the same in a mostly informal way if you want - to contribute changes to the text, but for copyright reasons please CC + end-of-content +.. + This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If + you spot a typo or small mistake, feel free to let him know directly and + he'll fix it. You are free to do the same in a mostly informal way if you + want to contribute changes to the text, but for copyright reasons please CC linux-doc@vger.kernel.org and "sign-off" your contribution as Documentation/process/submitting-patches.rst outlines in the section "Sign your work - the Developer's Certificate of Origin". +.. + This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top + of the file. If you want to distribute this text under CC-BY-4.0 only, + please use "The Linux kernel developers" for author attribution and link + this as source: + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst +.. + Note: Only the content of this RST file as found in the Linux kernel sources + is available under CC-BY-4.0, as versions of this text that were processed + (for example by the kernel's build system) might contain content taken from + files which use a more restrictive license. diff --git a/Documentation/admin-guide/reporting-regressions.rst b/Documentation/admin-guide/reporting-regressions.rst new file mode 100644 index 000000000000..d8adccdae23f --- /dev/null +++ b/Documentation/admin-guide/reporting-regressions.rst @@ -0,0 +1,451 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0) +.. [see the bottom of this file for redistribution information] + +Reporting regressions ++++++++++++++++++++++ + +"*We don't cause regressions*" is the first rule of Linux kernel development; +Linux founder and lead developer Linus Torvalds established it himself and +ensures it's obeyed. + +This document describes what the rule means for users and how the Linux kernel's +development model ensures to address all reported regressions; aspects relevant +for kernel developers are left to Documentation/process/handling-regressions.rst. + + +The important bits (aka "TL;DR") +================================ + +#. It's a regression if something running fine with one Linux kernel works worse + or not at all with a newer version. Note, the newer kernel has to be compiled + using a similar configuration; the detailed explanations below describes this + and other fine print in more detail. + +#. Report your issue as outlined in Documentation/admin-guide/reporting-issues.rst, + it already covers all aspects important for regressions and repeated + below for convenience. Two of them are important: start your report's subject + with "[REGRESSION]" and CC or forward it to `the regression mailing list + <https://lore.kernel.org/regressions/>`_ (regressions@lists.linux.dev). + +#. Optional, but recommended: when sending or forwarding your report, make the + Linux kernel regression tracking bot "regzbot" track the issue by specifying + when the regression started like this:: + + #regzbot introduced v5.13..v5.14-rc1 + + +All the details on Linux kernel regressions relevant for users +============================================================== + + +The important basics +-------------------- + + +What is a "regression" and what is the "no regressions rule"? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It's a regression if some application or practical use case running fine with +one Linux kernel works worse or not at all with a newer version compiled using a +similar configuration. The "no regressions rule" forbids this to take place; if +it happens by accident, developers that caused it are expected to quickly fix +the issue. + +It thus is a regression when a WiFi driver from Linux 5.13 works fine, but with +5.14 doesn't work at all, works significantly slower, or misbehaves somehow. +It's also a regression if a perfectly working application suddenly shows erratic +behavior with a newer kernel version; such issues can be caused by changes in +procfs, sysfs, or one of the many other interfaces Linux provides to userland +software. But keep in mind, as mentioned earlier: 5.14 in this example needs to +be built from a configuration similar to the one from 5.13. This can be achieved +using ``make olddefconfig``, as explained in more detail below. + +Note the "practical use case" in the first sentence of this section: developers +despite the "no regressions" rule are free to change any aspect of the kernel +and even APIs or ABIs to userland, as long as no existing application or use +case breaks. + +Also be aware the "no regressions" rule covers only interfaces the kernel +provides to the userland. It thus does not apply to kernel-internal interfaces +like the module API, which some externally developed drivers use to hook into +the kernel. + +How do I report a regression? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Just report the issue as outlined in +Documentation/admin-guide/reporting-issues.rst, it already describes the +important points. The following aspects outlined there are especially relevant +for regressions: + + * When checking for existing reports to join, also search the `archives of the + Linux regressions mailing list <https://lore.kernel.org/regressions/>`_ and + `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_. + + * Start your report's subject with "[REGRESSION]". + + * In your report, clearly mention the last kernel version that worked fine and + the first broken one. Ideally try to find the exact change causing the + regression using a bisection, as explained below in more detail. + + * Remember to let the Linux regressions mailing list + (regressions@lists.linux.dev) know about your report: + + * If you report the regression by mail, CC the regressions list. + + * If you report your regression to some bug tracker, forward the submitted + report by mail to the regressions list while CCing the maintainer and the + mailing list for the subsystem in question. + + If it's a regression within a stable or longterm series (e.g. + v5.15.3..v5.15.5), remember to CC the `Linux stable mailing list + <https://lore.kernel.org/stable/>`_ (stable@vger.kernel.org). + + In case you performed a successful bisection, add everyone to the CC the + culprit's commit message mentions in lines starting with "Signed-off-by:". + +When CCing for forwarding your report to the list, consider directly telling the +aforementioned Linux kernel regression tracking bot about your report. To do +that, include a paragraph like this in your mail:: + + #regzbot introduced: v5.13..v5.14-rc1 + +Regzbot will then consider your mail a report for a regression introduced in the +specified version range. In above case Linux v5.13 still worked fine and Linux +v5.14-rc1 was the first version where you encountered the issue. If you +performed a bisection to find the commit that caused the regression, specify the +culprit's commit-id instead:: + + #regzbot introduced: 1f2e3d4c5d + +Placing such a "regzbot command" is in your interest, as it will ensure the +report won't fall through the cracks unnoticed. If you omit this, the Linux +kernel's regressions tracker will take care of telling regzbot about your +regression, as long as you send a copy to the regressions mailing lists. But the +regression tracker is just one human which sometimes has to rest or occasionally +might even enjoy some time away from computers (as crazy as that might sound). +Relying on this person thus will result in an unnecessary delay before the +regressions becomes mentioned `on the list of tracked and unresolved Linux +kernel regressions <https://linux-regtracking.leemhuis.info/regzbot/>`_ and the +weekly regression reports sent by regzbot. Such delays can result in Linus +Torvalds being unaware of important regressions when deciding between "continue +development or call this finished and release the final?". + +Are really all regressions fixed? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Nearly all of them are, as long as the change causing the regression (the +"culprit commit") is reliably identified. Some regressions can be fixed without +this, but often it's required. + +Who needs to find the root cause of a regression? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Developers of the affected code area should try to locate the culprit on their +own. But for them that's often impossible to do with reasonable effort, as quite +a lot of issues only occur in a particular environment outside the developer's +reach -- for example, a specific hardware platform, firmware, Linux distro, +system's configuration, or application. That's why in the end it's often up to +the reporter to locate the culprit commit; sometimes users might even need to +run additional tests afterwards to pinpoint the exact root cause. Developers +should offer advice and reasonably help where they can, to make this process +relatively easy and achievable for typical users. + +How can I find the culprit? +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Perform a bisection, as roughly outlined in +Documentation/admin-guide/reporting-issues.rst and described in more detail by +Documentation/admin-guide/bug-bisect.rst. It might sound like a lot of work, but +in many cases finds the culprit relatively quickly. If it's hard or +time-consuming to reliably reproduce the issue, consider teaming up with other +affected users to narrow down the search range together. + +Who can I ask for advice when it comes to regressions? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Send a mail to the regressions mailing list (regressions@lists.linux.dev) while +CCing the Linux kernel's regression tracker (regressions@leemhuis.info); if the +issue might better be dealt with in private, feel free to omit the list. + + +Additional details about regressions +------------------------------------ + + +What is the goal of the "no regressions rule"? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Users should feel safe when updating kernel versions and not have to worry +something might break. This is in the interest of the kernel developers to make +updating attractive: they don't want users to stay on stable or longterm Linux +series that are either abandoned or more than one and a half years old. That's +in everybody's interest, as `those series might have known bugs, security +issues, or other problematic aspects already fixed in later versions +<http://www.kroah.com/log/blog/2018/08/24/what-stable-kernel-should-i-use/>`_. +Additionally, the kernel developers want to make it simple and appealing for +users to test the latest pre-release or regular release. That's also in +everybody's interest, as it's a lot easier to track down and fix problems, if +they are reported shortly after being introduced. + +Is the "no regressions" rule really adhered in practice? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It's taken really seriously, as can be seen by many mailing list posts from +Linux creator and lead developer Linus Torvalds, some of which are quoted in +Documentation/process/handling-regressions.rst. + +Exceptions to this rule are extremely rare; in the past developers almost always +turned out to be wrong when they assumed a particular situation was warranting +an exception. + +Who ensures the "no regressions" is actually followed? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The subsystem maintainers should take care of that, which are watched and +supported by the tree maintainers -- e.g. Linus Torvalds for mainline and +Greg Kroah-Hartman et al. for various stable/longterm series. + +All of them are helped by people trying to ensure no regression report falls +through the cracks. One of them is Thorsten Leemhuis, who's currently acting as +the Linux kernel's "regressions tracker"; to facilitate this work he relies on +regzbot, the Linux kernel regression tracking bot. That's why you want to bring +your report on the radar of these people by CCing or forwarding each report to +the regressions mailing list, ideally with a "regzbot command" in your mail to +get it tracked immediately. + +How quickly are regressions normally fixed? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Developers should fix any reported regression as quickly as possible, to provide +affected users with a solution in a timely manner and prevent more users from +running into the issue; nevertheless developers need to take enough time and +care to ensure regression fixes do not cause additional damage. + +The answer thus depends on various factors like the impact of a regression, its +age, or the Linux series in which it occurs. In the end though, most regressions +should be fixed within two weeks. + +Is it a regression, if the issue can be avoided by updating some software? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Almost always: yes. If a developer tells you otherwise, ask the regression +tracker for advice as outlined above. + +Is it a regression, if a newer kernel works slower or consumes more energy? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Yes, but the difference has to be significant. A five percent slow-down in a +micro-benchmark thus is unlikely to qualify as regression, unless it also +influences the results of a broad benchmark by more than one percent. If in +doubt, ask for advice. + +Is it a regression, if an external kernel module breaks when updating Linux? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +No, as the "no regression" rule is about interfaces and services the Linux +kernel provides to the userland. It thus does not cover building or running +externally developed kernel modules, as they run in kernel-space and hook into +the kernel using internal interfaces occasionally changed. + +How are regressions handled that are caused by security fixes? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In extremely rare situations security issues can't be fixed without causing +regressions; those fixes are given way, as they are the lesser evil in the end. +Luckily this middling almost always can be avoided, as key developers for the +affected area and often Linus Torvalds himself try very hard to fix security +issues without causing regressions. + +If you nevertheless face such a case, check the mailing list archives if people +tried their best to avoid the regression. If not, report it; if in doubt, ask +for advice as outlined above. + +What happens if fixing a regression is impossible without causing another? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sadly these things happen, but luckily not very often; if they occur, expert +developers of the affected code area should look into the issue to find a fix +that avoids regressions or at least their impact. If you run into such a +situation, do what was outlined already for regressions caused by security +fixes: check earlier discussions if people already tried their best and ask for +advice if in doubt. + +A quick note while at it: these situations could be avoided, if people would +regularly give mainline pre-releases (say v5.15-rc1 or -rc3) from each +development cycle a test run. This is best explained by imagining a change +integrated between Linux v5.14 and v5.15-rc1 which causes a regression, but at +the same time is a hard requirement for some other improvement applied for +5.15-rc1. All these changes often can simply be reverted and the regression thus +solved, if someone finds and reports it before 5.15 is released. A few days or +weeks later this solution can become impossible, as some software might have +started to rely on aspects introduced by one of the follow-up changes: reverting +all changes would then cause a regression for users of said software and thus is +out of the question. + +Is it a regression, if some feature I relied on was removed months ago? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is, but often it's hard to fix such regressions due to the aspects outlined +in the previous section. It hence needs to be dealt with on a case-by-case +basis. This is another reason why it's in everybody's interest to regularly test +mainline pre-releases. + +Does the "no regression" rule apply if I seem to be the only affected person? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It does, but only for practical usage: the Linux developers want to be free to +remove support for hardware only to be found in attics and museums anymore. + +Note, sometimes regressions can't be avoided to make progress -- and the latter +is needed to prevent Linux from stagnation. Hence, if only very few users seem +to be affected by a regression, it for the greater good might be in their and +everyone else's interest to lettings things pass. Especially if there is an +easy way to circumvent the regression somehow, for example by updating some +software or using a kernel parameter created just for this purpose. + +Does the regression rule apply for code in the staging tree as well? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Not according to the `help text for the configuration option covering all +staging code <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/staging/Kconfig>`_, +which since its early days states:: + + Please note that these drivers are under heavy development, may or + may not work, and may contain userspace interfaces that most likely + will be changed in the near future. + +The staging developers nevertheless often adhere to the "no regressions" rule, +but sometimes bend it to make progress. That's for example why some users had to +deal with (often negligible) regressions when a WiFi driver from the staging +tree was replaced by a totally different one written from scratch. + +Why do later versions have to be "compiled with a similar configuration"? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Because the Linux kernel developers sometimes integrate changes known to cause +regressions, but make them optional and disable them in the kernel's default +configuration. This trick allows progress, as the "no regressions" rule +otherwise would lead to stagnation. + +Consider for example a new security feature blocking access to some kernel +interfaces often abused by malware, which at the same time are required to run a +few rarely used applications. The outlined approach makes both camps happy: +people using these applications can leave the new security feature off, while +everyone else can enable it without running into trouble. + +How to create a configuration similar to the one of an older kernel? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Start your machine with a known-good kernel and configure the newer Linux +version with ``make olddefconfig``. This makes the kernel's build scripts pick +up the configuration file (the ".config" file) from the running kernel as base +for the new one you are about to compile; afterwards they set all new +configuration options to their default value, which should disable new features +that might cause regressions. + +Can I report a regression I found with pre-compiled vanilla kernels? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You need to ensure the newer kernel was compiled with a similar configuration +file as the older one (see above), as those that built them might have enabled +some known-to-be incompatible feature for the newer kernel. If in doubt, report +the matter to the kernel's provider and ask for advice. + + +More about regression tracking with "regzbot" +--------------------------------------------- + +What is regression tracking and why should I care about it? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Rules like "no regressions" need someone to ensure they are followed, otherwise +they are broken either accidentally or on purpose. History has shown this to be +true for Linux kernel development as well. That's why Thorsten Leemhuis, the +Linux Kernel's regression tracker, and some people try to ensure all regression +are fixed by keeping an eye on them until they are resolved. Neither of them are +paid for this, that's why the work is done on a best effort basis. + +Why and how are Linux kernel regressions tracked using a bot? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Tracking regressions completely manually has proven to be quite hard due to the +distributed and loosely structured nature of Linux kernel development process. +That's why the Linux kernel's regression tracker developed regzbot to facilitate +the work, with the long term goal to automate regression tracking as much as +possible for everyone involved. + +Regzbot works by watching for replies to reports of tracked regressions. +Additionally, it's looking out for posted or committed patches referencing such +reports with "Link:" tags; replies to such patch postings are tracked as well. +Combined this data provides good insights into the current state of the fixing +process. + +How to see which regressions regzbot tracks currently? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Check out `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_. + +What kind of issues are supposed to be tracked by regzbot? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The bot is meant to track regressions, hence please don't involve regzbot for +regular issues. But it's okay for the Linux kernel's regression tracker if you +involve regzbot to track severe issues, like reports about hangs, corrupted +data, or internal errors (Panic, Oops, BUG(), warning, ...). + +How to change aspects of a tracked regression? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By using a 'regzbot command' in a direct or indirect reply to the mail with the +report. The easiest way to do that: find the report in your "Sent" folder or the +mailing list archive and reply to it using your mailer's "Reply-all" function. +In that mail, use one of the following commands in a stand-alone paragraph (IOW: +use blank lines to separate one or multiple of these commands from the rest of +the mail's text). + + * Update when the regression started to happen, for example after performing a + bisection:: + + #regzbot introduced: 1f2e3d4c5d + + * Set or update the title:: + + #regzbot title: foo + + * Monitor a discussion or bugzilla.kernel.org ticket where additions aspects of + the issue or a fix are discussed::: + + #regzbot monitor: https://lore.kernel.org/r/30th.anniversary.repost@klaava.Helsinki.FI/ + #regzbot monitor: https://bugzilla.kernel.org/show_bug.cgi?id=123456789 + + * Point to a place with further details of interest, like a mailing list post + or a ticket in a bug tracker that are slightly related, but about a different + topic:: + + #regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=123456789 + + * Mark a regression as invalid:: + + #regzbot invalid: wasn't a regression, problem has always existed + +Regzbot supports a few other commands primarily used by developers or people +tracking regressions. They and more details about the aforementioned regzbot +commands can be found in the `getting started guide +<https://gitlab.com/knurd42/regzbot/-/blob/main/docs/getting_started.md>`_ and +the `reference documentation <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/reference.md>`_ +for regzbot. + +.. + end-of-content +.. + This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top + of the file. If you want to distribute this text under CC-BY-4.0 only, + please use "The Linux kernel developers" for author attribution and link + this as source: + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-regressions.rst +.. + Note: Only the content of this RST file as found in the Linux kernel sources + is available under CC-BY-4.0, as versions of this text that were processed + (for example by the kernel's build system) might contain content taken from + files which use a more restrictive license. diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index 52d060caf8bb..29884b261aa9 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -10,9 +10,9 @@ This document is based on the ARM booting document by Russell King and is relevant to all public releases of the AArch64 Linux kernel. The AArch64 exception model is made up of a number of exception levels -(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure -counterpart. EL2 is the hypervisor level and exists only in non-secure -mode. EL3 is the highest priority level and exists only in secure mode. +(EL0 - EL3), with EL0, EL1 and EL2 having a secure and a non-secure +counterpart. EL2 is the hypervisor level, EL3 is the highest priority +level and exists only in secure mode. Both are architecturally optional. For the purposes of this document, we will use the term `boot loader` simply to define all software that executes on the CPU(s) before control @@ -167,8 +167,8 @@ Before jumping into the kernel, the following conditions must be met: All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError, IRQ and FIQ). - The CPU must be in either EL2 (RECOMMENDED in order to have access to - the virtualisation extensions) or non-secure EL1. + The CPU must be in non-secure state, either in EL2 (RECOMMENDED in order + to have access to the virtualisation extensions), or in EL1. - Caches, MMUs diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index b72ff17d600a..a8f30963e550 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -259,6 +259,11 @@ HWCAP2_RPRES Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001. +HWCAP2_MTE3 + + Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described + by Documentation/arm64/memory-tagging-extension.rst. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst index 7b99c8f428eb..dd27f78d7608 100644 --- a/Documentation/arm64/memory-tagging-extension.rst +++ b/Documentation/arm64/memory-tagging-extension.rst @@ -76,6 +76,9 @@ configurable behaviours: with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting address is unknown). +- *Asymmetric* - Reads are handled as for synchronous mode while writes + are handled as for asynchronous mode. + The user can select the above modes, per thread, using the ``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where ``flags`` contains any number of the following values in the ``PR_MTE_TCF_MASK`` @@ -91,8 +94,9 @@ mode is specified, the program will run in that mode. If multiple modes are specified, the mode is selected as described in the "Per-CPU preferred tag checking modes" section below. -The current tag check fault mode can be read using the -``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call. +The current tag check fault configuration can be read using the +``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call. If +multiple modes were requested then all will be reported. Tag checking can also be disabled for a user thread by setting the ``PSTATE.TCO`` bit with ``MSR TCO, #1``. @@ -139,18 +143,25 @@ tag checking mode as the CPU's preferred tag checking mode. The preferred tag checking mode for each CPU is controlled by ``/sys/devices/system/cpu/cpu<N>/mte_tcf_preferred``, to which a -privileged user may write the value ``async`` or ``sync``. The default -preferred mode for each CPU is ``async``. +privileged user may write the value ``async``, ``sync`` or ``asymm``. The +default preferred mode for each CPU is ``async``. To allow a program to potentially run in the CPU's preferred tag checking mode, the user program may set multiple tag check fault mode bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL, -flags, 0, 0, 0)`` system call. If the CPU's preferred tag checking -mode is in the task's set of provided tag checking modes (this will -always be the case at present because the kernel only supports two -tag checking modes, but future kernels may support more modes), that -mode will be selected. Otherwise, one of the modes in the task's mode -set will be selected in a currently unspecified manner. +flags, 0, 0, 0)`` system call. If both synchronous and asynchronous +modes are requested then asymmetric mode may also be selected by the +kernel. If the CPU's preferred tag checking mode is in the task's set +of provided tag checking modes, that mode will be selected. Otherwise, +one of the modes in the task's mode will be selected by the kernel +from the task's mode set using the preference order: + + 1. Asynchronous + 2. Asymmetric + 3. Synchronous + +Note that there is no way for userspace to request multiple modes and +also disable asymmetric mode. Initial process state --------------------- @@ -213,6 +224,29 @@ address ABI control and MTE configuration of a process as per the Documentation/arm64/tagged-address-abi.rst and above. The corresponding ``regset`` is 1 element of 8 bytes (``sizeof(long))``). +Core dump support +----------------- + +The allocation tags for user memory mapped with ``PROT_MTE`` are dumped +in the core file as additional ``PT_ARM_MEMTAG_MTE`` segments. The +program header for such segment is defined as: + +:``p_type``: ``PT_ARM_MEMTAG_MTE`` +:``p_flags``: 0 +:``p_offset``: segment file offset +:``p_vaddr``: segment virtual address, same as the corresponding + ``PT_LOAD`` segment +:``p_paddr``: 0 +:``p_filesz``: segment size in file, calculated as ``p_mem_sz / 32`` + (two 4-bit tags cover 32 bytes of memory) +:``p_memsz``: segment size in memory, same as the corresponding + ``PT_LOAD`` segment +:``p_align``: 0 + +The tags are stored in the core file at ``p_offset`` as two 4-bit tags +in a byte. With the tag granule of 16 bytes, a 4K page requires 128 +bytes in the core file. + Example of correct usage ======================== diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index ea281dd75517..466cb9e89047 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -136,7 +136,7 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 | +----------------+-----------------+-----------------+-----------------------------+ -| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | +| Cavium | ThunderX GICv3 | #23154,38545 | CAVIUM_ERRATUM_23154 | +----------------+-----------------+-----------------+-----------------------------+ | Cavium | ThunderX GICv3 | #38539 | N/A | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/Documentation/asm-annotations.rst b/Documentation/asm-annotations.rst index f4bf0f6395fb..a64f2ca469d4 100644 --- a/Documentation/asm-annotations.rst +++ b/Documentation/asm-annotations.rst @@ -130,14 +130,13 @@ denoting a range of code via ``SYM_*_START/END`` annotations. In fact, this kind of annotation corresponds to the now deprecated ``ENTRY`` and ``ENDPROC`` macros. -* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` serve for those - who decided to have two or more names for one function. The typical use is:: +* ``SYM_FUNC_ALIAS``, ``SYM_FUNC_ALIAS_LOCAL``, and ``SYM_FUNC_ALIAS_WEAK`` can + be used to define multiple names for a function. The typical use is:: - SYM_FUNC_START_ALIAS(__memset) - SYM_FUNC_START(memset) + SYM_FUNC_START(__memset) ... asm insns ... - SYM_FUNC_END(memset) - SYM_FUNC_END_ALIAS(__memset) + SYN_FUNC_END(__memset) + SYM_FUNC_ALIAS(memset, __memset) In this example, one can call ``__memset`` or ``memset`` with the same result, except the debug information for the instructions is generated to diff --git a/Documentation/cdrom/packet-writing.rst b/Documentation/cdrom/packet-writing.rst index c5c957195a5a..43db58c50d29 100644 --- a/Documentation/cdrom/packet-writing.rst +++ b/Documentation/cdrom/packet-writing.rst @@ -11,7 +11,7 @@ Getting started quick - Compile and install kernel and modules, reboot. - You need the udftools package (pktsetup, mkudffs, cdrwtool). - Download from http://sourceforge.net/projects/linux-udf/ + Download from https://github.com/pali/udftools - Grab a new CD-RW disc and format it (assuming CD-RW is hdc, substitute as appropriate):: @@ -102,7 +102,7 @@ Using the pktcdvd sysfs interface Since Linux 2.6.20, the pktcdvd module has a sysfs interface and can be controlled by it. For example the "pktcdvd" tool uses -this interface. (see http://tom.ist-im-web.de/download/pktcdvd ) +this interface. (see http://tom.ist-im-web.de/linux/software/pktcdvd ) "pktcdvd" works similar to "pktsetup", e.g.:: diff --git a/Documentation/conf.py b/Documentation/conf.py index f07f2e9b9f2c..072ee31a301d 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -409,135 +409,25 @@ latex_elements = { # Additional stuff for the LaTeX preamble. 'preamble': ''' - % Prevent column squeezing of tabulary. - \\setlength{\\tymin}{20em} % Use some font with UTF-8 support with XeLaTeX \\usepackage{fontspec} \\setsansfont{DejaVu Sans} \\setromanfont{DejaVu Serif} \\setmonofont{DejaVu Sans Mono} - % Adjust \\headheight for fancyhdr - \\addtolength{\\headheight}{1.6pt} - \\addtolength{\\topmargin}{-1.6pt} - ''', + ''', } -# Translations have Asian (CJK) characters which are only displayed if -# xeCJK is used - -latex_elements['preamble'] += ''' - \\IfFontExistsTF{Noto Sans CJK SC}{ - % This is needed for translations - \\usepackage{xeCJK} - \\IfFontExistsTF{Noto Serif CJK SC}{ - \\setCJKmainfont{Noto Serif CJK SC}[AutoFakeSlant] - }{ - \\setCJKmainfont{Noto Sans CJK SC}[AutoFakeSlant] - } - \\setCJKsansfont{Noto Sans CJK SC}[AutoFakeSlant] - \\setCJKmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant] - % CJK Language-specific font choices - \\IfFontExistsTF{Noto Serif CJK SC}{ - \\newCJKfontfamily[SCmain]\\scmain{Noto Serif CJK SC}[AutoFakeSlant] - \\newCJKfontfamily[SCserif]\\scserif{Noto Serif CJK SC}[AutoFakeSlant] - }{ - \\newCJKfontfamily[SCmain]\\scmain{Noto Sans CJK SC}[AutoFakeSlant] - \\newCJKfontfamily[SCserif]\\scserif{Noto Sans CJK SC}[AutoFakeSlant] - } - \\newCJKfontfamily[SCsans]\\scsans{Noto Sans CJK SC}[AutoFakeSlant] - \\newCJKfontfamily[SCmono]\\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant] - \\IfFontExistsTF{Noto Serif CJK TC}{ - \\newCJKfontfamily[TCmain]\\tcmain{Noto Serif CJK TC}[AutoFakeSlant] - \\newCJKfontfamily[TCserif]\\tcserif{Noto Serif CJK TC}[AutoFakeSlant] - }{ - \\newCJKfontfamily[TCmain]\\tcmain{Noto Sans CJK TC}[AutoFakeSlant] - \\newCJKfontfamily[TCserif]\\tcserif{Noto Sans CJK TC}[AutoFakeSlant] - } - \\newCJKfontfamily[TCsans]\\tcsans{Noto Sans CJK TC}[AutoFakeSlant] - \\newCJKfontfamily[TCmono]\\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant] - \\IfFontExistsTF{Noto Serif CJK KR}{ - \\newCJKfontfamily[KRmain]\\krmain{Noto Serif CJK KR}[AutoFakeSlant] - \\newCJKfontfamily[KRserif]\\krserif{Noto Serif CJK KR}[AutoFakeSlant] - }{ - \\newCJKfontfamily[KRmain]\\krmain{Noto Sans CJK KR}[AutoFakeSlant] - \\newCJKfontfamily[KRserif]\\krserif{Noto Sans CJK KR}[AutoFakeSlant] - } - \\newCJKfontfamily[KRsans]\\krsans{Noto Sans CJK KR}[AutoFakeSlant] - \\newCJKfontfamily[KRmono]\\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant] - \\IfFontExistsTF{Noto Serif CJK JP}{ - \\newCJKfontfamily[JPmain]\\jpmain{Noto Serif CJK JP}[AutoFakeSlant] - \\newCJKfontfamily[JPserif]\\jpserif{Noto Serif CJK JP}[AutoFakeSlant] - }{ - \\newCJKfontfamily[JPmain]\\jpmain{Noto Sans CJK JP}[AutoFakeSlant] - \\newCJKfontfamily[JPserif]\\jpserif{Noto Sans CJK JP}[AutoFakeSlant] - } - \\newCJKfontfamily[JPsans]\\jpsans{Noto Sans CJK JP}[AutoFakeSlant] - \\newCJKfontfamily[JPmono]\\jpmono{Noto Sans Mono CJK JP}[AutoFakeSlant] - % Dummy commands for Sphinx < 2.3 (no 'extrapackages' support) - \\providecommand{\\onehalfspacing}{} - \\providecommand{\\singlespacing}{} - % Define custom macros to on/off CJK - \\newcommand{\\kerneldocCJKon}{\\makexeCJKactive\\onehalfspacing} - \\newcommand{\\kerneldocCJKoff}{\\makexeCJKinactive\\singlespacing} - \\newcommand{\\kerneldocBeginSC}{% - \\begingroup% - \\scmain% - } - \\newcommand{\\kerneldocEndSC}{\\endgroup} - \\newcommand{\\kerneldocBeginTC}{% - \\begingroup% - \\tcmain% - \\renewcommand{\\CJKrmdefault}{TCserif}% - \\renewcommand{\\CJKsfdefault}{TCsans}% - \\renewcommand{\\CJKttdefault}{TCmono}% - } - \\newcommand{\\kerneldocEndTC}{\\endgroup} - \\newcommand{\\kerneldocBeginKR}{% - \\begingroup% - \\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}% - \\xeCJKDeclareCharClass{HalfRight}{`â€,`’}% - \\krmain% - \\renewcommand{\\CJKrmdefault}{KRserif}% - \\renewcommand{\\CJKsfdefault}{KRsans}% - \\renewcommand{\\CJKttdefault}{KRmono}% - \\xeCJKsetup{CJKspace = true} % For inter-phrase space - } - \\newcommand{\\kerneldocEndKR}{\\endgroup} - \\newcommand{\\kerneldocBeginJP}{% - \\begingroup% - \\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}% - \\xeCJKDeclareCharClass{HalfRight}{`â€,`’}% - \\jpmain% - \\renewcommand{\\CJKrmdefault}{JPserif}% - \\renewcommand{\\CJKsfdefault}{JPsans}% - \\renewcommand{\\CJKttdefault}{JPmono}% - } - \\newcommand{\\kerneldocEndJP}{\\endgroup} - % Single spacing in literal blocks - \\fvset{baselinestretch=1} - % To customize \\sphinxtableofcontents - \\usepackage{etoolbox} - % Inactivate CJK after tableofcontents - \\apptocmd{\\sphinxtableofcontents}{\\kerneldocCJKoff}{}{} - }{ % No CJK font found - % Custom macros to on/off CJK (Dummy) - \\newcommand{\\kerneldocCJKon}{} - \\newcommand{\\kerneldocCJKoff}{} - \\newcommand{\\kerneldocBeginSC}{} - \\newcommand{\\kerneldocEndSC}{} - \\newcommand{\\kerneldocBeginTC}{} - \\newcommand{\\kerneldocEndTC}{} - \\newcommand{\\kerneldocBeginKR}{} - \\newcommand{\\kerneldocEndKR}{} - \\newcommand{\\kerneldocBeginJP}{} - \\newcommand{\\kerneldocEndJP}{} - } -''' - # Fix reference escape troubles with Sphinx 1.4.x if major == 1: latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n' + +# Load kerneldoc specific LaTeX settings +latex_elements['preamble'] += ''' + % Load kerneldoc specific LaTeX settings + \\input{kerneldoc-preamble.sty} +''' + # With Sphinx 1.6, it is possible to change the Bg color directly # by using: # \definecolor{sphinxnoteBgColor}{RGB}{204,255,255} @@ -599,6 +489,11 @@ for fn in os.listdir('.'): # If false, no module index is generated. #latex_domain_indices = True +# Additional LaTeX stuff to be copied to build directory +latex_additional_files = [ + 'sphinx/kerneldoc-preamble.sty', +] + # -- Options for manual page output --------------------------------------- diff --git a/Documentation/core-api/entry.rst b/Documentation/core-api/entry.rst new file mode 100644 index 000000000000..e12f22ab33c7 --- /dev/null +++ b/Documentation/core-api/entry.rst @@ -0,0 +1,279 @@ +Entry/exit handling for exceptions, interrupts, syscalls and KVM +================================================================ + +All transitions between execution domains require state updates which are +subject to strict ordering constraints. State updates are required for the +following: + + * Lockdep + * RCU / Context tracking + * Preemption counter + * Tracing + * Time accounting + +The update order depends on the transition type and is explained below in +the transition type sections: `Syscalls`_, `KVM`_, `Interrupts and regular +exceptions`_, `NMI and NMI-like exceptions`_. + +Non-instrumentable code - noinstr +--------------------------------- + +Most instrumentation facilities depend on RCU, so intrumentation is prohibited +for entry code before RCU starts watching and exit code after RCU stops +watching. In addition, many architectures must save and restore register state, +which means that (for example) a breakpoint in the breakpoint entry code would +overwrite the debug registers of the initial breakpoint. + +Such code must be marked with the 'noinstr' attribute, placing that code into a +special section inaccessible to instrumentation and debug facilities. Some +functions are partially instrumentable, which is handled by marking them +noinstr and using instrumentation_begin() and instrumentation_end() to flag the +instrumentable ranges of code: + +.. code-block:: c + + noinstr void entry(void) + { + handle_entry(); // <-- must be 'noinstr' or '__always_inline' + ... + + instrumentation_begin(); + handle_context(); // <-- instrumentable code + instrumentation_end(); + + ... + handle_exit(); // <-- must be 'noinstr' or '__always_inline' + } + +This allows verification of the 'noinstr' restrictions via objtool on +supported architectures. + +Invoking non-instrumentable functions from instrumentable context has no +restrictions and is useful to protect e.g. state switching which would +cause malfunction if instrumented. + +All non-instrumentable entry/exit code sections before and after the RCU +state transitions must run with interrupts disabled. + +Syscalls +-------- + +Syscall-entry code starts in assembly code and calls out into low-level C code +after establishing low-level architecture-specific state and stack frames. This +low-level C code must not be instrumented. A typical syscall handling function +invoked from low-level assembly code looks like this: + +.. code-block:: c + + noinstr void syscall(struct pt_regs *regs, int nr) + { + arch_syscall_enter(regs); + nr = syscall_enter_from_user_mode(regs, nr); + + instrumentation_begin(); + if (!invoke_syscall(regs, nr) && nr != -1) + result_reg(regs) = __sys_ni_syscall(regs); + instrumentation_end(); + + syscall_exit_to_user_mode(regs); + } + +syscall_enter_from_user_mode() first invokes enter_from_user_mode() which +establishes state in the following order: + + * Lockdep + * RCU / Context tracking + * Tracing + +and then invokes the various entry work functions like ptrace, seccomp, audit, +syscall tracing, etc. After all that is done, the instrumentable invoke_syscall +function can be invoked. The instrumentable code section then ends, after which +syscall_exit_to_user_mode() is invoked. + +syscall_exit_to_user_mode() handles all work which needs to be done before +returning to user space like tracing, audit, signals, task work etc. After +that it invokes exit_to_user_mode() which again handles the state +transition in the reverse order: + + * Tracing + * RCU / Context tracking + * Lockdep + +syscall_enter_from_user_mode() and syscall_exit_to_user_mode() are also +available as fine grained subfunctions in cases where the architecture code +has to do extra work between the various steps. In such cases it has to +ensure that enter_from_user_mode() is called first on entry and +exit_to_user_mode() is called last on exit. + +Do not nest syscalls. Nested systcalls will cause RCU and/or context tracking +to print a warning. + +KVM +--- + +Entering or exiting guest mode is very similar to syscalls. From the host +kernel point of view the CPU goes off into user space when entering the +guest and returns to the kernel on exit. + +kvm_guest_enter_irqoff() is a KVM-specific variant of exit_to_user_mode() +and kvm_guest_exit_irqoff() is the KVM variant of enter_from_user_mode(). +The state operations have the same ordering. + +Task work handling is done separately for guest at the boundary of the +vcpu_run() loop via xfer_to_guest_mode_handle_work() which is a subset of +the work handled on return to user space. + +Do not nest KVM entry/exit transitions because doing so is nonsensical. + +Interrupts and regular exceptions +--------------------------------- + +Interrupts entry and exit handling is slightly more complex than syscalls +and KVM transitions. + +If an interrupt is raised while the CPU executes in user space, the entry +and exit handling is exactly the same as for syscalls. + +If the interrupt is raised while the CPU executes in kernel space the entry and +exit handling is slightly different. RCU state is only updated when the +interrupt is raised in the context of the CPU's idle task. Otherwise, RCU will +already be watching. Lockdep and tracing have to be updated unconditionally. + +irqentry_enter() and irqentry_exit() provide the implementation for this. + +The architecture-specific part looks similar to syscall handling: + +.. code-block:: c + + noinstr void interrupt(struct pt_regs *regs, int nr) + { + arch_interrupt_enter(regs); + state = irqentry_enter(regs); + + instrumentation_begin(); + + irq_enter_rcu(); + invoke_irq_handler(regs, nr); + irq_exit_rcu(); + + instrumentation_end(); + + irqentry_exit(regs, state); + } + +Note that the invocation of the actual interrupt handler is within a +irq_enter_rcu() and irq_exit_rcu() pair. + +irq_enter_rcu() updates the preemption count which makes in_hardirq() +return true, handles NOHZ tick state and interrupt time accounting. This +means that up to the point where irq_enter_rcu() is invoked in_hardirq() +returns false. + +irq_exit_rcu() handles interrupt time accounting, undoes the preemption +count update and eventually handles soft interrupts and NOHZ tick state. + +In theory, the preemption count could be updated in irqentry_enter(). In +practice, deferring this update to irq_enter_rcu() allows the preemption-count +code to be traced, while also maintaining symmetry with irq_exit_rcu() and +irqentry_exit(), which are described in the next paragraph. The only downside +is that the early entry code up to irq_enter_rcu() must be aware that the +preemption count has not yet been updated with the HARDIRQ_OFFSET state. + +Note that irq_exit_rcu() must remove HARDIRQ_OFFSET from the preemption count +before it handles soft interrupts, whose handlers must run in BH context rather +than irq-disabled context. In addition, irqentry_exit() might schedule, which +also requires that HARDIRQ_OFFSET has been removed from the preemption count. + +Even though interrupt handlers are expected to run with local interrupts +disabled, interrupt nesting is common from an entry/exit perspective. For +example, softirq handling happens within an irqentry_{enter,exit}() block with +local interrupts enabled. Also, although uncommon, nothing prevents an +interrupt handler from re-enabling interrupts. + +Interrupt entry/exit code doesn't strictly need to handle reentrancy, since it +runs with local interrupts disabled. But NMIs can happen anytime, and a lot of +the entry code is shared between the two. + +NMI and NMI-like exceptions +--------------------------- + +NMIs and NMI-like exceptions (machine checks, double faults, debug +interrupts, etc.) can hit any context and must be extra careful with +the state. + +State changes for debug exceptions and machine-check exceptions depend on +whether these exceptions happened in user-space (breakpoints or watchpoints) or +in kernel mode (code patching). From user-space, they are treated like +interrupts, while from kernel mode they are treated like NMIs. + +NMIs and other NMI-like exceptions handle state transitions without +distinguishing between user-mode and kernel-mode origin. + +The state update on entry is handled in irqentry_nmi_enter() which updates +state in the following order: + + * Preemption counter + * Lockdep + * RCU / Context tracking + * Tracing + +The exit counterpart irqentry_nmi_exit() does the reverse operation in the +reverse order. + +Note that the update of the preemption counter has to be the first +operation on enter and the last operation on exit. The reason is that both +lockdep and RCU rely on in_nmi() returning true in this case. The +preemption count modification in the NMI entry/exit case must not be +traced. + +Architecture-specific code looks like this: + +.. code-block:: c + + noinstr void nmi(struct pt_regs *regs) + { + arch_nmi_enter(regs); + state = irqentry_nmi_enter(regs); + + instrumentation_begin(); + nmi_handler(regs); + instrumentation_end(); + + irqentry_nmi_exit(regs); + } + +and for e.g. a debug exception it can look like this: + +.. code-block:: c + + noinstr void debug(struct pt_regs *regs) + { + arch_nmi_enter(regs); + + debug_regs = save_debug_regs(); + + if (user_mode(regs)) { + state = irqentry_enter(regs); + + instrumentation_begin(); + user_mode_debug_handler(regs, debug_regs); + instrumentation_end(); + + irqentry_exit(regs, state); + } else { + state = irqentry_nmi_enter(regs); + + instrumentation_begin(); + kernel_mode_debug_handler(regs, debug_regs); + instrumentation_end(); + + irqentry_nmi_exit(regs, state); + } + } + +There is no combined irqentry_nmi_if_kernel() function available as the +above cannot be handled in an exception-agnostic way. + +NMIs can happen in any context. For example, an NMI-like exception triggered +while handling an NMI. So NMI entry code has to be reentrant and state updates +need to handle nesting. diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index 5de2c7a4b1b3..972d46a5ddf6 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -44,6 +44,14 @@ Library functionality that is used throughout the kernel. timekeeping errseq +Low level entry and exit +======================== + +.. toctree:: + :maxdepth: 1 + + entry + Concurrency primitives ====================== diff --git a/Documentation/dev-tools/ktap.rst b/Documentation/dev-tools/ktap.rst index 878530cb9c27..5ee735c6687f 100644 --- a/Documentation/dev-tools/ktap.rst +++ b/Documentation/dev-tools/ktap.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0 -======================================== -The Kernel Test Anything Protocol (KTAP) -======================================== +=================================================== +The Kernel Test Anything Protocol (KTAP), version 1 +=================================================== TAP, or the Test Anything Protocol is a format for specifying test results used by a number of projects. It's website and specification are found at this `link @@ -68,7 +68,7 @@ Test case result lines Test case result lines indicate the final status of a test. They are required and must have the format: -.. code-block:: +.. code-block:: none <result> <number> [<description>][ # [<directive>] [<diagnostic data>]] @@ -117,32 +117,32 @@ separator. Example result lines include: -.. code-block:: +.. code-block:: none ok 1 test_case_name The test "test_case_name" passed. -.. code-block:: +.. code-block:: none not ok 1 test_case_name The test "test_case_name" failed. -.. code-block:: +.. code-block:: none ok 1 test # SKIP necessary dependency unavailable The test "test" was SKIPPED with the diagnostic message "necessary dependency unavailable". -.. code-block:: +.. code-block:: none not ok 1 test # TIMEOUT 30 seconds The test "test" timed out, with diagnostic data "30 seconds". -.. code-block:: +.. code-block:: none ok 5 check return code # rcode=0 @@ -174,6 +174,13 @@ There may be lines within KTAP output that do not follow the format of one of the four formats for lines described above. This is allowed, however, they will not influence the status of the tests. +This is an important difference from TAP. Kernel tests may print messages +to the system console or a log file. Both of these destinations may contain +messages either from unrelated kernel or userspace activity, or kernel +messages from non-test code that is invoked by the test. The kernel code +invoked by the test likely is not aware that a test is in progress and +thus can not print the message as a diagnostic message. + Nested tests ------------ @@ -186,13 +193,16 @@ starting with another KTAP version line and test plan, and end with the overall result. If one of the subtests fail, for example, the parent test should also fail. -Additionally, all result lines in a subtest should be indented. One level of +Additionally, all lines in a subtest should be indented. One level of indentation is two spaces: " ". The indentation should begin at the version line and should end before the parent test's result line. +"Unknown lines" are not considered to be lines in a subtest and thus are +allowed to be either indented or not indented. + An example of a test with two nested subtests: -.. code-block:: +.. code-block:: none KTAP version 1 1..1 @@ -205,7 +215,7 @@ An example of a test with two nested subtests: An example format with multiple levels of nested testing: -.. code-block:: +.. code-block:: none KTAP version 1 1..2 @@ -224,10 +234,15 @@ An example format with multiple levels of nested testing: Major differences between TAP and KTAP -------------------------------------- -Note the major differences between the TAP and KTAP specification: -- yaml and json are not recommended in diagnostic messages -- TODO directive not recognized -- KTAP allows for an arbitrary number of tests to be nested +================================================== ========= =============== +Feature TAP KTAP +================================================== ========= =============== +yaml and json in diagnosic message ok not recommended +TODO directive ok not recognized +allows an arbitrary number of tests to be nested no yes +"Unknown lines" are in category of "Anything else" yes no +"Unknown lines" are incorrect allowed +================================================== ========= =============== The TAP14 specification does permit nested tests, but instead of using another nested version line, uses a line of the form @@ -235,7 +250,7 @@ nested version line, uses a line of the form Example KTAP output -------------------- -.. code-block:: +.. code-block:: none KTAP version 1 1..1 diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml index 981bac451698..7a04b8aaaec3 100644 --- a/Documentation/devicetree/bindings/arm/pmu.yaml +++ b/Documentation/devicetree/bindings/arm/pmu.yaml @@ -20,6 +20,8 @@ properties: items: - enum: - apm,potenza-pmu + - apple,firestorm-pmu + - apple,icestorm-pmu - arm,armv8-pmuv3 # Only for s/w models - arm,arm1136-pmu - arm,arm1176-pmu diff --git a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt index 23b18b92c558..bde63f8f090e 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt @@ -18,6 +18,7 @@ Required properties: "amlogic,meson-g12a-gpio-intc" for G12A SoCs (S905D2, S905X2, S905Y2) "amlogic,meson-sm1-gpio-intc" for SM1 SoCs (S905D3, S905X3, S905Y3) "amlogic,meson-a1-gpio-intc" for A1 SoCs (A113L) + "amlogic,meson-s4-gpio-intc" for S4 SoCs (S802X2, S905Y4, S805X2G, S905W2) - reg : Specifies base physical address and size of the registers. - interrupt-controller : Identifies the node as an interrupt controller. - #interrupt-cells : Specifies the number of cells needed to encode an diff --git a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml index 97359024709a..85c85b694217 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml @@ -56,6 +56,8 @@ properties: - 1: virtual HV timer - 2: physical guest timer - 3: virtual guest timer + - 4: 'efficient' CPU PMU + - 5: 'performance' CPU PMU The 3rd cell contains the interrupt flags. This is normally IRQ_TYPE_LEVEL_HIGH (4). @@ -68,6 +70,35 @@ properties: power-domains: maxItems: 1 + affinities: + type: object + additionalProperties: false + description: + FIQ affinity can be expressed as a single "affinities" node, + containing a set of sub-nodes, one per FIQ with a non-default + affinity. + patternProperties: + "^.+-affinity$": + type: object + additionalProperties: false + properties: + apple,fiq-index: + description: + The interrupt number specified as a FIQ, and for which + the affinity is not the default. + $ref: /schemas/types.yaml#/definitions/uint32 + maximum: 5 + + cpus: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: + Should be a list of phandles to CPU nodes (as described in + Documentation/devicetree/bindings/arm/cpus.yaml). + + required: + - fiq-index + - cpus + required: - compatible - '#interrupt-cells' diff --git a/Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml b/Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml new file mode 100644 index 000000000000..47a78a167aba --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/apple,aic2.yaml @@ -0,0 +1,98 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interrupt-controller/apple,aic2.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Apple Interrupt Controller 2 + +maintainers: + - Hector Martin <marcan@marcan.st> + +description: | + The Apple Interrupt Controller 2 is a simple interrupt controller present on + Apple ARM SoC platforms starting with t600x (M1 Pro and Max). + + It provides the following features: + + - Level-triggered hardware IRQs wired to SoC blocks + - Single mask bit per IRQ + - Automatic masking on event delivery (auto-ack) + - Software triggering (ORed with hw line) + - Automatic prioritization (single event/ack register per CPU, lower IRQs = + higher priority) + - Automatic masking on ack + - Support for multiple dies + + This device also represents the FIQ interrupt sources on platforms using AIC, + which do not go through a discrete interrupt controller. It also handles + FIQ-based Fast IPIs. + +properties: + compatible: + items: + - const: apple,t6000-aic + - const: apple,aic2 + + interrupt-controller: true + + '#interrupt-cells': + const: 4 + description: | + The 1st cell contains the interrupt type: + - 0: Hardware IRQ + - 1: FIQ + + The 2nd cell contains the die ID. + + The next cell contains the interrupt number. + - HW IRQs: interrupt number + - FIQs: + - 0: physical HV timer + - 1: virtual HV timer + - 2: physical guest timer + - 3: virtual guest timer + + The last cell contains the interrupt flags. This is normally + IRQ_TYPE_LEVEL_HIGH (4). + + reg: + items: + - description: Address and size of the main AIC2 registers. + - description: Address and size of the AIC2 Event register. + + reg-names: + items: + - const: core + - const: event + + power-domains: + maxItems: 1 + +required: + - compatible + - '#interrupt-cells' + - interrupt-controller + - reg + - reg-names + +additionalProperties: false + +allOf: + - $ref: /schemas/interrupt-controller.yaml# + +examples: + - | + soc { + #address-cells = <2>; + #size-cells = <2>; + + aic: interrupt-controller@28e100000 { + compatible = "apple,t6000-aic", "apple,aic2"; + #interrupt-cells = <4>; + interrupt-controller; + reg = <0x2 0x8e100000 0x0 0xc000>, + <0x2 0x8e10c000 0x0 0x4>; + reg-names = "core", "event"; + }; + }; diff --git a/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml new file mode 100644 index 000000000000..509d20c091af --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interrupt-controller/qcom,mpm.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcom MPM Interrupt Controller + +maintainers: + - Shawn Guo <shawn.guo@linaro.org> + +description: + Qualcomm Technologies Inc. SoCs based on the RPM architecture have a + MSM Power Manager (MPM) that is in always-on domain. In addition to managing + resources during sleep, the hardware also has an interrupt controller that + monitors the interrupts when the system is asleep, wakes up the APSS when + one of these interrupts occur and replays it to GIC interrupt controller + after GIC becomes operational. + +allOf: + - $ref: /schemas/interrupt-controller.yaml# + +properties: + compatible: + items: + - const: qcom,mpm + + reg: + maxItems: 1 + description: + Specifies the base address and size of vMPM registers in RPM MSG RAM. + + interrupts: + maxItems: 1 + description: + Specify the IRQ used by RPM to wakeup APSS. + + mboxes: + maxItems: 1 + description: + Specify the mailbox used to notify RPM for writing vMPM registers. + + interrupt-controller: true + + '#interrupt-cells': + const: 2 + description: + The first cell is the MPM pin number for the interrupt, and the second + is the trigger type. + + qcom,mpm-pin-count: + description: + Specify the total MPM pin count that a SoC supports. + $ref: /schemas/types.yaml#/definitions/uint32 + + qcom,mpm-pin-map: + description: + A set of MPM pin numbers and the corresponding GIC SPIs. + $ref: /schemas/types.yaml#/definitions/uint32-matrix + items: + items: + - description: MPM pin number + - description: GIC SPI number for the MPM pin + +required: + - compatible + - reg + - interrupts + - mboxes + - interrupt-controller + - '#interrupt-cells' + - qcom,mpm-pin-count + - qcom,mpm-pin-map + +additionalProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/arm-gic.h> + mpm: interrupt-controller@45f01b8 { + compatible = "qcom,mpm"; + interrupts = <GIC_SPI 197 IRQ_TYPE_EDGE_RISING>; + reg = <0x45f01b8 0x1000>; + mboxes = <&apcs_glb 1>; + interrupt-controller; + #interrupt-cells = <2>; + interrupt-parent = <&intc>; + qcom,mpm-pin-count = <96>; + qcom,mpm-pin-map = <2 275>, + <5 296>, + <12 422>, + <24 79>, + <86 183>, + <90 260>, + <91 260>; + }; diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml index d19c881b4abc..e44daa09b137 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml @@ -20,6 +20,7 @@ properties: - items: - enum: - st,stm32mp1-exti + - st,stm32mp13-exti - const: syscon "#interrupt-cells": diff --git a/Documentation/devicetree/bindings/perf/marvell-cn10k-ddr.yaml b/Documentation/devicetree/bindings/perf/marvell-cn10k-ddr.yaml new file mode 100644 index 000000000000..a18dd0a8c43a --- /dev/null +++ b/Documentation/devicetree/bindings/perf/marvell-cn10k-ddr.yaml @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/perf/marvell-cn10k-ddr.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell CN10K DDR performance monitor + +maintainers: + - Bharat Bhushan <bbhushan2@marvell.com> + +properties: + compatible: + items: + - enum: + - marvell,cn10k-ddr-pmu + + reg: + maxItems: 1 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + bus { + #address-cells = <2>; + #size-cells = <2>; + + pmu@87e1c0000000 { + compatible = "marvell,cn10k-ddr-pmu"; + reg = <0x87e1 0xc0000000 0x0 0x10000>; + }; + }; diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml b/Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml new file mode 100644 index 000000000000..b78209cd0f28 --- /dev/null +++ b/Documentation/devicetree/bindings/timer/nvidia,tegra-timer.yaml @@ -0,0 +1,150 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/timer/nvidia,tegra-timer.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: NVIDIA Tegra timer + +maintainers: + - Stephen Warren <swarren@nvidia.com> + +allOf: + - if: + properties: + compatible: + contains: + const: nvidia,tegra210-timer + then: + properties: + interrupts: + # Either a single combined interrupt or up to 14 individual interrupts + minItems: 1 + maxItems: 14 + description: > + A list of 14 interrupts; one per each timer channels 0 through 13 + + - if: + properties: + compatible: + oneOf: + - items: + - enum: + - nvidia,tegra114-timer + - nvidia,tegra124-timer + - nvidia,tegra132-timer + - const: nvidia,tegra30-timer + - items: + - const: nvidia,tegra30-timer + - const: nvidia,tegra20-timer + then: + properties: + interrupts: + # Either a single combined interrupt or up to 6 individual interrupts + minItems: 1 + maxItems: 6 + description: > + A list of 6 interrupts; one per each of timer channels 1 through 5, + and one for the shared interrupt for the remaining channels. + + - if: + properties: + compatible: + const: nvidia,tegra20-timer + then: + properties: + interrupts: + # Either a single combined interrupt or up to 4 individual interrupts + minItems: 1 + maxItems: 4 + description: | + A list of 4 interrupts; one per timer channel. + +properties: + compatible: + oneOf: + - const: nvidia,tegra210-timer + description: > + The Tegra210 timer provides fourteen 29-bit timer counters and one 32-bit + timestamp counter. The TMRs run at either a fixed 1 MHz clock rate derived + from the oscillator clock (TMR0-TMR9) or directly at the oscillator clock + (TMR10-TMR13). Each TMR can be programmed to generate one-shot, periodic, + or watchdog interrupts. + - items: + - enum: + - nvidia,tegra114-timer + - nvidia,tegra124-timer + - nvidia,tegra132-timer + - const: nvidia,tegra30-timer + - items: + - const: nvidia,tegra30-timer + - const: nvidia,tegra20-timer + description: > + The Tegra30 timer provides ten 29-bit timer channels, a single 32-bit free + running counter, and 5 watchdog modules. The first two channels may also + trigger a legacy watchdog reset. + - const: nvidia,tegra20-timer + description: > + The Tegra20 timer provides four 29-bit timer channels and a single 32-bit free + running counter. The first two channels may also trigger a watchdog reset. + + reg: + maxItems: 1 + + interrupts: true + + clocks: + maxItems: 1 + + clock-names: + items: + - const: timer + + +required: + - compatible + - reg + - interrupts + - clocks + +additionalProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/irq.h> + timer@60005000 { + compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer"; + reg = <0x60005000 0x400>; + interrupts = <0 0 IRQ_TYPE_LEVEL_HIGH>, + <0 1 IRQ_TYPE_LEVEL_HIGH>, + <0 41 IRQ_TYPE_LEVEL_HIGH>, + <0 42 IRQ_TYPE_LEVEL_HIGH>, + <0 121 IRQ_TYPE_LEVEL_HIGH>, + <0 122 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&tegra_car 214>; + }; + - | + #include <dt-bindings/clock/tegra210-car.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/interrupt-controller/irq.h> + + timer@60005000 { + compatible = "nvidia,tegra210-timer"; + reg = <0x60005000 0x400>; + interrupts = <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 178 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 179 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&tegra_car TEGRA210_CLK_TIMER>; + clock-names = "timer"; + }; diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt deleted file mode 100644 index 4a864bd10d3d..000000000000 --- a/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt +++ /dev/null @@ -1,24 +0,0 @@ -NVIDIA Tegra20 timer - -The Tegra20 timer provides four 29-bit timer channels and a single 32-bit free -running counter. The first two channels may also trigger a watchdog reset. - -Required properties: - -- compatible : should be "nvidia,tegra20-timer". -- reg : Specifies base physical address and size of the registers. -- interrupts : A list of 4 interrupts; one per timer channel. -- clocks : Must contain one entry, for the module clock. - See ../clocks/clock-bindings.txt for details. - -Example: - -timer { - compatible = "nvidia,tegra20-timer"; - reg = <0x60005000 0x60>; - interrupts = <0 0 0x04 - 0 1 0x04 - 0 41 0x04 - 0 42 0x04>; - clocks = <&tegra_car 132>; -}; diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt deleted file mode 100644 index 032cda96fe0d..000000000000 --- a/Documentation/devicetree/bindings/timer/nvidia,tegra210-timer.txt +++ /dev/null @@ -1,36 +0,0 @@ -NVIDIA Tegra210 timer - -The Tegra210 timer provides fourteen 29-bit timer counters and one 32-bit -timestamp counter. The TMRs run at either a fixed 1 MHz clock rate derived -from the oscillator clock (TMR0-TMR9) or directly at the oscillator clock -(TMR10-TMR13). Each TMR can be programmed to generate one-shot, periodic, -or watchdog interrupts. - -Required properties: -- compatible : "nvidia,tegra210-timer". -- reg : Specifies base physical address and size of the registers. -- interrupts : A list of 14 interrupts; one per each timer channels 0 through - 13. -- clocks : Must contain one entry, for the module clock. - See ../clocks/clock-bindings.txt for details. - -timer@60005000 { - compatible = "nvidia,tegra210-timer"; - reg = <0x0 0x60005000 0x0 0x400>; - interrupts = <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 178 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 179 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&tegra_car TEGRA210_CLK_TIMER>; - clock-names = "timer"; -}; diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt deleted file mode 100644 index 1761f53ee36f..000000000000 --- a/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt +++ /dev/null @@ -1,28 +0,0 @@ -NVIDIA Tegra30 timer - -The Tegra30 timer provides ten 29-bit timer channels, a single 32-bit free -running counter, and 5 watchdog modules. The first two channels may also -trigger a legacy watchdog reset. - -Required properties: - -- compatible : For Tegra30, must contain "nvidia,tegra30-timer". Otherwise, - must contain '"nvidia,<chip>-timer", "nvidia,tegra30-timer"' where - <chip> is tegra124 or tegra132. -- reg : Specifies base physical address and size of the registers. -- interrupts : A list of 6 interrupts; one per each of timer channels 1 - through 5, and one for the shared interrupt for the remaining channels. -- clocks : Must contain one entry, for the module clock. - See ../clocks/clock-bindings.txt for details. - -timer { - compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer"; - reg = <0x60005000 0x400>; - interrupts = <0 0 0x04 - 0 1 0x04 - 0 41 0x04 - 0 42 0x04 - 0 121 0x04 - 0 122 0x04>; - clocks = <&tegra_car 214>; -}; diff --git a/Documentation/driver-api/serial/driver.rst b/Documentation/driver-api/serial/driver.rst index 31bd4e16fb1f..06ec04ba086f 100644 --- a/Documentation/driver-api/serial/driver.rst +++ b/Documentation/driver-api/serial/driver.rst @@ -311,7 +311,7 @@ hardware. This call must not sleep set_ldisc(port,termios) - Notifier for discipline change. See Documentation/driver-api/serial/tty.rst. + Notifier for discipline change. See Documentation/tty/tty_ldisc.rst. Locking: caller holds tty_port->mutex diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst index 4fd7b70fcde1..bfa75ea1b66a 100644 --- a/Documentation/locking/locktypes.rst +++ b/Documentation/locking/locktypes.rst @@ -247,7 +247,7 @@ based on rt_mutex which changes the semantics: Non-PREEMPT_RT kernels disable preemption to get this effect. PREEMPT_RT kernels use a per-CPU lock for serialization which keeps - preemption disabled. The lock disables softirq handlers and also + preemption enabled. The lock disables softirq handlers and also prevents reentrancy due to task preemption. PREEMPT_RT kernels preserve all other spinlock_t semantics: diff --git a/Documentation/process/applying-patches.rst b/Documentation/process/applying-patches.rst index c2121c1e55d7..c269f5e1a0a3 100644 --- a/Documentation/process/applying-patches.rst +++ b/Documentation/process/applying-patches.rst @@ -249,6 +249,10 @@ The 5.x.y (-stable) and 5.x patches live at https://www.kernel.org/pub/linux/kernel/v5.x/ +The 5.x.y incremental patches live at + + https://www.kernel.org/pub/linux/kernel/v5.x/incr/ + The -rc patches are not stored on the webserver but are generated on demand from git tags such as @@ -308,12 +312,11 @@ versions. If no 5.x.y kernel is available, then the highest numbered 5.x kernel is the current stable kernel. -.. note:: +The -stable team provides normal as well as incremental patches. Below is +how to apply these patches. - The -stable team usually do make incremental patches available as well - as patches against the latest mainline release, but I only cover the - non-incremental ones below. The incremental ones can be found at - https://www.kernel.org/pub/linux/kernel/v5.x/incr/ +Normal patches +~~~~~~~~~~~~~~ These patches are not incremental, meaning that for example the 5.7.3 patch does not apply on top of the 5.7.2 kernel source, but rather on top @@ -331,6 +334,21 @@ Here's a small example:: $ cd .. $ mv linux-5.7.2 linux-5.7.3 # rename the kernel source dir +Incremental patches +~~~~~~~~~~~~~~~~~~~ + +Incremental patches are different: instead of being applied on top +of base 5.x kernel, they are applied on top of previous stable kernel +(5.x.y-1). + +Here's the example to apply these:: + + $ cd ~/linux-5.7.2 # change to the kernel source dir + $ patch -p1 < ../patch-5.7.2-3 # apply the new 5.7.3 patch + $ cd .. + $ mv linux-5.7.2 linux-5.7.3 # rename the kernel source dir + + The -rc kernels =============== diff --git a/Documentation/process/handling-regressions.rst b/Documentation/process/handling-regressions.rst new file mode 100644 index 000000000000..abb741b1aeee --- /dev/null +++ b/Documentation/process/handling-regressions.rst @@ -0,0 +1,746 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0) +.. See the bottom of this file for additional redistribution information. + +Handling regressions +++++++++++++++++++++ + +*We don't cause regressions* -- this document describes what this "first rule of +Linux kernel development" means in practice for developers. It complements +Documentation/admin-guide/reporting-regressions.rst, which covers the topic from a +user's point of view; if you never read that text, go and at least skim over it +before continuing here. + +The important bits (aka "The TL;DR") +==================================== + +#. Ensure subscribers of the `regression mailing list <https://lore.kernel.org/regressions/>`_ + (regressions@lists.linux.dev) quickly become aware of any new regression + report: + + * When receiving a mailed report that did not CC the list, bring it into the + loop by immediately sending at least a brief "Reply-all" with the list + CCed. + + * Forward or bounce any reports submitted in bug trackers to the list. + +#. Make the Linux kernel regression tracking bot "regzbot" track the issue (this + is optional, but recommended): + + * For mailed reports, check if the reporter included a line like ``#regzbot + introduced v5.13..v5.14-rc1``. If not, send a reply (with the regressions + list in CC) containing a paragraph like the following, which tells regzbot + when the issue started to happen:: + + #regzbot ^introduced 1f2e3d4c5b6a + + * When forwarding reports from a bug tracker to the regressions list (see + above), include a paragraph like the following:: + + #regzbot introduced: v5.13..v5.14-rc1 + #regzbot from: Some N. Ice Human <some.human@example.com> + #regzbot monitor: http://some.bugtracker.example.com/ticket?id=123456789 + +#. When submitting fixes for regressions, add "Link:" tags to the patch + description pointing to all places where the issue was reported, as + mandated by Documentation/process/submitting-patches.rst and + :ref:`Documentation/process/5.Posting.rst <development_posting>`. + +#. Try to fix regressions quickly once the culprit has been identified; fixes + for most regressions should be merged within two weeks, but some need to be + resolved within two or three days. + + +All the details on Linux kernel regressions relevant for developers +=================================================================== + + +The important basics in more detail +----------------------------------- + + +What to do when receiving regression reports +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Ensure the Linux kernel's regression tracker and others subscribers of the +`regression mailing list <https://lore.kernel.org/regressions/>`_ +(regressions@lists.linux.dev) become aware of any newly reported regression: + + * When you receive a report by mail that did not CC the list, immediately bring + it into the loop by sending at least a brief "Reply-all" with the list CCed; + try to ensure it gets CCed again in case you reply to a reply that omitted + the list. + + * If a report submitted in a bug tracker hits your Inbox, forward or bounce it + to the list. Consider checking the list archives beforehand, if the reporter + already forwarded the report as instructed by + Documentation/admin-guide/reporting-issues.rst. + +When doing either, consider making the Linux kernel regression tracking bot +"regzbot" immediately start tracking the issue: + + * For mailed reports, check if the reporter included a "regzbot command" like + ``#regzbot introduced 1f2e3d4c5b6a``. If not, send a reply (with the + regressions list in CC) with a paragraph like the following::: + + #regzbot ^introduced: v5.13..v5.14-rc1 + + This tells regzbot the version range in which the issue started to happen; + you can specify a range using commit-ids as well or state a single commit-id + in case the reporter bisected the culprit. + + Note the caret (^) before the "introduced": it tells regzbot to treat the + parent mail (the one you reply to) as the initial report for the regression + you want to see tracked; that's important, as regzbot will later look out + for patches with "Link:" tags pointing to the report in the archives on + lore.kernel.org. + + * When forwarding a regressions reported to a bug tracker, include a paragraph + with these regzbot commands:: + + #regzbot introduced: 1f2e3d4c5b6a + #regzbot from: Some N. Ice Human <some.human@example.com> + #regzbot monitor: http://some.bugtracker.example.com/ticket?id=123456789 + + Regzbot will then automatically associate patches with the report that + contain "Link:" tags pointing to your mail or the mentioned ticket. + +What's important when fixing regressions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You don't need to do anything special when submitting fixes for regression, just +remember to do what Documentation/process/submitting-patches.rst, +:ref:`Documentation/process/5.Posting.rst <development_posting>`, and +Documentation/process/stable-kernel-rules.rst already explain in more detail: + + * Point to all places where the issue was reported using "Link:" tags:: + + Link: https://lore.kernel.org/r/30th.anniversary.repost@klaava.Helsinki.FI/ + Link: https://bugzilla.kernel.org/show_bug.cgi?id=1234567890 + + * Add a "Fixes:" tag to specify the commit causing the regression. + + * If the culprit was merged in an earlier development cycle, explicitly mark + the fix for backporting using the ``Cc: stable@vger.kernel.org`` tag. + +All this is expected from you and important when it comes to regression, as +these tags are of great value for everyone (you included) that might be looking +into the issue weeks, months, or years later. These tags are also crucial for +tools and scripts used by other kernel developers or Linux distributions; one of +these tools is regzbot, which heavily relies on the "Link:" tags to associate +reports for regression with changes resolving them. + +Prioritize work on fixing regressions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You should fix any reported regression as quickly as possible, to provide +affected users with a solution in a timely manner and prevent more users from +running into the issue; nevertheless developers need to take enough time and +care to ensure regression fixes do not cause additional damage. + +In the end though, developers should give their best to prevent users from +running into situations where a regression leaves them only three options: "run +a kernel with a regression that seriously impacts usage", "continue running an +outdated and thus potentially insecure kernel version for more than two weeks +after a regression's culprit was identified", and "downgrade to a still +supported kernel series that lack required features". + +How to realize this depends a lot on the situation. Here are a few rules of +thumb for you, in order or importance: + + * Prioritize work on handling regression reports and fixing regression over all + other Linux kernel work, unless the latter concerns acute security issues or + bugs causing data loss or damage. + + * Always consider reverting the culprit commits and reapplying them later + together with necessary fixes, as this might be the least dangerous and + quickest way to fix a regression. + + * Developers should handle regressions in all supported kernel series, but are + free to delegate the work to the stable team, if the issue probably at no + point in time occurred with mainline. + + * Try to resolve any regressions introduced in the current development before + its end. If you fear a fix might be too risky to apply only days before a new + mainline release, let Linus decide: submit the fix separately to him as soon + as possible with the explanation of the situation. He then can make a call + and postpone the release if necessary, for example if multiple such changes + show up in his inbox. + + * Address regressions in stable, longterm, or proper mainline releases with + more urgency than regressions in mainline pre-releases. That changes after + the release of the fifth pre-release, aka "-rc5": mainline then becomes as + important, to ensure all the improvements and fixes are ideally tested + together for at least one week before Linus releases a new mainline version. + + * Fix regressions within two or three days, if they are critical for some + reason -- for example, if the issue is likely to affect many users of the + kernel series in question on all or certain architectures. Note, this + includes mainline, as issues like compile errors otherwise might prevent many + testers or continuous integration systems from testing the series. + + * Aim to fix regressions within one week after the culprit was identified, if + the issue was introduced in either: + + * a recent stable/longterm release + + * the development cycle of the latest proper mainline release + + In the latter case (say Linux v5.14), try to address regressions even + quicker, if the stable series for the predecessor (v5.13) will be abandoned + soon or already was stamped "End-of-Life" (EOL) -- this usually happens about + three to four weeks after a new mainline release. + + * Try to fix all other regressions within two weeks after the culprit was + found. Two or three additional weeks are acceptable for performance + regressions and other issues which are annoying, but don't prevent anyone + from running Linux (unless it's an issue in the current development cycle, + as those should ideally be addressed before the release). A few weeks in + total are acceptable if a regression can only be fixed with a risky change + and at the same time is affecting only a few users; as much time is + also okay if the regression is already present in the second newest longterm + kernel series. + +Note: The aforementioned time frames for resolving regressions are meant to +include getting the fix tested, reviewed, and merged into mainline, ideally with +the fix being in linux-next at least briefly. This leads to delays you need to +account for. + +Subsystem maintainers are expected to assist in reaching those periods by doing +timely reviews and quick handling of accepted patches. They thus might have to +send git-pull requests earlier or more often than usual; depending on the fix, +it might even be acceptable to skip testing in linux-next. Especially fixes for +regressions in stable and longterm kernels need to be handled quickly, as fixes +need to be merged in mainline before they can be backported to older series. + + +More aspects regarding regressions developers should be aware of +---------------------------------------------------------------- + + +How to deal with changes where a risk of regression is known +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Evaluate how big the risk of regressions is, for example by performing a code +search in Linux distributions and Git forges. Also consider asking other +developers or projects likely to be affected to evaluate or even test the +proposed change; if problems surface, maybe some solution acceptable for all +can be found. + +If the risk of regressions in the end seems to be relatively small, go ahead +with the change, but let all involved parties know about the risk. Hence, make +sure your patch description makes this aspect obvious. Once the change is +merged, tell the Linux kernel's regression tracker and the regressions mailing +list about the risk, so everyone has the change on the radar in case reports +trickle in. Depending on the risk, you also might want to ask the subsystem +maintainer to mention the issue in his mainline pull request. + +What else is there to known about regressions? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Check out Documentation/admin-guide/reporting-regressions.rst, it covers a lot +of other aspects you want might want to be aware of: + + * the purpose of the "no regressions rule" + + * what issues actually qualify as regression + + * who's in charge for finding the root cause of a regression + + * how to handle tricky situations, e.g. when a regression is caused by a + security fix or when fixing a regression might cause another one + +Whom to ask for advice when it comes to regressions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Send a mail to the regressions mailing list (regressions@lists.linux.dev) while +CCing the Linux kernel's regression tracker (regressions@leemhuis.info); if the +issue might better be dealt with in private, feel free to omit the list. + + +More about regression tracking and regzbot +------------------------------------------ + + +Why the Linux kernel has a regression tracker, and why is regzbot used? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Rules like "no regressions" need someone to ensure they are followed, otherwise +they are broken either accidentally or on purpose. History has shown this to be +true for the Linux kernel as well. That's why Thorsten Leemhuis volunteered to +keep an eye on things as the Linux kernel's regression tracker, who's +occasionally helped by other people. Neither of them are paid to do this, +that's why regression tracking is done on a best effort basis. + +Earlier attempts to manually track regressions have shown it's an exhausting and +frustrating work, which is why they were abandoned after a while. To prevent +this from happening again, Thorsten developed regzbot to facilitate the work, +with the long term goal to automate regression tracking as much as possible for +everyone involved. + +How does regression tracking work with regzbot? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The bot watches for replies to reports of tracked regressions. Additionally, +it's looking out for posted or committed patches referencing such reports +with "Link:" tags; replies to such patch postings are tracked as well. +Combined this data provides good insights into the current state of the fixing +process. + +Regzbot tries to do its job with as little overhead as possible for both +reporters and developers. In fact, only reporters are burdened with an extra +duty: they need to tell regzbot about the regression report using the ``#regzbot +introduced`` command outlined above; if they don't do that, someone else can +take care of that using ``#regzbot ^introduced``. + +For developers there normally is no extra work involved, they just need to make +sure to do something that was expected long before regzbot came to light: add +"Link:" tags to the patch description pointing to all reports about the issue +fixed. + +Do I have to use regzbot? +~~~~~~~~~~~~~~~~~~~~~~~~~ + +It's in the interest of everyone if you do, as kernel maintainers like Linus +Torvalds partly rely on regzbot's tracking in their work -- for example when +deciding to release a new version or extend the development phase. For this they +need to be aware of all unfixed regression; to do that, Linus is known to look +into the weekly reports sent by regzbot. + +Do I have to tell regzbot about every regression I stumble upon? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Ideally yes: we are all humans and easily forget problems when something more +important unexpectedly comes up -- for example a bigger problem in the Linux +kernel or something in real life that's keeping us away from keyboards for a +while. Hence, it's best to tell regzbot about every regression, except when you +immediately write a fix and commit it to a tree regularly merged to the affected +kernel series. + +How to see which regressions regzbot tracks currently? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Check `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_ +for the latest info; alternatively, `search for the latest regression report +<https://lore.kernel.org/lkml/?q=%22Linux+regressions+report%22+f%3Aregzbot>`_, +which regzbot normally sends out once a week on Sunday evening (UTC), which is a +few hours before Linus usually publishes new (pre-)releases. + +What places is regzbot monitoring? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Regzbot is watching the most important Linux mailing lists as well as the git +repositories of linux-next, mainline, and stable/longterm. + +What kind of issues are supposed to be tracked by regzbot? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The bot is meant to track regressions, hence please don't involve regzbot for +regular issues. But it's okay for the Linux kernel's regression tracker if you +use regzbot to track severe issues, like reports about hangs, corrupted data, +or internal errors (Panic, Oops, BUG(), warning, ...). + +Can I add regressions found by CI systems to regzbot's tracking? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Feel free to do so, if the particular regression likely has impact on practical +use cases and thus might be noticed by users; hence, please don't involve +regzbot for theoretical regressions unlikely to show themselves in real world +usage. + +How to interact with regzbot? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By using a 'regzbot command' in a direct or indirect reply to the mail with the +regression report. These commands need to be in their own paragraph (IOW: they +need to be separated from the rest of the mail using blank lines). + +One such command is ``#regzbot introduced <version or commit>``, which makes +regzbot consider your mail as a regressions report added to the tracking, as +already described above; ``#regzbot ^introduced <version or commit>`` is another +such command, which makes regzbot consider the parent mail as a report for a +regression which it starts to track. + +Once one of those two commands has been utilized, other regzbot commands can be +used in direct or indirect replies to the report. You can write them below one +of the `introduced` commands or in replies to the mail that used one of them +or itself is a reply to that mail: + + * Set or update the title:: + + #regzbot title: foo + + * Monitor a discussion or bugzilla.kernel.org ticket where additions aspects of + the issue or a fix are discussed -- for example the posting of a patch fixing + the regression:: + + #regzbot monitor: https://lore.kernel.org/all/30th.anniversary.repost@klaava.Helsinki.FI/ + + Monitoring only works for lore.kernel.org and bugzilla.kernel.org; regzbot + will consider all messages in that thread or ticket as related to the fixing + process. + + * Point to a place with further details of interest, like a mailing list post + or a ticket in a bug tracker that are slightly related, but about a different + topic:: + + #regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=123456789 + + * Mark a regression as fixed by a commit that is heading upstream or already + landed:: + + #regzbot fixed-by: 1f2e3d4c5d + + * Mark a regression as a duplicate of another one already tracked by regzbot:: + + #regzbot dup-of: https://lore.kernel.org/all/30th.anniversary.repost@klaava.Helsinki.FI/ + + * Mark a regression as invalid:: + + #regzbot invalid: wasn't a regression, problem has always existed + +Is there more to tell about regzbot and its commands? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +More detailed and up-to-date information about the Linux +kernel's regression tracking bot can be found on its +`project page <https://gitlab.com/knurd42/regzbot>`_, which among others +contains a `getting started guide <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/getting_started.md>`_ +and `reference documentation <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/reference.md>`_ +which both cover more details than the above section. + +Quotes from Linus about regression +---------------------------------- + +Find below a few real life examples of how Linus Torvalds expects regressions to +be handled: + + * From `2017-10-26 (1/2) + <https://lore.kernel.org/lkml/CA+55aFwiiQYJ+YoLKCXjN_beDVfu38mg=Ggg5LFOcqHE8Qi7Zw@mail.gmail.com/>`_:: + + If you break existing user space setups THAT IS A REGRESSION. + + It's not ok to say "but we'll fix the user space setup". + + Really. NOT OK. + + [...] + + The first rule is: + + - we don't cause regressions + + and the corollary is that when regressions *do* occur, we admit to + them and fix them, instead of blaming user space. + + The fact that you have apparently been denying the regression now for + three weeks means that I will revert, and I will stop pulling apparmor + requests until the people involved understand how kernel development + is done. + + * From `2017-10-26 (2/2) + <https://lore.kernel.org/lkml/CA+55aFxW7NMAMvYhkvz1UPbUTUJewRt6Yb51QAx5RtrWOwjebg@mail.gmail.com/>`_:: + + People should basically always feel like they can update their kernel + and simply not have to worry about it. + + I refuse to introduce "you can only update the kernel if you also + update that other program" kind of limitations. If the kernel used to + work for you, the rule is that it continues to work for you. + + There have been exceptions, but they are few and far between, and they + generally have some major and fundamental reasons for having happened, + that were basically entirely unavoidable, and people _tried_hard_ to + avoid them. Maybe we can't practically support the hardware any more + after it is decades old and nobody uses it with modern kernels any + more. Maybe there's a serious security issue with how we did things, + and people actually depended on that fundamentally broken model. Maybe + there was some fundamental other breakage that just _had_ to have a + flag day for very core and fundamental reasons. + + And notice that this is very much about *breaking* peoples environments. + + Behavioral changes happen, and maybe we don't even support some + feature any more. There's a number of fields in /proc/<pid>/stat that + are printed out as zeroes, simply because they don't even *exist* in + the kernel any more, or because showing them was a mistake (typically + an information leak). But the numbers got replaced by zeroes, so that + the code that used to parse the fields still works. The user might not + see everything they used to see, and so behavior is clearly different, + but things still _work_, even if they might no longer show sensitive + (or no longer relevant) information. + + But if something actually breaks, then the change must get fixed or + reverted. And it gets fixed in the *kernel*. Not by saying "well, fix + your user space then". It was a kernel change that exposed the + problem, it needs to be the kernel that corrects for it, because we + have a "upgrade in place" model. We don't have a "upgrade with new + user space". + + And I seriously will refuse to take code from people who do not + understand and honor this very simple rule. + + This rule is also not going to change. + + And yes, I realize that the kernel is "special" in this respect. I'm + proud of it. + + I have seen, and can point to, lots of projects that go "We need to + break that use case in order to make progress" or "you relied on + undocumented behavior, it sucks to be you" or "there's a better way to + do what you want to do, and you have to change to that new better + way", and I simply don't think that's acceptable outside of very early + alpha releases that have experimental users that know what they signed + up for. The kernel hasn't been in that situation for the last two + decades. + + We do API breakage _inside_ the kernel all the time. We will fix + internal problems by saying "you now need to do XYZ", but then it's + about internal kernel API's, and the people who do that then also + obviously have to fix up all the in-kernel users of that API. Nobody + can say "I now broke the API you used, and now _you_ need to fix it + up". Whoever broke something gets to fix it too. + + And we simply do not break user space. + + * From `2020-05-21 + <https://lore.kernel.org/all/CAHk-=wiVi7mSrsMP=fLXQrXK_UimybW=ziLOwSzFTtoXUacWVQ@mail.gmail.com/>`_:: + + The rules about regressions have never been about any kind of + documented behavior, or where the code lives. + + The rules about regressions are always about "breaks user workflow". + + Users are literally the _only_ thing that matters. + + No amount of "you shouldn't have used this" or "that behavior was + undefined, it's your own fault your app broke" or "that used to work + simply because of a kernel bug" is at all relevant. + + Now, reality is never entirely black-and-white. So we've had things + like "serious security issue" etc that just forces us to make changes + that may break user space. But even then the rule is that we don't + really have other options that would allow things to continue. + + And obviously, if users take years to even notice that something + broke, or if we have sane ways to work around the breakage that + doesn't make for too much trouble for users (ie "ok, there are a + handful of users, and they can use a kernel command line to work + around it" kind of things) we've also been a bit less strict. + + But no, "that was documented to be broken" (whether it's because the + code was in staging or because the man-page said something else) is + irrelevant. If staging code is so useful that people end up using it, + that means that it's basically regular kernel code with a flag saying + "please clean this up". + + The other side of the coin is that people who talk about "API + stability" are entirely wrong. API's don't matter either. You can make + any changes to an API you like - as long as nobody notices. + + Again, the regression rule is not about documentation, not about + API's, and not about the phase of the moon. + + It's entirely about "we caused problems for user space that used to work". + + * From `2017-11-05 + <https://lore.kernel.org/all/CA+55aFzUvbGjD8nQ-+3oiMBx14c_6zOj2n7KLN3UsJ-qsd4Dcw@mail.gmail.com/>`_:: + + And our regression rule has never been "behavior doesn't change". + That would mean that we could never make any changes at all. + + For example, we do things like add new error handling etc all the + time, which we then sometimes even add tests for in our kselftest + directory. + + So clearly behavior changes all the time and we don't consider that a + regression per se. + + The rule for a regression for the kernel is that some real user + workflow breaks. Not some test. Not a "look, I used to be able to do + X, now I can't". + + * From `2018-08-03 + <https://lore.kernel.org/all/CA+55aFwWZX=CXmWDTkDGb36kf12XmTehmQjbiMPCqCRG2hi9kw@mail.gmail.com/>`_:: + + YOU ARE MISSING THE #1 KERNEL RULE. + + We do not regress, and we do not regress exactly because your are 100% wrong. + + And the reason you state for your opinion is in fact exactly *WHY* you + are wrong. + + Your "good reasons" are pure and utter garbage. + + The whole point of "we do not regress" is so that people can upgrade + the kernel and never have to worry about it. + + > Kernel had a bug which has been fixed + + That is *ENTIRELY* immaterial. + + Guys, whether something was buggy or not DOES NOT MATTER. + + Why? + + Bugs happen. That's a fact of life. Arguing that "we had to break + something because we were fixing a bug" is completely insane. We fix + tens of bugs every single day, thinking that "fixing a bug" means that + we can break something is simply NOT TRUE. + + So bugs simply aren't even relevant to the discussion. They happen, + they get found, they get fixed, and it has nothing to do with "we + break users". + + Because the only thing that matters IS THE USER. + + How hard is that to understand? + + Anybody who uses "but it was buggy" as an argument is entirely missing + the point. As far as the USER was concerned, it wasn't buggy - it + worked for him/her. + + Maybe it worked *because* the user had taken the bug into account, + maybe it worked because the user didn't notice - again, it doesn't + matter. It worked for the user. + + Breaking a user workflow for a "bug" is absolutely the WORST reason + for breakage you can imagine. + + It's basically saying "I took something that worked, and I broke it, + but now it's better". Do you not see how f*cking insane that statement + is? + + And without users, your program is not a program, it's a pointless + piece of code that you might as well throw away. + + Seriously. This is *why* the #1 rule for kernel development is "we + don't break users". Because "I fixed a bug" is absolutely NOT AN + ARGUMENT if that bug fix broke a user setup. You actually introduced a + MUCH BIGGER bug by "fixing" something that the user clearly didn't + even care about. + + And dammit, we upgrade the kernel ALL THE TIME without upgrading any + other programs at all. It is absolutely required, because flag-days + and dependencies are horribly bad. + + And it is also required simply because I as a kernel developer do not + upgrade random other tools that I don't even care about as I develop + the kernel, and I want any of my users to feel safe doing the same + time. + + So no. Your rule is COMPLETELY wrong. If you cannot upgrade a kernel + without upgrading some other random binary, then we have a problem. + + * From `2021-06-05 + <https://lore.kernel.org/all/CAHk-=wiUVqHN76YUwhkjZzwTdjMMJf_zN4+u7vEJjmEGh3recw@mail.gmail.com/>`_:: + + THERE ARE NO VALID ARGUMENTS FOR REGRESSIONS. + + Honestly, security people need to understand that "not working" is not + a success case of security. It's a failure case. + + Yes, "not working" may be secure. But security in that case is *pointless*. + + * From `2011-05-06 (1/3) + <https://lore.kernel.org/all/BANLkTim9YvResB+PwRp7QTK-a5VNg2PvmQ@mail.gmail.com/>`_:: + + Binary compatibility is more important. + + And if binaries don't use the interface to parse the format (or just + parse it wrongly - see the fairly recent example of adding uuid's to + /proc/self/mountinfo), then it's a regression. + + And regressions get reverted, unless there are security issues or + similar that makes us go "Oh Gods, we really have to break things". + + I don't understand why this simple logic is so hard for some kernel + developers to understand. Reality matters. Your personal wishes matter + NOT AT ALL. + + If you made an interface that can be used without parsing the + interface description, then we're stuck with the interface. Theory + simply doesn't matter. + + You could help fix the tools, and try to avoid the compatibility + issues that way. There aren't that many of them. + + From `2011-05-06 (2/3) + <https://lore.kernel.org/all/BANLkTi=KVXjKR82sqsz4gwjr+E0vtqCmvA@mail.gmail.com/>`_:: + + it's clearly NOT an internal tracepoint. By definition. It's being + used by powertop. + + From `2011-05-06 (3/3) + <https://lore.kernel.org/all/BANLkTinazaXRdGovYL7rRVp+j6HbJ7pzhg@mail.gmail.com/>`_:: + + We have programs that use that ABI and thus it's a regression if they break. + + * From `2012-07-06 <https://lore.kernel.org/all/CA+55aFwnLJ+0sjx92EGREGTWOx84wwKaraSzpTNJwPVV8edw8g@mail.gmail.com/>`_:: + + > Now this got me wondering if Debian _unstable_ actually qualifies as a + > standard distro userspace. + + Oh, if the kernel breaks some standard user space, that counts. Tons + of people run Debian unstable + + * From `2019-09-15 + <https://lore.kernel.org/lkml/CAHk-=wiP4K8DRJWsCo=20hn_6054xBamGKF2kPgUzpB5aMaofA@mail.gmail.com/>`_:: + + One _particularly_ last-minute revert is the top-most commit (ignoring + the version change itself) done just before the release, and while + it's very annoying, it's perhaps also instructive. + + What's instructive about it is that I reverted a commit that wasn't + actually buggy. In fact, it was doing exactly what it set out to do, + and did it very well. In fact it did it _so_ well that the much + improved IO patterns it caused then ended up revealing a user-visible + regression due to a real bug in a completely unrelated area. + + The actual details of that regression are not the reason I point that + revert out as instructive, though. It's more that it's an instructive + example of what counts as a regression, and what the whole "no + regressions" kernel rule means. The reverted commit didn't change any + API's, and it didn't introduce any new bugs. But it ended up exposing + another problem, and as such caused a kernel upgrade to fail for a + user. So it got reverted. + + The point here being that we revert based on user-reported _behavior_, + not based on some "it changes the ABI" or "it caused a bug" concept. + The problem was really pre-existing, and it just didn't happen to + trigger before. The better IO patterns introduced by the change just + happened to expose an old bug, and people had grown to depend on the + previously benign behavior of that old issue. + + And never fear, we'll re-introduce the fix that improved on the IO + patterns once we've decided just how to handle the fact that we had a + bad interaction with an interface that people had then just happened + to rely on incidental behavior for before. It's just that we'll have + to hash through how to do that (there are no less than three different + patches by three different developers being discussed, and there might + be more coming...). In the meantime, I reverted the thing that exposed + the problem to users for this release, even if I hope it will be + re-introduced (perhaps even backported as a stable patch) once we have + consensus about the issue it exposed. + + Take-away from the whole thing: it's not about whether you change the + kernel-userspace ABI, or fix a bug, or about whether the old code + "should never have worked in the first place". It's about whether + something breaks existing users' workflow. + + Anyway, that was my little aside on the whole regression thing. Since + it's that "first rule of kernel programming", I felt it is perhaps + worth just bringing it up every once in a while + +.. + end-of-content +.. + This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top + of the file. If you want to distribute this text under CC-BY-4.0 only, + please use "The Linux kernel developers" for author attribution and link + this as source: + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/process/handling-regressions.rst +.. + Note: Only the content of this RST file as found in the Linux kernel sources + is available under CC-BY-4.0, as versions of this text that were processed + (for example by the kernel's build system) might contain content taken from + files which use a more restrictive license. diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst index 9f1b88492bb3..3587dae4d0ef 100644 --- a/Documentation/process/index.rst +++ b/Documentation/process/index.rst @@ -25,6 +25,7 @@ Below are the essential guides that every developer should read. code-of-conduct-interpretation development-process submitting-patches + handling-regressions programming-language coding-style maintainer-handbooks @@ -48,6 +49,7 @@ Other guides to the community that are of interest to most developers are: deprecated embargoed-hardware-issues maintainers + researcher-guidelines These are some overall technical guides that have been put here for now for lack of a better place. diff --git a/Documentation/process/researcher-guidelines.rst b/Documentation/process/researcher-guidelines.rst new file mode 100644 index 000000000000..afc944e0e898 --- /dev/null +++ b/Documentation/process/researcher-guidelines.rst @@ -0,0 +1,143 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. _researcher_guidelines: + +Researcher Guidelines ++++++++++++++++++++++ + +The Linux kernel community welcomes transparent research on the Linux +kernel, the activities involved in producing it, and any other byproducts +of its development. Linux benefits greatly from this kind of research, and +most aspects of Linux are driven by research in one form or another. + +The community greatly appreciates if researchers can share preliminary +findings before making their results public, especially if such research +involves security. Getting involved early helps both improve the quality +of research and ability for Linux to improve from it. In any case, +sharing open access copies of the published research with the community +is recommended. + +This document seeks to clarify what the Linux kernel community considers +acceptable and non-acceptable practices when conducting such research. At +the very least, such research and related activities should follow +standard research ethics rules. For more background on research ethics +generally, ethics in technology, and research of developer communities +in particular, see: + +* `History of Research Ethics <https://www.unlv.edu/research/ORI-HSR/history-ethics>`_ +* `IEEE Ethics <https://www.ieee.org/about/ethics/index.html>`_ +* `Developer and Researcher Views on the Ethics of Experiments on Open-Source Projects <https://arxiv.org/pdf/2112.13217.pdf>`_ + +The Linux kernel community expects that everyone interacting with the +project is participating in good faith to make Linux better. Research on +any publicly-available artifact (including, but not limited to source +code) produced by the Linux kernel community is welcome, though research +on developers must be distinctly opt-in. + +Passive research that is based entirely on publicly available sources, +including posts to public mailing lists and commits to public +repositories, is clearly permissible. Though, as with any research, +standard ethics must still be followed. + +Active research on developer behavior, however, must be done with the +explicit agreement of, and full disclosure to, the individual developers +involved. Developers cannot be interacted with/experimented on without +consent; this, too, is standard research ethics. + +To help clarify: sending patches to developers *is* interacting +with them, but they have already consented to receiving *good faith +contributions*. Sending intentionally flawed/vulnerable patches or +contributing misleading information to discussions is not consented +to. Such communication can be damaging to the developer (e.g. draining +time, effort, and morale) and damaging to the project by eroding +the entire developer community's trust in the contributor (and the +contributor's organization as a whole), undermining efforts to provide +constructive feedback to contributors, and putting end users at risk of +software flaws. + +Participation in the development of Linux itself by researchers, as +with anyone, is welcomed and encouraged. Research into Linux code is +a common practice, especially when it comes to developing or running +analysis tools that produce actionable results. + +When engaging with the developer community, sending a patch has +traditionally been the best way to make an impact. Linux already has +plenty of known bugs -- what's much more helpful is having vetted fixes. +Before contributing, carefully read the appropriate documentation: + +* Documentation/process/development-process.rst +* Documentation/process/submitting-patches.rst +* Documentation/admin-guide/reporting-issues.rst +* Documentation/admin-guide/security-bugs.rst + +Then send a patch (including a commit log with all the details listed +below) and follow up on any feedback from other developers. + +When sending patches produced from research, the commit logs should +contain at least the following details, so that developers have +appropriate context for understanding the contribution. Answer: + +* What is the specific problem that has been found? +* How could the problem be reached on a running system? +* What effect would encountering the problem have on the system? +* How was the problem found? Specifically include details about any + testing, static or dynamic analysis programs, and any other tools or + methods used to perform the work. +* Which version of Linux was the problem found on? Using the most recent + release or a recent linux-next branch is strongly preferred (see + Documentation/process/howto.rst). +* What was changed to fix the problem, and why it is believed to be correct? +* How was the change build tested and run-time tested? +* What prior commit does this change fix? This should go in a "Fixes:" + tag as the documentation describes. +* Who else has reviewed this patch? This should go in appropriate + "Reviewed-by:" tags; see below. + +For example:: + + From: Author <author@email> + Subject: [PATCH] drivers/foo_bar: Add missing kfree() + + The error path in foo_bar driver does not correctly free the allocated + struct foo_bar_info. This can happen if the attached foo_bar device + rejects the initialization packets sent during foo_bar_probe(). This + would result in a 64 byte slab memory leak once per device attach, + wasting memory resources over time. + + This flaw was found using an experimental static analysis tool we are + developing, LeakMagic[1], which reported the following warning when + analyzing the v5.15 kernel release: + + path/to/foo_bar.c:187: missing kfree() call? + + Add the missing kfree() to the error path. No other references to + this memory exist outside the probe function, so this is the only + place it can be freed. + + x86_64 and arm64 defconfig builds with CONFIG_FOO_BAR=y using GCC + 11.2 show no new warnings, and LeakMagic no longer warns about this + code path. As we don't have a FooBar device to test with, no runtime + testing was able to be performed. + + [1] https://url/to/leakmagic/details + + Reported-by: Researcher <researcher@email> + Fixes: aaaabbbbccccdddd ("Introduce support for FooBar") + Signed-off-by: Author <author@email> + Reviewed-by: Reviewer <reviewer@email> + +If you are a first time contributor it is recommended that the patch +itself be vetted by others privately before being posted to public lists. +(This is required if you have been explicitly told your patches need +more careful internal review.) These people are expected to have their +"Reviewed-by" tag included in the resulting patch. Finding another +developer familiar with Linux contribution, especially within your own +organization, and having them help with reviews before sending them to +the public mailing lists tends to significantly improve the quality of the +resulting patches, and there by reduces the burden on other developers. + +If no one can be found to internally review patches and you need +help finding such a person, or if you have any other questions +related to this document and the developer community's expectations, +please reach out to the private Technical Advisory Board mailing list: +<tech-board@lists.linux-foundation.org>. diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index 31ea120ce531..fb496b2ebfd3 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -495,7 +495,8 @@ Using Reported-by:, Tested-by:, Reviewed-by:, Suggested-by: and Fixes: The Reported-by tag gives credit to people who find bugs and report them and it hopefully inspires them to help us again in the future. Please note that if the bug was reported in private, then ask for permission first before using the -Reported-by tag. +Reported-by tag. The tag is intended for bugs; please do not use it to credit +feature requests. A Tested-by: tag indicates that the patch has been successfully tested (in some environment) by the person named. This tag informs maintainers that diff --git a/Documentation/scheduler/index.rst b/Documentation/scheduler/index.rst index 88900aabdbf7..693eaac769a7 100644 --- a/Documentation/scheduler/index.rst +++ b/Documentation/scheduler/index.rst @@ -14,6 +14,7 @@ Linux Scheduler sched-domains sched-capacity sched-energy + schedutil sched-nice-design sched-rt-group sched-stats diff --git a/Documentation/scheduler/sched-domains.rst b/Documentation/scheduler/sched-domains.rst index 84dcdcd2911c..e57ad28301bd 100644 --- a/Documentation/scheduler/sched-domains.rst +++ b/Documentation/scheduler/sched-domains.rst @@ -37,10 +37,10 @@ rebalancing event for the current runqueue has arrived. The actual load balancing workhorse, run_rebalance_domains()->rebalance_domains(), is then run in softirq context (SCHED_SOFTIRQ). -The latter function takes two arguments: the current CPU and whether it was idle -at the time the scheduler_tick() happened and iterates over all sched domains -our CPU is on, starting from its base domain and going up the ->parent chain. -While doing that, it checks to see if the current domain has exhausted its +The latter function takes two arguments: the runqueue of current CPU and whether +the CPU was idle at the time the scheduler_tick() happened and iterates over all +sched domains our CPU is on, starting from its base domain and going up the ->parent +chain. While doing that, it checks to see if the current domain has exhausted its rebalance interval. If so, it runs load_balance() on that domain. It then checks the parent sched_domain (if it exists), and the parent of the parent and so forth. diff --git a/Documentation/scheduler/schedutil.txt b/Documentation/scheduler/schedutil.rst index 78f6b91e2291..32c7d69fc86c 100644 --- a/Documentation/scheduler/schedutil.txt +++ b/Documentation/scheduler/schedutil.rst @@ -1,11 +1,15 @@ +========= +Schedutil +========= +.. note:: -NOTE; all this assumes a linear relation between frequency and work capacity, -we know this is flawed, but it is the best workable approximation. + All this assumes a linear relation between frequency and work capacity, + we know this is flawed, but it is the best workable approximation. PELT (Per Entity Load Tracking) -------------------------------- +=============================== With PELT we track some metrics across the various scheduler entities, from individual tasks to task-group slices to CPU runqueues. As the basis for this @@ -38,8 +42,8 @@ while 'runnable' will increase to reflect the amount of contention. For more detail see: kernel/sched/pelt.c -Frequency- / CPU Invariance ---------------------------- +Frequency / CPU Invariance +========================== Because consuming the CPU for 50% at 1GHz is not the same as consuming the CPU for 50% at 2GHz, nor is running 50% on a LITTLE CPU the same as running 50% on @@ -47,7 +51,7 @@ a big CPU, we allow architectures to scale the time delta with two ratios, one Dynamic Voltage and Frequency Scaling (DVFS) ratio and one microarch ratio. For simple DVFS architectures (where software is in full control) we trivially -compute the ratio as: +compute the ratio as:: f_cur r_dvfs := ----- @@ -55,7 +59,7 @@ compute the ratio as: For more dynamic systems where the hardware is in control of DVFS we use hardware counters (Intel APERF/MPERF, ARMv8.4-AMU) to provide us this ratio. -For Intel specifically, we use: +For Intel specifically, we use:: APERF f_cur := ----- * P0 @@ -87,7 +91,7 @@ For more detail see: UTIL_EST / UTIL_EST_FASTUP --------------------------- +========================== Because periodic tasks have their averages decayed while they sleep, even though when running their expected utilization will be the same, they suffer a @@ -106,7 +110,7 @@ For more detail see: kernel/sched/fair.c:util_est_dequeue() UCLAMP ------- +====== It is possible to set effective u_min and u_max clamps on each CFS or RT task; the runqueue keeps an max aggregate of these clamps for all running tasks. @@ -115,7 +119,7 @@ For more detail see: include/uapi/linux/sched/types.h Schedutil / DVFS ----------------- +================ Every time the scheduler load tracking is updated (task wakeup, task migration, time progression) we call out to schedutil to update the hardware @@ -123,7 +127,7 @@ DVFS state. The basis is the CPU runqueue's 'running' metric, which per the above it is the frequency invariant utilization estimate of the CPU. From this we compute -a desired frequency like: +a desired frequency like:: max( running, util_est ); if UTIL_EST u_cfs := { running; otherwise @@ -135,7 +139,7 @@ a desired frequency like: f_des := min( f_max, 1.25 u * f_max ) -XXX IO-wait; when the update is due to a task wakeup from IO-completion we +XXX IO-wait: when the update is due to a task wakeup from IO-completion we boost 'u' above. This frequency is then used to select a P-state/OPP or directly munged into a @@ -153,7 +157,7 @@ For more information see: kernel/sched/cpufreq_schedutil.c NOTES ------ +===== - On low-load scenarios, where DVFS is most relevant, the 'running' numbers will closely reflect utilization. diff --git a/Documentation/sphinx/kerneldoc-preamble.sty b/Documentation/sphinx/kerneldoc-preamble.sty new file mode 100644 index 000000000000..9d0204dc38be --- /dev/null +++ b/Documentation/sphinx/kerneldoc-preamble.sty @@ -0,0 +1,226 @@ +% -*- coding: utf-8 -*- +% SPDX-License-Identifier: GPL-2.0 +% +% LaTeX preamble for "make latexdocs" or "make pdfdocs" including: +% - TOC width settings +% - Setting of tabulary (\tymin) +% - Headheight setting for fancyhdr +% - Fontfamily settings for CJK (Chinese, Japanese, and Korean) translations +% +% Note on the suffix of .sty: +% This is not implemented as a LaTeX style file, but as a file containing +% plain LaTeX code to be included into preamble. +% ".sty" is chosen because ".tex" would cause the build scripts to confuse +% this file with a LaTeX main file. +% +% Copyright (C) 2022 Akira Yokosawa + +% Custom width parameters for TOC +% - Redefine low-level commands defined in report.cls. +% - Indent of 2 chars is preserved for ease of comparison. +% Summary of changes from default params: +% Width of page number (\@pnumwidth): 1.55em -> 2.7em +% Width of chapter number: 1.5em -> 1.8em +% Indent of section number: 1.5em -> 1.8em +% Width of section number: 2.6em -> 3.2em +% Indent of sebsection number: 4.1em -> 5em +% Width of subsection number: 3.5em -> 4.3em +% +% These params can have 4 digit page counts, 2 digit chapter counts, +% section counts of 4 digits + 1 period (e.g., 18.10), and subsection counts +% of 5 digits + 2 periods (e.g., 18.7.13). +\makeatletter +%% Redefine \@pnumwidth (page number width) +\renewcommand*\@pnumwidth{2.7em} +%% Redefine \l@chapter (chapter list entry) +\renewcommand*\l@chapter[2]{% + \ifnum \c@tocdepth >\m@ne + \addpenalty{-\@highpenalty}% + \vskip 1.0em \@plus\p@ + \setlength\@tempdima{1.8em}% + \begingroup + \parindent \z@ \rightskip \@pnumwidth + \parfillskip -\@pnumwidth + \leavevmode \bfseries + \advance\leftskip\@tempdima + \hskip -\leftskip + #1\nobreak\hfil + \nobreak\hb@xt@\@pnumwidth{\hss #2% + \kern-\p@\kern\p@}\par + \penalty\@highpenalty + \endgroup + \fi} +%% Redefine \l@section and \l@subsection +\renewcommand*\l@section{\@dottedtocline{1}{1.8em}{3.2em}} +\renewcommand*\l@subsection{\@dottedtocline{2}{5em}{4.3em}} +\makeatother +%% Sphinx < 1.8 doesn't have \sphinxtableofcontentshook +\providecommand{\sphinxtableofcontentshook}{} +%% Undefine it for compatibility with Sphinx 1.7.9 +\renewcommand{\sphinxtableofcontentshook}{} % Empty the hook + +% Prevent column squeezing of tabulary. \tymin is set by Sphinx as: +% \setlength{\tymin}{3\fontcharwd\font`0 } +% , which is too short. +\setlength{\tymin}{20em} + +% Adjust \headheight for fancyhdr +\addtolength{\headheight}{1.6pt} +\addtolength{\topmargin}{-1.6pt} + +% Translations have Asian (CJK) characters which are only displayed if +% xeCJK is used +\IfFontExistsTF{Noto Sans CJK SC}{ + % Load xeCJK when CJK font is available + \usepackage{xeCJK} + % Noto CJK fonts don't provide slant shape. [AutoFakeSlant] permits + % its emulation. + % Select KR variant at the beginning of each document so that quotation + % and apostorph symbols of half-width is used in TOC of Latin documents. + \IfFontExistsTF{Noto Serif CJK KR}{ + \setCJKmainfont{Noto Serif CJK KR}[AutoFakeSlant] + }{ + \setCJKmainfont{Noto Sans CJK KR}[AutoFakeSlant] + } + \setCJKsansfont{Noto Sans CJK KR}[AutoFakeSlant] + \setCJKmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant] + % Teach xeCJK of half-width symbols + \xeCJKDeclareCharClass{HalfLeft}{`“,`‘} + \xeCJKDeclareCharClass{HalfRight}{`â€,`’} + % CJK Language-specific font choices + %% for Simplified Chinese + \IfFontExistsTF{Noto Serif CJK SC}{ + \newCJKfontfamily[SCmain]\scmain{Noto Serif CJK SC}[AutoFakeSlant] + \newCJKfontfamily[SCserif]\scserif{Noto Serif CJK SC}[AutoFakeSlant] + }{ + \newCJKfontfamily[SCmain]\scmain{Noto Sans CJK SC}[AutoFakeSlant] + \newCJKfontfamily[SCserif]\scserif{Noto Sans CJK SC}[AutoFakeSlant] + } + \newCJKfontfamily[SCsans]\scsans{Noto Sans CJK SC}[AutoFakeSlant] + \newCJKfontfamily[SCmono]\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant] + %% for Traditional Chinese + \IfFontExistsTF{Noto Serif CJK TC}{ + \newCJKfontfamily[TCmain]\tcmain{Noto Serif CJK TC}[AutoFakeSlant] + \newCJKfontfamily[TCserif]\tcserif{Noto Serif CJK TC}[AutoFakeSlant] + }{ + \newCJKfontfamily[TCmain]\tcmain{Noto Sans CJK TC}[AutoFakeSlant] + \newCJKfontfamily[TCserif]\tcserif{Noto Sans CJK TC}[AutoFakeSlant] + } + \newCJKfontfamily[TCsans]\tcsans{Noto Sans CJK TC}[AutoFakeSlant] + \newCJKfontfamily[TCmono]\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant] + %% for Korean + \IfFontExistsTF{Noto Serif CJK KR}{ + \newCJKfontfamily[KRmain]\krmain{Noto Serif CJK KR}[AutoFakeSlant] + \newCJKfontfamily[KRserif]\krserif{Noto Serif CJK KR}[AutoFakeSlant] + }{ + \newCJKfontfamily[KRmain]\krmain{Noto Sans CJK KR}[AutoFakeSlant] + \newCJKfontfamily[KRserif]\krserif{Noto Sans CJK KR}[AutoFakeSlant] + } + \newCJKfontfamily[KRsans]\krsans{Noto Sans CJK KR}[AutoFakeSlant] + \newCJKfontfamily[KRmono]\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant] + %% for Japanese + \IfFontExistsTF{Noto Serif CJK JP}{ + \newCJKfontfamily[JPmain]\jpmain{Noto Serif CJK JP}[AutoFakeSlant] + \newCJKfontfamily[JPserif]\jpserif{Noto Serif CJK JP}[AutoFakeSlant] + }{ + \newCJKfontfamily[JPmain]\jpmain{Noto Sans CJK JP}[AutoFakeSlant] + \newCJKfontfamily[JPserif]\jpserif{Noto Sans CJK JP}[AutoFakeSlant] + } + \newCJKfontfamily[JPsans]\jpsans{Noto Sans CJK JP}[AutoFakeSlant] + \newCJKfontfamily[JPmono]\jpmono{Noto Sans Mono CJK JP}[AutoFakeSlant] + % Dummy commands for Sphinx < 2.3 (no 'extrapackages' support) + \providecommand{\onehalfspacing}{} + \providecommand{\singlespacing}{} + % Define custom macros to on/off CJK + %% One and half spacing for CJK contents + \newcommand{\kerneldocCJKon}{\makexeCJKactive\onehalfspacing} + \newcommand{\kerneldocCJKoff}{\makexeCJKinactive\singlespacing} + % Define custom macros for switching CJK font setting + %% for Simplified Chinese + \newcommand{\kerneldocBeginSC}{% + \begingroup% + \scmain% + \xeCJKDeclareCharClass{FullLeft}{`“,`‘}% Full-width in SC + \xeCJKDeclareCharClass{FullRight}{`â€,`’}% Full-width in SC + \renewcommand{\CJKrmdefault}{SCserif}% + \renewcommand{\CJKsfdefault}{SCsans}% + \renewcommand{\CJKttdefault}{SCmono}% + \xeCJKsetup{CJKspace = false}% gobble white spaces by ' ' + % For CJK ascii-art alignment + \setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]% + } + \newcommand{\kerneldocEndSC}{\endgroup} + %% for Traditional Chinese + \newcommand{\kerneldocBeginTC}{% + \begingroup% + \tcmain% + \xeCJKDeclareCharClass{FullLeft}{`“,`‘}% Full-width in TC + \xeCJKDeclareCharClass{FullRight}{`â€,`’}% Full-width in TC + \renewcommand{\CJKrmdefault}{TCserif}% + \renewcommand{\CJKsfdefault}{TCsans}% + \renewcommand{\CJKttdefault}{TCmono}% + \xeCJKsetup{CJKspace = false}% gobble white spaces by ' ' + % For CJK ascii-art alignment + \setmonofont{Noto Sans Mono CJK TC}[AutoFakeSlant]% + } + \newcommand{\kerneldocEndTC}{\endgroup} + %% for Korean + \newcommand{\kerneldocBeginKR}{% + \begingroup% + \krmain% + \renewcommand{\CJKrmdefault}{KRserif}% + \renewcommand{\CJKsfdefault}{KRsans}% + \renewcommand{\CJKttdefault}{KRmono}% + % \xeCJKsetup{CJKspace = true} % true by default + % For CJK ascii-art alignment (still misaligned for Hangul) + \setmonofont{Noto Sans Mono CJK KR}[AutoFakeSlant]% + } + \newcommand{\kerneldocEndKR}{\endgroup} + %% for Japanese + \newcommand{\kerneldocBeginJP}{% + \begingroup% + \jpmain% + \renewcommand{\CJKrmdefault}{JPserif}% + \renewcommand{\CJKsfdefault}{JPsans}% + \renewcommand{\CJKttdefault}{JPmono}% + \xeCJKsetup{CJKspace = false}% gobble white space by ' ' + % For CJK ascii-art alignment + \setmonofont{Noto Sans Mono CJK JP}[AutoFakeSlant]% + } + \newcommand{\kerneldocEndJP}{\endgroup} + + % Single spacing in literal blocks + \fvset{baselinestretch=1} + % To customize \sphinxtableofcontents + \usepackage{etoolbox} + % Inactivate CJK after tableofcontents + \apptocmd{\sphinxtableofcontents}{\kerneldocCJKoff}{}{} + \xeCJKsetup{CJKspace = true}% For inter-phrase space of Korean TOC +}{ % No CJK font found + % Custom macros to on/off CJK and switch CJK fonts (Dummy) + \newcommand{\kerneldocCJKon}{} + \newcommand{\kerneldocCJKoff}{} + %% By defining \kerneldocBegin(SC|TC|KR|JP) as commands with an argument + %% and ignore the argument (#1) in their definitions, whole contents of + %% CJK chapters can be ignored. + \newcommand{\kerneldocBeginSC}[1]{% + %% Put a note on missing CJK fonts in place of zh_CN translation. + \begin{sphinxadmonition}{note}{Note on missing fonts:} + Translations of Simplified Chinese (zh\_CN), Traditional Chinese + (zh\_TW), Korean (ko\_KR), and Japanese (ja\_JP) were skipped + due to the lack of suitable font families. + + If you want them, please install ``Noto Sans CJK'' font families + by following instructions from + \sphinxcode{./scripts/sphinx-pre-install}. + Having optional ``Noto Serif CJK'' font families will improve + the looks of those translations. + \end{sphinxadmonition}} + \newcommand{\kerneldocEndSC}{} + \newcommand{\kerneldocBeginTC}[1]{} + \newcommand{\kerneldocEndTC}{} + \newcommand{\kerneldocBeginKR}[1]{} + \newcommand{\kerneldocEndKR}{} + \newcommand{\kerneldocBeginJP}[1]{} + \newcommand{\kerneldocEndJP}{} +} diff --git a/Documentation/sphinx/kfigure.py b/Documentation/sphinx/kfigure.py index 3c78828330be..24d2b2addcce 100644 --- a/Documentation/sphinx/kfigure.py +++ b/Documentation/sphinx/kfigure.py @@ -31,10 +31,13 @@ u""" * ``dot(1)``: Graphviz (https://www.graphviz.org). If Graphviz is not available, the DOT language is inserted as literal-block. + For conversion to PDF, ``rsvg-convert(1)`` of librsvg + (https://gitlab.gnome.org/GNOME/librsvg) is used when available. * SVG to PDF: To generate PDF, you need at least one of this tools: - ``convert(1)``: ImageMagick (https://www.imagemagick.org) + - ``inkscape(1)``: Inkscape (https://inkscape.org/) List of customizations: @@ -49,6 +52,7 @@ import os from os import path import subprocess from hashlib import sha1 +import re from docutils import nodes from docutils.statemachine import ViewList from docutils.parsers.rst import directives @@ -109,10 +113,20 @@ def pass_handle(self, node): # pylint: disable=W0613 # Graphviz's dot(1) support dot_cmd = None +# dot(1) -Tpdf should be used +dot_Tpdf = False # ImageMagick' convert(1) support convert_cmd = None +# librsvg's rsvg-convert(1) support +rsvg_convert_cmd = None + +# Inkscape's inkscape(1) support +inkscape_cmd = None +# Inkscape prior to 1.0 uses different command options +inkscape_ver_one = False + def setup(app): # check toolchain first @@ -160,23 +174,62 @@ def setupTools(app): This function is called once, when the builder is initiated. """ - global dot_cmd, convert_cmd # pylint: disable=W0603 + global dot_cmd, dot_Tpdf, convert_cmd, rsvg_convert_cmd # pylint: disable=W0603 + global inkscape_cmd, inkscape_ver_one # pylint: disable=W0603 kernellog.verbose(app, "kfigure: check installed tools ...") dot_cmd = which('dot') convert_cmd = which('convert') + rsvg_convert_cmd = which('rsvg-convert') + inkscape_cmd = which('inkscape') if dot_cmd: kernellog.verbose(app, "use dot(1) from: " + dot_cmd) + + try: + dot_Thelp_list = subprocess.check_output([dot_cmd, '-Thelp'], + stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as err: + dot_Thelp_list = err.output + pass + + dot_Tpdf_ptn = b'pdf' + dot_Tpdf = re.search(dot_Tpdf_ptn, dot_Thelp_list) else: kernellog.warn(app, "dot(1) not found, for better output quality install " "graphviz from https://www.graphviz.org") - if convert_cmd: - kernellog.verbose(app, "use convert(1) from: " + convert_cmd) + if inkscape_cmd: + kernellog.verbose(app, "use inkscape(1) from: " + inkscape_cmd) + inkscape_ver = subprocess.check_output([inkscape_cmd, '--version'], + stderr=subprocess.DEVNULL) + ver_one_ptn = b'Inkscape 1' + inkscape_ver_one = re.search(ver_one_ptn, inkscape_ver) + convert_cmd = None + rsvg_convert_cmd = None + dot_Tpdf = False + else: - kernellog.warn(app, - "convert(1) not found, for SVG to PDF conversion install " - "ImageMagick (https://www.imagemagick.org)") + if convert_cmd: + kernellog.verbose(app, "use convert(1) from: " + convert_cmd) + else: + kernellog.warn(app, + "Neither inkscape(1) nor convert(1) found.\n" + "For SVG to PDF conversion, " + "install either Inkscape (https://inkscape.org/) (preferred) or\n" + "ImageMagick (https://www.imagemagick.org)") + + if rsvg_convert_cmd: + kernellog.verbose(app, "use rsvg-convert(1) from: " + rsvg_convert_cmd) + kernellog.verbose(app, "use 'dot -Tsvg' and rsvg-convert(1) for DOT -> PDF conversion") + dot_Tpdf = False + else: + kernellog.verbose(app, + "rsvg-convert(1) not found.\n" + " SVG rendering of convert(1) is done by ImageMagick-native renderer.") + if dot_Tpdf: + kernellog.verbose(app, "use 'dot -Tpdf' for DOT -> PDF conversion") + else: + kernellog.verbose(app, "use 'dot -Tsvg' and convert(1) for DOT -> PDF conversion") # integrate conversion tools @@ -242,7 +295,7 @@ def convert_image(img_node, translator, src_fname=None): elif in_ext == '.svg': if translator.builder.format == 'latex': - if convert_cmd is None: + if not inkscape_cmd and convert_cmd is None: kernellog.verbose(app, "no SVG to PDF conversion available / include SVG raw.") img_node.replace_self(file2literal(src_fname)) @@ -266,7 +319,14 @@ def convert_image(img_node, translator, src_fname=None): if in_ext == '.dot': kernellog.verbose(app, 'convert DOT to: {out}/' + _name) - ok = dot2format(app, src_fname, dst_fname) + if translator.builder.format == 'latex' and not dot_Tpdf: + svg_fname = path.join(translator.builder.outdir, fname + '.svg') + ok1 = dot2format(app, src_fname, svg_fname) + ok2 = svg2pdf_by_rsvg(app, svg_fname, dst_fname) + ok = ok1 and ok2 + + else: + ok = dot2format(app, src_fname, dst_fname) elif in_ext == '.svg': kernellog.verbose(app, 'convert SVG to: {out}/' + _name) @@ -303,22 +363,70 @@ def dot2format(app, dot_fname, out_fname): return bool(exit_code == 0) def svg2pdf(app, svg_fname, pdf_fname): - """Converts SVG to PDF with ``convert(1)`` command. + """Converts SVG to PDF with ``inkscape(1)`` or ``convert(1)`` command. - Uses ``convert(1)`` from ImageMagick (https://www.imagemagick.org) for - conversion. Returns ``True`` on success and ``False`` if an error occurred. + Uses ``inkscape(1)`` from Inkscape (https://inkscape.org/) or ``convert(1)`` + from ImageMagick (https://www.imagemagick.org) for conversion. + Returns ``True`` on success and ``False`` if an error occurred. * ``svg_fname`` pathname of the input SVG file with extension (``.svg``) * ``pdf_name`` pathname of the output PDF file with extension (``.pdf``) """ cmd = [convert_cmd, svg_fname, pdf_fname] - # use stdout and stderr from parent - exit_code = subprocess.call(cmd) + cmd_name = 'convert(1)' + + if inkscape_cmd: + cmd_name = 'inkscape(1)' + if inkscape_ver_one: + cmd = [inkscape_cmd, '-o', pdf_fname, svg_fname] + else: + cmd = [inkscape_cmd, '-z', '--export-pdf=%s' % pdf_fname, svg_fname] + + try: + warning_msg = subprocess.check_output(cmd, stderr=subprocess.STDOUT) + exit_code = 0 + except subprocess.CalledProcessError as err: + warning_msg = err.output + exit_code = err.returncode + pass + if exit_code != 0: kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd))) + if warning_msg: + kernellog.warn(app, "Warning msg from %s: %s" + % (cmd_name, str(warning_msg, 'utf-8'))) + elif warning_msg: + kernellog.verbose(app, "Warning msg from %s (likely harmless):\n%s" + % (cmd_name, str(warning_msg, 'utf-8'))) + return bool(exit_code == 0) +def svg2pdf_by_rsvg(app, svg_fname, pdf_fname): + """Convert SVG to PDF with ``rsvg-convert(1)`` command. + + * ``svg_fname`` pathname of input SVG file, including extension ``.svg`` + * ``pdf_fname`` pathname of output PDF file, including extension ``.pdf`` + + Input SVG file should be the one generated by ``dot2format()``. + SVG -> PDF conversion is done by ``rsvg-convert(1)``. + + If ``rsvg-convert(1)`` is unavailable, fall back to ``svg2pdf()``. + + """ + + if rsvg_convert_cmd is None: + ok = svg2pdf(app, svg_fname, pdf_fname) + else: + cmd = [rsvg_convert_cmd, '--format=pdf', '-o', pdf_fname, svg_fname] + # use stdout and stderr from parent + exit_code = subprocess.call(cmd) + if exit_code != 0: + kernellog.warn(app, "Error #%d when calling: %s" % (exit_code, " ".join(cmd))) + ok = bool(exit_code == 0) + + return ok + # image handling # --------------------- diff --git a/Documentation/trace/osnoise-tracer.rst b/Documentation/trace/osnoise-tracer.rst index b648cb9bf1f0..963def9f97c6 100644 --- a/Documentation/trace/osnoise-tracer.rst +++ b/Documentation/trace/osnoise-tracer.rst @@ -51,7 +51,7 @@ For example:: [root@f32 ~]# cd /sys/kernel/tracing/ [root@f32 tracing]# echo osnoise > current_tracer -It is possible to follow the trace by reading the trace trace file:: +It is possible to follow the trace by reading the trace file:: [root@f32 tracing]# cat trace # tracer: osnoise @@ -108,7 +108,7 @@ The tracer has a set of options inside the osnoise directory, they are: option. - tracing_threshold: the minimum delta between two time() reads to be considered as noise, in us. When set to 0, the default value will - will be used, which is currently 5 us. + be used, which is currently 5 us. Additional Tracing ------------------ diff --git a/Documentation/translations/conf.py b/Documentation/translations/conf.py deleted file mode 100644 index 92cdbba74229..000000000000 --- a/Documentation/translations/conf.py +++ /dev/null @@ -1,12 +0,0 @@ -# -*- coding: utf-8 -*- -# SPDX-License-Identifier: GPL-2.0 - -# -- Additinal options for LaTeX output ---------------------------------- -# font config for ascii-art alignment - -latex_elements['preamble'] += ''' - \\IfFontExistsTF{Noto Sans CJK SC}{ - % For CJK ascii-art alignment - \\setmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant] - }{} -''' diff --git a/Documentation/translations/ja_JP/index.rst b/Documentation/translations/ja_JP/index.rst index 88d4d98eed15..20738c931d02 100644 --- a/Documentation/translations/ja_JP/index.rst +++ b/Documentation/translations/ja_JP/index.rst @@ -3,7 +3,7 @@ \renewcommand\thesection* \renewcommand\thesubsection* \kerneldocCJKon - \kerneldocBeginJP + \kerneldocBeginJP{ Japanese translations ===================== @@ -15,4 +15,4 @@ Japanese translations .. raw:: latex - \kerneldocEndJP + }\kerneldocEndJP diff --git a/Documentation/translations/ko_KR/index.rst b/Documentation/translations/ko_KR/index.rst index f636b482fb4c..4add6b2fe1f2 100644 --- a/Documentation/translations/ko_KR/index.rst +++ b/Documentation/translations/ko_KR/index.rst @@ -3,7 +3,7 @@ \renewcommand\thesection* \renewcommand\thesubsection* \kerneldocCJKon - \kerneldocBeginKR + \kerneldocBeginKR{ í•œêµì–´ ë²ˆì— =========== @@ -26,5 +26,4 @@ .. raw:: latex - \normalsize - \kerneldocEndKR + }\kerneldocEndKR diff --git a/Documentation/translations/zh_CN/accounting/delay-accounting.rst b/Documentation/translations/zh_CN/accounting/delay-accounting.rst index 67d5606e5401..f1849411018e 100644 --- a/Documentation/translations/zh_CN/accounting/delay-accounting.rst +++ b/Documentation/translations/zh_CN/accounting/delay-accounting.rst @@ -17,6 +17,8 @@ a) ç‰å¾…一个CPU(任务为å¯è¿è¡Œï¼‰ b) 完æˆç”±è¯¥ä»»åŠ¡å‘èµ·çš„å—I/OåŒæ¥è¯·æ±‚ c) 页é¢äº¤æ¢ d) 内å˜å›žæ”¶ +e) 页缓å˜æŠ–动 +f) 直接规整 并将这些统计信æ¯é€šè¿‡taskstats接å£æ供给用户空间。 @@ -37,10 +39,10 @@ d) 内å˜å›žæ”¶ å‘用户æ€è¿”回一个通用数æ®ç»“构,对应æ¯pid或æ¯tgid的统计信æ¯ã€‚延时计数功能填写 该数æ®ç»“构的特定å—æ®µã€‚è§ - include/linux/taskstats.h + include/uapi/linux/taskstats.h å…¶æ述了延时计数相关å—段。系统通常以计数器形å¼è¿”回 CPUã€åŒæ¥å— I/Oã€äº¤æ¢ã€å†…å˜ -回收ç‰çš„累积延时。 +回收ã€é¡µç¼“å˜æŠ–动ã€ç›´æŽ¥è§„æ•´ç‰çš„累积延时。 å–任务æŸè®¡æ•°å™¨ä¸¤ä¸ªè¿žç»è¯»æ•°çš„差值,将得到任务在该时间间隔内ç‰å¾…对应资æºçš„总延时。 @@ -72,40 +74,36 @@ kernel.task_delayacct进行开关。注æ„,åªæœ‰åœ¨å¯ç”¨å»¶æ—¶è®¡æ•°åŽå¯åŠ getdelayså‘½ä»¤çš„ä¸€èˆ¬æ ¼å¼:: - getdelays [-t tgid] [-p pid] [-c cmd...] + getdelays [-dilv] [-t tgid] [-p pid] 获å–pid为10的任务从系统å¯åŠ¨åŽçš„延时信æ¯:: - # ./getdelays -p 10 + # ./getdelays -d -p 10 (输出信æ¯å’Œä¸‹ä¾‹ç›¸ä¼¼ï¼‰ 获å–所有tgid为5的任务从系统å¯åŠ¨åŽçš„总延时信æ¯:: - # ./getdelays -t 5 - - - CPU count real total virtual total delay total - 7876 92005750 100000000 24001500 - IO count delay total - 0 0 - SWAP count delay total - 0 0 - RECLAIM count delay total - 0 0 - -获å–指定简å•å‘½ä»¤è¿è¡Œæ—¶çš„延时信æ¯:: - - # ./getdelays -c ls / - - bin data1 data3 data5 dev home media opt root srv sys usr - boot data2 data4 data6 etc lib mnt proc sbin subdomain tmp var - - - CPU count real total virtual total delay total - 6 4000250 4000000 0 - IO count delay total - 0 0 - SWAP count delay total - 0 0 - RECLAIM count delay total - 0 0 + # ./getdelays -d -t 5 + print delayacct stats ON + TGID 5 + + + CPU count real total virtual total delay total delay average + 8 7000000 6872122 3382277 0.423ms + IO count delay total delay average + 0 0 0ms + SWAP count delay total delay average + 0 0 0ms + RECLAIM count delay total delay average + 0 0 0ms + THRASHING count delay total delay average + 0 0 0ms + COMPACT count delay total delay average + 0 0 0ms + +获å–pid为1çš„IO计数,它åªå’Œ-p一起使用:: + # ./getdelays -i -p 1 + printing IO accounting + linuxrc: read=65536, write=0, cancelled_write=0 + +上é¢çš„命令与-v一起使用,å¯ä»¥èŽ·å–更多调试信æ¯ã€‚ diff --git a/Documentation/translations/zh_CN/admin-guide/index.rst b/Documentation/translations/zh_CN/admin-guide/index.rst index 548e57f4b3f1..be535ffaf4b0 100644 --- a/Documentation/translations/zh_CN/admin-guide/index.rst +++ b/Documentation/translations/zh_CN/admin-guide/index.rst @@ -20,15 +20,15 @@ Linux å†…æ ¸ç”¨æˆ·å’Œç®¡ç†å‘˜æŒ‡å— Todolist: - kernel-parameters - devices - sysctl/index +* kernel-parameters +* devices +* sysctl/index 本节介ç»CPUæ¼æ´žåŠå…¶ç¼“解措施。 Todolist: - hw-vuln/index +* hw-vuln/index 下é¢çš„一组文档,针对的是试图跟踪问题和bug的用户。 @@ -44,18 +44,18 @@ Todolist: Todolist: - reporting-bugs - ramoops - dynamic-debug-howto - kdump/index - perf/index +* reporting-bugs +* ramoops +* dynamic-debug-howto +* kdump/index +* perf/index 这是应用程åºå¼€å‘äººå‘˜æ„Ÿå…´è¶£çš„ç« èŠ‚çš„å¼€å§‹ã€‚å¯ä»¥åœ¨è¿™é‡Œæ‰¾åˆ°æ¶µç›–å†…æ ¸ABIå„个 æ–¹é¢çš„文档。 Todolist: - sysfs-rules +* sysfs-rules 本手册的其余部分包括å„ç§æŒ‡å—,介ç»å¦‚ä½•æ ¹æ®æ‚¨çš„喜好é…ç½®å†…æ ¸çš„ç‰¹å®šè¡Œä¸ºã€‚ @@ -69,61 +69,61 @@ Todolist: lockup-watchdogs unicode sysrq + mm/index Todolist: - acpi/index - aoe/index - auxdisplay/index - bcache - binderfs - binfmt-misc - blockdev/index - bootconfig - braille-console - btmrvl - cgroup-v1/index - cgroup-v2 - cifs/index - dell_rbu - device-mapper/index - edid - efi-stub - ext4 - nfs/index - gpio/index - highuid - hw_random - initrd - iostats - java - jfs - kernel-per-CPU-kthreads - laptops/index - lcd-panel-cgram - ldm - LSM/index - md - media/index - mm/index - module-signing - mono - namespaces/index - numastat - parport - perf-security - pm/index - pnp - rapidio - ras - rtc - serial-console - svga - thunderbolt - ufs - vga-softcursor - video-output - xfs +* acpi/index +* aoe/index +* auxdisplay/index +* bcache +* binderfs +* binfmt-misc +* blockdev/index +* bootconfig +* braille-console +* btmrvl +* cgroup-v1/index +* cgroup-v2 +* cifs/index +* dell_rbu +* device-mapper/index +* edid +* efi-stub +* ext4 +* nfs/index +* gpio/index +* highuid +* hw_random +* initrd +* iostats +* java +* jfs +* kernel-per-CPU-kthreads +* laptops/index +* lcd-panel-cgram +* ldm +* LSM/index +* md +* media/index +* module-signing +* mono +* namespaces/index +* numastat +* parport +* perf-security +* pm/index +* pnp +* rapidio +* ras +* rtc +* serial-console +* svga +* thunderbolt +* ufs +* vga-softcursor +* video-output +* xfs .. only:: subproject and html diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst new file mode 100644 index 000000000000..0c8276109fc0 --- /dev/null +++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/index.rst @@ -0,0 +1,28 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../../../disclaimer-zh_CN.rst + +:Original: Documentation/admin-guide/mm/damon/index.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + +============ +监测数æ®è®¿é—® +============ + +:doc:`DAMON </vm/damon/index>` å…许轻é‡çº§çš„æ•°æ®è®¿é—®ç›‘测。使用DAMON, +用户å¯ä»¥åˆ†æžä»–们系统的内å˜è®¿é—®æ¨¡å¼ï¼Œå¹¶ä¼˜åŒ–它们。 + +.. toctree:: + :maxdepth: 2 + + start + usage + reclaim + + + + diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst new file mode 100644 index 000000000000..9e541578f38d --- /dev/null +++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/reclaim.rst @@ -0,0 +1,232 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../../../disclaimer-zh_CN.rst + +:Original: Documentation/admin-guide/mm/damon/reclaim.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + +=============== +基于DAMON的回收 +=============== + +基于DAMON的回收(DAMON_RECLAIM)是一个é™æ€çš„å†…æ ¸æ¨¡å—,旨在用于轻度内å˜åŽ‹åŠ›ä¸‹çš„主动和轻 +é‡çº§çš„回收。它的目的ä¸æ˜¯å–代基于LRU列表的页é¢å›žæ”¶ï¼Œè€Œæ˜¯æœ‰é€‰æ‹©åœ°ç”¨äºŽä¸åŒç¨‹åº¦çš„内å˜åŽ‹åŠ›å’Œè¦ +求。 + +哪些地方需è¦ä¸»åŠ¨å›žæ”¶ï¼Ÿ +====================== + +在一般的内å˜è¶…é‡ä½¿ç”¨ï¼ˆover-committed systems,虚拟化相关术è¯ï¼‰çš„系统上,主动回收冷页 +有助于节çœå†…å˜å’Œå‡å°‘延迟高峰,这些延迟是由直接回收进程或kswapdçš„CPU消耗引起的,åŒæ—¶åªäº§ +生最å°çš„æ€§èƒ½ä¸‹é™ [1]_ [2]_ 。 + +基于空闲页报告 [3]_ 的内å˜è¿‡åº¦æ‰¿è¯ºçš„虚拟化系统就是很好的例åã€‚åœ¨è¿™æ ·çš„ç³»ç»Ÿä¸ï¼Œå®¢æˆ·æœº +å‘主机报告他们的空闲内å˜ï¼Œè€Œä¸»æœºåˆ™å°†æŠ¥å‘Šçš„内å˜é‡æ–°åˆ†é…ç»™å…¶ä»–å®¢æˆ·ã€‚å› æ¤ï¼Œç³»ç»Ÿçš„内å˜å¾—到了充 +分的利用。然而,客户å¯èƒ½ä¸é‚£ä¹ˆèŠ‚çœå†…å˜ï¼Œä¸»è¦æ˜¯å› ä¸ºä¸€äº›å†…æ ¸å系统和用户空间应用程åºè¢«è®¾è®¡ä¸º +使用尽å¯èƒ½å¤šçš„内å˜ã€‚然åŽï¼Œå®¢æˆ·æœºå¯èƒ½åªå‘主机报告少é‡çš„内å˜æ˜¯ç©ºé—²çš„,导致系统的内å˜åˆ©ç”¨çŽ‡ä¸‹é™ã€‚ +在客户ä¸è¿è¡Œä¸»åŠ¨å›žæ”¶å¯ä»¥ç¼“解这个问题。 + +它是如何工作的? +================ + +DAMON_RECLAIM找到在特定时间内没有被访问的内å˜åŒºåŸŸå¹¶åˆ†é¡µã€‚为了é¿å…它在分页æ“作ä¸æ¶ˆè€—过多 +çš„CPU,å¯ä»¥é…置一个速度é™åˆ¶ã€‚在这个速度é™åˆ¶ä¸‹ï¼Œå®ƒé¦–先分页出那些没有被访问过的内å˜åŒºåŸŸã€‚ç³» +统管ç†å‘˜è¿˜å¯ä»¥é…置在什么情况下这个方案应该自动激活和åœç”¨ä¸‰ä¸ªå†…å˜åŽ‹åŠ›æ°´ä½ã€‚ + +接å£: 模å—å‚æ•° +============== + +è¦ä½¿ç”¨è¿™ä¸ªåŠŸèƒ½ï¼Œä½ 首先è¦ç¡®ä¿ä½ 的系统è¿è¡Œåœ¨ä¸€ä¸ªä»¥ ``CONFIG_DAMON_RECLAIM=y`` 构建的内 +æ ¸ä¸Šã€‚ + +为了让系统管ç†å‘˜å¯ç”¨æˆ–ç¦ç”¨å®ƒï¼Œå¹¶ä¸ºç»™å®šçš„系统进行调整,DAMON_RECLAIM利用了模å—å‚数。也就 +æ˜¯è¯´ï¼Œä½ å¯ä»¥æŠŠ ``damon_reclaim.<parameter>=<value>`` æ”¾åœ¨å†…æ ¸å¯åŠ¨å‘½ä»¤è¡Œä¸Šï¼Œæˆ–者把 +适当的值写入 ``/sys/modules/damon_reclaim/parameters/<parameter>`` 文件。 + +注æ„,除 ``å¯ç”¨`` 外的å‚数值åªåœ¨DAMON_RECLAIMå¯åŠ¨æ—¶åº”ç”¨ã€‚å› æ¤ï¼Œå¦‚æžœä½ æƒ³åœ¨è¿è¡Œæ—¶åº”用新 +çš„å‚数值,而DAMON_RECLAIMå·²ç»è¢«å¯ç”¨ï¼Œä½ 应该通过 ``å¯ç”¨`` çš„å‚数文件ç¦ç”¨å’Œé‡æ–°å¯ç”¨å®ƒã€‚ +在é‡æ–°å¯ç”¨ä¹‹å‰ï¼Œåº”将新的å‚数值写入适当的å‚数值ä¸ã€‚ + +下é¢æ˜¯æ¯ä¸ªå‚æ•°çš„æ述。 + +enable +------ + +å¯ç”¨æˆ–ç¦ç”¨DAMON_RECLAIM。 + +ä½ å¯ä»¥é€šè¿‡æŠŠè¿™ä¸ªå‚数的值设置为 ``Y`` æ¥å¯ç”¨DAMON_RCLAIM,把它设置为 ``N`` å¯ä»¥ç¦ç”¨ +DAMON_RECLAIM。注æ„,由于基于水ä½çš„激活æ¡ä»¶ï¼ŒDAMON_RECLAIMä¸èƒ½è¿›è¡ŒçœŸæ£çš„监测和回收。 +这一点请å‚考下é¢å…³äºŽæ°´ä½å‚æ•°çš„æ述。 + +min_age +------- + +识别冷内å˜åŒºåŸŸçš„时间阈值,å•ä½æ˜¯å¾®ç§’。 + +如果一个内å˜åŒºåŸŸåœ¨è¿™ä¸ªæ—¶é—´æˆ–更长的时间内没有被访问,DAMON_RECLAIM会将该区域识别为冷的, +并回收它。 + +默认为120秒。 + +quota_ms +-------- + +回收的时间é™åˆ¶ï¼Œä»¥æ¯«ç§’为å•ä½ã€‚ + +DAMON_RECLAIM 试图在一个时间窗å£ï¼ˆquota_reset_interval_ms)内åªä½¿ç”¨åˆ°è¿™ä¸ªæ—¶é—´ï¼Œä»¥ +å°è¯•å›žæ”¶å†·é¡µã€‚è¿™å¯ä»¥ç”¨æ¥é™åˆ¶DAMON_RECLAIMçš„CPU消耗。如果该值为零,则该é™åˆ¶è¢«ç¦ç”¨ã€‚ + +默认为10ms。 + +quota_sz +-------- + +回收的内å˜å¤§å°é™åˆ¶ï¼Œå•ä½ä¸ºå—节。 + +DAMON_RECLAIM 收å–在一个时间窗å£ï¼ˆquota_reset_interval_ms)内试图回收的内å˜é‡ï¼Œå¹¶ +使其ä¸è¶…过这个é™åˆ¶ã€‚è¿™å¯ä»¥ç”¨æ¥é™åˆ¶CPUå’ŒIO的消耗。如果该值为零,则é™åˆ¶è¢«ç¦ç”¨ã€‚ + +默认情况下是128 MiB。 + +quota_reset_interval_ms +----------------------- + +时间/大å°é…é¢æ”¶å–é‡ç½®é—´éš”,å•ä½ä¸ºæ¯«ç§’。 + +时间(quota_ms)和大å°ï¼ˆquota_sz)的é…é¢çš„ç›®æ ‡é‡ç½®é—´éš”。也就是说,DAMON_RECLAIM在 +å°è¯•å›žæ”¶â€˜ä¸â€™è¶…过quota_ms毫秒或quota_szå—节的内å˜ã€‚ + +默认为1秒。 + +wmarks_interval +--------------- + +当DAMON_RECLAIM被å¯ç”¨ä½†ç”±äºŽå…¶æ°´ä½è§„则而ä¸æ´»è·ƒæ—¶ï¼Œåœ¨æ£€æŸ¥æ°´ä½ä¹‹å‰çš„最å°ç‰å¾…时间。 + +wmarks_high +----------- + +高水ä½çš„å¯ç”¨å†…å˜çŽ‡ï¼ˆæ¯åƒå—节)。 + +如果系统的å¯ç”¨å†…å˜ï¼ˆä»¥æ¯åƒå—节为å•ä½ï¼‰é«˜äºŽè¿™ä¸ªæ•°å€¼ï¼ŒDAMON_RECLAIM就会å˜å¾—ä¸æ´»è·ƒï¼Œæ‰€ä»¥ +它什么也ä¸åšï¼Œåªæ˜¯å®šæœŸæ£€æŸ¥æ°´ä½ã€‚ + +wmarks_mid +---------- + +ä¸é—´æ°´ä½çš„å¯ç”¨å†…å˜çŽ‡ï¼ˆæ¯åƒå—节)。 + +如果系统的空闲内å˜ï¼ˆä»¥æ¯åƒå—节为å•ä½ï¼‰åœ¨è¿™ä¸ªå’Œä½Žæ°´ä½çº¿ä¹‹é—´ï¼ŒDAMON_RECLAIM就会被激活, +å› æ¤å¼€å§‹ç›‘测和回收。 + +wmarks_low +---------- + +低水ä½çš„å¯ç”¨å†…å˜çŽ‡ï¼ˆæ¯åƒå—节)。 + +如果系统的空闲内å˜ï¼ˆä»¥æ¯åƒå—节为å•ä½ï¼‰ä½ŽäºŽè¿™ä¸ªæ•°å€¼ï¼ŒDAMON_RECLAIM就会å˜å¾—ä¸æ´»è·ƒï¼Œæ‰€ä»¥ +它除了定期检查水ä½å¤–什么都ä¸åšã€‚在这ç§æƒ…况下,系统会退回到基于LRU列表的页é¢ç²’度回收逻辑。 + +sample_interval +--------------- + +ç›‘æµ‹çš„é‡‡æ ·é—´éš”ï¼Œå•ä½æ˜¯å¾®ç§’。 + +DAMON用于监测冷内å˜çš„é‡‡æ ·é—´éš”ã€‚æ›´å¤šç»†èŠ‚è¯·å‚考DAMON文档 (:doc:`usage`) 。 + +aggr_interval +------------- + +监测的èšé›†é—´éš”,å•ä½æ˜¯å¾®ç§’。 + +DAMON对冷内å˜ç›‘测的èšé›†é—´éš”。更多细节请å‚考DAMON文档 (:doc:`usage`)。 + +min_nr_regions +-------------- + +监测区域的最å°æ•°é‡ã€‚ + +DAMON用于冷内å˜ç›‘测的最å°ç›‘测区域数。这å¯ä»¥ç”¨æ¥è®¾ç½®ç›‘测质é‡çš„下é™ã€‚但是,设 +置的太高å¯èƒ½ä¼šå¯¼è‡´ç›‘æµ‹å¼€é”€çš„å¢žåŠ ã€‚æ›´å¤šç»†èŠ‚è¯·å‚考DAMON文档 (:doc:`usage`) 。 + +max_nr_regions +-------------- + +监测区域的最大数é‡ã€‚ + +DAMON用于冷内å˜ç›‘测的最大监测区域数。这å¯ä»¥ç”¨æ¥è®¾ç½®ç›‘测开销的上é™å€¼ã€‚但是, +设置得太低å¯èƒ½ä¼šå¯¼è‡´ç›‘测质é‡ä¸å¥½ã€‚更多细节请å‚考DAMON文档 (:doc:`usage`) 。 + +monitor_region_start +-------------------- + +ç›®æ ‡å†…å˜åŒºåŸŸçš„物ç†åœ°å€èµ·ç‚¹ã€‚ + +DAMON_RECLAIM将对其进行工作的内å˜åŒºåŸŸçš„起始物ç†åœ°å€ã€‚也就是说,DAMON_RECLAIM +将在这个区域ä¸æ‰¾åˆ°å†·çš„内å˜åŒºåŸŸå¹¶è¿›è¡Œå›žæ”¶ã€‚默认情况下,该区域使用最大系统内å˜åŒºã€‚ + +monitor_region_end +------------------ + +ç›®æ ‡å†…å˜åŒºåŸŸçš„结æŸç‰©ç†åœ°å€ã€‚ + +DAMON_RECLAIM将对其进行工作的内å˜åŒºåŸŸçš„末端物ç†åœ°å€ã€‚也就是说,DAMON_RECLAIMå°† +在这个区域内找到冷的内å˜åŒºåŸŸå¹¶è¿›è¡Œå›žæ”¶ã€‚默认情况下,该区域使用最大系统内å˜åŒºã€‚ + +kdamond_pid +----------- + +DAMON线程的PID。 + +如果DAMON_RECLAIM被å¯ç”¨ï¼Œè¿™å°†æˆä¸ºå·¥ä½œçº¿ç¨‹çš„PID。å¦åˆ™ï¼Œä¸º-1。 + +nr_reclaim_tried_regions +------------------------ + +试图通过DAMON_RECLAIM回收的内å˜åŒºåŸŸçš„æ•°é‡ã€‚ + +bytes_reclaim_tried_regions +--------------------------- + +试图通过DAMON_RECLAIM回收的内å˜åŒºåŸŸçš„总å—节数。 + +nr_reclaimed_regions +-------------------- + +通过DAMON_RECLAIMæˆåŠŸå›žæ”¶çš„内å˜åŒºåŸŸçš„æ•°é‡ã€‚ + +bytes_reclaimed_regions +----------------------- + +通过DAMON_RECLAIMæˆåŠŸå›žæ”¶çš„内å˜åŒºåŸŸçš„总å—节数。 + +nr_quota_exceeds +---------------- + +超过时间/空间é…é¢é™åˆ¶çš„次数。 + +例å +==== + +下é¢çš„è¿è¡Œç¤ºä¾‹å‘½ä»¤ä½¿DAMON_RECLAIM找到30秒或更长时间没有访问的内å˜åŒºåŸŸå¹¶â€œå›žæ”¶â€ï¼Ÿ +为了é¿å…DAMON_RECLAIM在分页æ“作ä¸æ¶ˆè€—过多的CPU时间,回收被é™åˆ¶åœ¨æ¯ç§’1GiB以内。 +它还è¦æ±‚DAMON_RECLAIM在系统的å¯ç”¨å†…å˜çŽ‡è¶…过50%æ—¶ä¸åšä»»ä½•äº‹æƒ…,但如果它低于40%æ—¶ +就开始真æ£çš„工作。如果DAMON_RECLAIM没有å–å¾—è¿›å±•ï¼Œå› æ¤ç©ºé—²å†…å˜çŽ‡ä½ŽäºŽ20%,它会è¦æ±‚ +DAMON_RECLAIMå†æ¬¡ä»€ä¹ˆéƒ½ä¸åšï¼Œè¿™æ ·æˆ‘们就å¯ä»¥é€€å›žåˆ°åŸºäºŽLRU列表的页é¢ç²’度回收了:: + + # cd /sys/modules/damon_reclaim/parameters + # echo 30000000 > min_age + # echo $((1 * 1024 * 1024 * 1024)) > quota_sz + # echo 1000 > quota_reset_interval_ms + # echo 500 > wmarks_high + # echo 400 > wmarks_mid + # echo 200 > wmarks_low + # echo Y > enabled + +.. [1] https://research.google/pubs/pub48551/ +.. [2] https://lwn.net/Articles/787611/ +.. [3] https://www.kernel.org/doc/html/latest/vm/free_page_reporting.html diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst new file mode 100644 index 000000000000..67d1b49481dc --- /dev/null +++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/start.rst @@ -0,0 +1,132 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../../../disclaimer-zh_CN.rst + +:Original: Documentation/admin-guide/mm/damon/start.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + +======== +å…¥é—¨æŒ‡å— +======== + +本文通过演示DAMON的默认用户空间工具,简è¦åœ°ä»‹ç»äº†å¦‚何使用DAMON。请注æ„ï¼Œä¸ºäº†ç®€æ´ +èµ·è§ï¼Œæœ¬æ–‡æ¡£åªæ述了它的部分功能。更多细节请å‚考该工具的使用文档。 +`doc <https://github.com/awslabs/damo/blob/next/USAGE.md>`_ . + + +å‰ææ¡ä»¶ +======== + +å†…æ ¸ +---- + +é¦–å…ˆï¼Œä½ è¦ç¡®ä¿ä½ 当å‰ç³»ç»Ÿä¸è·‘çš„å†…æ ¸æž„å»ºæ—¶é€‰å®šäº†è¿™ä¸ªåŠŸèƒ½é€‰é¡¹ ``CONFIG_DAMON_*=y``. + + +用户空间工具 +------------ + +在演示ä¸ï¼Œæˆ‘们将使用DAMON的默认用户空间工具,称为DAMON Operator(DAMO)。它å¯ä»¥åœ¨ +https://github.com/awslabs/damo找到。下é¢çš„例åå‡è®¾DAMOåœ¨ä½ çš„$PATH上。当然,但 +这并ä¸æ˜¯å¼ºåˆ¶æ€§çš„。 + +å› ä¸ºDAMO使用的是DAMONçš„debugfs接å£(详情请å‚考 :doc:`usage` ä¸çš„使用方法) ä½ åº”è¯¥ +ç¡®ä¿debugfs被挂载。手动挂载它,如下所示:: + + # mount -t debugfs none /sys/kernel/debug/ + +æˆ–è€…åœ¨ä½ çš„ ``/etc/fstab`` 文件ä¸æ·»åŠ ä»¥ä¸‹ä¸€è¡Œï¼Œè¿™æ ·ä½ çš„ç³»ç»Ÿå°±å¯ä»¥åœ¨å¯åŠ¨æ—¶è‡ªåŠ¨æŒ‚è½½ +debugfs了:: + + debugfs /sys/kernel/debug debugfs defaults 0 0 + + +记录数æ®è®¿é—®æ¨¡å¼ +================ + +下é¢çš„命令记录了一个程åºçš„内å˜è®¿é—®æ¨¡å¼ï¼Œå¹¶å°†ç›‘测结果ä¿å˜åˆ°æ–‡ä»¶ä¸ã€‚ :: + + $ git clone https://github.com/sjp38/masim + $ cd masim; make; ./masim ./configs/zigzag.cfg & + $ sudo damo record -o damon.data $(pidof masim) + +命令的å‰ä¸¤è¡Œä¸‹è½½äº†ä¸€ä¸ªäººå·¥å†…å˜è®¿é—®ç”Ÿæˆå™¨ç¨‹åºå¹¶åœ¨åŽå°è¿è¡Œã€‚生æˆå™¨å°†é‡å¤åœ°é€ä¸€è®¿é—®ä¸¤ä¸ª +100 MiB大å°çš„内å˜åŒºåŸŸã€‚ä½ å¯ä»¥ç”¨ä½ 的真实工作负载æ¥ä»£æ›¿å®ƒã€‚最åŽä¸€è¡Œè¦æ±‚ ``damo`` å°† +访问模å¼è®°å½•åœ¨ ``damon.data`` 文件ä¸ã€‚ + + +将记录的模å¼å¯è§†åŒ– +================== + +ä½ å¯ä»¥åœ¨heatmapä¸ç›´è§‚地看到这ç§æ¨¡å¼ï¼Œæ˜¾ç¤ºå“ªä¸ªå†…å˜åŒºåŸŸï¼ˆX轴)何时被访问(Y轴)以åŠè®¿ +问的频率(数å—)。:: + + $ sudo damo report heats --heatmap stdout + 22222222222222222222222222222222222222211111111111111111111111111111111111111100 + 44444444444444444444444444444444444444434444444444444444444444444444444444443200 + 44444444444444444444444444444444444444433444444444444444444444444444444444444200 + 33333333333333333333333333333333333333344555555555555555555555555555555555555200 + 33333333333333333333333333333333333344444444444444444444444444444444444444444200 + 22222222222222222222222222222222222223355555555555555555555555555555555555555200 + 00000000000000000000000000000000000000288888888888888888888888888888888888888400 + 00000000000000000000000000000000000000288888888888888888888888888888888888888400 + 33333333333333333333333333333333333333355555555555555555555555555555555555555200 + 88888888888888888888888888888888888888600000000000000000000000000000000000000000 + 88888888888888888888888888888888888888600000000000000000000000000000000000000000 + 33333333333333333333333333333333333333444444444444444444444444444444444444443200 + 00000000000000000000000000000000000000288888888888888888888888888888888888888400 + [...] + # access_frequency: 0 1 2 3 4 5 6 7 8 9 + # x-axis: space (139728247021568-139728453431248: 196.848 MiB) + # y-axis: time (15256597248362-15326899978162: 1 m 10.303 s) + # resolution: 80x40 (2.461 MiB and 1.758 s for each character) + +ä½ ä¹Ÿå¯ä»¥ç›´è§‚地看到工作集的大å°åˆ†å¸ƒï¼ŒæŒ‰å¤§å°æŽ’åºã€‚:: + + $ sudo damo report wss --range 0 101 10 + # <percentile> <wss> + # target_id 18446632103789443072 + # avr: 107.708 MiB + 0 0 B | | + 10 95.328 MiB |**************************** | + 20 95.332 MiB |**************************** | + 30 95.340 MiB |**************************** | + 40 95.387 MiB |**************************** | + 50 95.387 MiB |**************************** | + 60 95.398 MiB |**************************** | + 70 95.398 MiB |**************************** | + 80 95.504 MiB |**************************** | + 90 190.703 MiB |********************************************************* | + 100 196.875 MiB |***********************************************************| + +在上述命令ä¸ä½¿ç”¨ ``--sortby`` 选项,å¯ä»¥æ˜¾ç¤ºå·¥ä½œé›†çš„大å°æ˜¯å¦‚何按时间顺åºå˜åŒ–的。:: + + $ sudo damo report wss --range 0 101 10 --sortby time + # <percentile> <wss> + # target_id 18446632103789443072 + # avr: 107.708 MiB + 0 3.051 MiB | | + 10 190.703 MiB |***********************************************************| + 20 95.336 MiB |***************************** | + 30 95.328 MiB |***************************** | + 40 95.387 MiB |***************************** | + 50 95.332 MiB |***************************** | + 60 95.320 MiB |***************************** | + 70 95.398 MiB |***************************** | + 80 95.398 MiB |***************************** | + 90 95.340 MiB |***************************** | + 100 95.398 MiB |***************************** | + + +æ•°æ®è®¿é—®æ¨¡å¼æ„ŸçŸ¥çš„内å˜ç®¡ç† +========================== + +以下三个命令使æ¯ä¸€ä¸ªå¤§å°>=4K的内å˜åŒºåŸŸåœ¨ä½ 的工作负载ä¸æ²¡æœ‰è¢«è®¿é—®>=60秒,就会被æ¢æŽ‰ã€‚ :: + + $ echo "#min-size max-size min-acc max-acc min-age max-age action" > test_scheme + $ echo "4K max 0 0 60s max pageout" >> test_scheme + $ damo schemes -c test_scheme <pid of your workload> diff --git a/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst b/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst new file mode 100644 index 000000000000..5d7533347216 --- /dev/null +++ b/Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst @@ -0,0 +1,286 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../../../disclaimer-zh_CN.rst + +:Original: Documentation/admin-guide/mm/damon/usage.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + +======== +详细用法 +======== + +DAMON 为ä¸åŒçš„用户æ供了下é¢ä¸‰ç§æŽ¥å£ã€‚ + +- *DAMON用户空间工具。* + `è¿™ <https://github.com/awslabs/damo>`_ 为有这特æƒçš„人, 如系统管ç†å‘˜ï¼Œå¸Œæœ›æœ‰ä¸€ä¸ªåˆšå¥½ + å¯ä»¥å·¥ä½œçš„人性化界é¢ã€‚ + 使用它,用户å¯ä»¥ä»¥äººæ€§åŒ–çš„æ–¹å¼ä½¿ç”¨DAMON的主è¦åŠŸèƒ½ã€‚ä¸è¿‡ï¼Œå®ƒå¯èƒ½ä¸ä¼šä¸ºç‰¹æ®Šæƒ…况进行高度调整。 + 它åŒæ—¶æ”¯æŒè™šæ‹Ÿå’Œç‰©ç†åœ°å€ç©ºé—´çš„监测。更多细节,请å‚考它的 `使用文档 + <https://github.com/awslabs/damo/blob/next/USAGE.md>`_。 +- *debugfs接å£ã€‚* + :ref:`è¿™ <debugfs_interface>` 是为那些希望更高级的使用DAMON的特æƒç”¨æˆ·ç©ºé—´ç¨‹åºå‘˜å‡†å¤‡çš„。 + 使用它,用户å¯ä»¥é€šè¿‡è¯»å–和写入特殊的debugfs文件æ¥ä½¿ç”¨DAMON的主è¦åŠŸèƒ½ã€‚å› æ¤ï¼Œä½ å¯ä»¥ç¼–写和使 + ç”¨ä½ ä¸ªæ€§åŒ–çš„DAMON debugfs包装程åºï¼Œä»£æ›¿ä½ 读/写debugfs文件。 `DAMON用户空间工具 + <https://github.com/awslabs/damo>`_ 就是这ç§ç¨‹åºçš„一个例å 它åŒæ—¶æ”¯æŒè™šæ‹Ÿå’Œç‰©ç†åœ°å€ + 空间的监测。注æ„,这个界é¢åªæ供简å•çš„监测结果 :ref:`统计 <damos_stats>`。对于详细的监测 + 结果,DAMONæ供了一个:ref:`跟踪点 <tracepoint>`。 + +- *å†…æ ¸ç©ºé—´ç¼–ç¨‹æŽ¥å£ã€‚* + :doc:`This </vm/damon/api>` è¿™æ˜¯ä¸ºå†…æ ¸ç©ºé—´ç¨‹åºå‘˜å‡†å¤‡çš„。使用它,用户å¯ä»¥é€šè¿‡ä¸ºä½ 编写内 + æ ¸ç©ºé—´çš„DAMON应用程åºï¼Œæœ€çµæ´»æœ‰æ•ˆåœ°åˆ©ç”¨DAMONçš„æ¯ä¸€ä¸ªåŠŸèƒ½ã€‚ä½ ç”šè‡³å¯ä»¥ä¸ºå„ç§åœ°å€ç©ºé—´æ‰©å±•DAMON。 + 详细情况请å‚è€ƒæŽ¥å£ :doc:`文件 </vm/damon/api>`。 + + +debugfsæŽ¥å£ +=========== + +DAMON导出了八个文件, ``attrs``, ``target_ids``, ``init_regions``, +``schemes``, ``monitor_on``, ``kdamond_pid``, ``mk_contexts`` å’Œ +``rm_contexts`` under its debugfs directory, ``<debugfs>/damon/``. + + +属性 +---- + +用户å¯ä»¥é€šè¿‡è¯»å–和写入 ``attrs`` 文件获得和设置 ``é‡‡æ ·é—´éš”`` 〠``èšé›†é—´éš”`` 〠``区域更新间隔`` +以åŠç›‘æµ‹ç›®æ ‡åŒºåŸŸçš„æœ€å°/最大数é‡ã€‚è¦è¯¦ç»†äº†è§£ç›‘测属性,请å‚考 `:doc:/vm/damon/design` 。例如, +下é¢çš„命令将这些值设置为5msã€100msã€1000msã€10å’Œ1000,然åŽå†æ¬¡æ£€æŸ¥:: + + # cd <debugfs>/damon + # echo 5000 100000 1000000 10 1000 > attrs + # cat attrs + 5000 100000 1000000 10 1000 + + +ç›®æ ‡ID +------ + +一些类型的地å€ç©ºé—´æ”¯æŒå¤šä¸ªç›‘æµ‹ç›®æ ‡ã€‚ä¾‹å¦‚ï¼Œè™šæ‹Ÿå†…å˜åœ°å€ç©ºé—´çš„监测å¯ä»¥æœ‰å¤šä¸ªè¿›ç¨‹ä½œä¸ºç›‘æµ‹ç›®æ ‡ã€‚ç”¨æˆ· +å¯ä»¥é€šè¿‡å†™å…¥ç›®æ ‡çš„相关id值æ¥è®¾ç½®ç›®æ ‡ï¼Œå¹¶é€šè¿‡è¯»å– ``target_ids`` 文件æ¥èŽ·å¾—当å‰ç›®æ ‡çš„id。在监 +测虚拟地å€ç©ºé—´çš„æƒ…å†µä¸‹ï¼Œè¿™äº›å€¼åº”è¯¥æ˜¯ç›‘æµ‹ç›®æ ‡è¿›ç¨‹çš„pid。例如,下é¢çš„命令将pid为42å’Œ4242的进程设 +ä¸ºç›‘æµ‹ç›®æ ‡ï¼Œå¹¶å†æ¬¡æ£€æŸ¥:: + + # cd <debugfs>/damon + # echo 42 4242 > target_ids + # cat target_ids + 42 4242 + +用户还å¯ä»¥é€šè¿‡åœ¨æ–‡ä»¶ä¸å†™å…¥ä¸€ä¸ªç‰¹æ®Šçš„å…³é”®å— "paddr\n" æ¥ç›‘测系统的物ç†å†…å˜åœ°å€ç©ºé—´ã€‚å› ä¸ºç‰©ç†åœ° +å€ç©ºé—´ç›‘测ä¸æ”¯æŒå¤šä¸ªç›®æ ‡ï¼Œè¯»å–文件会显示一个å‡å€¼ï¼Œå³ ``42`` ,如下图所示:: + + # cd <debugfs>/damon + # echo paddr > target_ids + # cat target_ids + 42 + +请注æ„ï¼Œè®¾ç½®ç›®æ ‡ID并ä¸å¯åŠ¨ç›‘测。 + + +åˆå§‹ç›‘æµ‹ç›®æ ‡åŒºåŸŸ +---------------- + +在虚拟地å€ç©ºé—´ç›‘测的情况下,DAMONè‡ªåŠ¨è®¾ç½®å’Œæ›´æ–°ç›‘æµ‹çš„ç›®æ ‡åŒºåŸŸï¼Œè¿™æ ·å°±å¯ä»¥è¦†ç›–ç›®æ ‡è¿›ç¨‹çš„æ•´ä¸ª +内å˜æ˜ 射。然而,用户å¯èƒ½å¸Œæœ›å°†ç›‘测区域é™åˆ¶åœ¨ç‰¹å®šçš„地å€èŒƒå›´å†…ï¼Œå¦‚å †ã€æ ˆæˆ–ç‰¹å®šçš„æ–‡ä»¶æ˜ å°„åŒºåŸŸã€‚ +或者,一些用户å¯ä»¥çŸ¥é“他们工作负载的åˆå§‹è®¿é—®æ¨¡å¼ï¼Œå› æ¤å¸Œæœ›ä¸ºâ€œè‡ªé€‚应区域调整â€è®¾ç½®æœ€ä½³åˆå§‹åŒºåŸŸã€‚ + +相比之下,DAMON在物ç†å†…å˜ç›‘测的情况下ä¸ä¼šè‡ªåŠ¨è®¾ç½®å’Œæ›´æ–°ç›‘æµ‹ç›®æ ‡åŒºåŸŸã€‚å› æ¤ï¼Œç”¨æˆ·åº”该自己设置 +ç›‘æµ‹ç›®æ ‡åŒºåŸŸã€‚ + +在这ç§æƒ…况下,用户å¯ä»¥é€šè¿‡åœ¨ ``init_regions`` 文件ä¸å†™å…¥é€‚当的值,明确地设置他们想è¦çš„åˆ +å§‹ç›‘æµ‹ç›®æ ‡åŒºåŸŸã€‚è¾“å…¥çš„æ¯ä¸€è¡Œåº”代表一个区域,形å¼å¦‚下:: + + <target idx> <start address> <end address> + +ç›®æ ‡idx应该是 ``target_ids`` 文件ä¸ç›®æ ‡çš„索引,从 ``0`` 开始,区域应该按照地å€é¡ºåºä¼ 递。 +例如,下é¢çš„å‘½ä»¤å°†è®¾ç½®å‡ ä¸ªåœ°å€èŒƒå›´ï¼Œ ``1-100`` å’Œ ``100-200`` 作为pid 42çš„åˆå§‹ç›‘æµ‹ç›®æ ‡ +区域,这是 ``target_ids`` ä¸çš„第一个(索引 ``0`` ),å¦å¤–å‡ ä¸ªåœ°å€èŒƒå›´ï¼Œ ``20-40`` å’Œ +``50-100`` 作为pid 4242的地å€ï¼Œè¿™æ˜¯ ``target_ids`` ä¸çš„第二个(索引 ``1`` ):: + + # cd <debugfs>/damon + # cat target_ids + 42 4242 + # echo "0 1 100 + 0 100 200 + 1 20 40 + 1 50 100" > init_regions + +请注æ„,这åªæ˜¯è®¾ç½®äº†åˆå§‹çš„ç›‘æµ‹ç›®æ ‡åŒºåŸŸã€‚åœ¨è™šæ‹Ÿå†…å˜ç›‘测的情况下,DAMON会在一个 ``区域更新间隔`` +åŽè‡ªåŠ¨æ›´æ–°åŒºåŸŸçš„è¾¹ç•Œã€‚å› æ¤ï¼Œåœ¨è¿™ç§æƒ…况下,如果用户ä¸å¸Œæœ›æ›´æ–°çš„è¯ï¼Œåº”该把 ``区域的更新间隔`` 设 +置得足够大。 + + +方案 +---- + +对于通常的基于DAMONçš„æ•°æ®è®¿é—®æ„ŸçŸ¥çš„内å˜ç®¡ç†ä¼˜åŒ–,用户åªæ˜¯å¸Œæœ›ç³»ç»Ÿå¯¹ç‰¹å®šè®¿é—®æ¨¡å¼çš„内å˜åŒºåŸŸåº”用内 +å˜ç®¡ç†æ“作。DAMON从用户那里接收这ç§å½¢å¼åŒ–çš„æ“ä½œæ–¹æ¡ˆï¼Œå¹¶å°†è¿™äº›æ–¹æ¡ˆåº”ç”¨åˆ°ç›®æ ‡è¿›ç¨‹ä¸ã€‚ + +用户å¯ä»¥é€šè¿‡è¯»å–和写入 ``scheme`` debugfs文件æ¥èŽ·å¾—和设置这些方案。读å–该文件还å¯ä»¥æ˜¾ç¤ºæ¯ä¸ª +方案的统计数æ®ã€‚在文件ä¸ï¼Œæ¯ä¸€ä¸ªæ–¹æ¡ˆéƒ½åº”该在æ¯ä¸€è¡Œä¸ä»¥ä¸‹åˆ—å½¢å¼è¡¨ç¤ºå‡ºæ¥:: + + <target access pattern> <action> <quota> <watermarks> + +ä½ å¯ä»¥é€šè¿‡ç®€å•åœ°åœ¨æ–‡ä»¶ä¸å†™å…¥ä¸€ä¸ªç©ºå—符串æ¥ç¦ç”¨æ–¹æ¡ˆã€‚ + +ç›®æ ‡è®¿é—®æ¨¡å¼ +~~~~~~~~~~~~ + +``<ç›®æ ‡è®¿é—®æ¨¡å¼>`` 是由三个范围构æˆçš„,形å¼å¦‚下:: + + min-size max-size min-acc max-acc min-age max-age + +具体æ¥è¯´ï¼ŒåŒºåŸŸå¤§å°çš„å—节数( `min-size` å’Œ `max-size` ),访问频率的æ¯èšåˆåŒºé—´çš„监测访问次 +数( `min-acc` å’Œ `max-acc` ),区域年龄的èšåˆåŒºé—´æ•°ï¼ˆ `min-age` å’Œ `max-age` )都被指定。 +请注æ„,这些范围是å°é—区间。 + +动作 +~~~~ + +``<action>`` 是一个预定义的内å˜ç®¡ç†åŠ¨ä½œçš„整数,DAMONå°†åº”ç”¨äºŽå…·æœ‰ç›®æ ‡è®¿é—®æ¨¡å¼çš„åŒºåŸŸã€‚æ”¯æŒ +çš„æ•°å—和它们的å«ä¹‰å¦‚下:: + + - 0: Call ``madvise()`` for the region with ``MADV_WILLNEED`` + - 1: Call ``madvise()`` for the region with ``MADV_COLD`` + - 2: Call ``madvise()`` for the region with ``MADV_PAGEOUT`` + - 3: Call ``madvise()`` for the region with ``MADV_HUGEPAGE`` + - 4: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE`` + - 5: Do nothing but count the statistics + +é…é¢ +~~~~ + +æ¯ä¸ª ``动作`` 的最佳 ``ç›®æ ‡è®¿é—®æ¨¡å¼`` å–决于工作负载,所以ä¸å®¹æ˜“找到。更糟糕的是,将æŸä¸ª +动作的方案设置得过于激进会导致严é‡çš„开销。为了é¿å…è¿™ç§å¼€é”€ï¼Œç”¨æˆ·å¯ä»¥é€šè¿‡ä¸‹é¢è¡¨æ ¼ä¸çš„ ``<quota>`` +æ¥é™åˆ¶æ–¹æ¡ˆçš„时间和大å°é…é¢:: + + <ms> <sz> <reset interval> <priority weights> + +这使得DAMON在 ``<reset interval>`` 毫秒内,尽é‡åªç”¨ ``<ms>`` 毫秒的时间对 ``ç›®æ ‡è®¿ +问模å¼`` 的内å˜åŒºåŸŸåº”用动作,并在 ``<reset interval>`` 内åªå¯¹æœ€å¤š<sz>å—节的内å˜åŒºåŸŸåº” +用动作。将 ``<ms>`` å’Œ ``<sz>`` 都设置为零,å¯ä»¥ç¦ç”¨é…é¢é™åˆ¶ã€‚ + +当预计超过é…é¢é™åˆ¶æ—¶ï¼ŒDAMONä¼šæ ¹æ® ``ç›®æ ‡è®¿é—®æ¨¡å¼`` 的大å°ã€è®¿é—®é¢‘率和年龄,对å‘çŽ°çš„å†…å˜ +区域进行优先排åºã€‚为了实现个性化的优先级,用户å¯ä»¥åœ¨ ``<优先级æƒé‡>`` ä¸è®¾ç½®è¿™ä¸‰ä¸ªå±žæ€§çš„ +æƒé‡ï¼Œå…·ä½“å½¢å¼å¦‚下:: + + <size weight> <access frequency weight> <age weight> + +æ°´ä½ +~~~~ + +有些方案需è¦æ ¹æ®ç³»ç»Ÿç‰¹å®šæŒ‡æ ‡çš„当å‰å€¼æ¥è¿è¡Œï¼Œå¦‚自由内å˜æ¯”率。对于这ç§æƒ…况,用户å¯ä»¥ä¸ºè¯¥æ¡ +件指定水ä½ã€‚:: + + <metric> <check interval> <high mark> <middle mark> <low mark> + +``<metric>`` 是一个预定义的整数,用于è¦æ£€æŸ¥çš„度é‡ã€‚支æŒçš„æ•°å—和它们的å«ä¹‰å¦‚下。 + + - 0: å¿½è§†æ°´ä½ + - 1: 系统空闲内å˜çŽ‡ (åƒåˆ†æ¯”) + +æ¯éš” ``<检查间隔>`` 微秒检查一次公制的值。 + +如果该值高于 ``<é«˜æ ‡>`` 或低于 ``<ä½Žæ ‡>`` ,该方案被åœç”¨ã€‚如果该值低于 ``<ä¸æ ‡>`` , +该方案将被激活。 + +ç»Ÿè®¡æ•°æ® +~~~~~~~~ + +它还统计æ¯ä¸ªæ–¹æ¡ˆè¢«å°è¯•åº”用的区域的总数é‡å’Œå—节数,æ¯ä¸ªæ–¹æ¡ˆè¢«æˆåŠŸåº”用的区域的两个数é‡ï¼Œä»¥ +åŠè¶…过é…é¢é™åˆ¶çš„总数é‡ã€‚这些统计数æ®å¯ç”¨äºŽåœ¨çº¿åˆ†æžæˆ–调整方案。 + +统计数æ®å¯ä»¥é€šè¿‡è¯»å–方案文件æ¥æ˜¾ç¤ºã€‚读å–è¯¥æ–‡ä»¶å°†æ˜¾ç¤ºä½ åœ¨æ¯ä¸€è¡Œä¸è¾“入的æ¯ä¸ª ``方案`` , +统计的五个数å—å°†è¢«åŠ åœ¨æ¯ä¸€è¡Œçš„末尾。 + +例å +~~~~ + +下é¢çš„命令应用了一个方案:â€å¦‚果一个大å°ä¸º[4KiB, 8KiB]的内å˜åŒºåŸŸåœ¨[10, 20]çš„èšåˆæ—¶é—´ +间隔内显示出æ¯ä¸€ä¸ªèšåˆæ—¶é—´é—´éš”[0, 5]的访问é‡ï¼Œè¯·åˆ†é¡µå‡ºè¯¥åŒºåŸŸã€‚对于分页,æ¯ç§’最多åªèƒ½ä½¿ +用10ms,而且æ¯ç§’分页ä¸èƒ½è¶…过1GiB。在这一é™åˆ¶ä¸‹ï¼Œé¦–先分页出具有较长年龄的内å˜åŒºåŸŸã€‚å¦å¤–, +æ¯5秒钟检查一次系统的å¯ç”¨å†…å˜çŽ‡ï¼Œå½“å¯ç”¨å†…å˜çŽ‡ä½ŽäºŽ50%时开始监测和分页,但如果å¯ç”¨å†…å˜çŽ‡ +大于60%,或低于30%,则åœæ¢ç›‘测“:: + + # cd <debugfs>/damon + # scheme="4096 8192 0 5 10 20 2" # target access pattern and action + # scheme+=" 10 $((1024*1024*1024)) 1000" # quotas + # scheme+=" 0 0 100" # prioritization weights + # scheme+=" 1 5000000 600 500 300" # watermarks + # echo "$scheme" > schemes + + +开关 +---- + +除éžä½ 明确地å¯åŠ¨ç›‘测,å¦åˆ™å¦‚上所述的文件设置ä¸ä¼šäº§ç”Ÿæ•ˆæžœã€‚ä½ å¯ä»¥é€šè¿‡å†™å…¥å’Œè¯»å– ``monitor_on`` +文件æ¥å¯åŠ¨ã€åœæ¢å’Œæ£€æŸ¥ç›‘测的当å‰çŠ¶æ€ã€‚写入 ``on`` 该文件å¯ä»¥å¯åŠ¨å¯¹æœ‰å±žæ€§çš„ç›®æ ‡çš„ç›‘æµ‹ã€‚å†™å…¥ +``off`` 该文件则åœæ¢è¿™äº›ç›®æ ‡ã€‚如果æ¯ä¸ªç›®æ ‡è¿›ç¨‹è¢«ç»ˆæ¢ï¼ŒDAMON也会åœæ¢ã€‚下é¢çš„示例命令开å¯ã€å…³ +é—和检查DAMON的状æ€:: + + # cd <debugfs>/damon + # echo on > monitor_on + # echo off > monitor_on + # cat monitor_on + off + +请注æ„,当监测开å¯æ—¶ï¼Œä½ ä¸èƒ½å†™åˆ°ä¸Šè¿°çš„debugfsæ–‡ä»¶ã€‚å¦‚æžœä½ åœ¨DAMONè¿è¡Œæ—¶å†™åˆ°è¿™äº›æ–‡ä»¶ï¼Œå°†ä¼šè¿” +回一个错误代ç ,如 ``-EBUSY`` 。 + + +监测线程PID +----------- + +DAMON通过一个å«åškdamondçš„å†…æ ¸çº¿ç¨‹æ¥è¿›è¡Œè¯·æ±‚ç›‘æµ‹ã€‚ä½ å¯ä»¥é€šè¿‡è¯»å– ``kdamond_pid`` 文件获 +得该线程的 ``pid`` 。当监测被 ``å…³é—`` 时,读å–该文件ä¸ä¼šè¿”回任何信æ¯:: + + # cd <debugfs>/damon + # cat monitor_on + off + # cat kdamond_pid + none + # echo on > monitor_on + # cat kdamond_pid + 18594 + + +使用多个监测线程 +---------------- + +æ¯ä¸ªç›‘测上下文都会创建一个 ``kdamond`` çº¿ç¨‹ã€‚ä½ å¯ä»¥ä½¿ç”¨ ``mk_contexts`` å’Œ ``rm_contexts`` +文件为多个 ``kdamond`` 需è¦çš„ç”¨ä¾‹åˆ›å»ºå’Œåˆ é™¤ç›‘æµ‹ä¸Šä¸‹æ–‡ã€‚ + +将新上下文的å称写入 ``mk_contexts`` 文件,在 ``DAMON debugfs`` 目录上创建一个该å称的目录。 +该目录将有该上下文的 ``DAMON debugfs`` 文件:: + + # cd <debugfs>/damon + # ls foo + # ls: cannot access 'foo': No such file or directory + # echo foo > mk_contexts + # ls foo + # attrs init_regions kdamond_pid schemes target_ids + +如果ä¸å†éœ€è¦ä¸Šä¸‹æ–‡ï¼Œä½ å¯ä»¥é€šè¿‡æŠŠä¸Šä¸‹æ–‡çš„åå—放到 ``rm_contexts`` 文件ä¸æ¥åˆ 除它和相应的目录:: + + # echo foo > rm_contexts + # ls foo + # ls: cannot access 'foo': No such file or directory + +注æ„, ``mk_contexts`` 〠``rm_contexts`` å’Œ ``monitor_on`` 文件åªåœ¨æ ¹ç›®å½•ä¸‹ã€‚ + + +监测结果的监测点 +================ + +DAMON通过一个tracepoint ``damon:damon_aggregated`` æ供监测结果. 当监测开å¯æ—¶ï¼Œä½ å¯ +以记录追踪点事件,并使用追踪点支æŒå·¥å…·å¦‚perf显示结果。比如说:: + + # echo on > monitor_on + # perf record -e damon:damon_aggregated & + # sleep 5 + # kill 9 $(pidof perf) + # echo off > monitor_on + # perf script diff --git a/Documentation/translations/zh_CN/admin-guide/mm/index.rst b/Documentation/translations/zh_CN/admin-guide/mm/index.rst new file mode 100644 index 000000000000..702271c5b683 --- /dev/null +++ b/Documentation/translations/zh_CN/admin-guide/mm/index.rst @@ -0,0 +1,49 @@ +.. include:: ../../disclaimer-zh_CN.rst + +:Original: Documentation/admin-guide/mm/index.rst + +:翻译: + + å¾é‘« xu xin <xu.xin16@zte.com.cn> + + +======== +内å˜ç®¡ç† +======== + +Linux内å˜ç®¡ç†å系统,顾åæ€ä¹‰ï¼Œæ˜¯è´Ÿè´£ç³»ç»Ÿä¸çš„内å˜ç®¡ç†ã€‚它包括了虚拟内å˜ä¸Žè¯·æ±‚ +åˆ†é¡µçš„å®žçŽ°ï¼Œå†…æ ¸å†…éƒ¨ç»“æž„å’Œç”¨æˆ·ç©ºé—´ç¨‹åºçš„内å˜åˆ†é…ã€å°†æ–‡ä»¶æ˜ 射到进程地å€ç©ºé—´ä»¥ +åŠè®¸å¤šå…¶ä»–很酷的事情。 + +Linux内å˜ç®¡ç†æ˜¯ä¸€ä¸ªå…·æœ‰è®¸å¤šå¯é…置设置的å¤æ‚系统, 且这些设置ä¸çš„大多数都å¯ä»¥é€š +过 ``/proc`` 文件系统获得,并且å¯ä»¥ä½¿ç”¨ ``sysctl`` 进行查询和调整。这些API接 +å£è¢«æ述在Documentation/admin-guide/sysctl/vm.rst文件和 `man 5 proc`_ ä¸ã€‚ + +.. _man 5 proc: http://man7.org/linux/man-pages/man5/proc.5.html + +Linux内å˜ç®¡ç†æœ‰å®ƒè‡ªå·±çš„术è¯ï¼Œå¦‚æžœä½ è¿˜ä¸ç†Ÿæ‚‰å®ƒï¼Œè¯·è€ƒè™‘阅读下é¢å‚考: +:ref:`Documentation/admin-guide/mm/concepts.rst <mm_concepts>`. + +在æ¤ç›®å½•ä¸‹ï¼Œæˆ‘们详细æ述了如何与Linux内å˜ç®¡ç†ä¸çš„å„ç§æœºåˆ¶äº¤äº’。 + +.. toctree:: + :maxdepth: 1 + + damon/index + ksm + +Todolist: +* concepts +* cma_debugfs +* hugetlbpage +* idle_page_tracking +* memory-hotplug +* nommu-mmap +* numa_memory_policy +* numaperf +* pagemap +* soft-dirty +* swap_numa +* transhuge +* userfaultfd +* zswap diff --git a/Documentation/translations/zh_CN/admin-guide/mm/ksm.rst b/Documentation/translations/zh_CN/admin-guide/mm/ksm.rst new file mode 100644 index 000000000000..4829156ef1ae --- /dev/null +++ b/Documentation/translations/zh_CN/admin-guide/mm/ksm.rst @@ -0,0 +1,148 @@ +.. include:: ../../disclaimer-zh_CN.rst + +:Original: Documentation/admin-guide/mm/ksm.rst + +:翻译: + + å¾é‘« xu xin <xu.xin16@zte.com.cn> + + +============ +å†…æ ¸åŒé¡µåˆå¹¶ +============ + + +概述 +==== + +KSM是一ç§èƒ½èŠ‚çœå†…å˜çš„æ•°æ®åŽ»é‡åŠŸèƒ½ï¼Œç”±CONFIG_KSM=yå¯ç”¨ï¼Œå¹¶åœ¨2.6.32版本时被添 +åŠ åˆ°Linuxå†…æ ¸ã€‚è¯¦è§ ``mm/ksm.c`` 的实现,以åŠhttp://lwn.net/Articles/306704 +å’Œhttps://lwn.net/Articles/330589 + +KSM最åˆç›®çš„是为了与KVM(å³è‘—åçš„å†…æ ¸å…±äº«å†…å˜ï¼‰ä¸€èµ·ä½¿ç”¨è€Œå¼€å‘的,通过共享虚拟机 +之间的公共数æ®ï¼Œå°†æ›´å¤šè™šæ‹Ÿæœºæ”¾å…¥ç‰©ç†å†…å˜ã€‚但它对于任何会生æˆå¤šä¸ªç›¸åŒæ•°æ®å®žä¾‹çš„ +应用程åºéƒ½æ˜¯å¾ˆæœ‰ç”¨çš„。 + +KSM的守护进程ksmd会定期扫æ那些已注册的用户内å˜åŒºåŸŸï¼ŒæŸ¥æ‰¾å†…容相åŒçš„页é¢ï¼Œè¿™äº› +页é¢å¯ä»¥è¢«å•ä¸ªå†™ä¿æŠ¤é¡µé¢æ›¿æ¢ï¼ˆå¦‚果进程以åŽæƒ³è¦æ›´æ–°å…¶å†…容,将自动å¤åˆ¶ï¼‰ã€‚使用: +引用:`sysfs intraface <ksm_sysfs>` 接å£æ¥é…ç½®KSM守护程åºåœ¨å•ä¸ªè¿‡ç¨‹ä¸æ‰€æ‰«æ的页 +数以åŠä¸¤ä¸ªè¿‡ç¨‹ä¹‹é—´çš„间隔时间。 + +KSMåªåˆå¹¶åŒ¿å(ç§æœ‰ï¼‰é¡µé¢ï¼Œä»Žä¸åˆå¹¶é¡µç¼“å˜ï¼ˆæ–‡ä»¶ï¼‰é¡µé¢ã€‚KSMçš„åˆå¹¶é¡µé¢æœ€åˆåªèƒ½è¢« +é”å®šåœ¨å†…æ ¸å†…å˜ä¸ï¼Œä½†çŽ°åœ¨å¯ä»¥å°±åƒå…¶ä»–用户页é¢ä¸€æ ·è¢«æ¢å‡ºï¼ˆä½†å½“它们被交æ¢å›žæ¥æ—¶å…± +äº«ä¼šè¢«ç ´å: ksmdå¿…é¡»é‡æ–°å‘现它们的身份并å†æ¬¡åˆå¹¶ï¼‰ã€‚ + +以madvise控制KSM +================ + +KSM仅在特定的地å€ç©ºé—´åŒºåŸŸæ—¶è¿è¡Œï¼Œå³åº”用程åºé€šè¿‡ä½¿ç”¨å¦‚下所示的madvise(2)系统调 +用æ¥è¯·æ±‚æŸå—地å€æˆä¸ºå¯èƒ½çš„åˆå¹¶å€™é€‰è€…的地å€ç©ºé—´:: + + int madvise(addr, length, MADV_MERGEABLE) + +应用程åºå½“然也å¯ä»¥é€šè¿‡è°ƒç”¨:: + + int madvise(addr, length, MADV_UNMERGEABLE) + +æ¥å–消该请求,并æ¢å¤ä¸ºéžå…±äº«é¡µé¢ï¼šæ¤æ—¶KSM将去除åˆå¹¶åœ¨è¯¥èŒƒå›´å†…的任何åˆå¹¶é¡µã€‚注æ„: +这个去除åˆå¹¶çš„调用å¯èƒ½çªç„¶éœ€è¦çš„内å˜é‡è¶…过实际å¯ç”¨çš„内å˜é‡-那么å¯èƒ½ä¼šå‡ºçŽ°EAGAIN +失败,但更å¯èƒ½ä¼šå”¤é†’OOM killer。 + +如果KSM未被é…置到æ£åœ¨è¿è¡Œçš„å†…æ ¸ä¸ï¼Œåˆ™madvise MADV_MERGEABLE å’Œ MADV_UNMERGEABLE +的调用åªä¼šä»¥EINVAL 失败。如果æ£åœ¨è¿è¡Œçš„å†…æ ¸æ˜¯ç”¨CONFIG_KSM=yæ–¹å¼æž„建的,那么这些 +调用通常会æˆåŠŸï¼šå³ä½¿KSM守护程åºå½“å‰æ²¡æœ‰è¿è¡Œï¼ŒMADV_MERGEABLE ä»ç„¶ä¼šåœ¨KSMå®ˆæŠ¤ç¨‹åº +å¯åŠ¨æ—¶æ³¨å†ŒèŒƒå›´ï¼Œå³ä½¿è¯¥èŒƒå›´ä¸èƒ½åŒ…å«KSM实际å¯ä»¥åˆå¹¶çš„任何页é¢ï¼Œå³ä½¿MADV_UNMERGEABLE +åº”ç”¨äºŽä»Žæœªæ ‡è®°ä¸ºMADV_MERGEABLE的范围。 + +如果一å—内å˜åŒºåŸŸå¿…须被拆分为至少一个新的MADV_MERGEABLE区域或MADV_UNMERGEABLE区域, +当该进程将超过 ``vm.max_map_count`` 的设定,则madviseå¯èƒ½è¿”回ENOMEM。(请å‚阅文档 +Documentation/admin-guide/sysctl/vm.rst)。 + +与其他madviseè°ƒç”¨ä¸€æ ·ï¼Œå®ƒä»¬åœ¨ç”¨æˆ·åœ°å€ç©ºé—´çš„æ˜ å°„åŒºåŸŸä¸Šä½¿ç”¨ï¼šå¦‚æžœæŒ‡å®šçš„èŒƒå›´åŒ…å«æœª +æ˜ å°„çš„é—´éš™ï¼ˆå°½ç®¡åœ¨ä¸é—´çš„æ˜ å°„åŒºåŸŸå·¥ä½œï¼‰ï¼Œå®ƒä»¬å°†æŠ¥å‘ŠENOMEM,如果没有足够的内å˜ç”¨äºŽ +内部结构,则å¯èƒ½ä¼šå› EAGAIN而失败。 + +KSM守护进程sysfsæŽ¥å£ +==================== + +KSM守护进程å¯ä»¥ç”±``/sys/kernel/mm/ksm/`` ä¸çš„sysfs文件控制,所有人都å¯ä»¥è¯»å–,但 +åªèƒ½ç”±root用户写入。å„接å£è§£é‡Šå¦‚下: + + +pages_to_scan + ksmd进程进入ç¡çœ å‰è¦æ‰«æ的页数。 + 例如, ``echo 100 > /sys/kernel/mm/ksm/pages_to_scan`` + + 默认值:100(该值被选择用于演示目的) + +sleep_millisecs + ksmd在下次扫æå‰åº”ä¼‘çœ å¤šå°‘æ¯«ç§’ + 例如, ``echo 20 > /sys/kernel/mm/ksm/sleep_millisecs`` + + 默认值:20(该值被选择用于演示目的) + +merge_across_nodes + 指定是å¦å¯ä»¥åˆå¹¶æ¥è‡ªä¸åŒNUMA节点的页é¢ã€‚当设置为0时,ksmä»…åˆå¹¶åœ¨ç‰©ç†ä¸Šä½ + 于åŒä¸€NUMA节点的内å˜åŒºåŸŸä¸çš„页é¢ã€‚è¿™é™ä½Žäº†è®¿é—®å…±äº«é¡µé¢çš„延迟。在有明显的 + NUMAè·ç¦»ä¸Šï¼Œå…·æœ‰æ›´å¤šèŠ‚点的系统å¯èƒ½å—益于设置该值为0时的更低延迟。而对于 + 需è¦å¯¹å†…å˜ä½¿ç”¨é‡æœ€å°åŒ–的较å°ç³»ç»Ÿæ¥è¯´ï¼Œè®¾ç½®è¯¥å€¼ä¸º1(默认设置)则å¯èƒ½ä¼šå— + 益于更大共享页é¢ã€‚在决定使用哪ç§è®¾ç½®ä¹‹å‰ï¼Œæ‚¨å¯èƒ½å¸Œæœ›æ¯”较系统在æ¯ç§è®¾ç½®ä¸‹ + 的性能。 ``merge_across_nodes`` 仅当系统ä¸æ²¡æœ‰ksm共享页é¢æ—¶ï¼Œæ‰èƒ½è¢«æ›´æ”¹è®¾ + 置:首先将接å£`run` 设置为2从而对页进行去åˆå¹¶ï¼Œç„¶åŽåœ¨ä¿®æ”¹ + ``merge_across_nodes`` åŽå†å°†â€˜run’åˆè®¾ç½®ä¸º1ï¼Œä»¥æ ¹æ®æ–°è®¾ç½®æ¥é‡æ–°åˆå¹¶ã€‚ + + 默认值:1(如早期的å‘å¸ƒç‰ˆæœ¬ä¸€æ ·åˆå¹¶è·¨ç«™ç‚¹ï¼‰ + +run + * 设置为0å¯åœæ¢ksmdè¿è¡Œï¼Œä½†ä¿ç•™åˆå¹¶é¡µé¢ï¼Œ + * 设置为1å¯è¿è¡Œksmd,例如, ``echo 1 > /sys/kernel/mm/ksm/run`` , + * 设置为2å¯åœæ¢ksmdè¿è¡Œï¼Œå¹¶ä¸”对所有目å‰å·²åˆå¹¶çš„页进行去åˆå¹¶ï¼Œä½†ä¿ç•™å¯åˆå¹¶ + 区域以供下次è¿è¡Œã€‚ + + 默认值:0(必须设置为1æ‰èƒ½æ¿€æ´»KSM,除éžç¦ç”¨äº†CONFIG_SYSFS) + +use_zero_pages + 指定是å¦åº”当特殊处ç†ç©ºé¡µï¼ˆå³é‚£äº›ä»…å«zero的已分é…页)。当该值设置为1时, + ç©ºé¡µä¸Žå†…æ ¸é›¶é¡µåˆå¹¶ï¼Œè€Œä¸æ˜¯åƒé€šå¸¸æƒ…å†µä¸‹é‚£æ ·ç©ºé¡µè‡ªèº«å½¼æ¤åˆå¹¶ã€‚è¿™å¯ä»¥æ ¹æ® + 工作负载的ä¸åŒï¼Œåœ¨å…·æœ‰ç€è‰²é›¶é¡µçš„架构上å¯ä»¥æ高性能。å¯ç”¨æ¤è®¾ç½®æ—¶åº”å°å¿ƒï¼Œ + å› ä¸ºå®ƒå¯èƒ½ä¼šé™ä½ŽæŸäº›å·¥ä½œè´Ÿè½½çš„KSM性能,比如,当待åˆå¹¶çš„候选页é¢çš„æ ¡éªŒå’Œ + 与空页é¢çš„æ ¡éªŒå’Œæ°å¥½åŒ¹é…的时候。æ¤è®¾ç½®å¯éšæ—¶æ›´æ”¹ï¼Œä»…对那些更改åŽå†åˆå¹¶ + 的页é¢æœ‰æ•ˆã€‚ + + 默认值:0(如åŒæ—©æœŸç‰ˆæœ¬çš„KSMæ£å¸¸è¡¨çŽ°ï¼‰ + +max_page_sharing + å•ä¸ªKSM页é¢å…许的最大共享站点数。这将强制执行é‡å¤æ•°æ®æ¶ˆé™¤é™åˆ¶ï¼Œä»¥é¿å…涉 + åŠé历共享KSM页é¢çš„è™šæ‹Ÿæ˜ å°„çš„è™šæ‹Ÿå†…å˜æ“作的高延迟。最å°å€¼ä¸º2ï¼Œå› ä¸ºæ–°åˆ› + 建的KSM页é¢å°†è‡³å°‘有两个共享者。该值越高,KSMåˆå¹¶å†…å˜çš„é€Ÿåº¦è¶Šå¿«ï¼ŒåŽ»é‡ + å› å也越高,但是对于任何给定的KSM页é¢ï¼Œè™šæ‹Ÿæ˜ 射的最å情况é历的速度也会 + 越慢。å‡æ…¢äº†è¿™ç§é历速度就æ„味ç€åœ¨äº¤æ¢ã€åŽ‹ç¼©ã€NUMA平衡和页é¢è¿ç§»æœŸé—´ï¼Œ + æŸäº›è™šæ‹Ÿå†…å˜æ“作将有更高的延迟,从而é™ä½Žè¿™äº›è™šæ‹Ÿå†…å˜æ“作调用者的å“应能力。 + 其他任务如果ä¸æ¶‰åŠæ‰§è¡Œè™šæ‹Ÿæ˜ å°„é历的VMæ“作,其任务调度延迟ä¸å—æ¤å‚æ•°çš„å½± + å“ï¼Œå› ä¸ºè¿™äº›é历本身是调度å‹å¥½çš„。 + +stable_node_chains_prune_millisecs + 指定KSM检查特定页é¢çš„元数æ®çš„频率(å³é‚£äº›è¾¾åˆ°è¿‡æ—¶ä¿¡æ¯æ•°æ®åŽ»é‡é™åˆ¶æ ‡å‡†çš„ + 页é¢ï¼‰å•ä½æ˜¯æ¯«ç§’。较å°çš„毫秒值将以更低的延迟æ¥é‡Šæ”¾KSM元数æ®ï¼Œä½†å®ƒä»¬å°†ä½¿ + ksmd在扫æ期间使用更多CPU。如果还没有一个KSM页é¢è¾¾åˆ° ``max_page_sharing`` + æ ‡å‡†ï¼Œé‚£å°±æ²¡æœ‰ä»€ä¹ˆç”¨ã€‚ + +KSM与MADV_MERGEABLE的工作有效性体现于 ``/sys/kernel/mm/ksm/`` 路径下的接å£ï¼š + +pages_shared + 表示多少共享页æ£åœ¨è¢«ä½¿ç”¨ +pages_sharing + 表示还有多少站点æ£åœ¨å…±äº«è¿™äº›å…±äº«é¡µï¼Œå³èŠ‚çœäº†å¤šå°‘ +pages_unshared + 表示有多少页是唯一的,但被åå¤æ£€æŸ¥ä»¥è¿›è¡Œåˆå¹¶ +pages_volatile + è¡¨ç¤ºæœ‰å¤šå°‘é¡µå› å˜åŒ–å¤ªå¿«è€Œæ— æ³•æ”¾åœ¨treeä¸ +full_scans + 表示所有å¯åˆå¹¶åŒºåŸŸå·²æ‰«æ多少次 +stable_node_chains + 达到 ``max_page_sharing`` é™åˆ¶çš„KSM页数 +stable_node_dups + é‡å¤çš„KSM页数 + +比值 ``pages_sharing/pages_shared`` 的最大值å—é™åˆ¶äºŽ ``max_page_sharing`` +的设定。è¦æƒ³å¢žåŠ 该比值,则相应地è¦å¢žåŠ ``max_page_sharing`` 的值。 diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst index d10191c45cf1..26d9913fc8b6 100644 --- a/Documentation/translations/zh_CN/core-api/index.rst +++ b/Documentation/translations/zh_CN/core-api/index.rst @@ -42,6 +42,7 @@ kref assoc_array xarray + rbtree Todolist: @@ -49,7 +50,6 @@ Todolist: idr circular-buffers - rbtree generic-radix-tree packing bus-virt-phys-mapping diff --git a/Documentation/translations/zh_CN/core-api/rbtree.rst b/Documentation/translations/zh_CN/core-api/rbtree.rst new file mode 100644 index 000000000000..a3e1555cb974 --- /dev/null +++ b/Documentation/translations/zh_CN/core-api/rbtree.rst @@ -0,0 +1,391 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/core-api/rbtree.rst + +:翻译: + + å”艺舟 Tang Yizhou <tangyeechou@gmail.com> + +========================= +Linuxä¸çš„çº¢é»‘æ ‘ï¼ˆrbtree) +========================= + + +:日期: 2007å¹´1月18æ—¥ +:作者: Rob Landley <rob@landley.net> + +ä½•ä¸ºçº¢é»‘æ ‘ï¼Œå®ƒä»¬æœ‰ä»€ä¹ˆç”¨ï¼Ÿ +-------------------------- + +çº¢é»‘æ ‘æ˜¯ä¸€ç§è‡ªå¹³è¡¡äºŒå‰æœç´¢æ ‘,被用æ¥å˜å‚¨å¯æŽ’åºçš„é”®/值数æ®å¯¹ã€‚è¿™ä¸ŽåŸºæ•°æ ‘ï¼ˆè¢«ç”¨æ¥é«˜æ•ˆ +å˜å‚¨ç¨€ç–æ•°ç»„ï¼Œå› æ¤ä½¿ç”¨é•¿æ•´åž‹ä¸‹æ ‡æ¥æ’å…¥/访问/åˆ é™¤ç»“ç‚¹ï¼‰å’Œå“ˆå¸Œè¡¨ï¼ˆæ²¡æœ‰ä¿æŒæŽ’åºå› è€Œæ— æ³• +容易地按åºé历,åŒæ—¶å¿…须调节其大å°å’Œå“ˆå¸Œå‡½æ•°ï¼Œç„¶è€Œçº¢é»‘æ ‘å¯ä»¥ä¼˜é›…地伸缩以便å˜å‚¨ä»»æ„ +æ•°é‡çš„键)ä¸åŒã€‚ + +çº¢é»‘æ ‘å’ŒAVLæ ‘ç±»ä¼¼ï¼Œä½†åœ¨æ’å…¥å’Œåˆ é™¤æ—¶æ供了更快的实时有界的最å情况性能(分别最多两次 +旋转和三次旋转,æ¥å¹³è¡¡æ ‘),查询时间轻微å˜æ…¢ï¼ˆä½†æ—¶é—´å¤æ‚度ä»ç„¶æ˜¯O(log n))。 + +引用Linuxæ¯å‘¨æ–°é—»ï¼ˆLinux Weekly News): + + å†…æ ¸ä¸æœ‰å¤šå¤„çº¢é»‘æ ‘çš„ä½¿ç”¨æ¡ˆä¾‹ã€‚æœ€åŽæœŸé™è°ƒåº¦å™¨å’Œå®Œå…¨å…¬å¹³æŽ’队(CFQ)I/O调度器利用 + çº¢é»‘æ ‘è·Ÿè¸ªè¯·æ±‚ï¼›æ•°æ®åŒ…CD/DVD驱动程åºä¹Ÿæ˜¯å¦‚æ¤ã€‚高精度时钟代ç ä½¿ç”¨ä¸€é¢—çº¢é»‘æ ‘ç»„ç»‡ + 未完æˆçš„定时器请求。ext3æ–‡ä»¶ç³»ç»Ÿç”¨çº¢é»‘æ ‘è·Ÿè¸ªç›®å½•é¡¹ã€‚è™šæ‹Ÿå†…å˜åŒºåŸŸï¼ˆVMAs)ã€epoll + 文件æ述符ã€å¯†ç å¦å¯†é’¥å’Œåœ¨â€œåˆ†å±‚令牌桶â€è°ƒåº¦å™¨ä¸çš„网络数æ®åŒ…éƒ½ç”±çº¢é»‘æ ‘è·Ÿè¸ªã€‚ + +本文档涵盖了对Linuxçº¢é»‘æ ‘å®žçŽ°çš„ä½¿ç”¨æ–¹æ³•ã€‚æ›´å¤šå…³äºŽçº¢é»‘æ ‘çš„æ€§è´¨å’Œå®žçŽ°çš„ä¿¡æ¯ï¼Œå‚è§ï¼š + + Linuxæ¯å‘¨æ–°é—»å…³äºŽçº¢é»‘æ ‘çš„æ–‡ç« + https://lwn.net/Articles/184495/ + + ç»´åŸºç™¾ç§‘çº¢é»‘æ ‘è¯æ¡ + https://en.wikipedia.org/wiki/Red-black_tree + +çº¢é»‘æ ‘çš„Linux实现 +----------------- + +Linuxçš„çº¢é»‘æ ‘å®žçŽ°åœ¨æ–‡ä»¶â€œlib/rbtree.câ€ä¸ã€‚è¦ä½¿ç”¨å®ƒï¼Œéœ€è¦â€œ#include <linux/rbtree.h>â€ã€‚ + +Linuxçš„çº¢é»‘æ ‘å®žçŽ°å¯¹é€Ÿåº¦è¿›è¡Œäº†ä¼˜åŒ–ï¼Œå› æ¤æ¯”ä¼ ç»Ÿçš„å®žçŽ°å°‘ä¸€ä¸ªé—´æŽ¥å±‚ï¼ˆæœ‰æ›´å¥½çš„ç¼“å˜å±€éƒ¨æ€§ï¼‰ã€‚ +æ¯ä¸ªrb_node结构体的实例嵌入在它管ç†çš„æ•°æ®ç»“æž„ä¸ï¼Œå› æ¤ä¸éœ€è¦é 指针æ¥åˆ†ç¦»rb_node和它 +管ç†çš„æ•°æ®ç»“æž„ã€‚ç”¨æˆ·åº”è¯¥ç¼–å†™ä»–ä»¬è‡ªå·±çš„æ ‘æœç´¢å’Œæ’入函数,æ¥è°ƒç”¨å·²æä¾›çš„çº¢é»‘æ ‘å‡½æ•°ï¼Œ +而ä¸æ˜¯ä½¿ç”¨ä¸€ä¸ªæ¯”è¾ƒå›žè°ƒå‡½æ•°æŒ‡é’ˆã€‚åŠ é”代ç ä¹Ÿç•™ç»™çº¢é»‘æ ‘çš„ç”¨æˆ·ç¼–å†™ã€‚ + +åˆ›å»ºä¸€é¢—çº¢é»‘æ ‘ +-------------- + +çº¢é»‘æ ‘ä¸çš„æ•°æ®ç»“点是包å«rb_node结构体æˆå‘˜çš„结构体:: + + struct mytype { + struct rb_node node; + char *keystring; + }; + +当处ç†ä¸€ä¸ªæŒ‡å‘内嵌rb_node结构体的指针时,包ä½rb_node的结构体å¯ç”¨æ ‡å‡†çš„container_of() +å®è®¿é—®ã€‚æ¤å¤–,个体æˆå‘˜å¯ç›´æŽ¥ç”¨rb_entry(node, type, member)访问。 + +æ¯é¢—çº¢é»‘æ ‘çš„æ ¹æ˜¯ä¸€ä¸ªrb_rootæ•°æ®ç»“构,它由以下方å¼åˆå§‹åŒ–为空: + + struct rb_root mytree = RB_ROOT; + +åœ¨ä¸€é¢—çº¢é»‘æ ‘ä¸æœç´¢å€¼ +-------------------- + +ä¸ºä½ çš„æ ‘å†™ä¸€ä¸ªæœç´¢å‡½æ•°æ˜¯ç›¸å½“简å•çš„ï¼šä»Žæ ‘æ ¹å¼€å§‹ï¼Œæ¯”è¾ƒæ¯ä¸ªå€¼ï¼Œç„¶åŽæ ¹æ®éœ€è¦ç»§ç»å‰å¾€å·¦è¾¹æˆ– +å³è¾¹çš„分支。 + +示例:: + + struct mytype *my_search(struct rb_root *root, char *string) + { + struct rb_node *node = root->rb_node; + + while (node) { + struct mytype *data = container_of(node, struct mytype, node); + int result; + + result = strcmp(string, data->keystring); + + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return data; + } + return NULL; + } + +åœ¨ä¸€é¢—çº¢é»‘æ ‘ä¸æ’å…¥æ•°æ® +---------------------- + +åœ¨æ ‘ä¸æ’入数æ®çš„æ¥éª¤åŒ…括:首先æœç´¢æ’入新结点的ä½ç½®ï¼Œç„¶åŽæ’å…¥ç»“ç‚¹å¹¶å¯¹æ ‘å†å¹³è¡¡ +("recoloring")。 + +æ’入的æœç´¢å’Œä¸Šæ–‡çš„æœç´¢ä¸åŒï¼Œå®ƒè¦æ‰¾åˆ°å«æŽ¥æ–°ç»“点的ä½ç½®ã€‚新结点也需è¦ä¸€ä¸ªæŒ‡å‘它的父节点 +的链接,以达到å†å¹³è¡¡çš„目的。 + +示例:: + + int my_insert(struct rb_root *root, struct mytype *data) + { + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct mytype *this = container_of(*new, struct mytype, node); + int result = strcmp(data->keystring, this->keystring); + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else + return FALSE; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); + + return TRUE; + } + +åœ¨ä¸€é¢—çº¢é»‘æ ‘ä¸åˆ 除或替æ¢å·²ç»å˜åœ¨çš„æ•°æ® +-------------------------------------- + +è‹¥è¦ä»Žæ ‘ä¸åˆ 除一个已ç»å˜åœ¨çš„结点,调用:: + + void rb_erase(struct rb_node *victim, struct rb_root *tree); + +示例:: + + struct mytype *data = mysearch(&mytree, "walrus"); + + if (data) { + rb_erase(&data->node, &mytree); + myfree(data); + } + +è‹¥è¦ç”¨ä¸€ä¸ªæ–°ç»“点替æ¢æ ‘ä¸ä¸€ä¸ªå·²ç»å˜åœ¨çš„键值相åŒçš„结点,调用:: + + void rb_replace_node(struct rb_node *old, struct rb_node *new, + struct rb_root *tree); + +通过这ç§æ–¹å¼æ›¿æ¢ç»“点ä¸ä¼šå¯¹æ ‘åšé‡æŽ’åºï¼šå¦‚果新结点的键值和旧结点ä¸åŒï¼Œçº¢é»‘æ ‘å¯èƒ½è¢« +ç ´å。 + +(按排åºçš„顺åºï¼‰é历å˜å‚¨åœ¨çº¢é»‘æ ‘ä¸çš„å…ƒç´ +---------------------------------------- + +我们æ供了四个函数,用于以排åºçš„æ–¹å¼éåŽ†ä¸€é¢—çº¢é»‘æ ‘çš„å†…å®¹ã€‚è¿™äº›å‡½æ•°å¯ä»¥åœ¨ä»»æ„çº¢é»‘æ ‘ +上工作,并且ä¸éœ€è¦è¢«ä¿®æ”¹æˆ–包装(除éžåŠ é”的目的):: + + struct rb_node *rb_first(struct rb_root *tree); + struct rb_node *rb_last(struct rb_root *tree); + struct rb_node *rb_next(struct rb_node *node); + struct rb_node *rb_prev(struct rb_node *node); + +è¦å¼€å§‹è¿ä»£ï¼Œéœ€è¦ä½¿ç”¨ä¸€ä¸ªæŒ‡å‘æ ‘æ ¹çš„æŒ‡é’ˆè°ƒç”¨rb_first()或rb_last()ï¼Œå®ƒå°†è¿”å›žä¸€ä¸ªæŒ‡å‘ +æ ‘ä¸ç¬¬ä¸€ä¸ªæˆ–最åŽä¸€ä¸ªå…ƒç´ 所包å«çš„节点结构的指针。è¦ç»§ç»çš„è¯ï¼Œå¯ä»¥åœ¨å½“å‰ç»“点上调用 +rb_next()或rb_prev()æ¥èŽ·å–下一个或上一个结点。当没有剩余的结点时,将返回NULL。 + +è¿ä»£å™¨å‡½æ•°è¿”回一个指å‘被嵌入的rb_node结构体的指针,由æ¤ï¼ŒåŒ…ä½rb_node的结构体å¯ç”¨ +æ ‡å‡†çš„container_of()å®è®¿é—®ã€‚æ¤å¤–,个体æˆå‘˜å¯ç›´æŽ¥ç”¨rb_entry(node, type, member) +访问。 + +示例:: + + struct rb_node *node; + for (node = rb_first(&mytree); node; node = rb_next(node)) + printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring); + +带缓å˜çš„çº¢é»‘æ ‘ +-------------- + +计算最左边(最å°çš„)结点是二å‰æœç´¢æ ‘的一个相当常è§çš„任务,例如用于éåŽ†ï¼Œæˆ–ç”¨æˆ·æ ¹æ® +他们自己的逻辑ä¾èµ–一个特定的顺åºã€‚为æ¤ï¼Œç”¨æˆ·å¯ä»¥ä½¿ç”¨'struct rb_root_cached'æ¥ä¼˜åŒ– +时间å¤æ‚度为O(logN)çš„rb_first()的调用,以简å•åœ°èŽ·å–指针,é¿å…äº†æ½œåœ¨çš„æ˜‚è´µçš„æ ‘è¿ä»£ã€‚ +维护æ“作的é¢å¤–è¿è¡Œæ—¶é—´å¼€é”€å¯å¿½ç•¥ï¼Œä¸è¿‡å†…å˜å 用较大。 + +å’Œrb_root结构体类似,带缓å˜çš„çº¢é»‘æ ‘ç”±ä»¥ä¸‹æ–¹å¼åˆå§‹åŒ–为空:: + + struct rb_root_cached mytree = RB_ROOT_CACHED; + +带缓å˜çš„çº¢é»‘æ ‘åªæ˜¯ä¸€ä¸ªå¸¸è§„çš„rb_rootï¼ŒåŠ ä¸Šä¸€ä¸ªé¢å¤–的指针æ¥ç¼“å˜æœ€å·¦è¾¹çš„节点。这使得 +rb_root_cachedå¯ä»¥å˜åœ¨äºŽrb_rootå˜åœ¨çš„任何地方,并且åªéœ€å¢žåŠ å‡ ä¸ªæŽ¥å£æ¥æ”¯æŒå¸¦ç¼“å˜çš„ +æ ‘:: + + struct rb_node *rb_first_cached(struct rb_root_cached *tree); + void rb_insert_color_cached(struct rb_node *, struct rb_root_cached *, bool); + void rb_erase_cached(struct rb_node *node, struct rb_root_cached *); + +æ“ä½œå’Œåˆ é™¤ä¹Ÿæœ‰å¯¹åº”çš„å¸¦ç¼“å˜çš„æ ‘çš„è°ƒç”¨:: + + void rb_insert_augmented_cached(struct rb_node *node, struct rb_root_cached *, + bool, struct rb_augment_callbacks *); + void rb_erase_augmented_cached(struct rb_node *, struct rb_root_cached *, + struct rb_augment_callbacks *); + + +å¯¹å¢žå¼ºåž‹çº¢é»‘æ ‘çš„æ”¯æŒ +-------------------- + +å¢žå¼ºåž‹çº¢é»‘æ ‘æ˜¯ä¸€ç§åœ¨æ¯ä¸ªç»“点里å˜å‚¨äº†â€œä¸€äº›â€é™„åŠ æ•°æ®çš„çº¢é»‘æ ‘ï¼Œå…¶ä¸ç»“点Nçš„é™„åŠ æ•°æ® +必须是以Nä¸ºæ ¹çš„åæ ‘ä¸æ‰€æœ‰ç»“ç‚¹çš„å†…å®¹çš„å‡½æ•°ã€‚å®ƒæ˜¯å»ºç«‹åœ¨çº¢é»‘æ ‘åŸºç¡€è®¾æ–½ä¹‹ä¸Šçš„å¯é€‰ç‰¹æ€§ã€‚ +想è¦ä½¿ç”¨è¿™ä¸ªç‰¹æ€§çš„çº¢é»‘æ ‘ç”¨æˆ·ï¼Œæ’å…¥å’Œåˆ é™¤ç»“ç‚¹æ—¶å¿…é¡»è°ƒç”¨å¢žå¼ºåž‹æŽ¥å£å¹¶æ供增强型回调函数。 + +å®žçŽ°å¢žå¼ºåž‹çº¢é»‘æ ‘æ“作的C文件必须包å«<linux/rbtree_augmented.h>而ä¸æ˜¯<linux/rbtree.h>。 +注æ„,linux/rbtree_augmented.hæš´éœ²äº†ä¸€äº›çº¢é»‘æ ‘å®žçŽ°çš„ç»†èŠ‚è€Œä½ ä¸åº”ä¾èµ–它们,请åšæŒ +使用文档记录的API,并且ä¸è¦åœ¨å¤´æ–‡ä»¶ä¸åŒ…å«<linux/rbtree_augmented.h>,以最å°åŒ–ä½ çš„ +用户æ„外地ä¾èµ–这些实现细节的å¯èƒ½ã€‚ + +æ’入时,用户必须更新通往被æ’入节点的路径上的增强信æ¯ï¼Œç„¶åŽåƒå¾€å¸¸ä¸€æ ·è°ƒç”¨rb_link_node(), +然åŽæ˜¯rb_augment_inserted()而ä¸æ˜¯å¹³æ—¶çš„rb_insert_color()调用。如果 +rb_augment_inserted()å†å¹³è¡¡äº†çº¢é»‘æ ‘ï¼Œå®ƒå°†å›žè°ƒè‡³ä¸€ä¸ªç”¨æˆ·æ供的函数æ¥æ›´æ–°å—å½±å“çš„ +åæ ‘ä¸Šçš„å¢žå¼ºä¿¡æ¯ã€‚ + +åˆ é™¤ä¸€ä¸ªç»“ç‚¹æ—¶ï¼Œç”¨æˆ·å¿…é¡»è°ƒç”¨rb_erase_augmented()而ä¸æ˜¯rb_erase()。 +rb_erase_augmented()回调至一个用户æ供的函数æ¥æ›´æ–°å—å½±å“çš„åæ ‘ä¸Šçš„å¢žå¼ºä¿¡æ¯ã€‚ + +在两ç§æƒ…况下,回调都是通过rb_augment_callbacks结构体æ供的。必须定义3个回调: + +- ä¸€ä¸ªä¼ æ’回调,它更新一个给定结点和它的祖先们的增强数æ®ï¼Œç›´åˆ°ä¸€ä¸ªç»™å®šçš„åœæ¢ç‚¹ + (如果是NULLï¼Œå°†æ›´æ–°ä¸€è·¯æ›´æ–°åˆ°æ ‘æ ¹ï¼‰ã€‚ + +- 一个å¤åˆ¶å›žè°ƒï¼Œå®ƒå°†ä¸€é¢—给定åæ ‘çš„å¢žå¼ºæ•°æ®å¤åˆ¶åˆ°ä¸€ä¸ªæ–°æŒ‡å®šçš„åæ ‘æ ‘æ ¹ã€‚ + +- ä¸€ä¸ªæ ‘æ—‹è½¬å›žè°ƒï¼Œå®ƒå°†ä¸€é¢—ç»™å®šçš„åæ ‘çš„å¢žå¼ºå€¼å¤åˆ¶åˆ°æ–°æŒ‡å®šçš„åæ ‘æ ‘æ ¹ä¸Šï¼Œå¹¶é‡æ–°è®¡ç®— + å…ˆå‰çš„åæ ‘æ ‘æ ¹çš„å¢žå¼ºå€¼ã€‚ + +rb_erase_augmented()编译åŽçš„代ç å¯èƒ½ä¼šå†…è”ä¼ æ’ã€å¤åˆ¶å›žè°ƒï¼Œè¿™å°†å¯¼è‡´å‡½æ•°ä½“积更大, +å› æ¤æ¯ä¸ªå¢žå¼ºåž‹çº¢é»‘æ ‘çš„ç”¨æˆ·åº”è¯¥åªæœ‰ä¸€ä¸ªrb_erase_augmented()的调用点,以é™åˆ¶ç¼–è¯‘åŽ +的代ç 大å°ã€‚ + + +使用示例 +^^^^^^^^ + +åŒºé—´æ ‘æ˜¯å¢žå¼ºåž‹çº¢é»‘æ ‘çš„ä¸€ä¸ªä¾‹å。å‚考Cormen,Leiserson,Rivestå’ŒStein写的 +ã€Šç®—æ³•å¯¼è®ºã€‹ã€‚åŒºé—´æ ‘çš„æ›´å¤šç»†èŠ‚ï¼š + +ç»å…¸çš„çº¢é»‘æ ‘åªæœ‰ä¸€ä¸ªé”®ï¼Œå®ƒä¸èƒ½ç›´æŽ¥ç”¨æ¥å˜å‚¨åƒ[lo:hi]è¿™æ ·çš„åŒºé—´èŒƒå›´ï¼Œä¹Ÿä¸èƒ½å¿«é€ŸæŸ¥æ‰¾ +与新的lo:hié‡å 的部分,或者查找是å¦æœ‰ä¸Žæ–°çš„lo:hi完全匹é…的部分。 + +ç„¶è€Œï¼Œçº¢é»‘æ ‘å¯ä»¥è¢«å¢žå¼ºï¼Œä»¥ä¸€ç§ç»“构化的方å¼æ¥å˜å‚¨è¿™ç§åŒºé—´èŒƒå›´ï¼Œä»Žè€Œä½¿é«˜æ•ˆçš„查找和 +精确匹é…æˆä¸ºå¯èƒ½ã€‚ + +这个å˜å‚¨åœ¨æ¯ä¸ªèŠ‚点ä¸çš„“é¢å¤–ä¿¡æ¯â€æ˜¯å…¶æ‰€æœ‰åŽä»£ç»“点ä¸çš„最大hi(max_hiï¼‰å€¼ã€‚è¿™ä¸ªä¿¡æ¯ +å¯ä»¥ä¿æŒåœ¨æ¯ä¸ªç»“点上,åªéœ€æŸ¥çœ‹ä¸€ä¸‹è¯¥ç»“点和它的直系å结点们。这将被用于时间å¤æ‚度 +为O(log n)的最低匹é…查找(所有å¯èƒ½çš„匹é…ä¸æœ€ä½Žçš„起始地å€ï¼‰ï¼Œå°±åƒè¿™æ ·:: + + struct interval_tree_node * + interval_tree_first_match(struct rb_root *root, + unsigned long start, unsigned long last) + { + struct interval_tree_node *node; + + if (!root->rb_node) + return NULL; + node = rb_entry(root->rb_node, struct interval_tree_node, rb); + + while (true) { + if (node->rb.rb_left) { + struct interval_tree_node *left = + rb_entry(node->rb.rb_left, + struct interval_tree_node, rb); + if (left->__subtree_last >= start) { + /* + * Some nodes in left subtree satisfy Cond2. + * Iterate to find the leftmost such node N. + * If it also satisfies Cond1, that's the match + * we are looking for. Otherwise, there is no + * matching interval as nodes to the right of N + * can't satisfy Cond1 either. + */ + node = left; + continue; + } + } + if (node->start <= last) { /* Cond1 */ + if (node->last >= start) /* Cond2 */ + return node; /* node is leftmost match */ + if (node->rb.rb_right) { + node = rb_entry(node->rb.rb_right, + struct interval_tree_node, rb); + if (node->__subtree_last >= start) + continue; + } + } + return NULL; /* No match */ + } + } + +æ’å…¥/åˆ é™¤æ˜¯é€šè¿‡ä»¥ä¸‹å¢žå¼ºåž‹å›žè°ƒæ¥å®šä¹‰çš„:: + + static inline unsigned long + compute_subtree_last(struct interval_tree_node *node) + { + unsigned long max = node->last, subtree_last; + if (node->rb.rb_left) { + subtree_last = rb_entry(node->rb.rb_left, + struct interval_tree_node, rb)->__subtree_last; + if (max < subtree_last) + max = subtree_last; + } + if (node->rb.rb_right) { + subtree_last = rb_entry(node->rb.rb_right, + struct interval_tree_node, rb)->__subtree_last; + if (max < subtree_last) + max = subtree_last; + } + return max; + } + + static void augment_propagate(struct rb_node *rb, struct rb_node *stop) + { + while (rb != stop) { + struct interval_tree_node *node = + rb_entry(rb, struct interval_tree_node, rb); + unsigned long subtree_last = compute_subtree_last(node); + if (node->__subtree_last == subtree_last) + break; + node->__subtree_last = subtree_last; + rb = rb_parent(&node->rb); + } + } + + static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new) + { + struct interval_tree_node *old = + rb_entry(rb_old, struct interval_tree_node, rb); + struct interval_tree_node *new = + rb_entry(rb_new, struct interval_tree_node, rb); + + new->__subtree_last = old->__subtree_last; + } + + static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new) + { + struct interval_tree_node *old = + rb_entry(rb_old, struct interval_tree_node, rb); + struct interval_tree_node *new = + rb_entry(rb_new, struct interval_tree_node, rb); + + new->__subtree_last = old->__subtree_last; + old->__subtree_last = compute_subtree_last(old); + } + + static const struct rb_augment_callbacks augment_callbacks = { + augment_propagate, augment_copy, augment_rotate + }; + + void interval_tree_insert(struct interval_tree_node *node, + struct rb_root *root) + { + struct rb_node **link = &root->rb_node, *rb_parent = NULL; + unsigned long start = node->start, last = node->last; + struct interval_tree_node *parent; + + while (*link) { + rb_parent = *link; + parent = rb_entry(rb_parent, struct interval_tree_node, rb); + if (parent->__subtree_last < last) + parent->__subtree_last = last; + if (start < parent->start) + link = &parent->rb.rb_left; + else + link = &parent->rb.rb_right; + } + + node->__subtree_last = last; + rb_link_node(&node->rb, rb_parent, link); + rb_insert_augmented(&node->rb, root, &augment_callbacks); + } + + void interval_tree_remove(struct interval_tree_node *node, + struct rb_root *root) + { + rb_erase_augmented(&node->rb, root, &augment_callbacks); + } diff --git a/Documentation/translations/zh_CN/devicetree/index.rst b/Documentation/translations/zh_CN/devicetree/index.rst new file mode 100644 index 000000000000..23d0b6fccd58 --- /dev/null +++ b/Documentation/translations/zh_CN/devicetree/index.rst @@ -0,0 +1,50 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/Devicetree/index.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + + +============================= +Open Firmware å’Œ Devicetree +============================= + +è¯¥æ–‡æ¡£æ˜¯æ•´ä¸ªè®¾å¤‡æ ‘æ–‡æ¡£çš„æ€»ç›®å½•ï¼Œæ ‡é¢˜ä¸å¤šæ˜¯ä¸šå†…默认的术è¯ï¼Œåˆè§å°±ç¿»è¯‘æˆä¸æ–‡ï¼Œ +æ™¦æ¶©éš¾æ‡‚ï¼Œå› æ¤å°½é‡ä¿ç•™ï¼ŒåŽé¢ç¿»è¯‘å…¶å文档时,å¯èƒ½ä¼šæ ¹æ®è¯å¢ƒï¼Œçµæ´»åœ°ç¿»è¯‘为ä¸æ–‡ã€‚ + +å†…æ ¸Devicetree的使用 +======================= +.. toctree:: + :maxdepth: 1 + + usage-model + of_unittest + +Todolist: + +* kernel-api + +Devicetree Overlays +=================== +.. toctree:: + :maxdepth: 1 + +Todolist: + +* changesets +* dynamic-resolution-notes +* overlay-notes + +Devicetree Bindings +=================== +.. toctree:: + :maxdepth: 1 + +Todolist: + +* bindings/index diff --git a/Documentation/translations/zh_CN/devicetree/of_unittest.rst b/Documentation/translations/zh_CN/devicetree/of_unittest.rst new file mode 100644 index 000000000000..abd94e771ef8 --- /dev/null +++ b/Documentation/translations/zh_CN/devicetree/of_unittest.rst @@ -0,0 +1,189 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/Devicetree/of_unittest.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + +================================= +Open Firmware Devicetree å•å…ƒæµ‹è¯• +================================= + +作者: Gaurav Minocha <gaurav.minocha.os@gmail.com> + +1. 概述 +======= + +本文档解释了执行 OF å•å…ƒæµ‹è¯•æ‰€éœ€çš„测试数æ®æ˜¯å¦‚何动æ€åœ°é™„åŠ åˆ°å®žæ—¶æ ‘ä¸Šçš„ï¼Œä¸Žæœºå™¨çš„æž¶æž„æ— å…³ã€‚ + +建议在继ç»è¯»ä¸‹åŽ»ä¹‹å‰ï¼Œå…ˆé˜…读以下文件。 + +(1) Documentation/devicetree/usage-model.rst +(2) http://www.devicetree.org/Device_Tree_Usage + +OF Selftest被设计用æ¥æµ‹è¯•æ供给设备驱动开å‘者的接å£ï¼ˆinclude/linux/of.h),以从未æ‰å¹³ +åŒ–çš„è®¾å¤‡æ ‘æ•°æ®ç»“æž„ä¸èŽ·å–设备信æ¯ç‰ã€‚这个接å£è¢«å¤§å¤šæ•°è®¾å¤‡é©±åŠ¨åœ¨å„ç§ä½¿ç”¨æƒ…况下使用。 + + +2. æµ‹è¯•æ•°æ® +=========== + +è®¾å¤‡æ ‘æºæ–‡ä»¶ï¼ˆdrivers/of/unittest-data/testcases.dts)包å«æ‰§è¡Œdrivers/of/unittest.c +ä¸è‡ªåŠ¨åŒ–å•å…ƒæµ‹è¯•æ‰€éœ€çš„测试数æ®ã€‚ç›®å‰ï¼Œä»¥ä¸‹è®¾å¤‡æ ‘æºåŒ…å«æ–‡ä»¶ï¼ˆ.dtsi)被包å«åœ¨testcases.dtä¸:: + + drivers/of/unittest-data/tests-interrupts.dtsi + drivers/of/unittest-data/tests-platform.dtsi + drivers/of/unittest-data/tests-phandle.dtsi + drivers/of/unittest-data/tests-match.dtsi + +å½“å†…æ ¸åœ¨å¯ç”¨OF_SELFTEST的情况下被构建时,那么下é¢çš„make规则:: + + $(obj)/%.dtb: $(src)/%.dts FORCE + $(call if_changed_dep, dtc) + +用于将DTæºæ–‡ä»¶ï¼ˆtestcases.dts)编译æˆäºŒè¿›åˆ¶blob(testcases.dtb),也被称为æ‰å¹³åŒ–çš„DT。 + +之åŽï¼Œä½¿ç”¨ä»¥ä¸‹è§„则将上述二进制blob包装æˆä¸€ä¸ªæ±‡ç¼–文件(testcases.dtb.S):: + + $(obj)/%.dtb.S: $(obj)/%.dtb + $(call cmd, dt_S_dtb) + +汇编文件被编译æˆä¸€ä¸ªå¯¹è±¡æ–‡ä»¶ï¼ˆtestcases.dtb.oï¼‰ï¼Œå¹¶è¢«é“¾æŽ¥åˆ°å†…æ ¸é•œåƒä¸ã€‚ + + +2.1. æ·»åŠ æµ‹è¯•æ•°æ® +----------------- + +未æ‰å¹³åŒ–çš„è®¾å¤‡æ ‘ç»“æž„ä½“: + +未æ‰å¹³åŒ–çš„è®¾å¤‡æ ‘ç”±è¿žæŽ¥çš„è®¾å¤‡èŠ‚ç‚¹ç»„æˆï¼Œå…¶æ ‘状结构形å¼å¦‚下所述:: + + // following struct members are used to construct the tree + struct device_node { + ... + struct device_node *parent; + struct device_node *child; + struct device_node *sibling; + ... + }; + +图1æ述了一个机器的未æ‰å¹³åŒ–è®¾å¤‡æ ‘çš„é€šç”¨ç»“æž„ï¼Œåªè€ƒè™‘了å节点和åŒçº§æŒ‡é’ˆã€‚å˜åœ¨å¦ä¸€ä¸ªæŒ‡é’ˆï¼Œ +``*parent`` ,用于åå‘éåŽ†è¯¥æ ‘ã€‚å› æ¤ï¼Œåœ¨ä¸€ä¸ªç‰¹å®šçš„层次上,å节点和所有的兄弟å§å¦¹èŠ‚点将 +有一个指å‘å…±åŒèŠ‚点的父指针(例如,child1ã€sibling2ã€sibling3ã€sibling4çš„çˆ¶æŒ‡é’ˆæŒ‡å‘ +æ ¹èŠ‚ç‚¹ï¼‰:: + + root ('/') + | + child1 -> sibling2 -> sibling3 -> sibling4 -> null + | | | | + | | | null + | | | + | | child31 -> sibling32 -> null + | | | | + | | null null + | | + | child21 -> sibling22 -> sibling23 -> null + | | | | + | null null null + | + child11 -> sibling12 -> sibling13 -> sibling14 -> null + | | | | + | | | null + | | | + null null child131 -> null + | + null + +Figure 1: 未æ‰å¹³åŒ–çš„è®¾å¤‡æ ‘çš„é€šç”¨ç»“æž„ + + +在执行OFå•å…ƒæµ‹è¯•ä¹‹å‰ï¼Œéœ€è¦å°†æµ‹è¯•æ•°æ®é™„åŠ åˆ°æœºå™¨çš„è®¾å¤‡æ ‘ä¸Šï¼ˆå¦‚æžœå˜åœ¨ï¼‰ã€‚å› æ¤ï¼Œå½“调用 +selftest_data_add()时,首先会读å–é€šè¿‡ä»¥ä¸‹å†…æ ¸ç¬¦å·é“¾æŽ¥åˆ°å†…æ ¸é•œåƒä¸çš„æ‰å¹³åŒ–è®¾å¤‡æ ‘ +æ•°æ®:: + + __dtb_testcases_begin - address marking the start of test data blob + __dtb_testcases_end - address marking the end of test data blob + +其次,它调用of_fdt_unflatten_tree()æ¥è§£é™¤æ‰å¹³åŒ–çš„blob。最åŽï¼Œå¦‚æžœæœºå™¨çš„è®¾å¤‡æ ‘ +(å³å®žæ—¶æ ‘)是å˜åœ¨çš„,那么它将未æ‰å¹³åŒ–的测试数æ®æ ‘é™„åŠ åˆ°å®žæ—¶æ ‘ä¸Šï¼Œå¦åˆ™å®ƒå°†è‡ªå·±ä½œä¸º +å®žæ—¶è®¾å¤‡æ ‘é™„åŠ ã€‚ + +attach_node_and_children()使用of_attach_node()å°†èŠ‚ç‚¹é™„åŠ åˆ°å®žæ—¶æ ‘ä¸Šï¼Œå¦‚ä¸‹æ‰€ +述。为了解释这一点,图2ä¸æ述的测试数æ®æ ‘è¢«é™„åŠ åˆ°å›¾1ä¸æè¿°çš„å®žæ—¶æ ‘ä¸Š:: + + root ('/') + | + testcase-data + | + test-child0 -> test-sibling1 -> test-sibling2 -> test-sibling3 -> null + | | | | + test-child01 null null null + + +Figure 2: 将测试数æ®æ ‘é™„åœ¨å®žæ—¶æ ‘ä¸Šçš„ä¾‹å。 + +æ ¹æ®ä¸Šé¢çš„æ–¹æ¡ˆï¼Œå®žæ—¶æ ‘å·²ç»å˜åœ¨ï¼Œæ‰€ä»¥ä¸éœ€è¦é™„åŠ æ ¹('/')节点。所有其他节点都是通过在 +æ¯ä¸ªèŠ‚点上调用of_attach_node()æ¥é™„åŠ çš„ã€‚ + +在函数of_attach_node()ä¸ï¼Œæ–°çš„èŠ‚ç‚¹è¢«é™„åœ¨å®žæ—¶æ ‘ä¸ç»™å®šçš„父节点的å节点上。但是,如 +果父节点已ç»æœ‰äº†ä¸€ä¸ªå©å,那么新节点就会å–代当å‰çš„å©å,并将其å˜æˆå…¶å…„弟å§å¦¹ã€‚å› æ¤ï¼Œ +当测试案例的数æ®èŠ‚点被连接到上é¢çš„å®žæ—¶æ ‘ï¼ˆå›¾1)时,最终的结构如图3所示:: + + root ('/') + | + testcase-data -> child1 -> sibling2 -> sibling3 -> sibling4 -> null + | | | | | + (...) | | | null + | | child31 -> sibling32 -> null + | | | | + | | null null + | | + | child21 -> sibling22 -> sibling23 -> null + | | | | + | null null null + | + child11 -> sibling12 -> sibling13 -> sibling14 -> null + | | | | + null null | null + | + child131 -> null + | + null + ----------------------------------------------------------------------- + + root ('/') + | + testcase-data -> child1 -> sibling2 -> sibling3 -> sibling4 -> null + | | | | | + | (...) (...) (...) null + | + test-sibling3 -> test-sibling2 -> test-sibling1 -> test-child0 -> null + | | | | + null null null test-child01 + + +Figure 3: é™„åŠ æµ‹è¯•æ¡ˆä¾‹æ•°æ®åŽçš„å®žæ—¶è®¾å¤‡æ ‘ç»“æž„ã€‚ + + +èªæ˜Žçš„读者会注æ„到,与先å‰çš„结构相比,test-child0节点æˆä¸ºæœ€åŽä¸€ä¸ªå…„弟å§å¦¹ï¼ˆå›¾2)。 +在连接了第一个test-child0节点之åŽï¼Œåˆè¿žæŽ¥äº†test-sibling1节点,该节点推动å节点 +(å³test-child0)æˆä¸ºå…„弟å§å¦¹ï¼Œå¹¶ä½¿è‡ªå·±æˆä¸ºå节点,如上所述。 + +如果å‘现一个é‡å¤çš„节点(å³å¦‚果一个具有相åŒfull_name属性的节点已ç»å˜åœ¨äºŽå®žæ—¶æ ‘ä¸ï¼‰ï¼Œ +那么该节点ä¸ä¼šè¢«é™„åŠ ï¼Œè€Œæ˜¯é€šè¿‡è°ƒç”¨å‡½æ•°update_node_properties()将其属性更新到活 +æ ‘çš„èŠ‚ç‚¹ä¸ã€‚ + + +2.2. åˆ é™¤æµ‹è¯•æ•°æ® +----------------- + +一旦测试用例执行完,selftest_data_remove被调用,以移除最åˆè¿žæŽ¥çš„设备节点(首先是 +å¶å节点被分离,然åŽå‘上移动父节点被移除,最åŽæ˜¯æ•´ä¸ªæ ‘)。selftest_data_remove() +调用detach_node_and_children(),使用of_detach_node()å°†èŠ‚ç‚¹ä»Žå®žæ—¶è®¾å¤‡æ ‘ä¸Šåˆ†ç¦»ã€‚ + +为了分离一个节点,of_detach_node()è¦ä¹ˆå°†ç»™å®šèŠ‚点的父节点的å节点指针更新为其åŒçº§èŠ‚ +点,è¦ä¹ˆæ ¹æ®æƒ…况将å‰ä¸€ä¸ªåŒçº§èŠ‚点附在给定节点的åŒçº§èŠ‚ç‚¹ä¸Šã€‚å°±è¿™æ ·å§ã€‚ :) diff --git a/Documentation/translations/zh_CN/devicetree/usage-model.rst b/Documentation/translations/zh_CN/devicetree/usage-model.rst new file mode 100644 index 000000000000..318a3c6a0114 --- /dev/null +++ b/Documentation/translations/zh_CN/devicetree/usage-model.rst @@ -0,0 +1,330 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/Devicetree/usage-model.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + + +=================== +Linux å’Œ Devicetree +=================== + +Linuxå¯¹è®¾å¤‡æ ‘æ•°æ®çš„使用模型 + +:作者: Grant Likely <grant.likely@secretlab.ca> + +è¿™ç¯‡æ–‡ç« æ述了Linuxå¦‚ä½•ä½¿ç”¨è®¾å¤‡æ ‘ã€‚å…³äºŽè®¾å¤‡æ ‘æ•°æ®æ ¼å¼çš„概述å¯ä»¥åœ¨ +devicetree.org\ [1]_ çš„è®¾å¤‡æ ‘ä½¿ç”¨é¡µé¢ä¸Šæ‰¾åˆ°ã€‚ + +.. [1] https://www.devicetree.org/specifications/ + +"Open Firmware Device Tree",或简称为Devicetree(DT),是一ç§ç”¨äºŽæ述硬 +件的数æ®ç»“æž„å’Œè¯è¨€ã€‚更确切地说,它是一ç§æ“作系统å¯è¯»çš„硬件æè¿°ï¼Œè¿™æ ·æ“ä½œç³»ç»Ÿå°±ä¸ +需è¦å¯¹æœºå™¨çš„细节进行硬编ç 。 + +从结构上看,DTæ˜¯ä¸€æ£µæ ‘ï¼Œæˆ–è€…è¯´æ˜¯å¸¦æœ‰å‘½åèŠ‚ç‚¹çš„æ— çŽ¯å›¾ï¼ŒèŠ‚ç‚¹å¯ä»¥æœ‰ä»»æ„æ•°é‡çš„命å +属性æ¥å°è£…ä»»æ„çš„æ•°æ®ã€‚还å˜åœ¨ä¸€ç§æœºåˆ¶ï¼Œå¯ä»¥åœ¨è‡ªç„¶çš„æ ‘çŠ¶ç»“æž„ä¹‹å¤–åˆ›å»ºä»Žä¸€ä¸ªèŠ‚ç‚¹åˆ° +å¦ä¸€ä¸ªèŠ‚点的任æ„链接。 + +从概念上讲,一套通用的使用惯例,称为 "bindings"(åŽæ–‡è¯‘ä¸ºç»‘å®šï¼‰ï¼Œè¢«å®šä¹‰ä¸ºæ•°æ® +åº”è¯¥å¦‚ä½•å‡ºçŽ°åœ¨æ ‘ä¸ï¼Œä»¥æ述典型的硬件特性,包括数æ®æ€»çº¿ã€ä¸æ–线ã€GPIO连接和外围 +设备。 + +å°½å¯èƒ½ä½¿ç”¨çŽ°æœ‰çš„绑定æ¥æ述硬件,以最大é™åº¦åœ°åˆ©ç”¨çŽ°æœ‰çš„支æŒä»£ç ,但由于属性和节 +点å称是简å•çš„文本å—符串,通过定义新的节点和属性æ¥æ‰©å±•çŽ°æœ‰çš„绑定或创建新的绑定 +很容易。然而,è¦è¦æƒ•çš„是,在创建一个新的绑定之å‰ï¼Œæœ€å¥½å…ˆå¯¹å·²ç»å˜åœ¨çš„东西åšä¸€äº› +功课。目å‰æœ‰ä¸¤ç§ä¸åŒçš„ã€ä¸å…¼å®¹çš„i2cæ€»çº¿çš„ç»‘å®šï¼Œè¿™æ˜¯å› ä¸ºåœ¨åˆ›å»ºæ–°çš„ç»‘å®šæ—¶æ²¡æœ‰äº‹å…ˆ +调查i2c设备在现有系统ä¸æ˜¯å¦‚何被枚举的。 + +1. åŽ†å² +------- +DT最åˆæ˜¯ç”±Open Firmware创建的,作为将数æ®ä»ŽOpen Firmwareä¼ é€’ç»™å®¢æˆ·ç¨‹åº +ï¼ˆå¦‚ä¼ é€’ç»™æ“作系统)的通信方法的一部分。æ“ä½œç³»ç»Ÿä½¿ç”¨è®¾å¤‡æ ‘åœ¨è¿è¡Œæ—¶æŽ¢æµ‹ç¡¬ä»¶çš„æ‹“ +扑结构,从而在没有硬编ç ä¿¡æ¯çš„情况下支æŒå¤§å¤šæ•°å¯ç”¨çš„硬件(å‡è®¾æ‰€æœ‰è®¾å¤‡çš„驱动程 +åºéƒ½å¯ç”¨ï¼‰ã€‚ + +由于Open Firmware通常在PowerPCå’ŒSPARCå¹³å°ä¸Šä½¿ç”¨ï¼Œé•¿æœŸä»¥æ¥ï¼Œå¯¹è¿™äº›æž¶æž„çš„ +Linux支æŒä¸€ç›´ä½¿ç”¨è®¾å¤‡æ ‘。 + +2005年,当PowerPC Linux开始大规模清ç†å¹¶åˆå¹¶32ä½å’Œ64ä½æ”¯æŒæ—¶ï¼Œå†³å®šåœ¨æ‰€æœ‰ +Powerpcå¹³å°ä¸Šè¦æ±‚DT支æŒï¼Œæ— 论它们是å¦ä½¿ç”¨Open Firmware。为了åšåˆ°è¿™ä¸€ç‚¹ï¼Œ +我们创建了一个å«åšæ‰å¹³åŒ–è®¾å¤‡æ ‘ï¼ˆFDT)的DT表示法,它å¯ä»¥ä½œä¸ºä¸€ä¸ªäºŒè¿›åˆ¶çš„blob +ä¼ é€’ç»™å†…æ ¸ï¼Œè€Œä¸éœ€è¦çœŸæ£çš„Open Firmware实现。U-Bootã€kexecå’Œå…¶ä»–å¼•å¯¼ç¨‹åº +被修改,以支æŒä¼ é€’è®¾å¤‡æ ‘äºŒè¿›åˆ¶ï¼ˆdtb)和在引导时修改dtb。DTä¹Ÿè¢«æ·»åŠ åˆ°PowerPC +引导包装器(arch/powerpc/boot/\*)ä¸ï¼Œè¿™æ ·dtbå°±å¯ä»¥è¢«åŒ…è£¹åœ¨å†…æ ¸é•œåƒä¸ï¼Œä»¥ +支æŒå¼•å¯¼çŽ°æœ‰çš„éžDT察觉的固件。 + +一段时间åŽï¼ŒFDT基础架构被普åŠåˆ°äº†æ‰€æœ‰çš„架构ä¸ã€‚åœ¨å†™è¿™ç¯‡æ–‡ç« çš„æ—¶å€™ï¼Œ6个主线架 +构(armã€microblazeã€mipsã€powerpcã€sparcå’Œx86)和1个éžä¸»çº¿æž¶æž„(ios) +有æŸç§ç¨‹åº¦çš„DT支æŒã€‚ + +1. æ•°æ®æ¨¡åž‹ +----------- +å¦‚æžœä½ è¿˜æ²¡æœ‰è¯»è¿‡è®¾å¤‡æ ‘ç”¨æ³•\ [1]_页,那么现在就去读å§ã€‚没关系,我ç‰ç€.... + +2.1 高层次视角 +-------------- +最é‡è¦çš„是è¦æ˜Žç™½ï¼ŒDTåªæ˜¯ä¸€ä¸ªæ述硬件的数æ®ç»“构。它没有什么神奇之处,也ä¸ä¼šç¥ž +奇地让所有的硬件é…置问题消失。它所åšçš„是æ供一ç§è¯è¨€ï¼Œå°†ç¡¬ä»¶é…置与Linuxå†…æ ¸ +(或任何其他æ“作系统)ä¸çš„æ¿å¡å’Œè®¾å¤‡é©±åŠ¨æ”¯æŒè§£è€¦ã€‚使用它å¯ä»¥ä½¿æ¿å¡å’Œè®¾å¤‡æ”¯æŒ +å˜æˆæ•°æ®é©±åŠ¨ï¼›æ ¹æ®ä¼ é€’åˆ°å†…æ ¸çš„æ•°æ®åšå‡ºè®¾ç½®å†³å®šï¼Œè€Œä¸æ˜¯æ ¹æ®æ¯å°æœºå™¨çš„硬编ç 选 +择。 + +ç†æƒ³æƒ…况下,数æ®é©±åŠ¨çš„å¹³å°è®¾ç½®åº”该导致更少的代ç é‡å¤ï¼Œå¹¶ä½¿å…¶æ›´å®¹æ˜“ç”¨ä¸€ä¸ªå†…æ ¸ +é•œåƒæ”¯æŒå„ç§ç¡¬ä»¶ã€‚ + +Linux使用DTæ•°æ®æœ‰ä¸‰ä¸ªä¸»è¦ç›®çš„: + +1) å¹³å°è¯†åˆ«ã€‚ +2) è¿è¡Œæ—¶é…ç½®ï¼Œä»¥åŠ +3) 设备数é‡ã€‚ + +2.2 å¹³å°è¯†åˆ« +------------ +é¦–å…ˆï¼Œå†…æ ¸å°†ä½¿ç”¨DTä¸çš„æ•°æ®æ¥è¯†åˆ«ç‰¹å®šçš„机器。在一个ç†æƒ³çš„世界里,具体的平å°å¯¹ +å†…æ ¸æ¥è¯´å¹¶ä¸é‡è¦ï¼Œå› 为所有的平å°ç»†èŠ‚éƒ½ä¼šè¢«è®¾å¤‡æ ‘ä»¥ä¸€è‡´å’Œå¯é çš„æ–¹å¼å®Œç¾Žæ述。 +但是,硬件并ä¸å®Œç¾Žï¼Œæ‰€ä»¥å†…æ ¸å¿…é¡»åœ¨æ—©æœŸå¯åŠ¨æ—¶è¯†åˆ«æœºå™¨ï¼Œä»¥ä¾¿æœ‰æœºä¼šè¿è¡Œç‰¹å®šäºŽæœº +器的修å¤ç¨‹åºã€‚ + +在大多数情况下,机器的身份是ä¸ç›¸å…³çš„ï¼Œè€Œå†…æ ¸å°†æ ¹æ®æœºå™¨çš„æ ¸å¿ƒCPU或SoCæ¥é€‰æ‹© +设置代ç 。例如,在ARM上,arch/arm/kernel/setup.cä¸çš„setup_arch()将调 +用arch/arm/kernel/devtree.cä¸çš„setup_machine_fdt(),它通过 +machine_desc表æœç´¢å¹¶é€‰æ‹©ä¸Žè®¾å¤‡æ ‘æ•°æ®æœ€åŒ¹é…çš„machine_descã€‚å®ƒé€šè¿‡æŸ¥çœ‹æ ¹ +è®¾å¤‡æ ‘èŠ‚ç‚¹ä¸çš„'compatible'属性,并将其与struct machine_descä¸çš„ +dt_compatåˆ—è¡¨ï¼ˆå¦‚æžœä½ å¥½å¥‡ï¼Œè¯¥åˆ—è¡¨å®šä¹‰åœ¨arch/arm/include/asm/mach/arch.h +ä¸ï¼‰è¿›è¡Œæ¯”较,从而确定最佳匹é…。 + +“compatible†属性包å«ä¸€ä¸ªæŽ’åºçš„å—符串列表,以机器的确切å称开始,åŽé¢æ˜¯ +一个å¯é€‰çš„与之兼容的æ¿å列表,从最兼容到最ä¸å…¼å®¹æŽ’åºã€‚例如,TI BeagleBoard +和它的åŽç»§è€…BeagleBoard xMæ¿çš„æ ¹å…¼å®¹å±žæ€§å¯èƒ½çœ‹èµ·æ¥åˆ†åˆ«ä¸º:: + + compatible = "ti,omap3-beagleboard", "ti,omap3450", "ti,omap3"; + compatible = "ti,omap3-beagleboard-xm", "ti,omap3450", "ti,omap3"; + +å…¶ä¸ "ti,map3-beagleboard-xm "指定了确切的型å·ï¼Œå®ƒè¿˜å£°ç§°å®ƒä¸ŽOMAP 3450 SoC +以åŠä¸€èˆ¬çš„OMP3系列SoCå…¼å®¹ã€‚ä½ ä¼šæ³¨æ„到,该列表从最具体的(确切的æ¿å)到最 +ä¸å…·ä½“的(SoC系列)进行排åºã€‚ + +èªæ˜Žçš„读者å¯èƒ½ä¼šæŒ‡å‡ºï¼ŒBeagle xM也å¯ä»¥å£°ç§°ä¸ŽåŽŸBeagleæ¿å…¼å®¹ã€‚然而,我们应 +该当心在æ¿çº§ä¸Šè¿™æ ·åšï¼Œå› 为通常情况下,å³ä½¿åœ¨åŒä¸€äº§å“系列ä¸ï¼Œæ¯å—æ¿éƒ½æœ‰å¾ˆé«˜ +çš„å˜åŒ–,而且当一å—æ¿å£°ç§°ä¸Žå¦ä¸€å—æ¿å…¼å®¹æ—¶ï¼Œå¾ˆéš¾ç¡®å®šåˆ°åº•æ˜¯ä»€ä¹ˆæ„æ€ã€‚对于高层 +æ¥è¯´ï¼Œæœ€å¥½æ˜¯è°¨æ…Žè¡Œäº‹ï¼Œä¸è¦å£°ç§°ä¸€å—æ¿å与å¦ä¸€å—æ¿å兼容。值得注æ„的例外是, +当一å—æ¿å是å¦ä¸€å—æ¿å的载体时,例如CPU模å—连接到一个载体æ¿ä¸Šã€‚ + +关于兼容值还有一个注æ„事项。在兼容属性ä¸ä½¿ç”¨çš„任何å—符串都必须有文件说明它 +表示什么。在Documentation/devicetree/bindingsä¸æ·»åŠ 兼容å—符串的文档。 + +åŒæ ·åœ¨ARM上,对于æ¯ä¸ªmachine_descï¼Œå†…æ ¸ä¼šæŸ¥çœ‹æ˜¯å¦æœ‰ä»»ä½•dt_compatåˆ—è¡¨æ¡ +目出现在兼容属性ä¸ã€‚如果有,那么该机器_desc就是驱动该机器的候选者。在æœç´¢ +了整个machine_descs表之åŽï¼Œsetup_machine_fdt()æ ¹æ®æ¯ä¸ªmachine_desc +在兼容属性ä¸åŒ¹é…çš„æ¡ç›®ï¼Œè¿”回 “最兼容†的machine_descã€‚å¦‚æžœæ²¡æœ‰æ‰¾åˆ°åŒ¹é… +çš„machine_desc,那么它将返回NULL。 + +这个方案背åŽçš„åŽŸå› æ˜¯è§‚å¯Ÿåˆ°ï¼Œåœ¨å¤§å¤šæ•°æƒ…å†µä¸‹ï¼Œå¦‚æžœå®ƒä»¬éƒ½ä½¿ç”¨ç›¸åŒçš„SoCæˆ–ç›¸åŒ +系列的SoC,一个机器_descå¯ä»¥æ”¯æŒå¤§é‡çš„电路æ¿ã€‚然而,ä¸å¯é¿å…地会有一些例 +外情况,å³ç‰¹å®šçš„æ¿å需è¦ç‰¹æ®Šçš„设置代ç ,这在一般情况下是没有用的。特殊情况 +å¯ä»¥é€šè¿‡åœ¨é€šç”¨è®¾ç½®ä»£ç ä¸æ˜Žç¡®æ£€æŸ¥æœ‰é—®é¢˜çš„æ¿åæ¥å¤„ç†ï¼Œä½†å¦‚æžœè¶…è¿‡å‡ ä¸ªæƒ…å†µä¸‹ï¼Œ +è¿™æ ·åšå¾ˆå¿«å°±ä¼šå˜å¾—很难看和/æˆ–æ— æ³•ç»´æŠ¤ã€‚ + +相å,兼容列表å…许通用机器_desc通过在dt_compat列表ä¸æŒ‡å®šâ€œä¸å¤ªå…¼å®¹â€çš„值 +æ¥æ供对广泛的通用æ¿çš„支æŒã€‚在上é¢çš„例åä¸ï¼Œé€šç”¨æ¿æ”¯æŒå¯ä»¥å£°ç§°ä¸Žâ€œti,ompa3†+或“ti,ompa3450â€å…¼å®¹ã€‚如果在最åˆçš„beagleboard上å‘现了一个bug,需è¦åœ¨ +早期å¯åŠ¨æ—¶ä½¿ç”¨ç‰¹æ®Šçš„å˜é€šä»£ç ,那么å¯ä»¥æ·»åŠ 一个新的machine_desc,实现å˜é€šï¼Œ +并且åªåœ¨â€œti,omap3-beagleboardâ€ä¸ŠåŒ¹é…。 + +PowerPC使用了一个ç¨å¾®ä¸åŒçš„方案,它从æ¯ä¸ªæœºå™¨_descä¸è°ƒç”¨.probe()é’©å, +并使用第一个返回TRUEçš„é’©å。然而,这ç§æ–¹æ³•æ²¡æœ‰è€ƒè™‘到兼容列表的优先级,对于 +新的架构支æŒå¯èƒ½åº”该é¿å…。 + +2.3 è¿è¡Œæ—¶é…ç½® +-------------- +在大多数情况下,DT是将数æ®ä»Žå›ºä»¶ä¼ é€’ç»™å†…æ ¸çš„å”¯ä¸€æ–¹æ³•ï¼Œæ‰€ä»¥ä¹Ÿè¢«ç”¨æ¥ä¼ 递è¿è¡Œ +时和é…置数æ®ï¼Œå¦‚å†…æ ¸å‚æ•°å—符串和initrdé•œåƒçš„ä½ç½®ã€‚ + +这些数æ®å¤§éƒ¨åˆ†éƒ½åŒ…å«åœ¨/chosen节点ä¸ï¼Œå½“å¯åŠ¨Linux时,它看起æ¥å°±åƒè¿™æ ·:: + + chosen { + bootargs = "console=ttyS0,115200 loglevel=8"; + initrd-start = <0xc8000000>; + initrd-end = <0xc8200000>; + }; + +bootargs属性包å«å†…æ ¸å‚数,initrd-\*属性定义initrd blob的地å€å’Œå¤§å°ã€‚注 +æ„initrd-end是initrdæ˜ åƒåŽçš„第一个地å€ï¼Œæ‰€ä»¥è¿™ä¸Žç»“构体资æºçš„通常è¯ä¹‰ä¸ä¸€ +致。选择的节点也å¯ä»¥é€‰æ‹©åŒ…å«ä»»æ„æ•°é‡çš„é¢å¤–属性,用于平å°ç‰¹å®šçš„é…置数æ®ã€‚ + +在早期å¯åŠ¨è¿‡ç¨‹ä¸ï¼Œæž¶æž„设置代ç 通过ä¸åŒçš„辅助回调函数多次调用 +of_scan_flat_dt()æ¥è§£æžè®¾å¤‡æ ‘æ•°æ®ï¼Œç„¶åŽè¿›è¡Œåˆ†é¡µè®¾ç½®ã€‚of_scan_flat_dt() +代ç 扫æè®¾å¤‡æ ‘ï¼Œå¹¶ä½¿ç”¨è¾…åŠ©å‡½æ•°æ¥æå–早期å¯åŠ¨æœŸé—´æ‰€éœ€çš„ä¿¡æ¯ã€‚通常情况下, +early_init_dt_scan_chosen()辅助函数用于解æžæ‰€é€‰èŠ‚ç‚¹ï¼ŒåŒ…æ‹¬å†…æ ¸å‚数, +early_init_dt_scan_root()用于åˆå§‹åŒ–DT地å€ç©ºé—´æ¨¡åž‹ï¼Œearly_init_dt_scan_memory() +用于确定å¯ç”¨RAM的大å°å’Œä½ç½®ã€‚ + +在ARM上,函数setup_machine_fdt()负责在选择支æŒæ¿åçš„æ£ç¡®machine_desc +åŽï¼Œå¯¹è®¾å¤‡æ ‘进行早期扫æ。 + +2.4 è®¾å¤‡æ•°é‡ +------------ +在电路æ¿è¢«è¯†åˆ«åŽï¼Œåœ¨æ—©æœŸé…置数æ®è¢«è§£æžåŽï¼Œå†…æ ¸åˆå§‹åŒ–å¯ä»¥ä»¥æ£å¸¸æ–¹å¼è¿›è¡Œã€‚在 +这个过程ä¸çš„æŸä¸ªæ—¶åˆ»ï¼Œunflatten_device_tree()被调用以将数æ®è½¬æ¢æˆæ›´æœ‰ +效的è¿è¡Œæ—¶è¡¨ç¤ºã€‚这也是调用机器特定设置钩å的时候,比如ARM上的machine_desc +.init_early()ã€.init_irq()å’Œ.init_machine()é’©å。本节的其余部分使用 +了ARM实现的例å,但所有架构在使用DT时都会åšå‡ 乎相åŒçš„事情。 + +从å称上å¯ä»¥çŒœåˆ°ï¼Œ.init_early()用于在å¯åŠ¨è¿‡ç¨‹æ—©æœŸéœ€è¦æ‰§è¡Œçš„任何机器特定设 +置,而.init_irq()则用于设置ä¸æ–处ç†ã€‚使用DT并ä¸ä¼šå®žè´¨æ€§åœ°æ”¹å˜è¿™ä¸¤ä¸ªå‡½æ•°çš„ +行为。如果æ供了DT,那么.init_early()å’Œ.init_irq()都能调用任何一个DT查 +询函数(of_* in include/linux/of*.h),以获得关于平å°çš„é¢å¤–æ•°æ®ã€‚ + +DT上下文ä¸æœ€æœ‰è¶£çš„é’©å是.init_machine(),它主è¦è´Ÿè´£å°†å¹³å°çš„æ•°æ®å¡«å……到 +Linux设备模型ä¸ã€‚历å²ä¸Šï¼Œè¿™åœ¨åµŒå…¥å¼å¹³å°ä¸Šæ˜¯é€šè¿‡åœ¨æ¿å¡support .c文件ä¸å®š +义一组é™æ€æ—¶é’Ÿç»“æž„ã€platform_devices和其他数æ®ï¼Œå¹¶åœ¨.init_machine()ä¸ +大é‡æ³¨å†Œæ¥å®žçŽ°çš„。当使用DT时,就ä¸ç”¨ä¸ºæ¯ä¸ªå¹³å°çš„é™æ€è®¾å¤‡è¿›è¡Œç¡¬ç¼–ç ,å¯ä»¥é€šè¿‡ +解æžDT获得设备列表,并动æ€åˆ†é…设备结构体。 + +最简å•çš„情况是,.init_machine()åªè´Ÿè´£æ³¨å†Œä¸€ä¸ªplatform_devices。 +platform_device是Linux使用的一个概念,用于ä¸èƒ½è¢«ç¡¬ä»¶æ£€æµ‹åˆ°çš„内å˜æˆ–I/Oæ˜ +射的设备,以åŠâ€œå¤åˆâ€æˆ– “虚拟â€è®¾å¤‡ï¼ˆåŽé¢ä¼šè¯¦ç»†ä»‹ç»ï¼‰ã€‚虽然DT没有“平å°è®¾å¤‡â€çš„ +术è¯ï¼Œä½†å¹³å°è®¾å¤‡å¤§è‡´å¯¹åº”äºŽæ ‘æ ¹çš„è®¾å¤‡èŠ‚ç‚¹å’Œç®€å•å†…å˜æ˜ 射总线节点的å节点。 + +现在是举例说明的好时机。下é¢æ˜¯NVIDIA Tegraæ¿çš„è®¾å¤‡æ ‘çš„ä¸€éƒ¨åˆ†:: + + /{ + compatible = "nvidia,harmony", "nvidia,tegra20"; + #address-cells = <1>; + #size-cells = <1>; + interrupt-parent = <&intc>; + + chosen { }; + aliases { }; + + memory { + device_type = "memory"; + reg = <0x00000000 0x40000000>; + }; + + soc { + compatible = "nvidia,tegra20-soc", "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + intc: interrupt-controller@50041000 { + compatible = "nvidia,tegra20-gic"; + interrupt-controller; + #interrupt-cells = <1>; + reg = <0x50041000 0x1000>, < 0x50040100 0x0100 >; + }; + + serial@70006300 { + compatible = "nvidia,tegra20-uart"; + reg = <0x70006300 0x100>; + interrupts = <122>; + }; + + i2s1: i2s@70002800 { + compatible = "nvidia,tegra20-i2s"; + reg = <0x70002800 0x100>; + interrupts = <77>; + codec = <&wm8903>; + }; + + i2c@7000c000 { + compatible = "nvidia,tegra20-i2c"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x7000c000 0x100>; + interrupts = <70>; + + wm8903: codec@1a { + compatible = "wlf,wm8903"; + reg = <0x1a>; + interrupts = <347>; + }; + }; + }; + + sound { + compatible = "nvidia,harmony-sound"; + i2s-controller = <&i2s1>; + i2s-codec = <&wm8903>; + }; + }; + +在.init_machine()时,Tegraæ¿æ”¯æŒä»£ç 将需è¦æŸ¥çœ‹è¿™ä¸ªDT,并决定为哪些节点 +创建platform_devicesã€‚ç„¶è€Œï¼Œçœ‹ä¸€ä¸‹è¿™ä¸ªæ ‘ï¼Œå¹¶ä¸èƒ½ç«‹å³çœ‹å‡ºæ¯ä¸ªèŠ‚点代表什么 +类型的设备,甚至ä¸èƒ½çœ‹å‡ºä¸€ä¸ªèŠ‚点是å¦ä»£è¡¨ä¸€ä¸ªè®¾å¤‡ã€‚/chosenã€/aliaseså’Œ +/memory节点是信æ¯èŠ‚点,并ä¸æ述设备(尽管å¯ä»¥è¯´å†…å˜å¯ä»¥è¢«è®¤ä¸ºæ˜¯ä¸€ä¸ªè®¾å¤‡ï¼‰ã€‚ +/soc节点的å节点是内å˜æ˜ 射的设备,但是codec@1a是一个i2c设备,而sound节 +点代表的ä¸æ˜¯ä¸€ä¸ªè®¾å¤‡ï¼Œè€Œæ˜¯å…¶ä»–设备是如何连接在一起以创建音频å系统的。我知 +é“æ¯ä¸ªè®¾å¤‡æ˜¯ä»€ä¹ˆï¼Œå› 为我熟悉电路æ¿çš„è®¾è®¡ï¼Œä½†æ˜¯å†…æ ¸æ€Žä¹ˆçŸ¥é“æ¯ä¸ªèŠ‚点该怎么åšï¼Ÿ + +诀çªåœ¨äºŽï¼Œå†…æ ¸ä»Žæ ‘çš„æ ¹éƒ¨å¼€å§‹ï¼Œå¯»æ‰¾å…·æœ‰â€œå…¼å®¹â€å±žæ€§çš„节点。首先,一般认为任何 +具有“兼容â€å±žæ€§çš„节点都代表æŸç§è®¾å¤‡ï¼›å…¶æ¬¡ï¼Œå¯ä»¥è®¤ä¸ºæ ‘æ ¹çš„ä»»ä½•èŠ‚ç‚¹è¦ä¹ˆç›´æŽ¥è¿ž +接到处ç†å™¨æ€»çº¿ä¸Šï¼Œè¦ä¹ˆæ˜¯æ— 法用其他方å¼æè¿°çš„æ‚项系统设备。对于这些节点ä¸çš„ +æ¯ä¸€ä¸ªï¼ŒLinux都会分é…和注册一个platform_device,它åˆå¯èƒ½è¢«ç»‘定到一个 +platform_driver。 + +为什么为这些节点使用platform_device是一个安全的å‡è®¾ï¼Ÿå—¯ï¼Œå°±Linux对设备 +的建模方å¼è€Œè¨€ï¼Œå‡ 乎所有的总线类型都å‡å®šå…¶è®¾å¤‡æ˜¯æ€»çº¿æŽ§åˆ¶å™¨çš„å©åã€‚ä¾‹å¦‚ï¼Œæ¯ +个i2c_client是i2c_master的一个å节点。æ¯ä¸ªspi_device都是SPI总线的一 +个å节点。类似的还有USBã€PCIã€MDIOç‰ã€‚åŒæ ·çš„层次结构也出现在DTä¸ï¼ŒI2C设 +备节点åªä½œä¸ºI2C总线节点的å节点出现。åŒç†ï¼ŒSPIã€MDIOã€USBç‰ç‰ã€‚唯一ä¸éœ€ +è¦ç‰¹å®šç±»åž‹çš„父设备的设备是platform_devices(和amba_devices,但åŽé¢ä¼š +详细介ç»ï¼‰ï¼Œå®ƒä»¬å°†æ„‰å¿«åœ°è¿è¡Œåœ¨Linux/sys/devicesæ ‘çš„åº•éƒ¨ã€‚å› æ¤ï¼Œå¦‚果一个 +DT节点ä½äºŽæ ‘çš„æ ¹éƒ¨ï¼Œé‚£ä¹ˆå®ƒçœŸçš„å¯èƒ½æœ€å¥½æ³¨å†Œä¸ºplatform_device。 + +Linuxæ¿æ”¯æŒä»£ç 调用of_platform_populate(NULL, NULL, NULL, NULL)æ¥ +å¯åŠ¨æ ‘æ ¹çš„è®¾å¤‡å‘现。å‚数都是NULLï¼Œå› ä¸ºå½“ä»Žæ ‘çš„æ ¹éƒ¨å¼€å§‹æ—¶ï¼Œä¸éœ€è¦æ供一个起 +始节点(第一个NULL),一个父结构设备(最åŽä¸€ä¸ªNULLï¼‰ï¼Œè€Œä¸”æˆ‘ä»¬æ²¡æœ‰ä½¿ç”¨åŒ¹é… +表(尚未)。对于åªéœ€è¦æ³¨å†Œè®¾å¤‡çš„æ¿å,除了of_platform_populate()的调用, +.init_machine()å¯ä»¥å®Œå…¨ä¸ºç©ºã€‚ + +在Tegra的例åä¸ï¼Œè¿™è¯´æ˜Žäº†/socå’Œ/sound节点,但是SoC节点的å节点呢?它们 +ä¸åº”该也被注册为平å°è®¾å¤‡å—?对于Linux DT支æŒï¼Œä¸€èˆ¬çš„行为是å设备在驱动 +.probe()æ—¶è¢«çˆ¶è®¾å¤‡é©±åŠ¨æ³¨å†Œã€‚å› æ¤ï¼Œä¸€ä¸ªi2c总线设备驱动程åºå°†ä¸ºæ¯ä¸ªå节点 +注册一个i2c_client,一个SPI总线驱动程åºå°†æ³¨å†Œå…¶spi_deviceå节点,其他 +总线类型也是如æ¤ã€‚æ ¹æ®è¯¥æ¨¡åž‹ï¼Œå¯ä»¥ç¼–写一个与SoC节点绑定的驱动程åºï¼Œå¹¶ç®€å• +地为其æ¯ä¸ªå节点注册platform_device。æ¿å¡æ”¯æŒä»£ç 将分é…和注册一个SoC设 +备,一个(ç†è®ºä¸Šçš„)SoC设备驱动程åºå¯ä»¥ç»‘定到SoC设备,并在其.probe()é’© +ä¸ä¸º/soc/interruptcontrollerã€/soc/serialã€/soc/i2så’Œ/soc/i2c注 +册platform_devices。很简å•ï¼Œå¯¹å—? + +实际上,事实è¯æ˜Žï¼Œå°†ä¸€äº›platform_deviceçš„å设备注册为更多的platform_device +是一ç§å¸¸è§çš„模å¼ï¼Œè®¾å¤‡æ ‘支æŒä»£ç åæ˜ äº†è¿™ä¸€ç‚¹ï¼Œå¹¶ä½¿ä¸Šè¿°ä¾‹å更简å•ã€‚ +of_platform_populate()的第二个å‚数是一个of_device_id表,任何与该表 +ä¸çš„æ¡ç›®ç›¸åŒ¹é…的节点也将获得其å节点的注册。在Tegra的例åä¸ï¼Œä»£ç å¯ä»¥æ˜¯ +è¿™æ ·çš„:: + + static void __init harmony_init_machine(void) + { + /* ... */ + of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); + } + +“simple-busâ€åœ¨Devicetree规范ä¸è¢«å®šä¹‰ä¸ºä¸€ä¸ªå±žæ€§ï¼Œæ„味ç€ä¸€ä¸ªç®€å•çš„内å˜æ˜ å°„ +的总线,所以of_platform_populate()代ç å¯ä»¥è¢«å†™æˆåªæ˜¯å‡è®¾ç®€å•æ€»çº¿å…¼å®¹çš„节 +点将总是被é历。然而,我们把它作为一个å‚æ•°ä¼ å…¥ï¼Œä»¥ä¾¿ç”µè·¯æ¿æ”¯æŒä»£ç å¯ä»¥éšæ—¶è¦† +盖默认行为。 + +[需è¦æ·»åŠ å…³äºŽæ·»åŠ i2c/spi/etcå设备的讨论] 。 + +附录A:AMBA设备 +--------------- + +ARM Primecell是连接到ARM AMBA总线的æŸç§è®¾å¤‡ï¼Œå®ƒåŒ…括对硬件检测和电æºç®¡ç† +的一些支æŒã€‚在Linuxä¸ï¼Œamba_deviceå’Œamba_bus_type结构体被用æ¥è¡¨ç¤º +Primecell设备。然而,棘手的一点是,AMBA总线上的所有设备并éžéƒ½æ˜¯Primecell, +而且对于Linuxæ¥è¯´ï¼Œå…¸åž‹çš„情况是amba_deviceå’Œplatform_deviceå®žä¾‹éƒ½æ˜¯åŒ +一总线段的åŒä¹‰è¯ã€‚ + +当使用DT时,这给of_platform_populate()带æ¥äº†é—®é¢˜ï¼Œå› 为它必须决定是å¦å°† +æ¯ä¸ªèŠ‚点注册为platform_device或amba_device。ä¸å¹¸çš„是,这使设备创建模型 +å˜å¾—有点å¤æ‚,但解决方案原æ¥å¹¶ä¸æ˜¯å¤ªå…·æœ‰ä¾µç•¥æ€§ã€‚如果一个节点与“arm,amba-primecell†+兼容,那么of_platform_populate()将把它注册为amba_device而ä¸æ˜¯ +platform_device。 diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst index 46e14ec9963d..88d8df957a78 100644 --- a/Documentation/translations/zh_CN/index.rst +++ b/Documentation/translations/zh_CN/index.rst @@ -5,7 +5,7 @@ \renewcommand\thesection* \renewcommand\thesubsection* \kerneldocCJKon - \kerneldocBeginSC + \kerneldocBeginSC{ .. _linux_doc_zh: @@ -56,10 +56,14 @@ TODOList: 下列文档æè¿°äº†å†…æ ¸éœ€è¦çš„å¹³å°å›ºä»¶ç›¸å…³ä¿¡æ¯ã€‚ +.. toctree:: + :maxdepth: 2 + + devicetree/index + TODOList: * firmware-guide/index -* devicetree/index 应用程åºå¼€å‘人员文档 -------------------- @@ -104,14 +108,17 @@ TODOList: :maxdepth: 2 core-api/index + accounting/index cpu-freq/index iio/index + infiniband/index + power/index + virt/index sound/index filesystems/index - virt/index - infiniband/index - accounting/index scheduler/index + vm/index + peci/index TODOList: @@ -129,7 +136,6 @@ TODOList: * netlabel/index * networking/index * pcmcia/index -* power/index * target/index * timers/index * spi/index @@ -140,7 +146,6 @@ TODOList: * gpu/index * security/index * crypto/index -* vm/index * bpf/index * usb/index * PCI/index @@ -198,4 +203,4 @@ TODOList: .. raw:: latex - \kerneldocEndSC + }\kerneldocEndSC diff --git a/Documentation/translations/zh_CN/peci/index.rst b/Documentation/translations/zh_CN/peci/index.rst new file mode 100644 index 000000000000..4f6694c828fa --- /dev/null +++ b/Documentation/translations/zh_CN/peci/index.rst @@ -0,0 +1,26 @@ +.. SPDX-License-Identifier: GPL-2.0-only +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/peci/index.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + +================= +Linux PECI å系统 +================= + +.. toctree:: + + + peci + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/translations/zh_CN/peci/peci.rst b/Documentation/translations/zh_CN/peci/peci.rst new file mode 100644 index 000000000000..a3b4f99b994c --- /dev/null +++ b/Documentation/translations/zh_CN/peci/peci.rst @@ -0,0 +1,54 @@ +.. SPDX-License-Identifier: GPL-2.0-only +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/peci/peci.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + +==== +概述 +==== + +å¹³å°çŽ¯å¢ƒæŽ§åˆ¶æŽ¥å£ï¼ˆPECI)是英特尔处ç†å™¨å’Œç®¡ç†æŽ§åˆ¶å™¨ï¼ˆå¦‚底æ¿ç®¡ç†æŽ§åˆ¶å™¨ï¼ŒBMC) +之间的一个通信接å£ã€‚PECIæ供的æœåŠ¡å…许管ç†æŽ§åˆ¶å™¨é€šè¿‡è®¿é—®å„ç§å¯„å˜å™¨æ¥é…ç½®ã€ç›‘ +控和调试平å°ã€‚它定义了一个专门的命令å议,管ç†æŽ§åˆ¶å™¨ä½œä¸ºPECIçš„å‘起者,处ç†å™¨ +作为PECIçš„å“应者。PECIå¯ä»¥ç”¨äºŽåŸºäºŽå•å¤„ç†å™¨å’Œå¤šå¤„ç†å™¨çš„系统ä¸ã€‚ + +注æ„:英特尔PECI规范没有作为专门的文件å‘布,而是作为英特尔CPU的外部设计规范 +(EDS)的一部分。外部设计规范通常是ä¸å…¬å¼€çš„。 + +PECI 线 +--------- + +PECI线接å£ä½¿ç”¨å•çº¿è¿›è¡Œè‡ªé”和数æ®ä¼ 输。它ä¸éœ€è¦ä»»ä½•é¢å¤–的控制线--物ç†å±‚是一个 +自é”çš„å•çº¿æ€»çº¿ä¿¡å·ï¼Œæ¯ä¸€ä¸ªæ¯”特都从接近零ä¼çš„空闲状æ€å¼€å§‹é©±åŠ¨ã€ä¸Šå‡è¾¹ç¼˜ã€‚驱动高 +电平信å·çš„æŒç»æ—¶é—´å¯ä»¥ç¡®å®šä½å€¼æ˜¯é€»è¾‘ “0†还是逻辑 “1â€ã€‚PECI线还包括与æ¯ä¸ªä¿¡ +æ¯å»ºç«‹çš„å¯å˜æ•°æ®é€ŸçŽ‡ã€‚ + +对于PECI线,æ¯ä¸ªå¤„ç†å™¨åŒ…将在一个定义的范围内利用唯一的ã€å›ºå®šçš„地å€ï¼Œè¯¥åœ°å€åº” +该与处ç†å™¨æ’座ID有固定的关系--如果其ä¸ä¸€ä¸ªå¤„ç†å™¨è¢«ç§»é™¤ï¼Œå®ƒä¸ä¼šå½±å“其余处ç†å™¨ +的地å€ã€‚ + +PECIå系统代ç 内嵌文档 +------------------------ + +该APIåœ¨ä»¥ä¸‹å†…æ ¸ä»£ç ä¸: + +include/linux/peci.h + +drivers/peci/internal.h + +drivers/peci/core.c + +drivers/peci/request.c + +PECI CPU 驱动 API +------------------- + +该APIåœ¨ä»¥ä¸‹å†…æ ¸ä»£ç ä¸: + +drivers/peci/cpu.c diff --git a/Documentation/translations/zh_CN/power/energy-model.rst b/Documentation/translations/zh_CN/power/energy-model.rst new file mode 100644 index 000000000000..c7da1b6aefee --- /dev/null +++ b/Documentation/translations/zh_CN/power/energy-model.rst @@ -0,0 +1,190 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/power/energy-model.rst + +:翻译: + + å”艺舟 Tang Yizhou <tangyeechou@gmail.com> + +============ +设备能é‡æ¨¡åž‹ +============ + +1. 概述 +------- + +能é‡æ¨¡åž‹ï¼ˆEM)框架是一ç§é©±åŠ¨ç¨‹åºä¸Žå†…æ ¸å系统之间的接å£ã€‚å…¶ä¸é©±åŠ¨ç¨‹åºäº†è§£ä¸åŒ +æ€§èƒ½å±‚çº§çš„è®¾å¤‡æ‰€æ¶ˆè€—çš„åŠŸçŽ‡ï¼Œè€Œå†…æ ¸å系统愿æ„使用该信æ¯åšå‡ºèƒ½é‡æ„ŸçŸ¥å†³ç–。 + +设备所消耗的功率的信æ¯æ¥æºåœ¨ä¸åŒçš„å¹³å°ä¸Šå¯èƒ½æœ‰å¾ˆå¤§çš„ä¸åŒã€‚这些功率æˆæœ¬åœ¨æŸäº› +情况下å¯ä»¥ä½¿ç”¨è®¾å¤‡æ ‘æ•°æ®æ¥ä¼°ç®—。在其它情况下,固件会更清楚。或者,用户空间å¯èƒ½ +是最清楚的。以æ¤ç±»æŽ¨ã€‚为了é¿å…æ¯ä¸€ä¸ªå®¢æˆ·ç«¯å系统对æ¯ä¸€ç§å¯èƒ½çš„ä¿¡æ¯æºè‡ªå·±é‡æ–° +实现支æŒï¼ŒEMæ¡†æž¶ä½œä¸ºä¸€ä¸ªæŠ½è±¡å±‚ä»‹å…¥ï¼Œå®ƒåœ¨å†…æ ¸ä¸å¯¹åŠŸçŽ‡æˆæœ¬è¡¨çš„æ ¼å¼è¿›è¡Œæ ‡å‡†åŒ–, +å› æ¤èƒ½å¤Ÿé¿å…多余的工作。 + +功率值å¯ä»¥ç”¨æ¯«ç“¦æˆ–“抽象刻度â€è¡¨ç¤ºã€‚多个å系统å¯èƒ½ä½¿ç”¨EM,由系统集æˆå•†æ¥æ£€æŸ¥ +功率值刻度类型的è¦æ±‚是å¦æ»¡è¶³ã€‚å¯ä»¥åœ¨èƒ½é‡æ„ŸçŸ¥è°ƒåº¦å™¨çš„文档ä¸æ‰¾åˆ°ä¸€ä¸ªä¾‹å +Documentation/scheduler/sched-energy.rst。对于一些å系统,比如çƒèƒ½æˆ– +powercap,用“抽象刻度â€æ述功率值å¯èƒ½ä¼šå¯¼è‡´é—®é¢˜ã€‚这些å系统对过去使用的功率的 +ä¼°ç®—å€¼æ›´æ„Ÿå…´è¶£ï¼Œå› æ¤å¯èƒ½éœ€è¦çœŸå®žçš„毫瓦。这些è¦æ±‚的一个例åå¯ä»¥åœ¨æ™ºèƒ½åŠŸçŽ‡åˆ†é… +Documentation/driver-api/thermal/power_allocator.rst文档ä¸æ‰¾åˆ°ã€‚ + +å†…æ ¸å系统å¯èƒ½ï¼ˆåŸºäºŽEMå†…éƒ¨æ ‡å¿—ä½ï¼‰å®žçŽ°äº†å¯¹EM注册设备是å¦å…·æœ‰ä¸ä¸€è‡´åˆ»åº¦çš„自动 +检查。è¦è®°ä½çš„é‡è¦äº‹æƒ…是,当功率值以“抽象刻度â€è¡¨ç¤ºæ—¶ï¼Œä»Žä¸æŽ¨å¯¼ä»¥æ¯«ç„¦è€³ä¸ºå•ä½ +的真实能é‡æ¶ˆè€—是ä¸å¯èƒ½çš„。 + +下图æ述了一个驱动的例å(这里是针对Arm的,但该方法适用于任何体系结构),它 +å‘EM框架æ供了功率æˆæœ¬ï¼Œæ„Ÿå…´è¶£çš„客户端å¯ä»Žä¸è¯»å–æ•°æ®:: + + +---------------+ +-----------------+ +---------------+ + | Thermal (IPA) | | Scheduler (EAS) | | Other | + +---------------+ +-----------------+ +---------------+ + | | em_cpu_energy() | + | | em_cpu_get() | + +---------+ | +---------+ + | | | + v v v + +---------------------+ + | Energy Model | + | Framework | + +---------------------+ + ^ ^ ^ + | | | em_dev_register_perf_domain() + +----------+ | +---------+ + | | | + +---------------+ +---------------+ +--------------+ + | cpufreq-dt | | arm_scmi | | Other | + +---------------+ +---------------+ +--------------+ + ^ ^ ^ + | | | + +--------------+ +---------------+ +--------------+ + | Device Tree | | Firmware | | ? | + +--------------+ +---------------+ +--------------+ + +对于CPU设备,EM框架管ç†ç€ç³»ç»Ÿä¸æ¯ä¸ªâ€œæ€§èƒ½åŸŸâ€çš„功率æˆæœ¬è¡¨ã€‚一个性能域是一组 +性能一起伸缩的CPU。性能域通常与CPUFreqç–略具有1对1æ˜ å°„ã€‚ä¸€ä¸ªæ€§èƒ½åŸŸä¸çš„ +所有CPUè¦æ±‚具有相åŒçš„微架构。ä¸åŒæ€§èƒ½åŸŸä¸çš„CPUå¯ä»¥æœ‰ä¸åŒçš„微架构。 + + +2. æ ¸å¿ƒAPI +---------- + +2.1 é…置选项 +^^^^^^^^^^^^ + +必须使能CONFIG_ENERGY_MODELæ‰èƒ½ä½¿ç”¨EM框架。 + + +2.2 性能域的注册 +^^^^^^^^^^^^^^^^ + +“高级â€EM的注册 +~~~~~~~~~~~~~~~~ + +“高级â€EMå› å®ƒå…许驱动æ供更精确的功率模型而得å。它并ä¸å—é™äºŽæ¡†æž¶ä¸çš„一些已 +实现的数å¦å…¬å¼ï¼ˆå°±åƒâ€œç®€å•â€EMé‚£æ ·ï¼‰ã€‚å®ƒå¯ä»¥æ›´å¥½åœ°åæ˜ æ¯ä¸ªæ€§èƒ½çŠ¶æ€çš„实际功率 +测é‡ã€‚å› æ¤ï¼Œåœ¨EMé™æ€åŠŸçŽ‡ï¼ˆæ¼ç”µæµåŠŸçŽ‡ï¼‰æ˜¯é‡è¦çš„情况下,应该首选这ç§æ³¨å†Œæ–¹å¼ã€‚ + +驱动程åºåº”通过以下API将性能域注册到EM框架ä¸:: + + int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, + struct em_data_callback *cb, cpumask_t *cpus, bool milliwatts); + +驱动程åºå¿…é¡»æ供一个回调函数,为æ¯ä¸ªæ€§èƒ½çŠ¶æ€è¿”回<频率,功率>å…ƒç»„ã€‚é©±åŠ¨ç¨‹åº +æ供的回调函数å¯ä»¥è‡ªç”±åœ°ä»Žä»»ä½•ç›¸å…³ä½ç½®ï¼ˆDTã€å›ºä»¶......)以åŠä»¥ä»»ä½•è¢«è®¤ä¸ºæ˜¯ +å¿…è¦çš„æ–¹å¼èŽ·å–æ•°æ®ã€‚åªæœ‰å¯¹äºŽCPU设备,驱动程åºå¿…须使用cpumask指定性能域的CPU。 +对于CPU以外的其他设备,最åŽä¸€ä¸ªå‚数必须被设置为NULL。 + +最åŽä¸€ä¸ªå‚数“milliwattsâ€ï¼ˆæ¯«ç“¦ï¼‰è®¾ç½®æˆæ£ç¡®çš„值是很é‡è¦çš„,使用EMçš„å†…æ ¸ +å系统å¯èƒ½ä¼šä¾èµ–è¿™ä¸ªæ ‡å¿—æ¥æ£€æŸ¥æ‰€æœ‰çš„EM设备是å¦ä½¿ç”¨ç›¸åŒçš„刻度。如果有ä¸åŒçš„ +刻度,这些å系统å¯èƒ½å†³å®šï¼šè¿”回è¦å‘Š/错误,åœæ¢å·¥ä½œæˆ–崩溃(panic)。 + +关于实现这个回调函数的驱动程åºçš„例å,å‚è§ç¬¬3节。或者在第2.4节阅读这个API +的更多文档。 + + +“简å•â€EM的注册 +~~~~~~~~~~~~~~~~ + +“简å•â€EM是用框架的辅助函数cpufreq_register_em_with_opp()注册的。它实现了 +一个和以下数å¦å…¬å¼ç´§å¯†ç›¸å…³çš„功率模型:: + + Power = C * V^2 * f + +使用这ç§æ–¹æ³•æ³¨å†Œçš„EMå¯èƒ½æ— 法æ£ç¡®åæ˜ çœŸå®žè®¾å¤‡çš„ç‰©ç†ç‰¹æ€§ï¼Œä¾‹å¦‚当é™æ€åŠŸçŽ‡ +(æ¼ç”µæµåŠŸçŽ‡ï¼‰å¾ˆé‡è¦æ—¶ã€‚ + + +2.3 访问性能域 +^^^^^^^^^^^^^^ + +有两个API函数æ供对能é‡æ¨¡åž‹çš„访问。em_cpu_get()以CPU id为å‚数,em_pd_get() +以设备指针为å‚数。使用哪个接å£å–决于å系统,但对于CPU设备æ¥è¯´ï¼Œè¿™ä¸¤ä¸ªå‡½æ•°éƒ½è¿” +回相åŒçš„性能域。 + +对CPU的能é‡æ¨¡åž‹æ„Ÿå…´è¶£çš„å系统å¯ä»¥é€šè¿‡em_cpu_get() API检索它。在创建性能域时 +分é…一次能é‡æ¨¡åž‹è¡¨ï¼Œå®ƒä¿å˜åœ¨å†…å˜ä¸ä¸è¢«ä¿®æ”¹ã€‚ + +一个性能域所消耗的能é‡å¯ä»¥ä½¿ç”¨em_cpu_energy() APIæ¥ä¼°ç®—。该估算å‡å®šCPU设备 +使用的CPUfreq监管器是schedutil。当å‰è¯¥è®¡ç®—ä¸èƒ½æ供给其它类型的设备。 + +关于上述API的更多细节å¯ä»¥åœ¨ ``<linux/energy_model.h>`` 或第2.4节ä¸æ‰¾åˆ°ã€‚ + + +2.4 API的细节æè¿° +^^^^^^^^^^^^^^^^^ +å‚è§ include/linux/energy_model.h å’Œ kernel/power/energy_model.c çš„kernel doc。 + +3. 驱动示例 +----------- + +CPUFreq框架支æŒä¸“用的回调函数,用于为指定的CPU(们)注册EM: +cpufreq_driver::register_em()。这个回调必须为æ¯ä¸ªç‰¹å®šçš„驱动程åºæ£ç¡®å®žçŽ°ï¼Œ +å› ä¸ºæ¡†æž¶ä¼šåœ¨è®¾ç½®è¿‡ç¨‹ä¸é€‚时地调用它。本节æ供了一个简å•çš„例å,展示CPUFreq驱动 +在能é‡æ¨¡åž‹æ¡†æž¶ä¸ä½¿ç”¨ï¼ˆå‡çš„)“fooâ€å议注册性能域。该驱动实现了一个est_power() +函数æ供给EM框架:: + + -> drivers/cpufreq/foo_cpufreq.c + + 01 static int est_power(unsigned long *mW, unsigned long *KHz, + 02 struct device *dev) + 03 { + 04 long freq, power; + 05 + 06 /* 使用“fooâ€åè®®è®¾ç½®é¢‘çŽ‡ä¸Šé™ */ + 07 freq = foo_get_freq_ceil(dev, *KHz); + 08 if (freq < 0); + 09 return freq; + 10 + 11 /* 估算相关频率下设备的功率æˆæœ¬ */ + 12 power = foo_estimate_power(dev, freq); + 13 if (power < 0); + 14 return power; + 15 + 16 /* 将这些值返回给EM框架 */ + 17 *mW = power; + 18 *KHz = freq; + 19 + 20 return 0; + 21 } + 22 + 23 static void foo_cpufreq_register_em(struct cpufreq_policy *policy) + 24 { + 25 struct em_data_callback em_cb = EM_DATA_CB(est_power); + 26 struct device *cpu_dev; + 27 int nr_opp; + 28 + 29 cpu_dev = get_cpu_device(cpumask_first(policy->cpus)); + 30 + 31 /* 查找该ç–略支æŒçš„OPPæ•°é‡ */ + 32 nr_opp = foo_get_nr_opp(policy); + 33 + 34 /* 并注册新的性能域 */ + 35 em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus, + 36 true); + 37 } + 38 + 39 static struct cpufreq_driver foo_cpufreq_driver = { + 40 .register_em = foo_cpufreq_register_em, + 41 }; diff --git a/Documentation/translations/zh_CN/power/index.rst b/Documentation/translations/zh_CN/power/index.rst new file mode 100644 index 000000000000..bc54983ba515 --- /dev/null +++ b/Documentation/translations/zh_CN/power/index.rst @@ -0,0 +1,56 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/power/index.rst + +:翻译: + + å”艺舟 Tang Yizhou <tangyeechou@gmail.com> + +======== +电æºç®¡ç† +======== + +.. toctree:: + :maxdepth: 1 + + energy-model + opp + +TODOList: + + * apm-acpi + * basic-pm-debugging + * charger-manager + * drivers-testing + * freezing-of-tasks + * pci + * pm_qos_interface + * power_supply_class + * runtime_pm + * s2ram + * suspend-and-cpuhotplug + * suspend-and-interrupts + * swsusp-and-swap-files + * swsusp-dmcrypt + * swsusp + * video + * tricks + + * userland-swsusp + + * powercap/powercap + * powercap/dtpm + + * regulator/consumer + * regulator/design + * regulator/machine + * regulator/overview + * regulator/regulator + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/translations/zh_CN/power/opp.rst b/Documentation/translations/zh_CN/power/opp.rst new file mode 100644 index 000000000000..8d6e3f6f6202 --- /dev/null +++ b/Documentation/translations/zh_CN/power/opp.rst @@ -0,0 +1,341 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/power/opp.rst + +:翻译: + + å”艺舟 Tang Yizhou <tangyeechou@gmail.com> + +====================== +æ“作性能值(OPP)库 +====================== + +(C) 2009-2010 Nishanth Menon <nm@ti.com>, å¾·å·žä»ªå™¨å…¬å¸ + +.. 目录 + + 1. 简介 + 2. OPP链表åˆå§‹æ³¨å†Œ + 3. OPPæœç´¢å‡½æ•° + 4. OPPå¯ç”¨æ€§æŽ§åˆ¶å‡½æ•° + 5. OPPæ•°æ®æ£€ç´¢å‡½æ•° + 6. æ•°æ®ç»“æž„ + +1. 简介 +======= + +1.1 何为æ“作性能值(OPP)? +------------------------------ + +当今å¤æ‚çš„å•ç‰‡ç³»ç»Ÿï¼ˆSoC)由多个å模å—组æˆï¼Œè¿™äº›å模å—会è”åˆå·¥ä½œã€‚在一个执行ä¸åŒç”¨ä¾‹ +çš„æ“作系统ä¸ï¼Œå¹¶ä¸æ˜¯SoCä¸çš„所有模å—都需è¦ä¸€ç›´ä»¥æœ€é«˜é¢‘率工作。为了促æˆè¿™ä¸€ç‚¹ï¼ŒSoCä¸ +çš„å模å—被分组为ä¸åŒåŸŸï¼Œå…许一些域以较低的电压和频率è¿è¡Œï¼Œè€Œå…¶å®ƒåŸŸåˆ™ä»¥è¾ƒé«˜çš„“电压/ +频率对â€è¿è¡Œã€‚ + +设备按域支æŒçš„由频率电压对组æˆçš„离散的元组的集åˆï¼Œè¢«ç§°ä¸ºæ“作性能值(组),或OPPs。 + +举例æ¥è¯´ï¼š + +让我们考虑一个支æŒä¸‹è¿°é¢‘率ã€ç”µåŽ‹å€¼çš„内å˜ä¿æŠ¤å•å…ƒï¼ˆMPU)设备: +{300MHz,最低电压为1V}, {800MHz,最低电压为1.2V}, {1GHz,最低电压为1.3V} + +我们能将它们表示为3个OPP,如下述{Hz, uV}元组(译注:频率的å•ä½æ˜¯èµ«å…¹ï¼Œç”µåŽ‹çš„å•ä½æ˜¯ +å¾®ä¼ï¼‰ã€‚ + +- {300000000, 1000000} +- {800000000, 1200000} +- {1000000000, 1300000} + +1.2 æ“作性能值库 +---------------- + +OPP库æ供了一组辅助函数æ¥ç»„织和查询OPPä¿¡æ¯ã€‚该库ä½äºŽdrivers/opp/目录下,其头文件 +ä½äºŽinclude/linux/pm_opp.hä¸ã€‚OPP库å¯ä»¥é€šè¿‡å¼€å¯CONFIG_PM_OPPæ¥å¯ç”¨ã€‚æŸäº›SoC, +如德州仪器的OMAP框架å…许在ä¸éœ€è¦cpufreq的情况下å¯é€‰åœ°åœ¨æŸä¸€OPP下å¯åŠ¨ã€‚ + +OPP库的典型用法如下:: + + (用户) -> 注册一个默认的OPPé›†åˆ -> (库) + (SoC框架) -> 在必è¦çš„情况下,对æŸäº›OPP进行修改 -> OPP layer + -> æœç´¢/检索信æ¯çš„查询 -> + +OPP层期望æ¯ä¸ªåŸŸç”±ä¸€ä¸ªå”¯ä¸€çš„设备指针æ¥è¡¨ç¤ºã€‚SoC框架在OPP层为æ¯ä¸ªè®¾å¤‡æ³¨å†Œäº†ä¸€ç»„åˆå§‹ +OPP。这个链表的长度被期望是一个最优化的å°æ•°å—,通常æ¯ä¸ªè®¾å¤‡å¤§çº¦5个。åˆå§‹é“¾è¡¨åŒ…å«äº† +一个OPP集åˆï¼Œè¿™ä¸ªé›†åˆè¢«æœŸæœ›èƒ½åœ¨ç³»ç»Ÿä¸å®‰å…¨ä½¿èƒ½ã€‚ + +关于OPPå¯ç”¨æ€§çš„说明 +^^^^^^^^^^^^^^^^^^^ + +éšç€ç³»ç»Ÿçš„è¿è¡Œï¼ŒSoC框架å¯èƒ½ä¼šåŸºäºŽå„ç§å¤–éƒ¨å› ç´ é€‰æ‹©è®©æŸäº›OPP在æ¯ä¸ªè®¾å¤‡ä¸Šå¯ç”¨æˆ–ä¸å¯ç”¨ï¼Œ +示例:温度管ç†æˆ–其它异常场景ä¸ï¼ŒSoC框架å¯èƒ½ä¼šé€‰æ‹©ç¦ç”¨ä¸€ä¸ªè¾ƒé«˜é¢‘率的OPPä»¥å®‰å…¨åœ°ç»§ç» +è¿è¡Œï¼Œç›´åˆ°è¯¥OPP被é‡æ–°å¯ç”¨ï¼ˆå¦‚æžœå¯èƒ½ï¼‰ã€‚ + +OPP库在它的实现ä¸è¾¾æˆäº†è¿™ä¸ªæ¦‚念。以下æ“作函数åªèƒ½å¯¹å¯ç”¨çš„OPP使用: +dev_pm_opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, +dev_pm_opp_get_freq, dev_pm_opp_get_opp_count。 + +dev_pm_opp_find_freq_exact是用æ¥æŸ¥æ‰¾OPP指针的,该指针å¯è¢«ç”¨åœ¨dev_pm_opp_enable/ +disable函数,使一个OPP在被需è¦æ—¶å˜ä¸ºå¯ç”¨ã€‚ + +è¦å‘Šï¼šå¦‚果对一个设备调用dev_pm_opp_enable/disable函数,OPP库的用户应该使用 +dev_pm_opp_get_opp_countæ¥åˆ·æ–°OPPçš„å¯ç”¨æ€§è®¡æ•°ã€‚触å‘这些的具体机制,或者对有ä¾èµ–çš„ +å系统(比如cpufreq)的通知机制,都是由使用OPP库的SoC特定框架酌情处ç†çš„。在这些æ“作 +ä¸ï¼ŒåŒæ ·éœ€è¦æ³¨æ„刷新cpufreq表。 + +2. OPP链表åˆå§‹æ³¨å†Œ +================== +SoC的实现会è¿ä»£è°ƒç”¨dev_pm_opp_add函数æ¥å¢žåŠ æ¯ä¸ªè®¾å¤‡çš„OPP。预期SoC框架将以最优的 +æ–¹å¼æ³¨å†ŒOPPæ¡ç›® - 典型的数å—范围å°äºŽ5。通过注册OPP生æˆçš„OPP链表,在整个设备è¿è¡Œè¿‡ç¨‹ +ä¸ç”±OPP库维护。SoC框架éšåŽå¯ä»¥ä½¿ç”¨dev_pm_opp_enable / disable函数动æ€åœ° +控制OPPçš„å¯ç”¨æ€§ã€‚ + +dev_pm_opp_add + 为设备指针所指å‘çš„ç‰¹å®šåŸŸæ·»åŠ ä¸€ä¸ªæ–°çš„OPP。OPPæ˜¯ç”¨é¢‘çŽ‡å’Œç”µåŽ‹å®šä¹‰çš„ã€‚ä¸€æ—¦å®Œæˆ + æ·»åŠ ï¼ŒOPP被认为是å¯ç”¨çš„,å¯ä»¥ç”¨dev_pm_opp_enable/disable函数æ¥æŽ§åˆ¶å…¶å¯ç”¨æ€§ã€‚ + OPP库内部用dev_pm_opp结构体å˜å‚¨å¹¶ç®¡ç†è¿™äº›ä¿¡æ¯ã€‚这个函数å¯ä»¥è¢«SoCæ¡†æž¶æ ¹æ®SoC + 的使用环境的需求æ¥å®šä¹‰ä¸€ä¸ªæœ€ä¼˜é“¾è¡¨ã€‚ + + è¦å‘Šï¼š + ä¸è¦åœ¨ä¸æ–上下文使用这个函数。 + + 示例:: + + soc_pm_init() + { + /* åšä¸€äº›äº‹æƒ… */ + r = dev_pm_opp_add(mpu_dev, 1000000, 900000); + if (!r) { + pr_err("%s: unable to register mpu opp(%d)\n", r); + goto no_cpufreq; + } + /* åšä¸€äº›å’Œcpufreq相关的事情 */ + no_cpufreq: + /* åšå‰©ä½™çš„事情 */ + } + +3. OPPæœç´¢å‡½æ•° +============== +cpufreqç‰é«˜å±‚框架对频率进行æ“ä½œï¼Œä¸ºäº†å°†é¢‘çŽ‡æ˜ å°„åˆ°ç›¸åº”çš„OPP,OPP库æ供了便利的函数 +æ¥æœç´¢OPP库内部管ç†çš„OPP链表。这些æœç´¢å‡½æ•°å¦‚果找到匹é…çš„OPP,将返回指å‘该OPP的指针, +å¦åˆ™è¿”å›žé”™è¯¯ã€‚è¿™äº›é”™è¯¯é¢„è®¡ç”±æ ‡å‡†çš„é”™è¯¯æ£€æŸ¥ï¼Œå¦‚IS_ERR()æ¥å¤„ç†ï¼Œå¹¶ç”±è°ƒç”¨è€…采å–适当的 +行动。 + +这些函数的调用者应在使用完OPPåŽè°ƒç”¨dev_pm_opp_put()。å¦åˆ™ï¼ŒOPP的内å˜å°†æ°¸è¿œä¸ä¼š +被释放,并导致内å˜æ³„露。 + +dev_pm_opp_find_freq_exact + æ ¹æ® *精确的* 频率和å¯ç”¨æ€§æ¥æœç´¢OPP。这个函数对默认ä¸å¯ç”¨çš„OPP特别有用。 + 例å:在SoC框架检测到更高频率å¯ç”¨çš„情况下,它å¯ä»¥ä½¿ç”¨è¿™ä¸ªå‡½æ•°åœ¨è°ƒç”¨ + dev_pm_opp_enable之å‰æ‰¾åˆ°OPP:: + + opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false); + dev_pm_opp_put(opp); + /* ä¸è¦æ“作指针.. åªæ˜¯åšæœ‰æ•ˆæ€§æ£€æŸ¥.. */ + if (IS_ERR(opp)) { + pr_err("frequency not disabled!\n"); + /* 触å‘åˆé€‚çš„æ“作.. */ + } else { + dev_pm_opp_enable(dev,1000000000); + } + + 注æ„: + 这是唯一一个å¯ä»¥æœç´¢ä¸å¯ç”¨OPP的函数。 + +dev_pm_opp_find_freq_floor + æœç´¢ä¸€ä¸ª *最多* æ供指定频率的å¯ç”¨OPP。这个函数在æœç´¢è¾ƒå°çš„匹é…或按频率 + 递å‡çš„顺åºæ“作OPPä¿¡æ¯æ—¶å¾ˆæœ‰ç”¨ã€‚ + 例å:è¦æ‰¾çš„一个设备的最高OPP:: + + freq = ULONG_MAX; + opp = dev_pm_opp_find_freq_floor(dev, &freq); + dev_pm_opp_put(opp); + +dev_pm_opp_find_freq_ceil + æœç´¢ä¸€ä¸ª *最少* æ供指定频率的å¯ç”¨OPP。这个函数在æœç´¢è¾ƒå¤§çš„匹é…或按频率 + 递增的顺åºæ“作OPPä¿¡æ¯æ—¶å¾ˆæœ‰ç”¨ã€‚ + 例1:找到一个设备最å°çš„OPP:: + + freq = 0; + opp = dev_pm_opp_find_freq_ceil(dev, &freq); + dev_pm_opp_put(opp); + + 例: 一个SoCçš„cpufreq_driver->target的简易实现:: + + soc_cpufreq_target(..) + { + /* åšç–略检查ç‰æ“作 */ + /* 找到和请求最接近的频率 */ + opp = dev_pm_opp_find_freq_ceil(dev, &freq); + dev_pm_opp_put(opp); + if (!IS_ERR(opp)) + soc_switch_to_freq_voltage(freq); + else + /* 当ä¸èƒ½æ»¡è¶³è¯·æ±‚时,è¦åšçš„事 */ + /* åšå…¶å®ƒäº‹ */ + } + +4. OPPå¯ç”¨æ€§æŽ§åˆ¶å‡½æ•° +==================== +在OPP库ä¸æ³¨å†Œçš„默认OPPé“¾è¡¨ä¹Ÿè®¸æ— æ³•æ»¡è¶³æ‰€æœ‰å¯èƒ½çš„场景。OPP库æ供了一套函数æ¥ä¿®æ”¹ +OPP链表ä¸çš„æŸä¸ªOPPçš„å¯ç”¨æ€§ã€‚这使得SoC框架能够精细地动æ€æŽ§åˆ¶å“ªä¸€ç»„OPP是å¯ç”¨äºŽæ“作 +的。设计这些函数的目的是在诸如考虑温度时 *暂时地* åˆ é™¤æŸä¸ªOPPï¼ˆä¾‹å¦‚ï¼Œåœ¨æ¸©åº¦ä¸‹é™ +之å‰ä¸è¦ä½¿ç”¨æŸOPP)。 + +è¦å‘Šï¼š + ä¸è¦åœ¨ä¸æ–上下文使用这些函数。 + +dev_pm_opp_enable + 使一个OPPå¯ç”¨äºŽæ“作。 + 例å:å‡è®¾1GHzçš„OPPåªæœ‰åœ¨SoC温度低于æŸä¸ªé˜ˆå€¼æ—¶æ‰å¯ç”¨ã€‚SoC框架的实现å¯èƒ½ + 会选择åšä»¥ä¸‹äº‹æƒ…:: + + if (cur_temp < temp_low_thresh) { + /* è‹¥1GHz未使能,则使能 */ + opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false); + dev_pm_opp_put(opp); + /* 仅仅是错误检查 */ + if (!IS_ERR(opp)) + ret = dev_pm_opp_enable(dev, 1000000000); + else + goto try_something_else; + } + +dev_pm_opp_disable + 使一个OPPä¸å¯ç”¨äºŽæ“作。 + 例å:å‡è®¾1GHzçš„OPPåªæœ‰åœ¨SoC温度高于æŸä¸ªé˜ˆå€¼æ—¶æ‰å¯ç”¨ã€‚SoC框架的实现å¯èƒ½ + 会选择åšä»¥ä¸‹äº‹æƒ…:: + + if (cur_temp > temp_high_thresh) { + /* è‹¥1GHzå·²ä½¿èƒ½ï¼Œåˆ™å…³é— */ + opp = dev_pm_opp_find_freq_exact(dev, 1000000000, true); + dev_pm_opp_put(opp); + /* 仅仅是错误检查 */ + if (!IS_ERR(opp)) + ret = dev_pm_opp_disable(dev, 1000000000); + else + goto try_something_else; + } + +5. OPPæ•°æ®æ£€ç´¢å‡½æ•° +================== +由于OPP库对OPPä¿¡æ¯è¿›è¡Œäº†æŠ½è±¡åŒ–处ç†ï¼Œå› æ¤éœ€è¦ä¸€ç»„函数æ¥ä»Ždev_pm_opp结构体ä¸æå– +ä¿¡æ¯ã€‚一旦使用æœç´¢å‡½æ•°æ£€ç´¢åˆ°ä¸€ä¸ªOPP指针,以下函数就å¯ä»¥è¢«SoC框架用æ¥æ£€ç´¢OPP层 +内部æè¿°çš„ä¿¡æ¯ã€‚ + +dev_pm_opp_get_voltage + 检索OPP指针æ述的电压。 + 例å: 当cpufreq切æ¢åˆ°åˆ°ä¸åŒé¢‘率时,SoC框架需è¦ç”¨ç¨³åŽ‹å™¨æ¡†æž¶å°†OPPæè¿° + 的电压设置到æ供电压的电æºç®¡ç†èŠ¯ç‰‡ä¸:: + + soc_switch_to_freq_voltage(freq) + { + /* åšä¸€äº›äº‹æƒ… */ + opp = dev_pm_opp_find_freq_ceil(dev, &freq); + v = dev_pm_opp_get_voltage(opp); + dev_pm_opp_put(opp); + if (v) + regulator_set_voltage(.., v); + /* åšå…¶å®ƒäº‹ */ + } + +dev_pm_opp_get_freq + 检索OPP指针æ述的频率。 + 例å:比方说,SoCæ¡†æž¶ä½¿ç”¨äº†å‡ ä¸ªè¾…åŠ©å‡½æ•°ï¼Œé€šè¿‡è¿™äº›å‡½æ•°ï¼Œæˆ‘ä»¬å¯ä»¥å°†OPP + æŒ‡é’ˆä¼ å…¥ï¼Œè€Œä¸æ˜¯ä¼ å…¥é¢å¤–çš„å‚数,用æ¥å¤„ç†ä¸€ç³»åˆ—æ•°æ®å‚æ•°:: + + soc_cpufreq_target(..) + { + /* åšä¸€äº›äº‹æƒ….. */ + max_freq = ULONG_MAX; + max_opp = dev_pm_opp_find_freq_floor(dev,&max_freq); + requested_opp = dev_pm_opp_find_freq_ceil(dev,&freq); + if (!IS_ERR(max_opp) && !IS_ERR(requested_opp)) + r = soc_test_validity(max_opp, requested_opp); + dev_pm_opp_put(max_opp); + dev_pm_opp_put(requested_opp); + /* åšå…¶å®ƒäº‹ */ + } + soc_test_validity(..) + { + if(dev_pm_opp_get_voltage(max_opp) < dev_pm_opp_get_voltage(requested_opp)) + return -EINVAL; + if(dev_pm_opp_get_freq(max_opp) < dev_pm_opp_get_freq(requested_opp)) + return -EINVAL; + /* åšä¸€äº›äº‹æƒ….. */ + } + +dev_pm_opp_get_opp_count + 检索æŸä¸ªè®¾å¤‡å¯ç”¨çš„OPPæ•°é‡ã€‚ + 例å:å‡è®¾SoCä¸çš„一个å处ç†å™¨éœ€è¦çŸ¥é“æŸä¸ªè¡¨ä¸çš„å¯ç”¨é¢‘率,主处ç†å™¨å¯ä»¥ + 按如下方å¼å‘出通知:: + + soc_notify_coproc_available_frequencies() + { + /* åšä¸€äº›äº‹æƒ… */ + num_available = dev_pm_opp_get_opp_count(dev); + speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL); + /* 按å‡åºå¡«å……表 */ + freq = 0; + while (!IS_ERR(opp = dev_pm_opp_find_freq_ceil(dev, &freq))) { + speeds[i] = freq; + freq++; + i++; + dev_pm_opp_put(opp); + } + + soc_notify_coproc(AVAILABLE_FREQs, speeds, num_available); + /* åšå…¶å®ƒäº‹ */ + } + +6. æ•°æ®ç»“æž„ +=========== +通常,一个SoC包å«å¤šä¸ªå¯å˜ç”µåŽ‹åŸŸã€‚æ¯ä¸ªåŸŸç”±ä¸€ä¸ªè®¾å¤‡æŒ‡é’ˆæ述。和OPP之间的关系å¯ä»¥ +按以下方å¼æè¿°:: + + SoC + |- device 1 + | |- opp 1 (availability, freq, voltage) + | |- opp 2 .. + ... ... + | `- opp n .. + |- device 2 + ... + `- device m + +OPP库维护ç€ä¸€ä¸ªå†…部链表,SoC框架使用上文æè¿°çš„å„个函数æ¥å¡«å……和访问。然而,æè¿° +真实OPP和域的结构体是OPP库自身的内部组æˆï¼Œä»¥å…许åˆé€‚的抽象在ä¸åŒç³»ç»Ÿä¸å¾—到å¤ç”¨ã€‚ + +struct dev_pm_opp + OPP库的内部数æ®ç»“构,用于表示一个OPP。除了频率ã€ç”µåŽ‹ã€å¯ç”¨æ€§ä¿¡æ¯å¤–, + 它还包å«OPP库è¿è¡Œæ‰€éœ€çš„内部统计信æ¯ã€‚指å‘这个结构体的指针被æ供给 + 用户(比如SoC框架)使用,在与OPP层的交互ä¸ä½œä¸ºOPPçš„æ ‡è¯†ç¬¦ã€‚ + + è¦å‘Šï¼š + 结构体dev_pm_opp的指针ä¸åº”该由用户解æžæˆ–修改。一个实例的默认值由 + dev_pm_opp_add填充,但OPPçš„å¯ç”¨æ€§ç”±dev_pm_opp_enable/disable函数 + 修改。 + +struct device + 这用于å‘OPPå±‚æ ‡è¯†ä¸€ä¸ªåŸŸã€‚è®¾å¤‡çš„æ€§è´¨å’Œå®ƒçš„å®žçŽ°æ˜¯ç”±OPP库的用户决定的, + 如SoC框架。 + +总体æ¥è¯´ï¼Œä»¥ä¸€ä¸ªç®€åŒ–的视角看,对数æ®ç»“æž„çš„æ“作å¯ä»¥æ述为下é¢å„图:: + + åˆå§‹åŒ– / 修改: + +-----+ /- dev_pm_opp_enable + dev_pm_opp_add --> | opp | <------- + | +-----+ \- dev_pm_opp_disable + \-------> domain_info(device) + + æœç´¢å‡½æ•°: + /-- dev_pm_opp_find_freq_ceil ---\ +-----+ + domain_info<---- dev_pm_opp_find_freq_exact -----> | opp | + \-- dev_pm_opp_find_freq_floor ---/ +-----+ + + 检索函数: + +-----+ /- dev_pm_opp_get_voltage + | opp | <--- + +-----+ \- dev_pm_opp_get_freq + + domain_info <- dev_pm_opp_get_opp_count diff --git a/Documentation/translations/zh_CN/riscv/index.rst b/Documentation/translations/zh_CN/riscv/index.rst index bbf5d7b3777a..614cde0c0997 100644 --- a/Documentation/translations/zh_CN/riscv/index.rst +++ b/Documentation/translations/zh_CN/riscv/index.rst @@ -18,6 +18,7 @@ RISC-V 体系结构 :maxdepth: 1 boot-image-header + vm-layout pmu patch-acceptance diff --git a/Documentation/translations/zh_CN/riscv/vm-layout.rst b/Documentation/translations/zh_CN/riscv/vm-layout.rst new file mode 100644 index 000000000000..585cb89317a3 --- /dev/null +++ b/Documentation/translations/zh_CN/riscv/vm-layout.rst @@ -0,0 +1,67 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/riscv/vm-layout.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +============================ +RISC-V Linux上的虚拟内å˜å¸ƒå±€ +============================ + +:作者: Alexandre Ghiti <alex@ghiti.fr> +:日期: 12 February 2021 + +这份文件æ述了RISC-V Linuxå†…æ ¸ä½¿ç”¨çš„è™šæ‹Ÿå†…å˜å¸ƒå±€ã€‚ + +32ä½ RISC-V Linux å†…æ ¸ +====================== + +RISC-V Linux Kernel SV32 +------------------------ + +TODO + +64ä½ RISC-V Linux å†…æ ¸ +====================== + +RISC-V特æƒæž¶æž„文档指出,64ä½åœ°å€ "必须使第63-48ä½å€¼éƒ½ç‰äºŽç¬¬47ä½ï¼Œå¦åˆ™å°†å‘生缺页异常。":这将虚 +拟地å€ç©ºé—´åˆ†æˆä¸¤åŠï¼Œä¸é—´æœ‰ä¸€ä¸ªéžå¸¸å¤§çš„洞,下åŠéƒ¨åˆ†æ˜¯ç”¨æˆ·ç©ºé—´æ‰€åœ¨çš„地方,上åŠéƒ¨åˆ†æ˜¯RISC-V Linux +å†…æ ¸æ‰€åœ¨çš„åœ°æ–¹ã€‚ + +RISC-V Linux Kernel SV39 +------------------------ + +:: + + ======================================================================================================================== + å¼€å§‹åœ°å€ | å移 | 结æŸåœ°å€ | å¤§å° | 虚拟内å˜åŒºåŸŸæè¿° + ======================================================================================================================== + | | | | + 0000000000000000 | 0 | 0000003fffffffff | 256 GB | 用户空间虚拟内å˜ï¼Œæ¯ä¸ªå†…å˜ç®¡ç†å™¨ä¸åŒ + __________________|____________|__________________|_________|___________________________________________________________ + | | | | + 0000004000000000 | +256 GB | ffffffbfffffffff | ~16M TB | ... 巨大的ã€å‡ 乎64ä½å®½çš„ç›´åˆ°å†…æ ¸æ˜ å°„çš„-256GB地方 + | | | | 开始å移的éžç»å…¸è™šæ‹Ÿå†…å˜åœ°å€ç©ºæ´žã€‚ + | | | | + __________________|____________|__________________|_________|___________________________________________________________ + | + | å†…æ ¸ç©ºé—´çš„è™šæ‹Ÿå†…å˜ï¼Œåœ¨æ‰€æœ‰è¿›ç¨‹ä¹‹é—´å…±äº«: + ____________________________________________________________|___________________________________________________________ + | | | | + ffffffc6fee00000 | -228 GB | ffffffc6feffffff | 2 MB | fixmap + ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io + ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap + ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space + ffffffd800000000 | -160 GB | fffffff6ffffffff | 124 GB | ç›´æŽ¥æ˜ å°„æ‰€æœ‰ç‰©ç†å†…å˜ + fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan + __________________|____________|__________________|_________|____________________________________________________________ + | + | + ____________________________________________________________|____________________________________________________________ + | | | | + ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF + ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel + __________________|____________|__________________|_________|____________________________________________________________ diff --git a/Documentation/translations/zh_CN/scheduler/index.rst b/Documentation/translations/zh_CN/scheduler/index.rst index f8f8f35d53c7..12bf3bd02ccf 100644 --- a/Documentation/translations/zh_CN/scheduler/index.rst +++ b/Documentation/translations/zh_CN/scheduler/index.rst @@ -5,6 +5,7 @@ :翻译: å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + å”艺舟 Tang Yizhou <tangyeechou@gmail.com> :æ ¡è¯‘: @@ -23,16 +24,14 @@ Linux调度器 sched-design-CFS sched-domains sched-capacity - + sched-energy + sched-nice-design + sched-stats TODOList: - sched-bwc sched-deadline - sched-energy - sched-nice-design sched-rt-group - sched-stats text_files diff --git a/Documentation/translations/zh_CN/scheduler/sched-energy.rst b/Documentation/translations/zh_CN/scheduler/sched-energy.rst new file mode 100644 index 000000000000..fdbf6cfeea93 --- /dev/null +++ b/Documentation/translations/zh_CN/scheduler/sched-energy.rst @@ -0,0 +1,351 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/scheduler/sched-energy.rst + +:翻译: + + å”艺舟 Tang Yizhou <tangyeechou@gmail.com> + +============ +能é‡æ„ŸçŸ¥è°ƒåº¦ +============ + +1. 简介 +------- + +能é‡æ„ŸçŸ¥è°ƒåº¦ï¼ˆEAS)使调度器有能力预测其决ç–对CPU所消耗的能é‡çš„å½±å“。EASä¾é +一个能é‡æ¨¡åž‹ï¼ˆEM)æ¥ä¸ºæ¯ä¸ªä»»åŠ¡é€‰æ‹©ä¸€ä¸ªèŠ‚能的CPU,åŒæ—¶æœ€å°åŒ–对åžå率的影å“。 +本文档致力于介ç»ä»‹ç»EAS是如何工作的,它背åŽçš„主è¦è®¾è®¡å†³ç–是什么,以åŠä½¿å…¶è¿è¡Œ +所需的æ¡ä»¶ç»†èŠ‚。 + +在进一æ¥é˜…读之å‰ï¼Œè¯·æ³¨æ„,在撰写本文时:: + + /!\ EASä¸æ”¯æŒå¯¹ç§°CPUæ‹“æ‰‘çš„å¹³å° /!\ + +EASåªåœ¨å¼‚æž„CPU拓扑结构(如Arm大å°æ ¸ï¼Œbig.LITTLE)上è¿è¡Œã€‚å› ä¸ºåœ¨è¿™ç§æƒ…况下, +通过调度æ¥èŠ‚约能é‡çš„潜力是最大的。 + +EAS实际使用的EMä¸æ˜¯ç”±è°ƒåº¦å™¨ç»´æŠ¤çš„,而是一个专门的框架。关于这个框架的细节和 +它æ供的内容,请å‚考其文档(è§Documentation/power/energy-model.rst)。 + + +2. èƒŒæ™¯å’Œæœ¯è¯ +------------- + +从一开始就说清楚定义: + - èƒ½é‡ = [焦耳] ï¼ˆæ¯”å¦‚ä¾›ç”µè®¾å¤‡ä¸Šçš„ç”µæ± æ供的资æºï¼‰ + - 功率 = 能é‡/时间 = [焦耳/秒] = [瓦特] + + EASçš„ç›®æ ‡æ˜¯æœ€å°åŒ–能é‡æ¶ˆè€—,åŒæ—¶ä»èƒ½å°†å·¥ä½œå®Œæˆã€‚也就是说,我们è¦æœ€å¤§åŒ–:: + + 性能 [指令数/秒] + ---------------- + 功率 [瓦特] + +它ç‰æ•ˆäºŽæœ€å°åŒ–:: + + èƒ½é‡ [焦耳] + ----------- + 指令数 + +åŒæ—¶ä»ç„¶èŽ·å¾—“良好â€çš„性能。当å‰è°ƒåº¦å™¨åªè€ƒè™‘æ€§èƒ½ç›®æ ‡ï¼Œå› æ¤è¯¥å¼å本质上是一个 +å¯é€‰çš„ä¼˜åŒ–ç›®æ ‡ï¼Œå®ƒåŒæ—¶è€ƒè™‘äº†ä¸¤ä¸ªç›®æ ‡ï¼šèƒ½é‡æ•ˆçŽ‡å’Œæ€§èƒ½ã€‚ + +引入EM的想法是为了让调度器评估其决ç–çš„å½±å“,而ä¸æ˜¯ç›²ç›®åœ°åº”用å¯èƒ½ä»…在部分 +å¹³å°æœ‰æ£é¢æ•ˆæžœçš„节能技术。åŒæ—¶ï¼ŒEM必须尽å¯èƒ½çš„简å•ï¼Œä»¥æœ€å°åŒ–调度器的时延 +å½±å“。 + +简而言之,EAS改å˜äº†CFS任务分é…ç»™CPUçš„æ–¹å¼ã€‚当调度器决定一个任务应该在哪里 +è¿è¡Œæ—¶ï¼ˆåœ¨å”¤é†’期间),EM被用æ¥åœ¨ä¸æŸå®³ç³»ç»ŸåžåçŽ‡çš„æƒ…å†µä¸‹ï¼Œä»Žå‡ ä¸ªè¾ƒå¥½çš„å€™é€‰ +CPUä¸æŒ‘选一个ç»é¢„测能é‡æ¶ˆè€—最优的CPU。EAS的预测ä¾èµ–于对平å°æ‹“æ‰‘ç»“æž„ç‰¹å®šå…ƒç´ +的了解,包括CPU的“算力â€ï¼Œä»¥åŠå®ƒä»¬å„自的能é‡æˆæœ¬ã€‚ + + +3. æ‹“æ‰‘ä¿¡æ¯ +----------- + +EAS(以åŠè°ƒåº¦å™¨çš„剩余部分)使用“算力â€çš„概念æ¥åŒºåˆ†ä¸åŒè®¡ç®—åžå率的CPU。一个 +CPU的“算力â€ä»£è¡¨äº†å®ƒåœ¨æœ€é«˜é¢‘率下è¿è¡Œæ—¶èƒ½å®Œæˆçš„工作é‡ï¼Œä¸”è¿™ä¸ªå€¼æ˜¯ç›¸å¯¹ç³»ç»Ÿä¸ +算力最大的CPU而言的。算力值被归一化为1024以内,并且å¯ä¸Žç”±å®žä½“负载跟踪 +(PELT)机制算出的利用率信å·åšå¯¹æ¯”。由于有算力值和利用率值,EAS能够估计一个 +任务/CPU有多大/有多忙,并在评估性能与能é‡æ—¶å°†å…¶è€ƒè™‘在内。CPU算力由特定体系 +结构实现的arch_scale_cpu_capacity()回调函数æ供。 + +EAS使用的其余平å°ä¿¡æ¯æ˜¯ç›´æŽ¥ä»Žèƒ½é‡æ¨¡åž‹ï¼ˆEM)框架ä¸è¯»å–的。一个平å°çš„EMæ˜¯ä¸€å¼ +表,表ä¸æ¯é¡¹ä»£è¡¨ç³»ç»Ÿä¸ä¸€ä¸ªâ€œæ€§èƒ½åŸŸâ€çš„功率æˆæœ¬ã€‚(若è¦äº†è§£æ›´å¤šå…³äºŽæ€§èƒ½åŸŸçš„细节, +è§Documentation/power/energy-model.rst) + +当调度域被建立或é‡æ–°å»ºç«‹æ—¶ï¼Œè°ƒåº¦å™¨ç®¡ç†å¯¹æ‹“扑代ç ä¸EM对象的引用。对于æ¯ä¸ªæ ¹åŸŸ +(rd),调度器维护一个与当å‰rd->span相交的所有性能域的å•å‘链表。链表ä¸çš„æ¯ä¸ª +节点都包å«ä¸€ä¸ªæŒ‡å‘EM框架所æ供的结构体em_perf_domain的指针。 + +é“¾è¡¨è¢«é™„åŠ åœ¨æ ¹åŸŸä¸Šï¼Œä»¥åº”å¯¹ç‹¬å çš„cpusetçš„é…置。由于独å çš„cpuset的边界ä¸ä¸€å®šä¸Ž +性能域的边界一致,ä¸åŒæ ¹åŸŸçš„链表å¯èƒ½åŒ…å«é‡å¤çš„å…ƒç´ ã€‚ + +示例1 + 让我们考虑一个有12个CPUçš„å¹³å°ï¼Œåˆ†æˆ3个性能域,(pd0,pd4å’Œpd8),按以下 + æ–¹å¼ç»„织:: + + CPUs: 0 1 2 3 4 5 6 7 8 9 10 11 + PDs: |--pd0--|--pd4--|---pd8---| + RDs: |----rd1----|-----rd2-----| + + 现在,考虑用户空间决定将系统分æˆä¸¤ä¸ªç‹¬å çš„cpusetsï¼Œå› æ¤åˆ›å»ºäº†ä¸¤ä¸ªç‹¬ç«‹çš„æ ¹åŸŸï¼Œ + æ¯ä¸ªæ ¹åŸŸåŒ…å«6个CPUã€‚è¿™ä¸¤ä¸ªæ ¹åŸŸåœ¨ä¸Šå›¾ä¸è¢«è¡¨ç¤ºä¸ºrd1å’Œrd2。由于pd4与rd1å’Œrd2 + 都有交集,它将åŒæ—¶å‡ºçŽ°äºŽé™„åŠ åœ¨è¿™ä¸¤ä¸ªæ ¹åŸŸçš„â€œ->pdâ€é“¾è¡¨ä¸: + + * rd1->pd: pd0 -> pd4 + * rd2->pd: pd4 -> pd8 + + 请注æ„,调度器将为pd4创建两个é‡å¤çš„链表节点(æ¯ä¸ªé“¾è¡¨ä¸å„一个)。然而这 + 两个节点æŒæœ‰æŒ‡å‘åŒä¸€ä¸ªEM框架的共享数æ®ç»“构的指针。 + +由于对这些链表的访问å¯èƒ½ä¸Žçƒæ’æ‹”åŠå…¶å®ƒäº‹ä»¶å¹¶å‘å‘ç”Ÿï¼Œå› æ¤å®ƒä»¬å—RCUé”ä¿æŠ¤ï¼Œå°±åƒ +被调度器æ“控的拓扑结构体ä¸å‰©ä¸‹å—æ®µä¸€æ ·ã€‚ + +EASåŒæ ·ç»´æŠ¤äº†ä¸€ä¸ªé™æ€é”®ï¼ˆsched_energy_presentï¼‰ï¼Œå½“è‡³å°‘æœ‰ä¸€ä¸ªæ ¹åŸŸæ»¡è¶³EAS +å¯åŠ¨çš„所有æ¡ä»¶æ—¶ï¼Œè¿™ä¸ªé”®å°±ä¼šè¢«å¯åŠ¨ã€‚在第6节ä¸æ€»ç»“了这些æ¡ä»¶ã€‚ + + +4. 能é‡æ„ŸçŸ¥ä»»åŠ¡æ”¾ç½® +------------------- + +EAS覆盖了CFS的任务唤醒平衡代ç 。在唤醒平衡时,它使用平å°çš„EMå’ŒPELTä¿¡å·æ¥é€‰æ‹©èŠ‚能 +çš„ç›®æ ‡CPU。当EAS被å¯ç”¨æ—¶ï¼Œselect_task_rq_fair()调用find_energy_efficient_cpu() +æ¥åšä»»åŠ¡æ”¾ç½®å†³å®šã€‚这个函数寻找在æ¯ä¸ªæ€§èƒ½åŸŸä¸å¯»æ‰¾å…·æœ‰æœ€é«˜å‰©ä½™ç®—力(CPU算力 - CPU +利用率)的CPUï¼Œå› ä¸ºå®ƒèƒ½è®©æˆ‘ä»¬ä¿æŒæœ€ä½Žçš„频率。然åŽï¼Œè¯¥å‡½æ•°æ£€æŸ¥å°†ä»»åŠ¡æ”¾åœ¨æ–°CPU相较 +ä¾ç„¶æ”¾åœ¨ä¹‹å‰æ´»åŠ¨çš„prev_cpu是å¦å¯ä»¥èŠ‚çœèƒ½é‡ã€‚ + +如果唤醒的任务被è¿ç§»ï¼Œfind_energy_efficient_cpu()使用compute_energy()æ¥ä¼°ç®— +系统将消耗多少能é‡ã€‚compute_energy()检查å„CPU当å‰çš„利用率情况,并å°è¯•è°ƒæ•´æ¥ +“模拟â€ä»»åŠ¡è¿ç§»ã€‚EM框架æ供了API em_pd_energy()计算æ¯ä¸ªæ€§èƒ½åŸŸåœ¨ç»™å®šçš„利用率æ¡ä»¶ +下的预期能é‡æ¶ˆè€—。 + +下é¢è¯¦ç»†ä»‹ç»ä¸€ä¸ªä¼˜åŒ–能é‡æ¶ˆè€—的任务放置决ç–的例å。 + +示例2 + 让我们考虑一个有两个独立性能域的(伪)平å°ï¼Œæ¯ä¸ªæ€§èƒ½åŸŸå«æœ‰2个CPU。CPU0å’ŒCPU1 + 是å°æ ¸ï¼ŒCPU2å’ŒCPU3æ˜¯å¤§æ ¸ã€‚ + + 调度器必须决定将任务P放在哪个CPU上,这个任务的util_avg = 200(平å‡åˆ©ç”¨çŽ‡ï¼‰ï¼Œ + prev_cpu = 0(上一次è¿è¡Œåœ¨CPU0)。 + + ç›®å‰CPU的利用率情况如下图所示。CPU 0-3çš„util_avg分别为400ã€100ã€600å’Œ500。 + æ¯ä¸ªæ€§èƒ½åŸŸæœ‰ä¸‰ä¸ªæ“作性能值(OPP)。与æ¯ä¸ªOPP相关的CPU算力和功率æˆæœ¬åˆ—åœ¨èƒ½é‡ + 模型表ä¸ã€‚Pçš„util_avg在图ä¸æ˜¾ç¤ºä¸º"PP":: + + CPU util. + 1024 - - - - - - - Energy Model + +-----------+-------------+ + | Little | Big | + 768 ============= +-----+-----+------+------+ + | Cap | Pwr | Cap | Pwr | + +-----+-----+------+------+ + 512 =========== - ##- - - - - | 170 | 50 | 512 | 400 | + ## ## | 341 | 150 | 768 | 800 | + 341 -PP - - - - ## ## | 512 | 300 | 1024 | 1700 | + PP ## ## +-----+-----+------+------+ + 170 -## - - - - ## ## + ## ## ## ## + ------------ ------------- + CPU0 CPU1 CPU2 CPU3 + + Current OPP: ===== Other OPP: - - - util_avg (100 each): ## + + + find_energy_efficient_cpu()将首先在两个性能域ä¸å¯»æ‰¾å…·æœ‰æœ€å¤§å‰©ä½™ç®—力的CPU。 + 在这个例åä¸æ˜¯CPU1å’ŒCPU3。然åŽï¼Œå®ƒå°†ä¼°ç®—,当P被放在它们ä¸çš„ä»»æ„一个时,系统的 + èƒ½è€—ï¼Œå¹¶æ£€æŸ¥è¿™æ ·åšæ˜¯å¦ä¼šæ¯”把P放在CPU0上节çœä¸€äº›èƒ½é‡ã€‚EASå‡å®šOPPséµå¾ªåˆ©ç”¨çŽ‡ + (这与CPUFreq监管器schedutil的行为一致。关于这个问题的更多细节,è§ç¬¬6节)。 + + **情况1. P被è¿ç§»åˆ°CPU1**:: + + 1024 - - - - - - - + + Energy calculation: + 768 ============= * CPU0: 200 / 341 * 150 = 88 + * CPU1: 300 / 341 * 150 = 131 + * CPU2: 600 / 768 * 800 = 625 + 512 - - - - - - - ##- - - - - * CPU3: 500 / 768 * 800 = 520 + ## ## => total_energy = 1364 + 341 =========== ## ## + PP ## ## + 170 -## - - PP- ## ## + ## ## ## ## + ------------ ------------- + CPU0 CPU1 CPU2 CPU3 + + + **情况2. P被è¿ç§»åˆ°CPU3**:: + + 1024 - - - - - - - + + Energy calculation: + 768 ============= * CPU0: 200 / 341 * 150 = 88 + * CPU1: 100 / 341 * 150 = 43 + PP * CPU2: 600 / 768 * 800 = 625 + 512 - - - - - - - ##- - -PP - * CPU3: 700 / 768 * 800 = 729 + ## ## => total_energy = 1485 + 341 =========== ## ## + ## ## + 170 -## - - - - ## ## + ## ## ## ## + ------------ ------------- + CPU0 CPU1 CPU2 CPU3 + + **情况3. Pä¾æ—§ç•™åœ¨prev_cpu/CPU0**:: + + 1024 - - - - - - - + + Energy calculation: + 768 ============= * CPU0: 400 / 512 * 300 = 234 + * CPU1: 100 / 512 * 300 = 58 + * CPU2: 600 / 768 * 800 = 625 + 512 =========== - ##- - - - - * CPU3: 500 / 768 * 800 = 520 + ## ## => total_energy = 1437 + 341 -PP - - - - ## ## + PP ## ## + 170 -## - - - - ## ## + ## ## ## ## + ------------ ------------- + CPU0 CPU1 CPU2 CPU3 + + 从这些计算结果æ¥çœ‹ï¼Œæƒ…况1的总能é‡æœ€ä½Žã€‚所以从节约能é‡çš„角度看,CPU1是最佳候选 + 者。 + +å¤§æ ¸é€šå¸¸æ¯”å°æ ¸æ›´è€—ç”µï¼Œå› æ¤ä¸»è¦åœ¨ä»»åŠ¡ä¸é€‚åˆåœ¨å°æ ¸è¿è¡Œæ—¶ä½¿ç”¨ã€‚然而,å°æ ¸å¹¶ä¸æ€»æ˜¯æ¯” +å¤§æ ¸èŠ‚èƒ½ã€‚ä¸¾ä¾‹æ¥è¯´ï¼Œå¯¹äºŽæŸäº›ç³»ç»Ÿï¼Œå°æ ¸çš„高OPPå¯èƒ½æ¯”å¤§æ ¸çš„ä½ŽOPP能é‡æ¶ˆè€—æ›´é«˜ã€‚å› æ¤ï¼Œ +如果å°æ ¸åœ¨æŸä¸€ç‰¹å®šæ—¶é—´ç‚¹åˆšå¥½æœ‰è¶³å¤Ÿçš„利用率,在æ¤åˆ»è¢«å”¤é†’çš„å°ä»»åŠ¡æ”¾åœ¨å¤§æ ¸æ‰§è¡Œå¯èƒ½ +会更节能,尽管它在å°æ ¸ä¸Šè¿è¡Œä¹Ÿæ˜¯åˆé€‚的。 + +å³ä½¿åœ¨å¤§æ ¸æ‰€æœ‰OPP都ä¸å¦‚å°æ ¸OPP节能的情况下,在æŸäº›ç‰¹å®šæ¡ä»¶ä¸‹ï¼Œä»¤å°ä»»åŠ¡è¿è¡Œåœ¨å¤§æ ¸ +上ä¾ç„¶å¯èƒ½èŠ‚能。事实上,将一个任务放在一个å°æ ¸ä¸Šå¯èƒ½å¯¼è‡´æ•´ä¸ªæ€§èƒ½åŸŸçš„OPPæ高,这将 +å¢žåŠ å·²ç»åœ¨è¯¥æ€§èƒ½åŸŸè¿è¡Œçš„任务的能é‡æˆæœ¬ã€‚å¦‚æžœå”¤é†’çš„ä»»åŠ¡è¢«æ”¾åœ¨ä¸€ä¸ªå¤§æ ¸ä¸Šï¼Œå®ƒçš„æ‰§è¡Œ +æˆæœ¬å¯èƒ½æ¯”放在å°æ ¸ä¸Šæ›´é«˜ï¼Œä½†è¿™ä¸ä¼šå½±å“å°æ ¸ä¸Šçš„其它任务,这些任务将继ç»ä»¥è¾ƒä½Žçš„OPP +è¿è¡Œã€‚å› æ¤ï¼Œå½“考虑CPU消耗的总能é‡æ—¶ï¼Œåœ¨å¤§æ ¸ä¸Šè¿è¡Œä¸€ä¸ªä»»åŠ¡çš„é¢å¤–æˆæœ¬å¯èƒ½å°äºŽä¸ºæ‰€æœ‰ +其它è¿è¡Œåœ¨å°æ ¸çš„任务æ高OPPçš„æˆæœ¬ã€‚ + +上é¢çš„例åå‡ ä¹Žä¸å¯èƒ½ä»¥ä¸€ç§é€šç”¨çš„æ–¹å¼å¾—到æ£ç¡®çš„结果;åŒæ—¶ï¼Œå¯¹äºŽæ‰€æœ‰å¹³å°ï¼Œåœ¨ä¸çŸ¥é“ +系统所有CPUæ¯ä¸ªä¸åŒOPPçš„è¿è¡Œæˆæœ¬æ—¶ï¼Œä¹Ÿæ— 法得到æ£ç¡®çš„结果。得益于基于EM的设计, +EAS应该能够æ£ç¡®å¤„ç†è¿™äº›é—®é¢˜è€Œä¸ä¼šå¼•å‘太多麻烦。然而,为了确ä¿å¯¹é«˜åˆ©ç”¨çŽ‡åœºæ™¯çš„ +åžåçŽ‡é€ æˆçš„å½±å“最å°åŒ–,EASåŒæ ·å®žçŽ°äº†å¦å¤–一ç§å«â€œè¿‡åº¦åˆ©ç”¨çŽ‡â€çš„机制。 + + +5. 过度利用率 +------------- + +从一般的角度æ¥çœ‹ï¼ŒEAS能æ供最大帮助的是那些涉åŠä½Žã€ä¸CPU利用率的使用场景。æ¯å½“CPU +密集型的长任务è¿è¡Œï¼Œå®ƒä»¬å°†éœ€è¦æ‰€æœ‰çš„å¯ç”¨CPU算力,调度器将没有什么办法æ¥èŠ‚çœèƒ½é‡åŒæ—¶ +åˆä¸æŸå®³åžå率。为了é¿å…EASæŸå®³æ€§èƒ½ï¼Œä¸€æ—¦CPU被使用的算力超过80%ï¼Œå®ƒå°†è¢«æ ‡è®°ä¸ºâ€œè¿‡åº¦ +利用â€ã€‚åªè¦æ ¹åŸŸä¸æ²¡æœ‰CPU是过度利用状æ€ï¼Œè´Ÿè½½å‡è¡¡è¢«ç¦ç”¨ï¼Œè€ŒEAS将覆盖唤醒平衡代ç 。 +EAS很å¯èƒ½å°†è´Ÿè½½æ”¾ç½®åœ¨ç³»ç»Ÿä¸èƒ½é‡æ•ˆçŽ‡æœ€é«˜çš„CPU而ä¸æ˜¯å…¶å®ƒCPU上,åªè¦ä¸æŸå®³åžå率。 +å› æ¤ï¼Œè´Ÿè½½å‡è¡¡å™¨è¢«ç¦ç”¨ä»¥é˜²æ¢å®ƒæ‰“ç ´EASå‘现的节能任务放置。当系统未处于过度利用状æ€æ—¶ï¼Œ +è¿™æ ·åšæ˜¯å®‰å…¨çš„ï¼Œå› ä¸ºä½ŽäºŽ80%的临界点æ„味ç€ï¼š + + a. 所有的CPU都有一些空闲时间,所以EAS使用的利用率信å·å¾ˆå¯èƒ½å‡†ç¡®åœ°ä»£è¡¨å„ç§ä»»åŠ¡ + 的“大å°â€ã€‚ + b. 所有任务,ä¸ç®¡å®ƒä»¬çš„nice值是多大,都应该被æ供了足够多的CPU算力。 + c. 既然有多余的算力,那么所有的任务都必须定期阻塞/ä¼‘çœ ï¼Œåœ¨å”¤é†’æ—¶è¿›è¡Œå¹³è¡¡å°±è¶³å¤Ÿ + 了。 + +åªè¦ä¸€ä¸ªCPU利用率超过80%的临界点,上述三个å‡è®¾ä¸è‡³å°‘有一个是ä¸æ£ç¡®çš„。在这ç§æƒ…况下, +æ•´ä¸ªæ ¹åŸŸçš„â€œè¿‡åº¦åˆ©ç”¨â€æ ‡å¿—被设置,EAS被ç¦ç”¨ï¼Œè´Ÿè½½å‡è¡¡å™¨è¢«é‡æ–°å¯ç”¨ã€‚é€šè¿‡è¿™æ ·åšï¼Œè°ƒåº¦å™¨ +åˆå›žåˆ°äº†åœ¨CPU密集的æ¡ä»¶ä¸‹åŸºäºŽè´Ÿè½½çš„算法åšè´Ÿè½½å‡è¡¡ã€‚这更好地尊é‡äº†ä»»åŠ¡çš„nice值。 + +由于过度利用率的概念在很大程度上ä¾èµ–于检测系统ä¸æ˜¯å¦æœ‰ä¸€äº›ç©ºé—²æ—¶é—´ï¼Œæ‰€ä»¥å¿…须考虑 +(比CFS)更高优先级的调度类(以åŠä¸æ–)“窃å–â€çš„CPU算力。åƒè¿™æ ·ï¼Œå¯¹è¿‡åº¦ä½¿ç”¨çŽ‡çš„检测 +ä¸ä»…è¦è€ƒè™‘CFS任务使用的算力,还需è¦è€ƒè™‘其它调度类和ä¸æ–。 + + +6. EASçš„ä¾èµ–å’Œè¦æ±‚ +------------------ + +能é‡æ„ŸçŸ¥è°ƒåº¦ä¾èµ–系统的CPU具有特定的硬件属性,以åŠå†…æ ¸ä¸çš„其它特性被å¯ç”¨ã€‚本节列出 +了这些ä¾èµ–,并对如何满足这些ä¾èµ–æ供了æ示。 + + +6.1 - éžå¯¹ç§°CPU拓扑 +^^^^^^^^^^^^^^^^^^^ + + +如简介所æ,目å‰åªæœ‰éžå¯¹ç§°CPU拓扑结构的平å°æ”¯æŒEAS。通过在è¿è¡Œæ—¶æŸ¥è¯¢ +SD_ASYM_CPUCAPACITY_FULLæ ‡å¿—ä½æ˜¯å¦åœ¨åˆ›å»ºè°ƒåº¦åŸŸæ—¶å·²è®¾ç½®æ¥æ£€æŸ¥è¿™ä¸€è¦æ±‚是å¦æ»¡è¶³ã€‚ + +å‚阅Documentation/scheduler/sched-capacity.rst以了解在sched_domain层次结构 +ä¸è®¾ç½®æ¤æ ‡å¿—ä½æ‰€éœ€æ»¡è¶³çš„è¦æ±‚。 + +请注æ„,EAS并éžä»Žæ ¹æœ¬ä¸Šä¸ŽSMPä¸å…¼å®¹ï¼Œä½†åœ¨SMPå¹³å°ä¸Šè¿˜æ²¡æœ‰è§‚察到明显的节能。这一 +é™åˆ¶å¯ä»¥åœ¨å°†æ¥è¿›è¡Œä¿®æ”¹ï¼Œå¦‚果被è¯æ˜Žä¸æ˜¯è¿™æ ·çš„è¯ã€‚ + + +6.2 - 当å‰çš„能é‡æ¨¡åž‹ +^^^^^^^^^^^^^^^^^^^^ + +EAS使用一个平å°çš„EMæ¥ä¼°ç®—调度决ç–对能é‡çš„å½±å“ã€‚å› æ¤ï¼Œä½ çš„å¹³å°å¿…é¡»å‘EM框架æä¾› +能é‡æˆæœ¬è¡¨ï¼Œä»¥å¯åŠ¨EAS。è¦åšåˆ°è¿™ä¸€ç‚¹ï¼Œè¯·å‚阅文档 +Documentation/power/energy-model.rstä¸çš„独立EM框架部分。 + +å¦è¯·æ³¨æ„,调度域需è¦åœ¨EM注册åŽé‡å»ºï¼Œä»¥ä¾¿å¯åŠ¨EAS。 + +EAS使用EM对能é‡ä½¿ç”¨çŽ‡è¿›è¡Œé¢„测决ç–ï¼Œå› æ¤å®ƒåœ¨æ£€æŸ¥ä»»åŠ¡æ”¾ç½®çš„å¯èƒ½é€‰é¡¹æ—¶æ›´åŠ æ³¨é‡ +差异。对于EASæ¥è¯´ï¼ŒEM的功率值是以毫瓦还是以“抽象刻度â€ä¸ºå•ä½è¡¨ç¤ºå¹¶ä¸é‡è¦ã€‚ + + + +6.3 - 能é‡æ¨¡åž‹å¤æ‚度 +^^^^^^^^^^^^^^^^^^^^ + +任务唤醒路径是时延æ•æ„Ÿçš„。当一个平å°çš„EM太å¤æ‚(太多CPUï¼Œå¤ªå¤šæ€§èƒ½åŸŸï¼Œå¤ªå¤šçŠ¶æ€ +ç‰ï¼‰ï¼Œåœ¨å”¤é†’路径ä¸ä½¿ç”¨å®ƒçš„æˆæœ¬å°±ä¼šå‡é«˜åˆ°ä¸å¯æŽ¥å—。能é‡æ„ŸçŸ¥å”¤é†’算法的å¤æ‚度为: + + C = Nd * (Nc + Ns) + +å…¶ä¸ï¼šNd是性能域的数é‡ï¼›Nc是CPUçš„æ•°é‡ï¼›Ns是OPP的总数(例如:对于两个性能域, +æ¯ä¸ªåŸŸæœ‰4个OPP,则Ns = 8)。 + +当调度域建立时,å¤æ‚æ€§æ£€æŸ¥æ˜¯åœ¨æ ¹åŸŸä¸Šè¿›è¡Œçš„ã€‚å¦‚æžœä¸€ä¸ªæ ¹åŸŸçš„å¤æ‚度Cæ°å¥½é«˜äºŽå®Œå…¨ +主观设定的EM_MAX_COMPLEXITY阈值(在本文写作时,是2048),则EASä¸ä¼šåœ¨æ¤æ ¹åŸŸ +å¯åŠ¨ã€‚ + +å¦‚æžœä½ çš„å¹³å°çš„能é‡æ¨¡åž‹çš„å¤æ‚度太高,EASæ— æ³•åœ¨è¿™ä¸ªæ ¹åŸŸä¸Šä½¿ç”¨ï¼Œä½†ä½ çœŸçš„æƒ³ç”¨ï¼Œ +é‚£ä¹ˆä½ å°±åªå‰©ä¸‹ä¸¤ä¸ªå¯èƒ½çš„选择: + + 1. å°†ä½ çš„ç³»ç»Ÿæ‹†åˆ†æˆåˆ†ç¦»çš„ã€è¾ƒå°çš„ã€ä½¿ç”¨ç‹¬å cpusetçš„æ ¹åŸŸï¼Œå¹¶åœ¨æ¯ä¸ªæ ¹åŸŸå±€éƒ¨ + å¯ç”¨EAS。这个方案的好处是开箱å³ç”¨ï¼Œä½†ç¼ºç‚¹æ˜¯æ— æ³•åœ¨æ ¹åŸŸä¹‹é—´å®žçŽ°è´Ÿè½½å‡è¡¡ï¼Œ + è¿™å¯èƒ½ä¼šå¯¼è‡´æ€»ä½“系统负载ä¸å‡è¡¡ã€‚ + 2. æ交补ä¸ä»¥é™ä½ŽEAS唤醒算法的å¤æ‚度,从而使其能够在åˆç†çš„时间内处ç†æ›´å¤§ + çš„EM。 + + +6.4 - Schedutil监管器 +^^^^^^^^^^^^^^^^^^^^^ + +EAS试图预测CPU在ä¸ä¹…çš„å°†æ¥ä¼šåœ¨å“ªä¸ªOPP下è¿è¡Œï¼Œä»¥ä¼°ç®—它们的能é‡æ¶ˆè€—。为了åšåˆ° +这一点,它å‡å®šCPUçš„OPPè·ŸéšCPU利用率å˜åŒ–而å˜åŒ–。 + +尽管在实践ä¸å¾ˆéš¾å¯¹è¿™ä¸€å‡è®¾çš„准确性æ供硬性ä¿è¯ï¼ˆå› 为,举例æ¥è¯´ç¡¬ä»¶å¯èƒ½ä¸ä¼šåš +它被è¦æ±‚åšçš„事情),相对于其他CPUFreq监管器,schedutil至少_请求_使用利用率 +ä¿¡å·è®¡ç®—çš„é¢‘çŽ‡ã€‚å› æ¤ï¼Œä¸ŽEAS一起使用的唯一åˆç†çš„监管器是schedutilï¼Œå› ä¸ºå®ƒæ˜¯ +唯一一个在频率请求和能é‡é¢„测之间æä¾›æŸç§ç¨‹åº¦çš„一致性的监管器。 + +ä¸æ”¯æŒå°†EAS与schedutil之外的任何其它监管器一起使用。 + + +6.5 刻度ä¸å˜æ€§ä½¿ç”¨çŽ‡ä¿¡å· +^^^^^^^^^^^^^^^^^^^^^^^^ + +为了对ä¸åŒçš„CPU和所有的性能状æ€åšå‡ºå‡†ç¡®çš„预测,EAS需è¦é¢‘率ä¸å˜çš„å’ŒCPUä¸å˜çš„ +PELTä¿¡å·ã€‚这些信å·å¯ä»¥é€šè¿‡æ¯ä¸ªä½“系结构定义的arch_scale{cpu,freq}_capacity() +回调函数获å–。 + +ä¸æ”¯æŒåœ¨æ²¡æœ‰å®žçŽ°è¿™ä¸¤ä¸ªå›žè°ƒå‡½æ•°çš„å¹³å°ä¸Šä½¿ç”¨EAS。 + + +6.6 多线程(SMT) +^^^^^^^^^^^^^^^^^ + +当å‰å®žçŽ°çš„EAS是ä¸æ„ŸçŸ¥SMTçš„ï¼Œå› æ¤æ— 法利用多线程硬件节约能é‡ã€‚EAS认为线程是独立的 +CPU,这实际上对性能和能é‡æ¶ˆè€—都是ä¸åˆ©çš„。 + +ä¸æ”¯æŒåœ¨SMT上使用EAS。 diff --git a/Documentation/translations/zh_CN/scheduler/sched-nice-design.rst b/Documentation/translations/zh_CN/scheduler/sched-nice-design.rst new file mode 100644 index 000000000000..9107f0c0b979 --- /dev/null +++ b/Documentation/translations/zh_CN/scheduler/sched-nice-design.rst @@ -0,0 +1,99 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/scheduler/sched-nice-design.rst + +:翻译: + + å”艺舟 Tang Yizhou <tangyeechou@gmail.com> + +===================== +调度器nice值设计 +===================== + +本文档解释了新的Linux调度器ä¸ä¿®æ”¹å’Œç²¾ç®€åŽçš„nice级别的实现æ€è·¯ã€‚ + +Linuxçš„nice级别总是éžå¸¸è„†å¼±ï¼Œäººä»¬æŒç»ä¸æ–åœ°ç¼ ç€æˆ‘们,让nice +19的任务å 用 +æ›´å°‘çš„CPU时间。 + +ä¸å¹¸çš„是,在旧的调度器ä¸ï¼Œè¿™ä¸æ˜¯é‚£ä¹ˆå®¹æ˜“实现的(å¦åˆ™æˆ‘们早就åšåˆ°äº†ï¼‰ï¼Œå› 为对 +nice级别的支æŒåœ¨åŽ†å²ä¸Šæ˜¯ä¸Žæ—¶é—´ç‰‡é•¿åº¦è€¦åˆçš„,而时间片å•ä½æ˜¯ç”±HZæ»´ç”驱动的, +所以最å°çš„时间片是1/HZ。 + +在O(1)调度器ä¸ï¼ˆ2003年),我们改å˜äº†è´Ÿçš„nice级别,使它们比2.4å†…æ ¸æ›´å¼º +(人们对这一å˜åŒ–很满æ„),而且我们还故æ„æ ¡æ£äº†çº¿æ€§æ—¶é—´ç‰‡å‡†åˆ™ï¼Œä½¿å¾—nice +19 +的级别 _æ£å¥½_ 是1 jiffy。为了让大家更好地ç†è§£å®ƒï¼Œæ—¶é—´ç‰‡çš„å›¾ä¼šæ˜¯è¿™æ ·çš„ï¼ˆè´¨é‡ +ä¸ä½³çš„ASCII艺术æ醒ï¼ï¼‰:: + + + A + \ | [timeslice length] + \ | + \ | + \ | + \ | + \|___100msecs + |^ . _ + | ^ . _ + | ^ . _ + -*----------------------------------*-----> [nice level] + -20 | +19 + | + | + +å› æ¤ï¼Œå¦‚果有人真的想renice任务,相较线性规则,+19ä¼šç»™å‡ºæ›´å¤§çš„æ•ˆæžœï¼ˆæ”¹å˜ +ABIæ¥æ‰©å±•ä¼˜å…ˆçº§çš„解决方案在早期就被放弃了)。 + +è¿™ç§æ–¹æ³•åœ¨ä¸€å®šç¨‹åº¦ä¸Šå¥æ•ˆäº†ä¸€æ®µæ—¶é—´ï¼Œä½†åŽæ¥HZ=1000时,它导致1 jiffy为 +1ms,这æ„味ç€0.1%çš„CPU使用率,我们认为这有点过度。过度 _ä¸æ˜¯_ å› ä¸ºå®ƒè¡¨ç¤º +çš„CPU使用率过å°ï¼Œè€Œæ˜¯å› 为它引å‘了过于频ç¹ï¼ˆæ¯æ¯«ç§’1次)的é‡æ–°è°ƒåº¦ï¼ˆå› æ¤ä¼š +ç ´å缓å˜ï¼Œç‰ç‰ã€‚请记ä½ï¼Œç¡¬ä»¶æ›´å¼±ã€cacheæ›´å°æ˜¯å¾ˆä¹…以å‰çš„事了,当时人们在 +nice +19级别è¿è¡Œæ•°é‡é¢‡å¤šçš„应用程åºï¼‰ã€‚ + +å› æ¤ï¼Œå¯¹äºŽHZ=1000,我们将nice +19改为5æ¯«ç§’ï¼Œå› ä¸ºè¿™æ„Ÿè§‰åƒæ˜¯æ£ç¡®çš„æœ€å° +粒度——这相当于5%çš„CPU利用率。但nice +19çš„æ ¹æœ¬çš„HZæ•æ„Ÿå±žæ€§ä¾ç„¶ä¿æŒä¸å˜ï¼Œ +我们没有收到过关于nice +19在CPU利用率方é¢å¤ª _å¼±_ 的任何抱怨,我们åªæ”¶åˆ° +过它(ä¾ç„¶ï¼‰å¤ª _强_ 的抱怨 :-)。 + +总结一下:我们一直想让niceå„级别一致性更强,但在HZå’Œjiffiesçš„é™åˆ¶ä¸‹ï¼Œä»¥åŠ +nice级别与时间片ã€è°ƒåº¦ç²’度耦åˆæ˜¯ä»¤äººè®¨åŽŒçš„è®¾è®¡ï¼Œè¿™ä¸€ç›®æ ‡å¹¶ä¸çœŸæ£å¯è¡Œã€‚ + +第二个关于Linux nice级别支æŒçš„抱怨是(ä¸é‚£ä¹ˆé¢‘ç¹ï¼Œä½†ä»ç„¶å®šæœŸå‘生),它 +在原点周围的ä¸å¯¹ç§°æ€§ï¼ˆä½ å¯ä»¥åœ¨ä¸Šé¢çš„图片ä¸çœ‹åˆ°ï¼‰ï¼Œæˆ–者更准确地说:事实上 +nice级别的行为å–决于 _ç»å¯¹çš„_ nice级别,而nice应用程åºæŽ¥å£æœ¬èº«ä»Žæ ¹æœ¬ä¸Š +说是“相对â€çš„: + + int nice(int inc); + + asmlinkage long sys_nice(int increment) + +(第一个是glibc的应用程åºæŽ¥å£ï¼Œç¬¬äºŒä¸ªæ˜¯syscall的应用程åºæŽ¥å£ï¼‰ +注æ„,“incâ€æ˜¯ç›¸å¯¹å½“å‰nice级别而言的,类似bash的“niceâ€å‘½ä»¤ç‰å·¥å…·æ˜¯è¿™ä¸ª +相对性应用程åºæŽ¥å£çš„é•œåƒã€‚ + +在旧的调度器ä¸ï¼Œä¸¾ä¾‹æ¥è¯´ï¼Œå¦‚æžœä½ ä»¥nice +1å¯åŠ¨ä¸€ä¸ªä»»åŠ¡ï¼Œå¹¶ä»¥nice +2å¯åŠ¨ +å¦ä¸€ä¸ªä»»åŠ¡ï¼Œè¿™ä¸¤ä¸ªä»»åŠ¡çš„CPU分é…å°†å–决于父外壳程åºçš„nice级别——如果它是 +nice -10,那么CPU的分é…ä¸åŒäºŽ+5或+10。 + +第三个关于Linux nice级别支æŒçš„抱怨是,负数nice级别“ä¸å¤Ÿæœ‰åŠ›â€ï¼Œä»¥å¾ˆå¤šäºº +ä¸å¾—ä¸è¯‰è¯¸äºŽå®žæ—¶è°ƒåº¦ä¼˜å…ˆçº§æ¥è¿è¡ŒéŸ³é¢‘(和其它多媒体)应用程åºï¼Œæ¯”如 +SCHED_FIFOã€‚ä½†è¿™ä¹Ÿé€ æˆäº†å…¶å®ƒé—®é¢˜ï¼šSCHED_FIFO未被è¯æ˜Žæ˜¯å…于饥饿的,一个 +有问题的SCHED_FIFO应用程åºä¹Ÿä¼šé”ä½è¿è¡Œè‰¯å¥½çš„系统。 + +v2.6.23ç‰ˆå†…æ ¸çš„æ–°è°ƒåº¦å™¨è§£å†³äº†è¿™ä¸‰ç§ç±»åž‹çš„抱怨: + +为了解决第一个抱怨(nice级别ä¸å¤Ÿâ€œæœ‰åŠ›â€ï¼‰ï¼Œè°ƒåº¦å™¨ä¸Žâ€œæ—¶é—´ç‰‡â€ã€HZ的概念 +解耦(调度粒度被处ç†æˆä¸€ä¸ªå’Œniceçº§åˆ«ç‹¬ç«‹çš„æ¦‚å¿µï¼‰ï¼Œå› æ¤æœ‰å¯èƒ½å®žçŽ°æ›´å¥½ã€ +更一致的nice +19支æŒï¼šåœ¨æ–°çš„调度器ä¸ï¼Œnice +19的任务得到一个HZæ— å…³çš„ +1.5%CPU使用率,而ä¸æ˜¯æ—§ç‰ˆè°ƒåº¦å™¨ä¸3%-5%-9%çš„å¯å˜èŒƒå›´ã€‚ + +为了解决第二个抱怨(niceå„级别ä¸ä¸€è‡´ï¼‰ï¼Œæ–°è°ƒåº¦å™¨ä»¤è°ƒç”¨nice(1)对å„任务的 +CPU利用率有相åŒçš„å½±å“ï¼Œæ— è®ºå…¶ç»å¯¹nice级别如何。所以在新调度器上,è¿è¡Œä¸€ä¸ª +nice +10和一个nice +11的任务会与è¿è¡Œä¸€ä¸ªnice -5和一个nice -4的任务的 +CPU利用率分割是相åŒçš„(一个会得到55%çš„CPU,å¦ä¸€ä¸ªä¼šå¾—到45%)。这是为什么 +nice级别被改为“乘法â€ï¼ˆæˆ–æŒ‡æ•°ï¼‰â€”â€”è¿™æ ·çš„è¯ï¼Œä¸ç®¡ä½ 从哪个级别开始,“相对†+ç»“æžœå°†æ€»æ˜¯ä¸€æ ·çš„ã€‚ + +第三个抱怨(负数nice级别ä¸å¤Ÿâ€œæœ‰åŠ›â€ï¼Œå¹¶è¿«ä½¿éŸ³é¢‘应用程åºåœ¨æ›´å±é™©çš„ +SCHED_FIFO调度ç–略下è¿è¡Œï¼‰å‡ 乎被新的调度器自动解决了:更强的负数级别 +具有é‡æ–°æ ¡æ£nice级别动æ€èŒƒå›´çš„自动化副作用。 diff --git a/Documentation/translations/zh_CN/scheduler/sched-stats.rst b/Documentation/translations/zh_CN/scheduler/sched-stats.rst new file mode 100644 index 000000000000..1c68c3d1c283 --- /dev/null +++ b/Documentation/translations/zh_CN/scheduler/sched-stats.rst @@ -0,0 +1,156 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/scheduler/sched-stats.rst + +:翻译: + + å”艺舟 Tang Yizhou <tangyeechou@gmail.com> + +============== +è°ƒåº¦å™¨ç»Ÿè®¡æ•°æ® +============== + +第15版schedstats去掉了sched_yield的一些计数器:yld_exp_empty,yld_act_empty +å’Œyld_both_empty。在其它方é¢å’Œç¬¬14版完全相åŒã€‚ + +第14版schedstats包括对sched_domains(译注:调度域)的支æŒï¼Œè¯¥ç‰¹æ€§è¿›å…¥å†…æ ¸ +主线2.6.20,ä¸è¿‡è¿™ä¸€ç‰ˆschedstats与2.6.13-2.6.19å†…æ ¸çš„ç‰ˆæœ¬12的统计数æ®æ˜¯å®Œå…¨ +相åŒçš„ï¼ˆå†…æ ¸æœªå‘布第13版)。有些计数器按æ¯ä¸ªè¿è¡Œé˜Ÿåˆ—统计是更有æ„义的,其它则 +按æ¯ä¸ªè°ƒåº¦åŸŸç»Ÿè®¡æ˜¯æ›´æœ‰æ„义的。注æ„,调度域(以åŠå®ƒä»¬çš„附属信æ¯ï¼‰ä»…åœ¨å¼€å¯ +CONFIG_SMP的机器上是相关的和å¯ç”¨çš„。 + +在第14版schedstatä¸ï¼Œæ¯ä¸ªè¢«åˆ—出的CPU至少会有一级域统计数æ®ï¼Œä¸”很å¯èƒ½æœ‰ä¸€ä¸ª +以上的域。在这个实现ä¸ï¼ŒåŸŸæ²¡æœ‰ç‰¹åˆ«çš„åå—,但是编å·æœ€é«˜çš„域通常在机器上所有的 +CPU上仲è£å¹³è¡¡ï¼Œè€Œdomain0是最紧密èšç„¦çš„域,有时仅在一对CPU之间进行平衡。æ¤æ—¶ï¼Œ +没有任何体系结构需è¦3层以上的域。域统计数æ®ä¸çš„第一个å—段是一个ä½å›¾ï¼Œè¡¨æ˜Žå“ªäº› +CPUå—该域的影å“。 + +这些å—段是计数器,而且åªèƒ½é€’增。使用这些å—段的程åºå°†éœ€è¦ä»ŽåŸºçº¿è§‚测开始,然åŽåœ¨ +åŽç»æ¯ä¸€ä¸ªè§‚测ä¸è®¡ç®—出计数器的å˜åŒ–。一个能以这ç§æ–¹å¼å¤„ç†å…¶ä¸å¾ˆå¤šå—段的perl脚本 +å¯è§ + + http://eaglet.pdxhosts.com/rick/linux/schedstat/ + +请注æ„ï¼Œä»»ä½•è¿™æ ·çš„è„šæœ¬éƒ½å¿…é¡»æ˜¯ç‰¹å®šäºŽç‰ˆæœ¬çš„ï¼Œæ”¹å˜ç‰ˆæœ¬çš„主è¦åŽŸå› æ˜¯è¾“å‡ºæ ¼å¼çš„å˜åŒ–。 +对于那些希望编写自己的脚本的人,å¯ä»¥å‚考这里æè¿°çš„å„个å—段。 + +CPUç»Ÿè®¡æ•°æ® +----------- +cpu<N> 1 2 3 4 5 6 7 8 9 + +第一个å—段是sched_yield()的统计数æ®ï¼š + + 1) sched_yield()被调用了#次 + +接下æ¥çš„三个是schedule()的统计数æ®ï¼š + + 2) 这个å—段是一个过时的数组过期计数,在O(1)调度器ä¸ä½¿ç”¨ã€‚为了ABI兼容性, + 我们ä¿ç•™äº†å®ƒï¼Œä½†å®ƒæ€»æ˜¯è¢«è®¾ç½®ä¸º0。 + 3) schedule()被调用了#次 + 4) 调用schedule()导致处ç†å™¨å˜ä¸ºç©ºé—²äº†#次 + +接下æ¥çš„两个是try_to_wake_up()的统计数æ®ï¼š + + 5) try_to_wake_up()被调用了#次 + 6) 调用try_to_wake_up()导致本地CPU被唤醒了#次 + +接下æ¥çš„三个统计数æ®æ述了调度延迟: + + 7) 本处ç†å™¨è¿è¡Œä»»åŠ¡çš„总时间,å•ä½æ˜¯jiffies + 8) 本处ç†å™¨ä»»åŠ¡ç‰å¾…è¿è¡Œçš„时间,å•ä½æ˜¯jiffies + 9) 本CPUè¿è¡Œäº†#个时间片 + +åŸŸç»Ÿè®¡æ•°æ® +---------- + +对于æ¯ä¸ªè¢«æè¿°çš„CPU,和它相关的æ¯ä¸€ä¸ªè°ƒåº¦åŸŸå‡ä¼šäº§ç”Ÿä¸‹é¢ä¸€è¡Œæ•°æ®ï¼ˆæ³¨æ„,如果 +CONFIG_SMP没有被定义,那么*没有*调度域被使用,这些行ä¸ä¼šå‡ºçŽ°åœ¨è¾“出ä¸ï¼‰ã€‚ + +domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 + +第一个å—段是一个ä½æŽ©ç ,表明该域在æ“作哪些CPU。 + +接下æ¥çš„24个å—段是load_balance()函数的å„个统计数æ®ï¼ŒæŒ‰ç©ºé—²ç±»åž‹åˆ†ç»„(空闲, +ç¹å¿™ï¼Œæ–°ç©ºé—²ï¼‰ï¼š + + + 1) 当CPU空闲时,load_balance()在这个调度域ä¸è¢«è°ƒç”¨äº†#次 + 2) 当CPU空闲时,load_balance()在这个调度域ä¸è¢«è°ƒç”¨ï¼Œä½†æ˜¯å‘çŽ°è´Ÿè½½æ— éœ€ + å‡è¡¡#次 + 3) 当CPU空闲时,load_balance()在这个调度域ä¸è¢«è°ƒç”¨ï¼Œè¯•å›¾è¿ç§»1个或更多 + 任务且失败了#次 + 4) 当CPU空闲时,load_balance()在这个调度域ä¸è¢«è°ƒç”¨ï¼Œå‘现ä¸å‡è¡¡ï¼ˆå¦‚果有) + #次 + 5) 当CPU空闲时,pull_task()在这个调度域ä¸è¢«è°ƒç”¨#次 + 6) 当CPUç©ºé—²æ—¶ï¼Œå°½ç®¡ç›®æ ‡ä»»åŠ¡æ˜¯çƒç¼“å˜çŠ¶æ€ï¼Œpull_task()ä¾ç„¶è¢«è°ƒç”¨#次 + 7) 当CPU空闲时,load_balance()在这个调度域ä¸è¢«è°ƒç”¨ï¼Œæœªèƒ½æ‰¾åˆ°æ›´ç¹å¿™çš„ + 队列#次 + 8) 当CPU空闲时,在调度域ä¸æ‰¾åˆ°äº†æ›´ç¹å¿™çš„队列,但未找到更ç¹å¿™çš„调度组 + #次 + 9) 当CPUç¹å¿™æ—¶ï¼Œload_balance()在这个调度域ä¸è¢«è°ƒç”¨äº†#次 + 10) 当CPUç¹å¿™æ—¶ï¼Œload_balance()在这个调度域ä¸è¢«è°ƒç”¨ï¼Œä½†æ˜¯å‘çŽ°è´Ÿè½½æ— éœ€ + å‡è¡¡#次 + 11) 当CPUç¹å¿™æ—¶ï¼Œload_balance()在这个调度域ä¸è¢«è°ƒç”¨ï¼Œè¯•å›¾è¿ç§»1个或更多 + 任务且失败了#次 + 12) 当CPUç¹å¿™æ—¶ï¼Œload_balance()在这个调度域ä¸è¢«è°ƒç”¨ï¼Œå‘现ä¸å‡è¡¡ï¼ˆå¦‚果有) + #次 + 13) 当CPUç¹å¿™æ—¶ï¼Œpull_task()在这个调度域ä¸è¢«è°ƒç”¨#次 + 14) 当CPUç¹å¿™æ—¶ï¼Œå°½ç®¡ç›®æ ‡ä»»åŠ¡æ˜¯çƒç¼“å˜çŠ¶æ€ï¼Œpull_task()ä¾ç„¶è¢«è°ƒç”¨#次 + 15) 当CPUç¹å¿™æ—¶ï¼Œload_balance()在这个调度域ä¸è¢«è°ƒç”¨ï¼Œæœªèƒ½æ‰¾åˆ°æ›´ç¹å¿™çš„ + 队列#次 + 16) 当CPUç¹å¿™æ—¶ï¼Œåœ¨è°ƒåº¦åŸŸä¸æ‰¾åˆ°äº†æ›´ç¹å¿™çš„队列,但未找到更ç¹å¿™çš„调度组 + #次 + 17) 当CPU新空闲时,load_balance()在这个调度域ä¸è¢«è°ƒç”¨äº†#次 + 18) 当CPU新空闲时,load_balance()在这个调度域ä¸è¢«è°ƒç”¨ï¼Œä½†æ˜¯å‘çŽ°è´Ÿè½½æ— éœ€ + å‡è¡¡#次 + 19) 当CPU新空闲时,load_balance()在这个调度域ä¸è¢«è°ƒç”¨ï¼Œè¯•å›¾è¿ç§»1个或更多 + 任务且失败了#次 + 20) 当CPU新空闲时,load_balance()在这个调度域ä¸è¢«è°ƒç”¨ï¼Œå‘现ä¸å‡è¡¡ï¼ˆå¦‚果有) + #次 + 21) 当CPU新空闲时,pull_task()在这个调度域ä¸è¢«è°ƒç”¨#次 + 22) 当CPUæ–°ç©ºé—²æ—¶ï¼Œå°½ç®¡ç›®æ ‡ä»»åŠ¡æ˜¯çƒç¼“å˜çŠ¶æ€ï¼Œpull_task()ä¾ç„¶è¢«è°ƒç”¨#次 + 23) 当CPU新空闲时,load_balance()在这个调度域ä¸è¢«è°ƒç”¨ï¼Œæœªèƒ½æ‰¾åˆ°æ›´ç¹å¿™çš„ + 队列#次 + 24) 当CPU新空闲时,在调度域ä¸æ‰¾åˆ°äº†æ›´ç¹å¿™çš„队列,但未找到更ç¹å¿™çš„调度组 + #次 + +接下æ¥çš„3个å—段是active_load_balance()函数的å„个统计数æ®ï¼š + + 25) active_load_balance()被调用了#次 + 26) active_load_balance()被调用,试图è¿ç§»1个或更多任务且失败了#次 + 27) active_load_balance()被调用,æˆåŠŸè¿ç§»äº†#次任务 + +接下æ¥çš„3个å—段是sched_balance_exec()函数的å„个统计数æ®ï¼š + + 28) sbe_cntä¸å†è¢«ä½¿ç”¨ + 29) sbe_balancedä¸å†è¢«ä½¿ç”¨ + 30) sbe_pushedä¸å†è¢«ä½¿ç”¨ + +接下æ¥çš„3个å—段是sched_balance_fork()函数的å„个统计数æ®ï¼š + + 31) sbf_cntä¸å†è¢«ä½¿ç”¨ + 32) sbf_balancedä¸å†è¢«ä½¿ç”¨ + 33) sbf_pushedä¸å†è¢«ä½¿ç”¨ + +接下æ¥çš„3个å—段是try_to_wake_up()函数的å„个统计数æ®ï¼š + + 34) 在这个调度域ä¸è°ƒç”¨try_to_wake_up()唤醒任务时,任务在调度域ä¸ä¸€ä¸ª + 和上次è¿è¡Œä¸åŒçš„æ–°CPU上è¿è¡Œäº†#次 + 35) 在这个调度域ä¸è°ƒç”¨try_to_wake_up()唤醒任务时,任务被è¿ç§»åˆ°å‘生唤醒 + çš„CPU次数为#ï¼Œå› ä¸ºè¯¥ä»»åŠ¡åœ¨åŽŸCPU是冷缓å˜çŠ¶æ€ + 36) 在这个调度域ä¸è°ƒç”¨try_to_wake_up()唤醒任务时,引å‘被动负载å‡è¡¡#次 + +/proc/<pid>/schedstat +--------------------- +schedstatsè¿˜æ·»åŠ äº†ä¸€ä¸ªæ–°çš„/proc/<pid>/schedstat文件,æ¥æ供一些进程级的 +相åŒä¿¡æ¯ã€‚这个文件ä¸ï¼Œæœ‰ä¸‰ä¸ªå—段与该进程相关: + + 1) 在CPU上è¿è¡ŒèŠ±è´¹çš„时间 + 2) 在è¿è¡Œé˜Ÿåˆ—上ç‰å¾…的时间 + 3) 在CPU上è¿è¡Œäº†#个时间片 + +å¯ä»¥å¾ˆå®¹æ˜“地编写一个程åºï¼Œåˆ©ç”¨è¿™äº›é¢å¤–çš„å—段æ¥æŠ¥å‘Šä¸€ä¸ªç‰¹å®šçš„进程或一组进程在 +调度器ç–ç•¥ä¸‹çš„è¡¨çŽ°å¦‚ä½•ã€‚è¿™æ ·çš„ç¨‹åºçš„一个简å•ç‰ˆæœ¬å¯åœ¨ä¸‹é¢çš„链接找到 + + http://eaglet.pdxhosts.com/rick/linux/schedstat/v12/latency.c diff --git a/Documentation/translations/zh_CN/vm/active_mm.rst b/Documentation/translations/zh_CN/vm/active_mm.rst new file mode 100644 index 000000000000..366609ea4f37 --- /dev/null +++ b/Documentation/translations/zh_CN/vm/active_mm.rst @@ -0,0 +1,85 @@ +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/vm/active_mm.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + + +========= +Active MM +========= + +这是一å°linux之父回å¤å¼€å‘者的一å°é‚®ä»¶ï¼Œæ‰€ä»¥ç¿»è¯‘时我尽é‡ä¿æŒé‚®ä»¶æ ¼å¼çš„完整。 + +:: + + List: linux-kernel + Subject: Re: active_mm + From: Linus Torvalds <torvalds () transmeta ! com> + Date: 1999-07-30 21:36:24 + + å› ä¸ºæˆ‘å¹¶ä¸ç»å¸¸å†™è§£é‡Šï¼Œæ‰€ä»¥å·²ç»æŠ„é€åˆ°linux-kernel邮件列表,而当我åšè¿™äº›ï¼Œ + 且更多的人在阅读它们时,我觉得棒æžäº†ã€‚ + + 1999å¹´7月30æ—¥ 星期五, David Mosberger 写é“: + > + > 是å¦æœ‰ä¸€ä¸ªç®€çŸçš„æ述,说明task_structä¸çš„ + > "mm" å’Œ "active_mm"应该如何使用? (如果 + > 这个问题在邮件列表ä¸è®¨è®ºè¿‡ï¼Œæˆ‘表示æ‰æ„--我刚 + > 刚度å‡å›žæ¥ï¼Œæœ‰ä¸€æ®µæ—¶é—´æ²¡èƒ½å…³æ³¨linux-kernel了)。 + + 基本上,新的设定是: + + - 我们有“真实地å€ç©ºé—´â€å’Œâ€œåŒ¿å地å€ç©ºé—´â€ã€‚区别在于,匿å地å€ç©ºé—´æ ¹æœ¬ä¸å…³å¿ƒç”¨ + 户级页表,所以当我们åšä¸Šä¸‹æ–‡åˆ‡æ¢åˆ°åŒ¿å地å€ç©ºé—´æ—¶ï¼Œæˆ‘们åªæ˜¯è®©ä»¥å‰çš„地å€ç©ºé—´ + 处于活动状æ€ã€‚ + + 一个“匿å地å€ç©ºé—´â€çš„明显用途是任何ä¸éœ€è¦ä»»ä½•ç”¨æˆ·æ˜ 射的线程--æ‰€æœ‰çš„å†…æ ¸çº¿ + 程基本上都属于这一类,但å³ä½¿æ˜¯â€œçœŸæ£çš„â€çº¿ç¨‹ä¹Ÿå¯ä»¥æš‚æ—¶è¯´åœ¨ä¸€å®šæ—¶é—´å†…å®ƒä»¬ä¸ + 会对用户空间感兴趣,调度器ä¸å¦¨è¯•ç€é¿å…在切æ¢VM状æ€ä¸Šæµªè´¹æ—¶é—´ã€‚ç›®å‰åªæœ‰è€ + å¼çš„bdflush sync能åšåˆ°è¿™ä¸€ç‚¹ã€‚ + + - “tsk->mmâ€ æŒ‡å‘ â€œçœŸå®žåœ°å€ç©ºé—´â€ã€‚对于一个匿å进程æ¥è¯´ï¼Œtsk->mm将是NULL, + å…¶é€»è¾‘åŽŸå› æ˜¯åŒ¿åè¿›ç¨‹å®žé™…ä¸Šæ ¹æœ¬å°± “没有†真æ£çš„地å€ç©ºé—´ã€‚ + + - 然而,我们显然需è¦è·Ÿè¸ªæˆ‘ä»¬ä¸ºè¿™æ ·çš„åŒ¿å用户“å·ç”¨â€äº†å“ªä¸ªåœ°å€ç©ºé—´ã€‚为æ¤ï¼Œæˆ‘们 + 有 “tsk->active_mmâ€ï¼Œå®ƒæ˜¾ç¤ºäº†å½“å‰æ´»åŠ¨çš„地å€ç©ºé—´æ˜¯ä»€ä¹ˆã€‚ + + 规则是,对于一个有真实地å€ç©ºé—´çš„进程(å³tsk->mm是 non-NULL),active_mm + 显然必须与真实的mm相åŒã€‚ + + 对于一个匿å进程,tsk->mm == NULL,而tsk->active_mm是匿å进程è¿è¡Œæ—¶ + “借用â€çš„mm。当匿å进程被调度走时,借用的地å€ç©ºé—´è¢«è¿”回并清除。 + + 为了支æŒæ‰€æœ‰è¿™äº›ï¼Œâ€œstruct mm_structâ€çŽ°åœ¨æœ‰ä¸¤ä¸ªè®¡æ•°å™¨ï¼šä¸€ä¸ªæ˜¯ “mm_users†+ 计数器,å³æœ‰å¤šå°‘ “真æ£çš„地å€ç©ºé—´ç”¨æˆ·â€ï¼Œå¦ä¸€ä¸ªæ˜¯ “mm_countâ€è®¡æ•°å™¨ï¼Œå³ “lazy†+ 用户(å³åŒ¿å用户)的数é‡ï¼Œå¦‚果有任何真æ£çš„ç”¨æˆ·ï¼Œåˆ™åŠ 1。 + + 通常情况下,至少有一个真æ£çš„用户,但也å¯èƒ½æ˜¯çœŸæ£çš„用户在å¦ä¸€ä¸ªCPU上退出,而 + 一个lazy的用户ä»åœ¨æ´»åŠ¨ï¼Œæ‰€ä»¥ä½ å®žé™…ä¸Šå¾—åˆ°çš„æƒ…å†µæ˜¯ï¼Œä½ æœ‰ä¸€ä¸ªåœ°å€ç©ºé—´ **åª** + 被lazy的用户使用。这通常是一个çŸæš‚的生命周期状æ€ï¼Œå› 为一旦这个线程被安排给一 + 个真æ£çš„线程,这个 “僵尸†mmå°±ä¼šè¢«é‡Šæ”¾ï¼Œå› ä¸º “mm_countâ€å˜æˆäº†é›¶ã€‚ + + å¦å¤–,一个新的规则是,**没有人** å†æŠŠ “init_mm†作为一个真æ£çš„MM了。 + “init_mmâ€åº”该被认为åªæ˜¯ä¸€ä¸ª “没有其他上下文时的lazy上下文â€ï¼Œäº‹å®žä¸Šï¼Œå®ƒä¸» + è¦æ˜¯åœ¨å¯åŠ¨æ—¶ä½¿ç”¨ï¼Œå½“时还没有真æ£çš„VMè¢«åˆ›å»ºã€‚å› æ¤ï¼Œç”¨æ¥æ£€æŸ¥çš„代ç + + if (current->mm == &init_mm) + + 一般æ¥è¯´ï¼Œåº”该用 + + if (!current->mm) + + å–代上é¢çš„写法(这更有æ„义--测试基本上是 “我们是å¦æœ‰ä¸€ä¸ªç”¨æˆ·çŽ¯å¢ƒâ€ï¼Œå¹¶ä¸”通常 + 由缺页异常处ç†ç¨‹åºå’Œç±»ä¼¼çš„东西æ¥å®Œæˆï¼‰ã€‚ + + 总之,我刚æ‰åœ¨ftp.kernel.org上放了一个pre-patch-2.3.13-1ï¼Œå› ä¸ºå®ƒç¨å¾®æ”¹ + å˜äº†æŽ¥å£ä»¥é€‚é…alpha(è°ä¼šæƒ³åˆ°å‘¢ï¼Œä½†alpha体系结构上下文切æ¢ä»£ç 实际上最终是 + 最丑陋的之一--ä¸åƒå…¶ä»–架构的MM和寄å˜å™¨çŠ¶æ€æ˜¯åˆ†å¼€çš„,alphaçš„PALcode将两者 + 连接起æ¥ï¼Œä½ 需è¦åŒæ—¶åˆ‡æ¢ä¸¤è€…)。 + + (文档æ¥æº http://marc.info/?l=linux-kernel&m=93337278602211&w=2) diff --git a/Documentation/translations/zh_CN/vm/balance.rst b/Documentation/translations/zh_CN/vm/balance.rst new file mode 100644 index 000000000000..e98a47ef24a8 --- /dev/null +++ b/Documentation/translations/zh_CN/vm/balance.rst @@ -0,0 +1,81 @@ +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/vm/balance.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + + +======== +内å˜å¹³è¡¡ +======== + +2000å¹´1月开始,作者:Kanoj Sarcar <kanoj@sgi.com> + +对于 !__GFP_HIGH å’Œ !__GFP_KSWAPD_RECLAIM 以åŠéž __GFP_IO 的分é…,需è¦è¿›è¡Œ +内å˜å¹³è¡¡ã€‚ + +调用者é¿å…å›žæ”¶çš„ç¬¬ä¸€ä¸ªåŽŸå› æ˜¯è°ƒç”¨è€…ç”±äºŽæŒæœ‰è‡ªæ—‹é”或处于ä¸æ–环境ä¸è€Œæ— 法ç¡çœ 。第二个 +åŽŸå› å¯èƒ½æ˜¯ï¼Œè°ƒç”¨è€…æ„¿æ„在ä¸äº§ç”Ÿé¡µé¢å›žæ”¶å¼€é”€çš„情况下分é…失败。这å¯èƒ½å‘生在有0阶回退 +选项的机会主义高阶分é…请求ä¸ã€‚在这ç§æƒ…况下,调用者å¯èƒ½ä¹Ÿå¸Œæœ›é¿å…唤醒kswapd。 + +__GFP_IO分é…请求是为了防æ¢æ–‡ä»¶ç³»ç»Ÿæ»é”。 + +在没有éžç¡çœ 分é…请求的情况下,åšå¹³è¡¡ä¼¼ä¹Žæ˜¯æœ‰å®³çš„。页é¢å›žæ”¶å¯ä»¥è¢«æ‡’散地å¯åŠ¨ï¼Œä¹Ÿå°±æ˜¯ +说,åªæœ‰åœ¨éœ€è¦çš„时候(也就是区域的空闲内å˜ä¸º0),而ä¸æ˜¯è®©å®ƒæˆä¸ºä¸€ä¸ªä¸»åŠ¨çš„过程。 + +ä¹Ÿå°±æ˜¯è¯´ï¼Œå†…æ ¸åº”è¯¥å°è¯•ä»Žç›´æŽ¥æ˜ å°„æ± ä¸æ»¡è¶³å¯¹ç›´æŽ¥æ˜ 射页的请求,而ä¸æ˜¯å›žé€€åˆ°dmaæ± ä¸ï¼Œ +è¿™æ ·å°±å¯ä»¥ä¿æŒdmaæ± ä¸ºdma请求(ä¸ç®¡æ˜¯ä¸æ˜¯åŽŸåçš„ï¼‰æ‰€å¡«å……ã€‚ç±»ä¼¼çš„äº‰è®ºä¹Ÿé€‚ç”¨äºŽé«˜å†…å˜ +å’Œç›´æŽ¥æ˜ å°„çš„é¡µé¢ã€‚相å,如果有很多空闲的dma页,最好是通过从dmaæ± ä¸åˆ†é…一个æ¥æ»¡è¶³ +常规的内å˜è¯·æ±‚,而ä¸æ˜¯äº§ç”Ÿå¸¸è§„区域平衡的开销。 + +在2.2ä¸ï¼Œåªæœ‰å½“空闲页总数低于总内å˜çš„1/64时,æ‰ä¼šå¯åŠ¨å†…å˜å¹³è¡¡/页é¢å›žæ”¶ã€‚如果dma +和常规内å˜çš„比例åˆé€‚,å³ä½¿dma区完全空了,也很å¯èƒ½ä¸ä¼šè¿›è¡Œå¹³è¡¡ã€‚2.2å·²ç»åœ¨ä¸åŒå†…å˜ +大å°çš„生产机器上è¿è¡Œï¼Œå³ä½¿æœ‰è¿™ä¸ªé—®é¢˜å˜åœ¨ï¼Œä¼¼ä¹Žä¹Ÿåšå¾—ä¸é”™ã€‚在2.3ä¸ï¼Œç”±äºŽHIGHMEMçš„ +å˜åœ¨ï¼Œè¿™ä¸ªé—®é¢˜å˜å¾—æ›´åŠ ä¸¥é‡ã€‚ + +在2.3ä¸ï¼ŒåŒºåŸŸå¹³è¡¡å¯ä»¥ç”¨ä¸¤ç§æ–¹å¼ä¹‹ä¸€æ¥å®Œæˆï¼šæ ¹æ®åŒºåŸŸçš„大å°ï¼ˆå¯èƒ½æ˜¯ä½Žçº§åŒºåŸŸçš„大å°ï¼‰ï¼Œ +我们å¯ä»¥åœ¨åˆå§‹åŒ–阶段决定在平衡任何区域时应该争å–多少空闲页。好的方é¢æ˜¯ï¼Œåœ¨å¹³è¡¡çš„æ—¶ +候,我们ä¸éœ€è¦çœ‹ä½Žçº§åŒºçš„大å°ï¼Œåçš„æ–¹é¢æ˜¯ï¼Œæˆ‘们å¯èƒ½ä¼šå› 为忽略低级区å¯èƒ½è¾ƒä½Žçš„使用率 +而åšè¿‡äºŽé¢‘ç¹çš„平衡。å¦å¤–,åªè¦å¯¹åˆ†é…程åºç¨ä½œä¿®æ”¹ï¼Œå°±æœ‰å¯èƒ½å°†memclass()å®ç®€åŒ–为一 +个简å•çš„ç‰å¼ã€‚ + +å¦ä¸€ä¸ªå¯èƒ½çš„解决方案是,我们åªåœ¨ä¸€ä¸ªåŒº **å’Œ** 其所有低级区的空闲内å˜ä½ŽäºŽè¯¥åŒºåŠå…¶ +低级区总内å˜çš„1/64时进行平衡。这就解决了2.2的平衡问题,并尽å¯èƒ½åœ°ä¿æŒäº†ä¸Ž2.2行为 +的接近。å¦å¤–,平衡算法在å„ç§æž¶æž„上的工作方å¼ä¹Ÿæ˜¯ä¸€æ ·çš„,这些架构有ä¸åŒæ•°é‡å’Œç±»åž‹çš„ +内å˜åŒºã€‚如果我们想å˜å¾—更花哨一点,我们å¯ä»¥åœ¨æœªæ¥ä¸ºä¸åŒåŒºåŸŸçš„自由页é¢åˆ†é…ä¸åŒçš„æƒé‡ã€‚ + +请注æ„,如果普通区的大å°ä¸Ždma区相比是巨大的,那么在决定是å¦å¹³è¡¡æ™®é€šåŒºçš„时候,考虑 +空闲的dma页就å˜å¾—ä¸é‚£ä¹ˆé‡è¦äº†ã€‚那么第一个解决方案就å˜å¾—更有å¸å¼•åŠ›ã€‚ + +所附的补ä¸å®žçŽ°äº†ç¬¬äºŒä¸ªè§£å†³æ–¹æ¡ˆã€‚它还 “修å¤â€äº†ä¸¤ä¸ªé—®é¢˜ï¼šé¦–先,在低内å˜æ¡ä»¶ä¸‹ï¼Œkswapd +被唤醒,就åƒ2.2ä¸çš„éžç¡çœ 分é…。第二,HIGHMEM区也被平衡了,以便给replace_with_highmem() +一个争å–获得HIGHMEM页的机会,åŒæ—¶ç¡®ä¿HIGHMEM分é…ä¸ä¼šè½å›žæ™®é€šåŒºã€‚这也确ä¿äº†HIGHMEM +页ä¸ä¼šè¢«æ³„露(例如,在一个HIGHMEM页在交æ¢ç¼“å˜ä¸ä½†æ²¡æœ‰è¢«ä»»ä½•äººä½¿ç”¨çš„情况下)。 + +kswapd还需è¦çŸ¥é“它应该平衡哪些区。kswapd主è¦æ˜¯åœ¨æ— 法进行平衡的情况下需è¦çš„,å¯èƒ½ +æ˜¯å› ä¸ºæ‰€æœ‰çš„åˆ†é…请求都æ¥è‡ªä¸æ–上下文,而所有的进程上下文都在ç¡çœ 。对于2.3, +kswapd并ä¸çœŸæ£éœ€è¦å¹³è¡¡é«˜å†…å˜åŒºï¼Œå› 为ä¸æ–上下文并ä¸è¯·æ±‚高内å˜é¡µã€‚kswapd看zone +结构体ä¸çš„zone_wake_kswapdå—段æ¥å†³å®šä¸€ä¸ªåŒºæ˜¯å¦éœ€è¦å¹³è¡¡ã€‚ + +如果从进程内å˜å’Œshmä¸å·å–页é¢å¯ä»¥å‡è½»è¯¥é¡µé¢èŠ‚点ä¸ä»»ä½•åŒºçš„内å˜åŽ‹åŠ›ï¼Œè€Œè¯¥åŒºçš„内å˜åŽ‹åŠ› +å·²ç»ä½ŽäºŽå…¶æ°´ä½ï¼Œåˆ™ä¼šè¿›è¡Œå·å–。 + +watemark[WMARK_MIN/WMARK_LOW/WMARK_HIGH]/low_on_memory/zone_wake_kswapd: +这些是æ¯ä¸ªåŒºçš„å—段,用于确定一个区何时需è¦å¹³è¡¡ã€‚当页é¢æ•°ä½ŽäºŽæ°´ä½[WMARK_MIN]时, +hysteric çš„å—段low_on_memory被设置。这个å—段会一直被设置,直到空闲页数å˜æˆæ°´ä½ +[WMARK_HIGH]。当low_on_memory被设置时,页é¢åˆ†é…请求将å°è¯•é‡Šæ”¾è¯¥åŒºåŸŸçš„一些页é¢ï¼ˆå¦‚æžœ +请求ä¸è®¾ç½®äº†GFP_WAIT)。与æ¤ç›¸å的是,决定唤醒kswapd以释放一些区的页。这个决定ä¸æ˜¯åŸºäºŽ +hysteresis 的,而是当空闲页的数é‡ä½ŽäºŽwatermark[WMARK_LOW]时就会进行;在这ç§æƒ…况下, +zone_wake_kswapd也被设置。 + + +我所å¬åˆ°çš„(超棒的)想法: + +1. 动æ€ç»åŽ†åº”该影å“平衡:å¯ä»¥è·Ÿè¸ªä¸€ä¸ªåŒºçš„失败请求的数é‡ï¼Œå¹¶å馈到平衡方案ä¸ï¼ˆjalvo@mbay.net)。 + +2. 实现一个类似于replace_with_highmem()çš„replace_with_regular(),以ä¿ç•™dma页é¢ã€‚ + (lkd@tantalophile.demon.co.uk) diff --git a/Documentation/translations/zh_CN/vm/damon/api.rst b/Documentation/translations/zh_CN/vm/damon/api.rst new file mode 100644 index 000000000000..21143eea4ebe --- /dev/null +++ b/Documentation/translations/zh_CN/vm/damon/api.rst @@ -0,0 +1,32 @@ +.. SPDX-License-Identifier: GPL-2.0 + +:Original: Documentation/vm/damon/api.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + + +======= +APIå‚考 +======= + +å†…æ ¸ç©ºé—´çš„ç¨‹åºå¯ä»¥ä½¿ç”¨ä¸‹é¢çš„APIæ¥ä½¿ç”¨DAMONçš„æ¯ä¸ªåŠŸèƒ½ã€‚ä½ æ‰€éœ€è¦åšçš„就是引用 ``damon.h`` , +它ä½äºŽæºä»£ç æ ‘çš„include/linux/。 + +结构体 +====== + +该APIåœ¨ä»¥ä¸‹å†…æ ¸ä»£ç ä¸: + +include/linux/damon.h + + +函数 +==== + +该APIåœ¨ä»¥ä¸‹å†…æ ¸ä»£ç ä¸: + +mm/damon/core.c diff --git a/Documentation/translations/zh_CN/vm/damon/design.rst b/Documentation/translations/zh_CN/vm/damon/design.rst new file mode 100644 index 000000000000..05f66c02740a --- /dev/null +++ b/Documentation/translations/zh_CN/vm/damon/design.rst @@ -0,0 +1,139 @@ +.. SPDX-License-Identifier: GPL-2.0 + +:Original: Documentation/vm/damon/design.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + + +==== +设计 +==== + +å¯é…置的层 +========== + +DAMONæ供了数æ®è®¿é—®ç›‘控功能,åŒæ—¶ä½¿å…¶å‡†ç¡®æ€§å’Œå¼€é”€å¯æŽ§ã€‚基本的访问监控需è¦ä¾èµ–äºŽç›®æ ‡åœ°å€ç©ºé—´ +并为之优化的基元。å¦ä¸€æ–¹é¢ï¼Œä½œä¸ºDAMONçš„æ ¸å¿ƒï¼Œå‡†ç¡®æ€§å’Œå¼€é”€çš„æƒè¡¡æœºåˆ¶æ˜¯åœ¨çº¯é€»è¾‘空间ä¸ã€‚DAMON +将这两部分分离在ä¸åŒçš„层ä¸ï¼Œå¹¶å®šä¹‰äº†å®ƒçš„接å£ï¼Œä»¥å…许å„ç§ä½Žå±‚æ¬¡çš„åŸºå…ƒå®žçŽ°ä¸Žæ ¸å¿ƒé€»è¾‘çš„é…置。 + +由于这ç§åˆ†ç¦»çš„设计和å¯é…置的接å£ï¼Œç”¨æˆ·å¯ä»¥é€šè¿‡é…ç½®æ ¸å¿ƒé€»è¾‘å’Œé€‚å½“çš„ä½Žçº§åŸºå…ƒå®žçŽ°æ¥æ‰©å±•DAMONçš„ +任何地å€ç©ºé—´ã€‚如果没有æä¾›åˆé€‚的,用户å¯ä»¥è‡ªå·±å®žçŽ°åŸºå…ƒã€‚ + +例如,物ç†å†…å˜ã€è™šæ‹Ÿå†…å˜ã€äº¤æ¢ç©ºé—´ã€é‚£äº›ç‰¹å®šçš„进程ã€NUMA节点ã€æ–‡ä»¶å’Œæ”¯æŒçš„内å˜è®¾å¤‡å°†è¢«æ”¯æŒã€‚ +å¦å¤–,如果æŸäº›æž¶æž„或设备支æŒç‰¹æ®Šçš„优化访问检查基元,这些基元将很容易被é…置。 + + +特定地å€ç©ºé—´åŸºå…ƒçš„å‚考实现 +========================== + +基本访问监测的低级基元被定义为两部分。: + +1. 确定地å€ç©ºé—´çš„ç›‘æµ‹ç›®æ ‡åœ°å€èŒƒå›´ +2. ç›®æ ‡ç©ºé—´ä¸ç‰¹å®šåœ°å€èŒƒå›´çš„访问检查。 + +DAMONç›®å‰ä¸ºç‰©ç†å’Œè™šæ‹Ÿåœ°å€ç©ºé—´æ供了基元的实现。下é¢ä¸¤ä¸ªå°èŠ‚æ述了这些工作的方å¼ã€‚ + + +基于VMAçš„ç›®æ ‡åœ°å€èŒƒå›´æž„é€ +------------------------- + +这仅仅是针对虚拟地å€ç©ºé—´åŸºå…ƒçš„实现。对于物ç†åœ°å€ç©ºé—´ï¼Œåªæ˜¯è¦æ±‚ç”¨æˆ·æ‰‹åŠ¨è®¾ç½®ç›‘æŽ§ç›®æ ‡åœ°å€èŒƒå›´ã€‚ + +在进程的超级巨大的虚拟地å€ç©ºé—´ä¸ï¼Œåªæœ‰å°éƒ¨åˆ†è¢«æ˜ 射到物ç†å†…å˜å¹¶è¢«è®¿é—®ã€‚å› æ¤ï¼Œè·Ÿè¸ªæœªæ˜ 射的地 +å€åŒºåŸŸåªæ˜¯ä¸€ç§æµªè´¹ã€‚然而,由于DAMONå¯ä»¥ä½¿ç”¨è‡ªé€‚应区域调整机制æ¥å¤„ç†ä¸€å®šç¨‹åº¦çš„噪声,所以严 +æ ¼æ¥è¯´ï¼Œè·Ÿè¸ªæ¯ä¸€ä¸ªæ˜ 射并ä¸æ˜¯å¿…须的,但在æŸäº›æƒ…å†µä¸‹ç”šè‡³ä¼šäº§ç”Ÿå¾ˆé«˜çš„å¼€é”€ã€‚ä¹Ÿå°±æ˜¯è¯´ï¼Œç›‘æµ‹ç›®æ ‡ +å†…éƒ¨è¿‡äºŽå·¨å¤§çš„æœªæ˜ å°„åŒºåŸŸåº”è¯¥è¢«ç§»é™¤ï¼Œä»¥ä¸å 用自适应机制的时间。 + +å‡ºäºŽè¿™ä¸ªåŽŸå› ï¼Œè¿™ä¸ªå®žçŽ°å°†å¤æ‚çš„æ˜ å°„è½¬æ¢ä¸ºä¸‰ä¸ªä¸åŒçš„区域,覆盖地å€ç©ºé—´çš„æ¯ä¸ªæ˜ 射区域。这三个 +区域之间的两个空隙是给定地å€ç©ºé—´ä¸ä¸¤ä¸ªæœ€å¤§çš„æœªæ˜ å°„åŒºåŸŸã€‚è¿™ä¸¤ä¸ªæœ€å¤§çš„æœªæ˜ å°„åŒºåŸŸæ˜¯å †å’Œæœ€ä¸Šé¢ +çš„mmap()区域之间的间隙,以åŠåœ¨å¤§å¤šæ•°æƒ…况下最下é¢çš„mmap()åŒºåŸŸå’Œå †ä¹‹é—´çš„é—´éš™ã€‚å› ä¸ºè¿™äº›é—´éš™ +在通常的地å€ç©ºé—´ä¸æ˜¯å¼‚常巨大的,排除这些间隙就足以åšå‡ºåˆç†çš„æƒè¡¡ã€‚下é¢è¯¦ç»†è¯´æ˜Žäº†è¿™ä¸€ç‚¹:: + + <heap> + <BIG UNMAPPED REGION 1> + <uppermost mmap()-ed region> + (small mmap()-ed regions and munmap()-ed regions) + <lowermost mmap()-ed region> + <BIG UNMAPPED REGION 2> + <stack> + + +基于PTE访问ä½çš„访问检查 +----------------------- + +物ç†å’Œè™šæ‹Ÿåœ°å€ç©ºé—´çš„实现都使用PTE Accessed-bit进行基本访问检查。唯一的区别在于从地å€ä¸ +找到相关的PTE访问ä½çš„æ–¹å¼ã€‚虚拟地å€çš„实现是为该地å€çš„ç›®æ ‡ä»»åŠ¡æŸ¥æ‰¾é¡µè¡¨ï¼Œè€Œç‰©ç†åœ°å€çš„实现则 +是查找与该地å€æœ‰æ˜ 射关系的æ¯ä¸€ä¸ªé¡µè¡¨ã€‚通过这ç§æ–¹å¼ï¼Œå®žçŽ°è€…æ‰¾åˆ°å¹¶æ¸…é™¤ä¸‹ä¸€ä¸ªé‡‡æ ·ç›®æ ‡åœ°å€çš„ä½ï¼Œ +并检查该ä½æ˜¯å¦åœ¨ä¸€ä¸ªé‡‡æ ·å‘¨æœŸåŽå†æ¬¡è®¾ç½®ã€‚è¿™å¯èƒ½ä¼šå¹²æ‰°å…¶ä»–使用访问ä½çš„å†…æ ¸å系统,å³ç©ºé—²é¡µè·Ÿ +踪和回收逻辑。为了é¿å…è¿™ç§å¹²æ‰°ï¼ŒDAMON使其与空闲页é¢è·Ÿè¸ªç›¸äº’排斥,并使用 ``PG_idle`` å’Œ +``PG_young`` 页é¢æ ‡å¿—æ¥è§£å†³ä¸Žå›žæ”¶é€»è¾‘的冲çªï¼Œå°±åƒç©ºé—²é¡µé¢è·Ÿè¸ªé‚£æ ·ã€‚ + + +独立于地å€ç©ºé—´çš„æ ¸å¿ƒæœºåˆ¶ +======================== + +下é¢å››ä¸ªéƒ¨åˆ†åˆ†åˆ«æ述了DAMONçš„æ ¸å¿ƒæœºåˆ¶å’Œäº”ä¸ªç›‘æµ‹å±žæ€§ï¼Œå³ ``é‡‡æ ·é—´éš”`` 〠``èšé›†é—´éš”`` 〠+``区域更新间隔`` 〠``最å°åŒºåŸŸæ•°`` å’Œ ``最大区域数`` 。 + + +访问频率监测 +------------ + +DAMON的输出显示了在给定的时间内哪些页é¢çš„访问频率是多少。访问频率的分辨率是通过设置 +``é‡‡æ ·é—´éš”`` å’Œ ``èšé›†é—´éš”`` æ¥æŽ§åˆ¶çš„。详细地说,DAMON检查æ¯ä¸ª ``é‡‡æ ·é—´éš”`` å¯¹æ¯ +个页é¢çš„访问,并将结果汇总。æ¢å¥è¯è¯´ï¼Œè®¡ç®—æ¯ä¸ªé¡µé¢çš„访问次数。在æ¯ä¸ª ``èšåˆé—´éš”`` 过 +去åŽï¼ŒDAMON调用先å‰ç”±ç”¨æˆ·æ³¨å†Œçš„回调函数,以便用户å¯ä»¥é˜…读èšåˆçš„结果,然åŽå†æ¸…除这些结 +果。这å¯ä»¥ç”¨ä»¥ä¸‹ç®€å•çš„伪代ç æ¥æè¿°:: + + while monitoring_on: + for page in monitoring_target: + if accessed(page): + nr_accesses[page] += 1 + if time() % aggregation_interval == 0: + for callback in user_registered_callbacks: + callback(monitoring_target, nr_accesses) + for page in monitoring_target: + nr_accesses[page] = 0 + sleep(sampling interval) + +è¿™ç§æœºåˆ¶çš„监测开销将éšç€ç›®æ ‡å·¥ä½œè´Ÿè½½è§„模的增长而任æ„å¢žåŠ ã€‚ + + +åŸºäºŽåŒºåŸŸçš„æŠ½æ ·è°ƒæŸ¥ +------------------ + +为了é¿å…å¼€é”€çš„æ— é™åˆ¶å¢žåŠ ,DAMONå°†å‡å®šå…·æœ‰ç›¸åŒè®¿é—®é¢‘率的相邻页é¢å½’入一个区域。åªè¦ä¿æŒ +这个å‡è®¾ï¼ˆä¸€ä¸ªåŒºåŸŸå†…的页é¢å…·æœ‰ç›¸åŒçš„访问频率),该区域内就åªéœ€è¦æ£€æŸ¥ä¸€ä¸ªé¡µé¢ã€‚å› æ¤ï¼Œå¯¹ +于æ¯ä¸ª ``é‡‡æ ·é—´éš”`` ,DAMON在æ¯ä¸ªåŒºåŸŸä¸éšæœºæŒ‘选一个页é¢ï¼Œç‰å¾…一个 ``é‡‡æ ·é—´éš”`` ,检 +查该页é¢æ˜¯å¦åŒæ—¶è¢«è®¿é—®ï¼Œå¦‚æžœè¢«è®¿é—®åˆ™å¢žåŠ è¯¥åŒºåŸŸçš„è®¿é—®é¢‘çŽ‡ã€‚å› æ¤ï¼Œç›‘测开销是å¯ä»¥é€šè¿‡è®¾ç½® +区域的数é‡æ¥æŽ§åˆ¶çš„。DAMONå…许用户设置最å°å’Œæœ€å¤§çš„区域数é‡æ¥è¿›è¡Œæƒè¡¡ã€‚ + +然而,如果å‡è®¾æ²¡æœ‰å¾—到ä¿è¯ï¼Œè¿™ä¸ªæ–¹æ¡ˆå°±ä¸èƒ½ä¿æŒè¾“出的质é‡ã€‚ + + +适应性区域调整 +-------------- + +å³ä½¿æœ€åˆçš„ç›‘æµ‹ç›®æ ‡åŒºåŸŸè¢«å¾ˆå¥½åœ°æž„å»ºä»¥æ»¡è¶³å‡è®¾ï¼ˆåŒä¸€åŒºåŸŸå†…的页é¢å…·æœ‰ç›¸ä¼¼çš„访问频率),数 +æ®è®¿é—®æ¨¡å¼ä¹Ÿä¼šè¢«åŠ¨æ€åœ°æ”¹å˜ã€‚这将导致监测质é‡ä¸‹é™ã€‚为了尽å¯èƒ½åœ°ä¿æŒå‡è®¾ï¼ŒDAMONæ ¹æ®æ¯ä¸ª +区域的访问频率自适应地进行åˆå¹¶å’Œæ‹†åˆ†ã€‚ + +对于æ¯ä¸ª ``èšé›†åŒºé—´`` ,它比较相邻区域的访问频率,如果频率差异较å°ï¼Œå°±åˆå¹¶è¿™äº›åŒºåŸŸã€‚ +然åŽï¼Œåœ¨å®ƒæŠ¥å‘Šå¹¶æ¸…除æ¯ä¸ªåŒºåŸŸçš„èšåˆæŽ¥å…¥é¢‘率åŽï¼Œå¦‚果区域总数ä¸è¶…过用户指定的最大区域数, +它将æ¯ä¸ªåŒºåŸŸæ‹†åˆ†ä¸ºä¸¤ä¸ªæˆ–三个区域。 + +通过这ç§æ–¹å¼ï¼ŒDAMONæ供了其最佳的质é‡å’Œæœ€å°çš„开销,åŒæ—¶ä¿æŒäº†ç”¨æˆ·ä¸ºå…¶æƒè¡¡è®¾å®šçš„ç•Œé™ã€‚ + + +动æ€ç›®æ ‡ç©ºé—´æ›´æ–°å¤„ç† +-------------------- + +ç›‘æµ‹ç›®æ ‡åœ°å€èŒƒå›´å¯ä»¥åŠ¨æ€æ”¹å˜ã€‚例如,虚拟内å˜å¯ä»¥åŠ¨æ€åœ°è¢«æ˜ å°„å’Œè§£æ˜ å°„ã€‚ç‰©ç†å†…å˜å¯ä»¥è¢« +çƒæ’拔。 + +由于在æŸäº›æƒ…况下å˜åŒ–å¯èƒ½ç›¸å½“频ç¹ï¼ŒDAMON检查动æ€å†…å˜æ˜ å°„çš„å˜åŒ–,并仅在用户指定的时间 +间隔( ``区域更新间隔`` ï¼‰å†…å°†å…¶åº”ç”¨äºŽæŠ½è±¡çš„ç›®æ ‡åŒºåŸŸã€‚ diff --git a/Documentation/translations/zh_CN/vm/damon/faq.rst b/Documentation/translations/zh_CN/vm/damon/faq.rst new file mode 100644 index 000000000000..07b4ac19407d --- /dev/null +++ b/Documentation/translations/zh_CN/vm/damon/faq.rst @@ -0,0 +1,48 @@ +.. SPDX-License-Identifier: GPL-2.0 + +:Original: Documentation/vm/damon/faq.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + + +======== +常è§é—®é¢˜ +======== + +为什么是一个新的å系统,而ä¸æ˜¯æ‰©å±•perf或其他用户空间工具? +========================================================== + +é¦–å…ˆï¼Œå› ä¸ºå®ƒéœ€è¦å°½å¯èƒ½çš„è½»é‡çº§ï¼Œä»¥ä¾¿å¯ä»¥åœ¨çº¿ä½¿ç”¨ï¼Œæ‰€ä»¥åº”该é¿å…任何ä¸å¿…è¦çš„å¼€é”€ï¼Œå¦‚å†…æ ¸-用户 +空间的上下文切æ¢æˆæœ¬ã€‚第二,DAMONçš„ç›®æ ‡æ˜¯è¢«åŒ…æ‹¬å†…æ ¸åœ¨å†…çš„å…¶ä»–ç¨‹åºæ‰€ä½¿ç”¨ã€‚å› æ¤ï¼Œå¯¹ç‰¹å®šå·¥å…· +(如perf)的ä¾èµ–性是ä¸å¯å–的。这就是DAMONåœ¨å†…æ ¸ç©ºé—´å®žçŽ°çš„ä¸¤ä¸ªæœ€å¤§çš„åŽŸå› ã€‚ + + +“闲置页é¢è·Ÿè¸ªâ€ 或 “perf mem†å¯ä»¥æ›¿ä»£DAMONå—? +============================================== + +闲置页跟踪是物ç†åœ°å€ç©ºé—´è®¿é—®æ£€æŸ¥çš„一个低层次的原始方法。“perf memâ€ä¹Ÿæ˜¯ç±»ä¼¼çš„,尽管它å¯ä»¥ +ä½¿ç”¨é‡‡æ ·æ¥å‡å°‘开销。å¦ä¸€æ–¹é¢ï¼ŒDAMON是一个更高层次的框架,用于监控å„ç§åœ°å€ç©ºé—´ã€‚它专注于内 +å˜ç®¡ç†ä¼˜åŒ–,并æä¾›å¤æ‚的精度/开销处ç†æœºåˆ¶ã€‚å› æ¤ï¼Œâ€œç©ºé—²é¡µé¢è·Ÿè¸ªâ€ å’Œ “perf mem†å¯ä»¥æä¾› +DAMON输出的一个å集,但ä¸èƒ½æ›¿ä»£DAMON。 + + +DAMON是å¦åªæ”¯æŒè™šæ‹Ÿå†…å˜ï¼Ÿ +========================= + +ä¸ï¼ŒDAMONçš„æ ¸å¿ƒæ˜¯ç‹¬ç«‹äºŽåœ°å€ç©ºé—´çš„。用户å¯ä»¥åœ¨DAMONæ ¸å¿ƒä¸Šå®žçŽ°å’Œé…置特定地å€ç©ºé—´çš„低级原始 +éƒ¨åˆ†ï¼ŒåŒ…æ‹¬ç›‘æµ‹ç›®æ ‡åŒºåŸŸçš„æž„é€ å’Œå®žé™…çš„è®¿é—®æ£€æŸ¥ã€‚é€šè¿‡è¿™ç§æ–¹å¼ï¼ŒDAMON用户å¯ä»¥ç”¨ä»»ä½•è®¿é—®æ£€æŸ¥æŠ€ +术æ¥ç›‘测任何地å€ç©ºé—´ã€‚ + +尽管如æ¤ï¼ŒDAMON默认为虚拟内å˜å’Œç‰©ç†å†…å˜æ供了基于vma/rmap跟踪和PTE访问ä½æ£€æŸ¥çš„地å€ç©ºé—´ +相关功能的实现,以供å‚考和方便使用。 + + +我å¯ä»¥ç®€å•åœ°ç›‘测页é¢çš„粒度å—? +============================== + +æ˜¯çš„ï¼Œä½ å¯ä»¥é€šè¿‡è®¾ç½® ``min_nr_regions`` 属性高于工作集大å°é™¤ä»¥é¡µé¢å¤§å°çš„值æ¥å®žçŽ°ã€‚ +å› ä¸ºç›‘è§†ç›®æ ‡åŒºåŸŸçš„å¤§å°è¢«å¼ºåˆ¶ä¸º ``>=page size`` ,所以区域分割ä¸ä¼šäº§ç”Ÿä»»ä½•å½±å“。 diff --git a/Documentation/translations/zh_CN/vm/damon/index.rst b/Documentation/translations/zh_CN/vm/damon/index.rst new file mode 100644 index 000000000000..84d36d90c9b0 --- /dev/null +++ b/Documentation/translations/zh_CN/vm/damon/index.rst @@ -0,0 +1,33 @@ +.. SPDX-License-Identifier: GPL-2.0 + +:Original: Documentation/vm/damon/index.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + + +========================== +DAMON:æ•°æ®è®¿é—®ç›‘视器 +========================== + +DAMON是Linuxå†…æ ¸çš„ä¸€ä¸ªæ•°æ®è®¿é—®ç›‘控框架å系统。DAMONçš„æ ¸å¿ƒæœºåˆ¶ä½¿å…¶æˆä¸º +ï¼ˆè¯¥æ ¸å¿ƒæœºåˆ¶è¯¦è§(Documentation/translations/zh_CN/vm/damon/design.rst)) + + - *准确度* (监测输出对DRAM级别的内å˜ç®¡ç†è¶³å¤Ÿæœ‰ç”¨ï¼›ä½†å¯èƒ½ä¸é€‚åˆCPU Cache级别), + - *è½»é‡çº§* (监控开销低到å¯ä»¥åœ¨çº¿åº”ç”¨ï¼‰ï¼Œä»¥åŠ + - *å¯æ‰©å±•* ï¼ˆæ— è®ºç›®æ ‡å·¥ä½œè´Ÿè½½çš„å¤§å°ï¼Œå¼€é”€çš„上é™å€¼éƒ½åœ¨æ’定范围内)。 + +å› æ¤ï¼Œåˆ©ç”¨è¿™ä¸ªæ¡†æž¶ï¼Œå†…æ ¸çš„å†…å˜ç®¡ç†æœºåˆ¶å¯ä»¥åšå‡ºé«˜çº§å†³ç–。会导致高数æ®è®¿é—®ç›‘控开销的实 +验性内å˜ç®¡ç†ä¼˜åŒ–工作å¯ä»¥å†æ¬¡è¿›è¡Œã€‚åŒæ—¶ï¼Œåœ¨ç”¨æˆ·ç©ºé—´ï¼Œæœ‰ä¸€äº›ç‰¹æ®Šå·¥ä½œè´Ÿè½½çš„用户å¯ä»¥ç¼–写 +个性化的应用程åºï¼Œä»¥ä¾¿æ›´å¥½åœ°äº†è§£å’Œä¼˜åŒ–他们的工作负载和系统。 + +.. toctree:: + :maxdepth: 2 + + faq + design + api + diff --git a/Documentation/translations/zh_CN/vm/free_page_reporting.rst b/Documentation/translations/zh_CN/vm/free_page_reporting.rst new file mode 100644 index 000000000000..31d6c34b956b --- /dev/null +++ b/Documentation/translations/zh_CN/vm/free_page_reporting.rst @@ -0,0 +1,38 @@ +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/vm/_free_page_reporting.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + +========== +空闲页报告 +========== + +空闲页报告是一个API,设备å¯ä»¥é€šè¿‡å®ƒæ¥æ³¨å†ŒæŽ¥æ”¶ç³»ç»Ÿå½“å‰æœªä½¿ç”¨çš„页é¢åˆ—表。这在虚拟 +化的情况下是很有用的,客户机能够使用这些数æ®æ¥é€šçŸ¥ç®¡ç†å™¨å®ƒä¸å†ä½¿ç”¨å†…å˜ä¸çš„æŸäº›é¡µ +é¢ã€‚ + +对于驱动,通常是气çƒé©±åŠ¨è¦ä½¿ç”¨è¿™ä¸ªåŠŸèƒ½ï¼Œå®ƒå°†åˆ†é…å’Œåˆå§‹åŒ–一个page_reporting_dev_info +结构体。它è¦å¡«å……的结构体ä¸çš„å—段是用于处ç†æ•£ç‚¹åˆ—表的 "report" 函数指针。它还必 +é¡»ä¿è¯æ¯æ¬¡è°ƒç”¨è¯¥å‡½æ•°æ—¶èƒ½å¤„ç†è‡³å°‘相当于PAGE_REPORTING_CAPACITY的散点列表æ¡ç›®ã€‚ +å‡è®¾æ²¡æœ‰å…¶ä»–页é¢æŠ¥å‘Šè®¾å¤‡å·²ç»æ³¨å†Œï¼Œ 对page_reporting_register的调用将å‘报告框 +架注册页é¢æŠ¥å‘ŠæŽ¥å£ã€‚ + +一旦注册,页é¢æŠ¥å‘ŠAPI将开始å‘驱动报告æˆæ‰¹çš„页é¢ã€‚API将在接å£è¢«æ³¨å†ŒåŽ2秒开始报告 +页é¢ï¼Œå¹¶åœ¨ä»»ä½•è¶³å¤Ÿé«˜çš„页é¢è¢«é‡Šæ”¾ä¹‹åŽ2秒继ç»æŠ¥å‘Šã€‚ + +报告的页é¢å°†è¢«å˜å‚¨åœ¨ä¼ 递给报告函数的散列表ä¸ï¼Œæœ€åŽä¸€ä¸ªæ¡ç›®çš„结æŸä½è¢«è®¾ç½®åœ¨æ¡ç›® +nent-1ä¸ã€‚ 当页é¢è¢«æŠ¥å‘Šå‡½æ•°å¤„ç†æ—¶ï¼Œåˆ†é…å™¨å°†æ— æ³•è®¿é—®å®ƒä»¬ã€‚ä¸€æ—¦æŠ¥å‘Šå‡½æ•°å®Œæˆï¼Œè¿™äº› +页将被返回到它们所获得的自由区域。 + +在移除使用空闲页报告的驱动之å‰ï¼Œæœ‰å¿…è¦è°ƒç”¨page_reporting_unregister,以移除 +ç›®å‰è¢«ç©ºé—²é¡µæŠ¥å‘Šä½¿ç”¨çš„page_reporting_dev_infoç»“æž„ä½“ã€‚è¿™æ ·åšå°†é˜»æ¢è¿›ä¸€æ¥çš„报 +告通过该接å£å‘出。如果å¦ä¸€ä¸ªé©±åŠ¨æˆ–åŒä¸€é©±åŠ¨è¢«æ³¨å†Œï¼Œå®ƒå°±æœ‰å¯èƒ½æ¢å¤å‰ä¸€ä¸ªé©±åŠ¨åœ¨æŠ¥å‘Š +空闲页方é¢çš„工作。 + + +Alexander Duyck, 2019å¹´12月04æ—¥ diff --git a/Documentation/translations/zh_CN/vm/highmem.rst b/Documentation/translations/zh_CN/vm/highmem.rst new file mode 100644 index 000000000000..018838e58c3e --- /dev/null +++ b/Documentation/translations/zh_CN/vm/highmem.rst @@ -0,0 +1,128 @@ +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/vm/highmem.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + +========== +高内å˜å¤„ç† +========== + +作者: Peter Zijlstra <a.p.zijlstra@chello.nl> + +.. contents:: :local: + +高内å˜æ˜¯ä»€ä¹ˆï¼Ÿ +============== + +当物ç†å†…å˜çš„大å°æŽ¥è¿‘或超过虚拟内å˜çš„最大大å°æ—¶ï¼Œå°±ä¼šä½¿ç”¨é«˜å†…å˜ï¼ˆhighmem)。在这一点上,内 +æ ¸ä¸å¯èƒ½åœ¨ä»»ä½•æ—¶å€™éƒ½ä¿æŒæ‰€æœ‰å¯ç”¨çš„物ç†å†…å˜çš„æ˜ å°„ã€‚è¿™æ„味ç€å†…æ ¸éœ€è¦å¼€å§‹ä½¿ç”¨å®ƒæƒ³è®¿é—®çš„物ç†å†… +å˜çš„ä¸´æ—¶æ˜ å°„ã€‚ + +æ²¡æœ‰è¢«æ°¸ä¹…æ˜ å°„è¦†ç›–çš„é‚£éƒ¨åˆ†ï¼ˆç‰©ç†ï¼‰å†…å˜å°±æ˜¯æˆ‘们所说的 "高内å˜"。对于这个边界的确切ä½ç½®ï¼Œæœ‰ +å„ç§æž¶æž„上的é™åˆ¶ã€‚ + +例如,在i386架构ä¸ï¼Œæˆ‘ä»¬é€‰æ‹©å°†å†…æ ¸æ˜ å°„åˆ°æ¯ä¸ªè¿›ç¨‹çš„è™šæ‹Ÿç©ºé—´ï¼Œè¿™æ ·æˆ‘ä»¬å°±ä¸å¿…ä¸ºå†…æ ¸çš„è¿›å…¥/退 +出付出全部的TLB作废代价。这æ„味ç€å¯ç”¨çš„虚拟内å˜ç©ºé—´ï¼ˆi386上为4GiBï¼‰å¿…é¡»åœ¨ç”¨æˆ·å’Œå†…æ ¸ç©ºé—´ä¹‹ +间进行划分。 + +使用这ç§æ–¹æ³•çš„æž¶æž„çš„ä¼ ç»Ÿåˆ†é…æ–¹å¼æ˜¯3:1,3GiB用于用户空间,顶部的1GiBç”¨äºŽå†…æ ¸ç©ºé—´ã€‚:: + + +--------+ 0xffffffff + | Kernel | + +--------+ 0xc0000000 + | | + | User | + | | + +--------+ 0x00000000 + +è¿™æ„味ç€å†…æ ¸åœ¨ä»»ä½•æ—¶å€™æœ€å¤šå¯ä»¥æ˜ å°„1GiB的物ç†å†…å˜ï¼Œä½†æ˜¯ç”±äºŽæˆ‘们需è¦è™šæ‹Ÿåœ°å€ç©ºé—´æ¥åšå…¶ä»–事 +情--包括访问其余物ç†å†…å˜çš„ä¸´æ—¶æ˜ å°„--å®žé™…çš„ç›´æŽ¥æ˜ å°„é€šå¸¸ä¼šæ›´å°‘ï¼ˆé€šå¸¸åœ¨~896MiBå·¦å³ï¼‰ã€‚ + +其他有mmä¸Šä¸‹æ–‡æ ‡ç¾çš„TLB的架构å¯ä»¥æœ‰ç‹¬ç«‹çš„å†…æ ¸å’Œç”¨æˆ·æ˜ å°„ã€‚ç„¶è€Œï¼Œä¸€äº›ç¡¬ä»¶ï¼ˆå¦‚ä¸€äº›ARM)在使 +用mmä¸Šä¸‹æ–‡æ ‡ç¾æ—¶ï¼Œå…¶è™šæ‹Ÿç©ºé—´æœ‰é™ã€‚ + + +ä¸´æ—¶è™šæ‹Ÿæ˜ å°„ +============ + +å†…æ ¸åŒ…å«å‡ ç§åˆ›å»ºä¸´æ—¶æ˜ 射的方法。: + +* vmap(). è¿™å¯ä»¥ç”¨æ¥å°†å¤šä¸ªç‰©ç†é¡µé•¿æœŸæ˜ 射到一个连ç»çš„虚拟空间。它需è¦synchronization + æ¥è§£é™¤æ˜ 射。 + +* kmap(). è¿™å…许对å•ä¸ªé¡µé¢è¿›è¡ŒçŸæœŸæ˜ 射。它需è¦synchronization,但在一定程度上被摊销。 + 当以嵌套方å¼ä½¿ç”¨æ—¶ï¼Œå®ƒä¹Ÿå¾ˆå®¹æ˜“出现æ»é”ï¼Œå› æ¤ä¸å»ºè®®åœ¨æ–°ä»£ç ä¸ä½¿ç”¨å®ƒã€‚ + +* kmap_atomic(). è¿™å…许对å•ä¸ªé¡µé¢è¿›è¡Œéžå¸¸çŸçš„æ—¶é—´æ˜ å°„ã€‚ç”±äºŽæ˜ å°„è¢«é™åˆ¶åœ¨å‘布它的CPU上, + 它表现得很好,但å‘å¸ƒä»»åŠ¡å› æ¤è¢«è¦æ±‚留在该CPU上直到它完æˆï¼Œä»¥å…其他任务å–ä»£å®ƒçš„æ˜ å°„ã€‚ + + kmap_atomic() 也å¯ä»¥ç”±ä¸æ–ä¸Šä¸‹æ–‡ä½¿ç”¨ï¼Œå› ä¸ºå®ƒä¸ç¡çœ ,而且调用者å¯èƒ½åœ¨è°ƒç”¨kunmap_atomic() + 之åŽæ‰ç¡çœ 。 + + å¯ä»¥å‡è®¾k[un]map_atomic()ä¸ä¼šå¤±è´¥ã€‚ + + +使用kmap_atomic +=============== + +何时何地使用 kmap_atomic() 是很直接的。当代ç 想è¦è®¿é—®ä¸€ä¸ªå¯èƒ½ä»Žé«˜å†…å˜ï¼ˆè§__GFP_HIGHMEM) +分é…的页é¢çš„内容时,例如在页缓å˜ä¸çš„页é¢ï¼Œå°±ä¼šä½¿ç”¨å®ƒã€‚该API有两个函数,它们的使用方å¼ä¸Ž +下é¢ç±»ä¼¼:: + + /* 找到感兴趣的页é¢ã€‚ */ + struct page *page = find_get_page(mapping, offset); + + /* 获得对该页内容的访问æƒã€‚ */ + void *vaddr = kmap_atomic(page); + + /* 对该页的内容åšä¸€äº›å¤„ç†ã€‚ */ + memset(vaddr, 0, PAGE_SIZE); + + /* 解除该页é¢çš„æ˜ å°„ã€‚ */ + kunmap_atomic(vaddr); + +注æ„,kunmap_atomic()调用的是kmap_atomic()调用的结果而ä¸æ˜¯å‚数。 + +å¦‚æžœä½ éœ€è¦æ˜ 射两个页é¢ï¼Œå› ä¸ºä½ æƒ³ä»Žä¸€ä¸ªé¡µé¢å¤åˆ¶åˆ°å¦ä¸€ä¸ªé¡µé¢ï¼Œä½ 需è¦ä¿æŒkmap_atomic调用严 +æ ¼åµŒå¥—ï¼Œå¦‚:: + + vaddr1 = kmap_atomic(page1); + vaddr2 = kmap_atomic(page2); + + memcpy(vaddr1, vaddr2, PAGE_SIZE); + + kunmap_atomic(vaddr2); + kunmap_atomic(vaddr1); + + +ä¸´æ—¶æ˜ å°„çš„æˆæœ¬ +============== + +åˆ›å»ºä¸´æ—¶æ˜ å°„çš„ä»£ä»·å¯èƒ½ç›¸å½“高。体系架构必须æ“ä½œå†…æ ¸çš„é¡µè¡¨ã€æ•°æ®TLBå’Œ/或MMU的寄å˜å™¨ã€‚ + +如果CONFIG_HIGHMEMæ²¡æœ‰è¢«è®¾ç½®ï¼Œé‚£ä¹ˆå†…æ ¸ä¼šå°è¯•ç”¨ä¸€ç‚¹è®¡ç®—æ¥åˆ›å»ºæ˜ 射,将页é¢ç»“构地å€è½¬æ¢æˆ +指å‘页é¢å†…容的指针,而ä¸æ˜¯åŽ»æ£é¼“æ˜ å°„ã€‚åœ¨è¿™ç§æƒ…å†µä¸‹ï¼Œè§£æ˜ å°„æ“作å¯èƒ½æ˜¯ä¸€ä¸ªç©ºæ“作。 + +如果CONFIG_MMU没有被设置,那么就ä¸å¯èƒ½æœ‰ä¸´æ—¶æ˜ 射和高内å˜ã€‚在这ç§æƒ…况下,也将使用计算方法。 + + +i386 PAE +======== + +在æŸäº›æƒ…况下,i386 架构将å…è®¸ä½ åœ¨ 32 ä½æœºå™¨ä¸Šå®‰è£…多达 64GiB 的内å˜ã€‚但这有一些åŽæžœ: + +* Linux需è¦ä¸ºç³»ç»Ÿä¸çš„æ¯ä¸ªé¡µé¢å»ºç«‹ä¸€ä¸ªé¡µå¸§ç»“构,而且页帧需è¦é©»åœ¨æ°¸ä¹…æ˜ å°„ä¸ï¼Œè¿™æ„味ç€ï¼š + +* ä½ æœ€å¤šå¯ä»¥æœ‰896M/sizeof(struct page)页帧;由于页结构体是32å—节的,所以最终会有 + 112Gçš„é¡µï¼›ç„¶è€Œï¼Œå†…æ ¸éœ€è¦åœ¨å†…å˜ä¸å˜å‚¨æ›´å¤šçš„页帧...... + +* PAEä½¿ä½ çš„é¡µè¡¨å˜å¤§--这使系统å˜æ…¢ï¼Œå› 为更多的数æ®éœ€è¦åœ¨TLBå¡«å……ç‰æ–¹é¢è¢«è®¿é—®ã€‚一个好处 + 是,PAE有更多的PTEä½ï¼Œå¯ä»¥æä¾›åƒNXå’ŒPATè¿™æ ·çš„é«˜çº§åŠŸèƒ½ã€‚ + +ä¸€èˆ¬çš„å»ºè®®æ˜¯ï¼Œä½ ä¸è¦åœ¨32ä½æœºå™¨ä¸Šä½¿ç”¨è¶…过8GiB的空间--尽管更多的空间å¯èƒ½å¯¹ä½ å’Œä½ çš„å·¥ä½œ +é‡æœ‰ç”¨ï¼Œä½†ä½ å‡ ä¹Žæ˜¯é ä½ è‡ªå·±--ä¸è¦æŒ‡æœ›å†…æ ¸å¼€å‘者真的会很关心事情的进展情况。 diff --git a/Documentation/translations/zh_CN/vm/index.rst b/Documentation/translations/zh_CN/vm/index.rst new file mode 100644 index 000000000000..a1d2f0356cc1 --- /dev/null +++ b/Documentation/translations/zh_CN/vm/index.rst @@ -0,0 +1,53 @@ +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/vm/index.rst + +:翻译: + + å¸å»¶è…¾ Yanteng Si <siyanteng@loongson.cn> + +:æ ¡è¯‘: + +================= +Linux内å˜ç®¡ç†æ–‡æ¡£ +================= + +这是一个关于Linux内å˜ç®¡ç†ï¼ˆmm)å系统内部的文档集,其ä¸æœ‰ä¸åŒå±‚次的细节,包括注释 +和邮件列表的回å¤ï¼Œç”¨äºŽé˜è¿°æ•°æ®ç»“æž„å’Œç®—æ³•çš„åŸºæœ¬æƒ…å†µã€‚å¦‚æžœä½ æ£åœ¨å¯»æ‰¾å…³äºŽç®€å•åˆ†é…内å˜çš„建 +议,请å‚阅(Documentation/translations/zh_CN/core-api/memory-allocation.rst)。 +对于控制和调整指å—,请å‚阅(Documentation/admin-guide/mm/index)。 +TODO:待引用文档集被翻译完毕åŽè¯·åŠæ—¶ä¿®æ”¹æ¤å¤„) + +.. toctree:: + :maxdepth: 1 + + active_mm + balance + damon/index + free_page_reporting + highmem + ksm + +TODOLIST: +* arch_pgtable_helpers +* free_page_reporting +* frontswap +* hmm +* hwpoison +* hugetlbfs_reserv +* memory-model +* mmu_notifier +* numa +* overcommit-accounting +* page_migration +* page_frags +* page_owner +* page_table_check +* remap_file_pages +* slub +* split_page_table_lock +* transhuge +* unevictable-lru +* vmalloced-kernel-stacks +* z3fold +* zsmalloc diff --git a/Documentation/translations/zh_CN/vm/ksm.rst b/Documentation/translations/zh_CN/vm/ksm.rst new file mode 100644 index 000000000000..83b0c73984da --- /dev/null +++ b/Documentation/translations/zh_CN/vm/ksm.rst @@ -0,0 +1,70 @@ +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/vm/ksm.rst + +:翻译: + + å¾é‘« xu xin <xu.xin16@zte.com.cn> + +============ +å†…æ ¸åŒé¡µåˆå¹¶ +============ + +KSM 是一ç§èŠ‚çœå†…å˜çš„æ•°æ®åŽ»é‡åŠŸèƒ½ï¼Œç”±CONFIG_KSM=yå¯ç”¨ï¼Œå¹¶åœ¨2.6.32ç‰ˆæœ¬æ—¶è¢«æ·»åŠ +到Linuxå†…æ ¸ã€‚è¯¦è§ ``mm/ksm.c`` 的实现,以åŠhttp://lwn.net/Articles/306704å’Œ +https://lwn.net/Articles/330589 + +KSM的用户空间的接å£åœ¨Documentation/translations/zh_CN/admin-guide/mm/ksm.rst +文档ä¸æœ‰æ述。 + +设计 +==== + +概述 +---- + +概述内容请è§mm/ksm.c文档ä¸çš„“DOC: Overview†+ +é€†æ˜ å°„ +------ +KSM维护ç€ç¨³å®šæ ‘ä¸çš„KSMé¡µçš„é€†æ˜ å°„ä¿¡æ¯ã€‚ + +当KSM页é¢çš„共享数å°äºŽ ``max_page_sharing`` 的虚拟内å˜åŒºåŸŸ(VMAs)时,则代表了 +KSMé¡µçš„ç¨³å®šæ ‘å…¶ä¸çš„节点指å‘了一个rmap_item结构体类型的列表。åŒæ—¶ï¼Œè¿™ä¸ªKSM页 +çš„ ``page->mapping`` 指å‘äº†è¯¥ç¨³å®šæ ‘èŠ‚ç‚¹ã€‚ + +如果共享数超过了阈值,KSMå°†ç»™ç¨³å®šæ ‘æ·»åŠ ç¬¬äºŒä¸ªç»´åº¦ã€‚ç¨³å®šæ ‘å°±å˜æˆé“¾æŽ¥ä¸€ä¸ªæˆ–多 +ä¸ªç¨³å®šæ ‘"副本"çš„"链"。æ¯ä¸ªå‰¯æœ¬éƒ½ä¿ç•™KSMé¡µçš„é€†æ˜ å°„ä¿¡æ¯ï¼Œå…¶ä¸ ``page->mapping`` +指å‘该"副本"。 + +æ¯ä¸ªé“¾ä»¥åŠé“¾æŽ¥åˆ°è¯¥é“¾ä¸çš„所有"副本"强制ä¸å˜çš„是,它们代表了相åŒçš„写ä¿æŠ¤å†…å˜ +内容,尽管任ä¸ä¸€ä¸ª"副本"是由åŒä¸€ç‰‡å†…å˜åŒºçš„ä¸åŒçš„KSMå¤åˆ¶é¡µæ‰€æŒ‡å‘的。 + +è¿™æ ·ä¸€æ¥ï¼Œç›¸æ¯”ä¸Žæ— é™çš„é€†æ˜ å°„é“¾è¡¨ï¼Œç¨³å®šæ ‘çš„æŸ¥æ‰¾è®¡ç®—å¤æ‚性ä¸å—å½±å“ã€‚ä½†åœ¨ç¨³å®šæ ‘ +本身ä¸ä¸èƒ½æœ‰é‡å¤çš„KSM页é¢å†…容ä»ç„¶æ˜¯å¼ºåˆ¶è¦æ±‚。 + +ç”± ``max_page_sharing`` 强制决定的数æ®åŽ»é‡é™åˆ¶æ˜¯å¿…è¦çš„,以æ¤æ¥é¿å…è™šæ‹Ÿå†…å˜ +rmap链表å˜å¾—过大。rmapçš„é历具有O(N)çš„å¤æ‚度,其ä¸N是共享页é¢çš„rmap_é¡¹ï¼ˆå³ +è™šæ‹Ÿæ˜ å°„ï¼‰çš„æ•°é‡ï¼Œè€Œè¿™ä¸ªå…±äº«é¡µé¢çš„节点数é‡åˆè¢« ``max_page_sharing`` 所é™åˆ¶ã€‚ +å› æ¤ï¼Œè¿™æœ‰æ•ˆåœ°å°†çº¿æ€§O(N)计算å¤æ‚度从rmapé历ä¸åˆ†æ•£åˆ°ä¸åŒçš„KSM页é¢ä¸Šã€‚ksmdè¿› +程在稳定节点"链"上的é历也是O(N),但这个Næ˜¯ç¨³å®šæ ‘"副本"çš„æ•°é‡ï¼Œè€Œä¸æ˜¯rmap项 +çš„æ•°é‡ï¼Œå› æ¤å®ƒå¯¹ksmd性能没有显著影å“ã€‚å®žé™…ä¸Šï¼Œæœ€ä½³ç¨³å®šæ ‘"副本"的候选节点将 +ä¿ç•™åœ¨"副本"列表的开头。 + +``max_page_sharing`` 的值设置得高了会促使更快的内å˜åˆå¹¶ï¼ˆå› 为将有更少的稳定 +æ ‘å‰¯æœ¬æŽ’é˜Ÿè¿›å…¥ç¨³å®šèŠ‚ç‚¹chain->hlist)和更高的数æ®åŽ»é‡ç³»æ•°ï¼Œä½†ä»£ä»·æ˜¯åœ¨äº¤æ¢ã€åŽ‹ +缩ã€NUMA平衡和页é¢è¿ç§»è¿‡ç¨‹ä¸å¯èƒ½å¯¼è‡´KSM页的最大rmapé历速度较慢。 + +``stable_node_dups/stable_node_chains`` çš„æ¯”å€¼è¿˜å— ``max_page_sharing`` 调控 +çš„å½±å“,高比值å¯èƒ½æ„味ç€ç¨³å®šèŠ‚点dupä¸å˜åœ¨ç¢Žç‰‡ï¼Œè¿™å¯ä»¥é€šè¿‡åœ¨ksmdä¸å¼•å…¥ç¢Žç‰‡ç®— +法æ¥è§£å†³ï¼Œè¯¥ç®—法将rmap项从一个稳定节点dupé‡å®šä½åˆ°å¦ä¸€ä¸ªç¨³å®šèŠ‚点dup,以便释放 +那些仅包å«æžå°‘rmap项的稳定节点"dup",但这å¯èƒ½ä¼šå¢žåŠ ksmd进程的CPUä½¿ç”¨çŽ‡ï¼Œå¹¶å¯ +能会å‡æ…¢åº”用程åºåœ¨KSM页é¢ä¸Šçš„åªè¯»è®¡ç®—。 + +KSM会定期扫æ稳定节点"链"ä¸é“¾æŽ¥çš„æ‰€æœ‰ç¨³å®šæ ‘"副本"ï¼Œä»¥ä¾¿åˆ å‡è¿‡æ—¶äº†çš„稳定节点。 +è¿™ç§æ‰«æ的频率由 ``stable_node_chains_prune_millisecs`` 这个sysfs 接å£å®šä¹‰ã€‚ + +å‚考 +==== +å†…æ ¸ä»£ç 请è§mm/ksm.c。 +涉åŠçš„函数(mm_slot ksm_scan stable_node rmap_item)。 diff --git a/Documentation/translations/zh_TW/index.rst b/Documentation/translations/zh_TW/index.rst index f56f78ba7860..e1ce9d8c06f8 100644 --- a/Documentation/translations/zh_TW/index.rst +++ b/Documentation/translations/zh_TW/index.rst @@ -5,7 +5,7 @@ \renewcommand\thesection* \renewcommand\thesubsection* \kerneldocCJKon - \kerneldocBeginTC + \kerneldocBeginTC{ .. _linux_doc_zh_tw: @@ -174,4 +174,4 @@ TODOList: .. raw:: latex - \kerneldocEndTC + }\kerneldocEndTC diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index e6fce2cbd99e..dfbc27d17ff7 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -256,7 +256,7 @@ Code Seq# Include File Comments 'l' 00-3F linux/tcfs_fs.h transparent cryptographic file system <http://web.archive.org/web/%2A/http://mikonos.dia.unisa.it/tcfs> 'l' 40-7F linux/udf_fs_i.h in development: - <http://sourceforge.net/projects/linux-udf/> + <https://github.com/pali/udftools> 'm' 00-09 linux/mmtimer.h conflict! 'm' all linux/mtio.h conflict! 'm' all linux/soundcard.h conflict! diff --git a/Documentation/virt/uml/user_mode_linux_howto_v2.rst b/Documentation/virt/uml/user_mode_linux_howto_v2.rst index 2cafd3c3c6cb..d5ad96c795f6 100644 --- a/Documentation/virt/uml/user_mode_linux_howto_v2.rst +++ b/Documentation/virt/uml/user_mode_linux_howto_v2.rst @@ -664,7 +664,11 @@ one is input, the second one output. * The fd channel - use file descriptor numbers for input/output. Example: ``con1=fd:0,fd:1.`` -* The port channel - listen on TCP port number. Example: ``con1=port:4321`` +* The port channel - start a telnet server on TCP port number. Example: + ``con1=port:4321``. The host must have /usr/sbin/in.telnetd (usually part of + a telnetd package) and the port-helper from the UML utilities (see the + information for the xterm channel below). UML will not boot until a client + connects. * The pty and pts channels - use system pty/pts. diff --git a/Documentation/vm/page_owner.rst b/Documentation/vm/page_owner.rst index 9837fc8147dd..905555e3e483 100644 --- a/Documentation/vm/page_owner.rst +++ b/Documentation/vm/page_owner.rst @@ -26,9 +26,9 @@ fragmentation statistics can be obtained through gfp flag information of each page. It is already implemented and activated if page owner is enabled. Other usages are more than welcome. -page owner is disabled in default. So, if you'd like to use it, you need -to add "page_owner=on" into your boot cmdline. If the kernel is built -with page owner and page owner is disabled in runtime due to no enabling +page owner is disabled by default. So, if you'd like to use it, you need +to add "page_owner=on" to your boot cmdline. If the kernel is built +with page owner and page owner is disabled in runtime due to not enabling boot option, runtime overhead is marginal. If disabled in runtime, it doesn't require memory to store owner information, so there is no runtime memory overhead. And, page owner inserts just two unlikely branches into @@ -85,7 +85,7 @@ Usage cat /sys/kernel/debug/page_owner > page_owner_full.txt ./page_owner_sort page_owner_full.txt sorted_page_owner.txt - The general output of ``page_owner_full.txt`` is as follows: + The general output of ``page_owner_full.txt`` is as follows:: Page allocated via order XXX, ... PFN XXX ... @@ -100,7 +100,7 @@ Usage and pages of buf, and finally sorts them according to the times. See the result about who allocated each page - in the ``sorted_page_owner.txt``. General output: + in the ``sorted_page_owner.txt``. General output:: XXX times, XXX pages: Page allocated via order XXX, ... diff --git a/Documentation/x86/sva.rst b/Documentation/x86/sva.rst index 076efd51ef1f..2e9b8b0f9a0f 100644 --- a/Documentation/x86/sva.rst +++ b/Documentation/x86/sva.rst @@ -104,18 +104,47 @@ The MSR must be configured on each logical CPU before any application thread can interact with a device. Threads that belong to the same process share the same page tables, thus the same MSR value. -PASID is cleared when a process is created. The PASID allocation and MSR -programming may occur long after a process and its threads have been created. -One thread must call iommu_sva_bind_device() to allocate the PASID for the -process. If a thread uses ENQCMD without the MSR first being populated, a #GP -will be raised. The kernel will update the PASID MSR with the PASID for all -threads in the process. A single process PASID can be used simultaneously -with multiple devices since they all share the same address space. - -One thread can call iommu_sva_unbind_device() to free the allocated PASID. -The kernel will clear the PASID MSR for all threads belonging to the process. - -New threads inherit the MSR value from the parent. +PASID Life Cycle Management +=========================== + +PASID is initialized as INVALID_IOASID (-1) when a process is created. + +Only processes that access SVA-capable devices need to have a PASID +allocated. This allocation happens when a process opens/binds an SVA-capable +device but finds no PASID for this process. Subsequent binds of the same, or +other devices will share the same PASID. + +Although the PASID is allocated to the process by opening a device, +it is not active in any of the threads of that process. It's loaded to the +IA32_PASID MSR lazily when a thread tries to submit a work descriptor +to a device using the ENQCMD. + +That first access will trigger a #GP fault because the IA32_PASID MSR +has not been initialized with the PASID value assigned to the process +when the device was opened. The Linux #GP handler notes that a PASID has +been allocated for the process, and so initializes the IA32_PASID MSR +and returns so that the ENQCMD instruction is re-executed. + +On fork(2) or exec(2) the PASID is removed from the process as it no +longer has the same address space that it had when the device was opened. + +On clone(2) the new task shares the same address space, so will be +able to use the PASID allocated to the process. The IA32_PASID is not +preemptively initialized as the PASID value might not be allocated yet or +the kernel does not know whether this thread is going to access the device +and the cleared IA32_PASID MSR reduces context switch overhead by xstate +init optimization. Since #GP faults have to be handled on any threads that +were created before the PASID was assigned to the mm of the process, newly +created threads might as well be treated in a consistent way. + +Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in +all threads in unbind, free the PASID lazily only on mm exit. + +If a process does a close(2) of the device file descriptor and munmap(2) +of the device MMIO portal, then the driver will unbind the device. The +PASID is still marked VALID in the PASID_MSR for any threads in the +process that accessed the device. But this is harmless as without the +MMIO portal they cannot submit new work to the device. Relationships ============= diff --git a/MAINTAINERS b/MAINTAINERS index e127c2fb08a7..c27a9ea1af1b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1769,7 +1769,7 @@ T: git https://github.com/AsahiLinux/linux.git F: Documentation/devicetree/bindings/arm/apple.yaml F: Documentation/devicetree/bindings/arm/apple/* F: Documentation/devicetree/bindings/i2c/apple,i2c.yaml -F: Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml +F: Documentation/devicetree/bindings/interrupt-controller/apple,* F: Documentation/devicetree/bindings/mailbox/apple,mailbox.yaml F: Documentation/devicetree/bindings/pci/apple,pcie.yaml F: Documentation/devicetree/bindings/pinctrl/apple,pinctrl.yaml @@ -6307,8 +6307,8 @@ T: git git://anongit.freedesktop.org/drm/drm-misc F: drivers/gpu/drm/vboxvideo/ DRM DRIVER FOR VMWARE VIRTUAL GPU -M: "VMware Graphics" <linux-graphics-maintainer@vmware.com> M: Zack Rusin <zackr@vmware.com> +R: VMware Graphics Reviewers <linux-graphics-maintainer@vmware.com> L: dri-devel@lists.freedesktop.org S: Supported T: git git://anongit.freedesktop.org/drm/drm-misc @@ -10455,6 +10455,8 @@ KERNEL REGRESSIONS M: Thorsten Leemhuis <linux@leemhuis.info> L: regressions@lists.linux.dev S: Supported +F: Documentation/admin-guide/reporting-regressions.rst +F: Documentation/process/handling-regressions.rst KERNEL SELFTEST FRAMEWORK M: Shuah Khan <shuah@kernel.org> @@ -13376,6 +13378,7 @@ F: net/core/drop_monitor.c NETWORKING DRIVERS M: "David S. Miller" <davem@davemloft.net> M: Jakub Kicinski <kuba@kernel.org> +M: Paolo Abeni <pabeni@redhat.com> L: netdev@vger.kernel.org S: Maintained Q: https://patchwork.kernel.org/project/netdevbpf/list/ @@ -13422,6 +13425,7 @@ F: tools/testing/selftests/drivers/net/dsa/ NETWORKING [GENERAL] M: "David S. Miller" <davem@davemloft.net> M: Jakub Kicinski <kuba@kernel.org> +M: Paolo Abeni <pabeni@redhat.com> L: netdev@vger.kernel.org S: Maintained Q: https://patchwork.kernel.org/project/netdevbpf/list/ @@ -14603,8 +14607,9 @@ F: include/uapi/linux/ppdev.h PARAVIRT_OPS INTERFACE M: Juergen Gross <jgross@suse.com> -M: Deep Shah <sdeep@vmware.com> -M: "VMware, Inc." <pv-drivers@vmware.com> +M: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu> +R: Alexey Makhalov <amakhalov@vmware.com> +R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> L: virtualization@lists.linux-foundation.org L: x86@kernel.org S: Supported @@ -16321,6 +16326,8 @@ F: tools/testing/selftests/resctrl/ READ-COPY UPDATE (RCU) M: "Paul E. McKenney" <paulmck@kernel.org> +M: Frederic Weisbecker <frederic@kernel.org> (kernel/rcu/tree_nocb.h) +M: Neeraj Upadhyay <quic_neeraju@quicinc.com> (kernel/rcu/tasks.h) M: Josh Triplett <josh@joshtriplett.org> R: Steven Rostedt <rostedt@goodmis.org> R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> @@ -20638,30 +20645,33 @@ F: tools/testing/vsock/ VMWARE BALLOON DRIVER M: Nadav Amit <namit@vmware.com> -M: "VMware, Inc." <pv-drivers@vmware.com> +R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> L: linux-kernel@vger.kernel.org S: Maintained F: drivers/misc/vmw_balloon.c VMWARE HYPERVISOR INTERFACE -M: Deep Shah <sdeep@vmware.com> -M: "VMware, Inc." <pv-drivers@vmware.com> +M: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu> +M: Alexey Makhalov <amakhalov@vmware.com> +R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> L: virtualization@lists.linux-foundation.org +L: x86@kernel.org S: Supported +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vmware F: arch/x86/include/asm/vmware.h F: arch/x86/kernel/cpu/vmware.c VMWARE PVRDMA DRIVER M: Bryan Tan <bryantan@vmware.com> M: Vishnu Dasa <vdasa@vmware.com> -M: VMware PV-Drivers <pv-drivers@vmware.com> +R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> L: linux-rdma@vger.kernel.org S: Maintained F: drivers/infiniband/hw/vmw_pvrdma/ VMware PVSCSI driver M: Vishal Bhakta <vbhakta@vmware.com> -M: VMware PV-Drivers <pv-drivers@vmware.com> +R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> L: linux-scsi@vger.kernel.org S: Maintained F: drivers/scsi/vmw_pvscsi.c @@ -20669,7 +20679,7 @@ F: drivers/scsi/vmw_pvscsi.h VMWARE VIRTUAL PTP CLOCK DRIVER M: Vivek Thampi <vithampi@vmware.com> -M: "VMware, Inc." <pv-drivers@vmware.com> +R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> L: netdev@vger.kernel.org S: Supported F: drivers/ptp/ptp_vmw.c @@ -20677,14 +20687,15 @@ F: drivers/ptp/ptp_vmw.c VMWARE VMCI DRIVER M: Jorgen Hansen <jhansen@vmware.com> M: Vishnu Dasa <vdasa@vmware.com> +R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> L: linux-kernel@vger.kernel.org -L: pv-drivers@vmware.com (private) S: Maintained F: drivers/misc/vmw_vmci/ VMWARE VMMOUSE SUBDRIVER -M: "VMware Graphics" <linux-graphics-maintainer@vmware.com> -M: "VMware, Inc." <pv-drivers@vmware.com> +M: Zack Rusin <zackr@vmware.com> +R: VMware Graphics Reviewers <linux-graphics-maintainer@vmware.com> +R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> L: linux-input@vger.kernel.org S: Maintained F: drivers/input/mouse/vmmouse.c @@ -20692,7 +20703,7 @@ F: drivers/input/mouse/vmmouse.h VMWARE VMXNET3 ETHERNET DRIVER M: Ronak Doshi <doshir@vmware.com> -M: pv-drivers@vmware.com +R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> L: netdev@vger.kernel.org S: Maintained F: drivers/net/vmxnet3/ @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Superb Owl # *DOCUMENTATION* diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 956a26d52a4c..0a11bacffc1f 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -3482,8 +3482,7 @@ ti,timer-pwm; }; }; - - target-module@2c000 { /* 0x4882c000, ap 17 02.0 */ + timer15_target: target-module@2c000 { /* 0x4882c000, ap 17 02.0 */ compatible = "ti,sysc-omap4-timer", "ti,sysc"; reg = <0x2c000 0x4>, <0x2c010 0x4>; @@ -3511,7 +3510,7 @@ }; }; - target-module@2e000 { /* 0x4882e000, ap 19 14.0 */ + timer16_target: target-module@2e000 { /* 0x4882e000, ap 19 14.0 */ compatible = "ti,sysc-omap4-timer", "ti,sysc"; reg = <0x2e000 0x4>, <0x2e010 0x4>; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 42bff117656c..97ce0c4f1df7 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1339,20 +1339,20 @@ }; /* Local timers, see ARM architected timer wrap erratum i940 */ -&timer3_target { +&timer15_target { ti,no-reset-on-init; ti,no-idle; timer@0 { - assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER3_CLKCTRL 24>; + assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>; assigned-clock-parents = <&timer_sys_clk_div>; }; }; -&timer4_target { +&timer16_target { ti,no-reset-on-init; ti,no-idle; timer@0 { - assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 24>; + assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>; assigned-clock-parents = <&timer_sys_clk_div>; }; }; diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 6d0cb0f7bc54..fe249ea91908 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -164,47 +164,6 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align) return phys; } -static void __init arm_initrd_init(void) -{ -#ifdef CONFIG_BLK_DEV_INITRD - phys_addr_t start; - unsigned long size; - - initrd_start = initrd_end = 0; - - if (!phys_initrd_size) - return; - - /* - * Round the memory region to page boundaries as per free_initrd_mem() - * This allows us to detect whether the pages overlapping the initrd - * are in use, but more importantly, reserves the entire set of pages - * as we don't want these pages allocated for other purposes. - */ - start = round_down(phys_initrd_start, PAGE_SIZE); - size = phys_initrd_size + (phys_initrd_start - start); - size = round_up(size, PAGE_SIZE); - - if (!memblock_is_region_memory(start, size)) { - pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n", - (u64)start, size); - return; - } - - if (memblock_is_region_reserved(start, size)) { - pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n", - (u64)start, size); - return; - } - - memblock_reserve(start, size); - - /* Now convert initrd to virtual addresses */ - initrd_start = __phys_to_virt(phys_initrd_start); - initrd_end = initrd_start + phys_initrd_size; -#endif -} - #ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND void check_cpu_icache_size(int cpuid) { @@ -226,7 +185,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc) /* Register the kernel text, kernel data and initrd with memblock. */ memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START); - arm_initrd_init(); + reserve_initrd_mem(); arm_mm_memblock_reserve(); diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 7c9e395b77f7..ec52b776f926 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -18,7 +18,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO32 ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8 ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ - -z max-page-size=4096 -nostdlib -shared $(ldflags-y) \ + -z max-page-size=4096 -shared $(ldflags-y) \ --hash-style=sysv --build-id=sha1 \ -T diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c842878f8133..a555f409ba95 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -10,6 +10,7 @@ config ARM64 select ACPI_SPCR_TABLE if ACPI select ACPI_PPTT if ACPI select ARCH_HAS_DEBUG_WX + select ARCH_BINFMT_ELF_EXTRA_PHDRS select ARCH_BINFMT_ELF_STATE select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION @@ -891,13 +892,17 @@ config CAVIUM_ERRATUM_23144 If unsure, say Y. config CAVIUM_ERRATUM_23154 - bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed" + bool "Cavium errata 23154 and 38545: GICv3 lacks HW synchronisation" default y help - The gicv3 of ThunderX requires a modified version for + The ThunderX GICv3 implementation requires a modified version for reading the IAR status to ensure data synchronization (access to icc_iar1_el1 is not sync'ed before and after). + It also suffers from erratum 38545 (also present on Marvell's + OcteonTX and OcteonTX2), resulting in deactivated interrupts being + spuriously presented to the CPU interface. + If unsure, say Y. config CAVIUM_ERRATUM_27456 diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index 19afbc91020a..9f8f4145db88 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -97,6 +97,18 @@ <AIC_FIQ AIC_TMR_HV_VIRT IRQ_TYPE_LEVEL_HIGH>; }; + pmu-e { + compatible = "apple,icestorm-pmu"; + interrupt-parent = <&aic>; + interrupts = <AIC_FIQ AIC_CPU_PMU_E IRQ_TYPE_LEVEL_HIGH>; + }; + + pmu-p { + compatible = "apple,firestorm-pmu"; + interrupt-parent = <&aic>; + interrupts = <AIC_FIQ AIC_CPU_PMU_P IRQ_TYPE_LEVEL_HIGH>; + }; + clkref: clock-ref { compatible = "fixed-clock"; #clock-cells = <0>; @@ -213,6 +225,18 @@ interrupt-controller; reg = <0x2 0x3b100000 0x0 0x8000>; power-domains = <&ps_aic>; + + affinities { + e-core-pmu-affinity { + apple,fiq-index = <AIC_CPU_PMU_E>; + cpus = <&cpu0 &cpu1 &cpu2 &cpu3>; + }; + + p-core-pmu-affinity { + apple,fiq-index = <AIC_CPU_PMU_P>; + cpus = <&cpu4 &cpu5 &cpu6 &cpu7>; + }; + }; }; pmgr: power-management@23b700000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi index 3ed1f2c51cad..18e529118476 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi @@ -253,18 +253,18 @@ interrupt-controller; reg = <0x14 4>; interrupt-map = - <0 0 &gic 0 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, - <1 0 &gic 0 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, - <2 0 &gic 0 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, - <3 0 &gic 0 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, - <4 0 &gic 0 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, - <5 0 &gic 0 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, - <6 0 &gic 0 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, - <7 0 &gic 0 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, - <8 0 &gic 0 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, - <9 0 &gic 0 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, - <10 0 &gic 0 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, - <11 0 &gic 0 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; + <0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, + <1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, + <2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, + <3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, + <4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, + <5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, + <6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, + <7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, + <8 0 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, + <9 0 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, + <10 0 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, + <11 0 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; interrupt-map-mask = <0xffffffff 0x0>; }; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index 3cb9c21d2775..1282b61da8a5 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -293,18 +293,18 @@ interrupt-controller; reg = <0x14 4>; interrupt-map = - <0 0 &gic 0 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, - <1 0 &gic 0 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, - <2 0 &gic 0 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, - <3 0 &gic 0 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, - <4 0 &gic 0 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, - <5 0 &gic 0 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, - <6 0 &gic 0 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, - <7 0 &gic 0 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, - <8 0 &gic 0 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, - <9 0 &gic 0 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, - <10 0 &gic 0 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, - <11 0 &gic 0 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; + <0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, + <1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, + <2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, + <3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, + <4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, + <5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, + <6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, + <7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, + <8 0 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, + <9 0 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, + <10 0 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, + <11 0 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; interrupt-map-mask = <0xffffffff 0x0>; }; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi index 7032505f5ef3..3c611cb4f5fe 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi @@ -680,18 +680,18 @@ interrupt-controller; reg = <0x14 4>; interrupt-map = - <0 0 &gic 0 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, - <1 0 &gic 0 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, - <2 0 &gic 0 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, - <3 0 &gic 0 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, - <4 0 &gic 0 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, - <5 0 &gic 0 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, - <6 0 &gic 0 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, - <7 0 &gic 0 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, - <8 0 &gic 0 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, - <9 0 &gic 0 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, - <10 0 &gic 0 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, - <11 0 &gic 0 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; + <0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, + <1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, + <2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, + <3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, + <4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, + <5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, + <6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, + <7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, + <8 0 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, + <9 0 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, + <10 0 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, + <11 0 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; interrupt-map-mask = <0xffffffff 0x0>; }; }; diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h new file mode 100644 index 000000000000..99483b19b99f --- /dev/null +++ b/arch/arm64/include/asm/apple_m1_pmu.h @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef __ASM_APPLE_M1_PMU_h +#define __ASM_APPLE_M1_PMU_h + +#include <linux/bits.h> +#include <asm/sysreg.h> + +/* Counters */ +#define SYS_IMP_APL_PMC0_EL1 sys_reg(3, 2, 15, 0, 0) +#define SYS_IMP_APL_PMC1_EL1 sys_reg(3, 2, 15, 1, 0) +#define SYS_IMP_APL_PMC2_EL1 sys_reg(3, 2, 15, 2, 0) +#define SYS_IMP_APL_PMC3_EL1 sys_reg(3, 2, 15, 3, 0) +#define SYS_IMP_APL_PMC4_EL1 sys_reg(3, 2, 15, 4, 0) +#define SYS_IMP_APL_PMC5_EL1 sys_reg(3, 2, 15, 5, 0) +#define SYS_IMP_APL_PMC6_EL1 sys_reg(3, 2, 15, 6, 0) +#define SYS_IMP_APL_PMC7_EL1 sys_reg(3, 2, 15, 7, 0) +#define SYS_IMP_APL_PMC8_EL1 sys_reg(3, 2, 15, 9, 0) +#define SYS_IMP_APL_PMC9_EL1 sys_reg(3, 2, 15, 10, 0) + +/* Core PMC control register */ +#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0) +#define PMCR0_CNT_ENABLE_0_7 GENMASK(7, 0) +#define PMCR0_IMODE GENMASK(10, 8) +#define PMCR0_IMODE_OFF 0 +#define PMCR0_IMODE_PMI 1 +#define PMCR0_IMODE_AIC 2 +#define PMCR0_IMODE_HALT 3 +#define PMCR0_IMODE_FIQ 4 +#define PMCR0_IACT BIT(11) +#define PMCR0_PMI_ENABLE_0_7 GENMASK(19, 12) +#define PMCR0_STOP_CNT_ON_PMI BIT(20) +#define PMCR0_CNT_GLOB_L2C_EVT BIT(21) +#define PMCR0_DEFER_PMI_TO_ERET BIT(22) +#define PMCR0_ALLOW_CNT_EN_EL0 BIT(30) +#define PMCR0_CNT_ENABLE_8_9 GENMASK(33, 32) +#define PMCR0_PMI_ENABLE_8_9 GENMASK(45, 44) + +#define SYS_IMP_APL_PMCR1_EL1 sys_reg(3, 1, 15, 1, 0) +#define PMCR1_COUNT_A64_EL0_0_7 GENMASK(15, 8) +#define PMCR1_COUNT_A64_EL1_0_7 GENMASK(23, 16) +#define PMCR1_COUNT_A64_EL0_8_9 GENMASK(41, 40) +#define PMCR1_COUNT_A64_EL1_8_9 GENMASK(49, 48) + +#define SYS_IMP_APL_PMCR2_EL1 sys_reg(3, 1, 15, 2, 0) +#define SYS_IMP_APL_PMCR3_EL1 sys_reg(3, 1, 15, 3, 0) +#define SYS_IMP_APL_PMCR4_EL1 sys_reg(3, 1, 15, 4, 0) + +#define SYS_IMP_APL_PMESR0_EL1 sys_reg(3, 1, 15, 5, 0) +#define PMESR0_EVT_CNT_2 GENMASK(7, 0) +#define PMESR0_EVT_CNT_3 GENMASK(15, 8) +#define PMESR0_EVT_CNT_4 GENMASK(23, 16) +#define PMESR0_EVT_CNT_5 GENMASK(31, 24) + +#define SYS_IMP_APL_PMESR1_EL1 sys_reg(3, 1, 15, 6, 0) +#define PMESR1_EVT_CNT_6 GENMASK(7, 0) +#define PMESR1_EVT_CNT_7 GENMASK(15, 8) +#define PMESR1_EVT_CNT_8 GENMASK(23, 16) +#define PMESR1_EVT_CNT_9 GENMASK(31, 24) + +#define SYS_IMP_APL_PMSR_EL1 sys_reg(3, 1, 15, 13, 0) +#define PMSR_OVERFLOW GENMASK(9, 0) + +#endif /* __ASM_APPLE_M1_PMU_h */ diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 4ad22c3135db..8bd5afc7b692 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -53,17 +53,36 @@ static inline u64 gic_read_iar_common(void) * The gicv3 of ThunderX requires a modified version for reading the * IAR status to ensure data synchronization (access to icc_iar1_el1 * is not sync'ed before and after). + * + * Erratum 38545 + * + * When a IAR register read races with a GIC interrupt RELEASE event, + * GIC-CPU interface could wrongly return a valid INTID to the CPU + * for an interrupt that is already released(non activated) instead of 0x3ff. + * + * To workaround this, return a valid interrupt ID only if there is a change + * in the active priority list after the IAR read. + * + * Common function used for both the workarounds since, + * 1. On Thunderx 88xx 1.x both erratas are applicable. + * 2. Having extra nops doesn't add any side effects for Silicons where + * erratum 23154 is not applicable. */ static inline u64 gic_read_iar_cavium_thunderx(void) { - u64 irqstat; + u64 irqstat, apr; + apr = read_sysreg_s(SYS_ICC_AP1R0_EL1); nops(8); irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); nops(4); mb(); - return irqstat; + /* Max priority groups implemented is only 32 */ + if (likely(apr != read_sysreg_s(SYS_ICC_AP1R0_EL1))) + return irqstat; + + return 0x3ff; } static inline void gic_write_ctlr(u32 val) diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index 09e43272ccb0..d1bb5e71df25 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -42,13 +42,47 @@ static inline bool __arm64_rndr(unsigned long *v) return ok; } +static inline bool __arm64_rndrrs(unsigned long *v) +{ + bool ok; + + /* + * Reads of RNDRRS set PSTATE.NZCV to 0b0000 on success, + * and set PSTATE.NZCV to 0b0100 otherwise. + */ + asm volatile( + __mrs_s("%0", SYS_RNDRRS_EL0) "\n" + " cset %w1, ne\n" + : "=r" (*v), "=r" (ok) + : + : "cc"); + + return ok; +} + static inline bool __must_check arch_get_random_long(unsigned long *v) { + /* + * Only support the generic interface after we have detected + * the system wide capability, avoiding complexity with the + * cpufeature code and with potential scheduling between CPUs + * with and without the feature. + */ + if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v)) + return true; return false; } static inline bool __must_check arch_get_random_int(unsigned int *v) { + if (cpus_have_const_cap(ARM64_HAS_RNG)) { + unsigned long val; + + if (__arm64_rndr(&val)) { + *v = val; + return true; + } + } return false; } @@ -71,12 +105,11 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v) } /* - * Only support the generic interface after we have detected - * the system wide capability, avoiding complexity with the - * cpufeature code and with potential scheduling between CPUs - * with and without the feature. + * RNDRRS is not backed by an entropy source but by a DRBG that is + * reseeded after each invocation. This is not a 100% fit but good + * enough to implement this API if no other entropy source exists. */ - if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v)) + if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndrrs(v)) return true; return false; @@ -96,7 +129,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v) } if (cpus_have_const_cap(ARM64_HAS_RNG)) { - if (__arm64_rndr(&val)) { + if (__arm64_rndrrs(&val)) { *v = val; return true; } diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h index f1bba5fc61c4..ead62f7dd269 100644 --- a/arch/arm64/include/asm/asm_pointer_auth.h +++ b/arch/arm64/include/asm/asm_pointer_auth.h @@ -60,6 +60,9 @@ alternative_else_nop_endif .macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3 mrs \tmp1, id_aa64isar1_el1 ubfx \tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8 + mrs_s \tmp2, SYS_ID_AA64ISAR2_EL1 + ubfx \tmp2, \tmp2, #ID_AA64ISAR2_APA3_SHIFT, #4 + orr \tmp1, \tmp1, \tmp2 cbz \tmp1, .Lno_addr_auth\@ mov_q \tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ SCTLR_ELx_ENDA | SCTLR_ELx_ENDB) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 6ebdc0f834a7..8c5a61aeaf8e 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -542,11 +542,6 @@ alternative_endif #define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name) #endif -#ifdef CONFIG_KASAN_HW_TAGS -#define EXPORT_SYMBOL_NOHWKASAN(name) -#else -#define EXPORT_SYMBOL_NOHWKASAN(name) EXPORT_SYMBOL_NOKASAN(name) -#endif /* * Emit a 64-bit absolute little endian symbol reference in a way that * ensures that it will be resolved at build time, even when building a diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index a77b5f49b3a6..c62e7e5e2f0c 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -356,6 +356,7 @@ struct arm64_cpu_capabilities { struct { /* Feature register checking */ u32 sys_reg; u8 field_pos; + u8 field_width; u8 min_field_value; u8 hwcap_type; bool sign; @@ -576,6 +577,8 @@ static inline u64 arm64_ftr_reg_user_value(const struct arm64_ftr_reg *reg) static inline int __attribute_const__ cpuid_feature_extract_field_width(u64 features, int field, int width, bool sign) { + if (WARN_ON_ONCE(!width)) + width = 4; return (sign) ? cpuid_feature_extract_signed_field_width(features, field, width) : cpuid_feature_extract_unsigned_field_width(features, field, width); @@ -883,6 +886,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) extern struct arm64_ftr_override id_aa64mmfr1_override; extern struct arm64_ftr_override id_aa64pfr1_override; extern struct arm64_ftr_override id_aa64isar1_override; +extern struct arm64_ftr_override id_aa64isar2_override; u32 get_kvm_ipa_limit(void); void dump_cpu_features(void); diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index bfbf0c4c7c5e..232b439cbaf3 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -88,6 +88,13 @@ #define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 #define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3 #define CAVIUM_CPU_PART_THUNDERX2 0x0AF +/* OcteonTx2 series */ +#define CAVIUM_CPU_PART_OCTX2_98XX 0x0B1 +#define CAVIUM_CPU_PART_OCTX2_96XX 0x0B2 +#define CAVIUM_CPU_PART_OCTX2_95XX 0x0B3 +#define CAVIUM_CPU_PART_OCTX2_95XXN 0x0B4 +#define CAVIUM_CPU_PART_OCTX2_95XXMM 0x0B5 +#define CAVIUM_CPU_PART_OCTX2_95XXO 0x0B6 #define BRCM_CPU_PART_BRAHMA_B53 0x100 #define BRCM_CPU_PART_VULCAN 0x516 @@ -132,6 +139,12 @@ #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) +#define MIDR_OCTX2_98XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_98XX) +#define MIDR_OCTX2_96XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_96XX) +#define MIDR_OCTX2_95XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XX) +#define MIDR_OCTX2_95XXN MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXN) +#define MIDR_OCTX2_95XXMM MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXMM) +#define MIDR_OCTX2_95XXO MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXO) #define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2) #define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53) #define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 657c921fd784..00c291067e57 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -34,18 +34,6 @@ */ #define BREAK_INSTR_SIZE AARCH64_INSN_SIZE -/* - * BRK instruction encoding - * The #imm16 value should be placed at bits[20:5] within BRK ins - */ -#define AARCH64_BREAK_MON 0xd4200000 - -/* - * BRK instruction for provoking a fault on purpose - * Unlike kgdb, #imm16 value with unallocated handler is used for faulting. - */ -#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5)) - #define AARCH64_BREAK_KGDB_DYN_DBG \ (AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5)) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index f68fbb207473..8db5ec0089db 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -108,6 +108,7 @@ #define KERNEL_HWCAP_ECV __khwcap2_feature(ECV) #define KERNEL_HWCAP_AFP __khwcap2_feature(AFP) #define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES) +#define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/insn-def.h b/arch/arm64/include/asm/insn-def.h index 2c075f615c6a..1a7d0d483698 100644 --- a/arch/arm64/include/asm/insn-def.h +++ b/arch/arm64/include/asm/insn-def.h @@ -3,7 +3,21 @@ #ifndef __ASM_INSN_DEF_H #define __ASM_INSN_DEF_H +#include <asm/brk-imm.h> + /* A64 instructions are always 32 bits. */ #define AARCH64_INSN_SIZE 4 +/* + * BRK instruction encoding + * The #imm16 value should be placed at bits[20:5] within BRK ins + */ +#define AARCH64_BREAK_MON 0xd4200000 + +/* + * BRK instruction for provoking a fault on purpose + * Unlike kgdb, #imm16 value with unallocated handler is used for faulting. + */ +#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5)) + #endif /* __ASM_INSN_DEF_H */ diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index b02f0c328c8e..1e5760d567ae 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -206,7 +206,9 @@ enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX, AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX, AARCH64_INSN_LDST_LOAD_EX, + AARCH64_INSN_LDST_LOAD_ACQ_EX, AARCH64_INSN_LDST_STORE_EX, + AARCH64_INSN_LDST_STORE_REL_EX, }; enum aarch64_insn_adsb_type { @@ -281,6 +283,36 @@ enum aarch64_insn_adr_type { AARCH64_INSN_ADR_TYPE_ADR, }; +enum aarch64_insn_mem_atomic_op { + AARCH64_INSN_MEM_ATOMIC_ADD, + AARCH64_INSN_MEM_ATOMIC_CLR, + AARCH64_INSN_MEM_ATOMIC_EOR, + AARCH64_INSN_MEM_ATOMIC_SET, + AARCH64_INSN_MEM_ATOMIC_SWP, +}; + +enum aarch64_insn_mem_order_type { + AARCH64_INSN_MEM_ORDER_NONE, + AARCH64_INSN_MEM_ORDER_ACQ, + AARCH64_INSN_MEM_ORDER_REL, + AARCH64_INSN_MEM_ORDER_ACQREL, +}; + +enum aarch64_insn_mb_type { + AARCH64_INSN_MB_SY, + AARCH64_INSN_MB_ST, + AARCH64_INSN_MB_LD, + AARCH64_INSN_MB_ISH, + AARCH64_INSN_MB_ISHST, + AARCH64_INSN_MB_ISHLD, + AARCH64_INSN_MB_NSH, + AARCH64_INSN_MB_NSHST, + AARCH64_INSN_MB_NSHLD, + AARCH64_INSN_MB_OSH, + AARCH64_INSN_MB_OSHST, + AARCH64_INSN_MB_OSHLD, +}; + #define __AARCH64_INSN_FUNCS(abbr, mask, val) \ static __always_inline bool aarch64_insn_is_##abbr(u32 code) \ { \ @@ -304,6 +336,11 @@ __AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400) __AARCH64_INSN_FUNCS(load_post, 0x3FE00C00, 0x38400400) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) __AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000) +__AARCH64_INSN_FUNCS(ldclr, 0x3F20FC00, 0x38201000) +__AARCH64_INSN_FUNCS(ldeor, 0x3F20FC00, 0x38202000) +__AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000) +__AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000) +__AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) @@ -475,13 +512,6 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, enum aarch64_insn_register state, enum aarch64_insn_size_type size, enum aarch64_insn_ldst_type type); -u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, - enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size); -u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size); u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, enum aarch64_insn_register src, int imm, enum aarch64_insn_variant variant, @@ -542,6 +572,42 @@ u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base, enum aarch64_insn_prfm_type type, enum aarch64_insn_prfm_target target, enum aarch64_insn_prfm_policy policy); +#ifdef CONFIG_ARM64_LSE_ATOMICS +u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_atomic_op op, + enum aarch64_insn_mem_order_type order); +u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_order_type order); +#else +static inline +u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_atomic_op op, + enum aarch64_insn_mem_order_type order) +{ + return AARCH64_BREAK_FAULT; +} + +static inline +u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_order_type order) +{ + return AARCH64_BREAK_FAULT; +} +#endif +u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); + s32 aarch64_get_branch_offset(u32 insn); u32 aarch64_set_branch_offset(u32 insn, s32 offset); diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 01d47c5886dc..1767ded83888 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -355,8 +355,8 @@ ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) -#define CPACR_EL1_FPEN (3 << 20) #define CPACR_EL1_TTA (1 << 28) -#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN) +#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\ + CPACR_EL1_ZEN_EL1EN) #endif /* __ARM64_KVM_ARM_H__ */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 462882f356c7..aa7fa2a08f06 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -118,6 +118,7 @@ extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val); diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index b77e9b3f5371..43f8c25b3fda 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -39,28 +39,4 @@ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ bti c ; -/* - * Annotate a function as position independent, i.e., safe to be called before - * the kernel virtual mapping is activated. - */ -#define SYM_FUNC_START_PI(x) \ - SYM_FUNC_START_ALIAS(__pi_##x); \ - SYM_FUNC_START(x) - -#define SYM_FUNC_START_WEAK_PI(x) \ - SYM_FUNC_START_ALIAS(__pi_##x); \ - SYM_FUNC_START_WEAK(x) - -#define SYM_FUNC_START_WEAK_ALIAS_PI(x) \ - SYM_FUNC_START_ALIAS(__pi_##x); \ - SYM_START(x, SYM_L_WEAK, SYM_A_ALIGN) - -#define SYM_FUNC_END_PI(x) \ - SYM_FUNC_END(x); \ - SYM_FUNC_END_ALIAS(__pi_##x) - -#define SYM_FUNC_END_ALIAS_PI(x) \ - SYM_FUNC_END_ALIAS(x); \ - SYM_FUNC_END_ALIAS(__pi_##x) - #endif diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 5d10051c3e62..29c85810ae69 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -17,12 +17,10 @@ #include <asm/cpucaps.h> extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; -extern struct static_key_false arm64_const_caps_ready; -static inline bool system_uses_lse_atomics(void) +static __always_inline bool system_uses_lse_atomics(void) { - return (static_branch_likely(&arm64_const_caps_ready)) && - static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]); + return static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]); } #define __lse_ll_sc_body(op, ...) \ diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index a11ccadd47d2..094701ec5500 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -1,8 +1,8 @@ SECTIONS { #ifdef CONFIG_ARM64_MODULE_PLTS - .plt 0 (NOLOAD) : { BYTE(0) } - .init.plt 0 (NOLOAD) : { BYTE(0) } - .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) } + .plt 0 : { BYTE(0) } + .init.plt 0 : { BYTE(0) } + .text.ftrace_trampoline 0 : { BYTE(0) } #endif #ifdef CONFIG_KASAN_SW_TAGS diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h index 626d359b396e..14ee86b019c2 100644 --- a/arch/arm64/include/asm/mte-def.h +++ b/arch/arm64/include/asm/mte-def.h @@ -11,6 +11,7 @@ #define MTE_TAG_SHIFT 56 #define MTE_TAG_SIZE 4 #define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT) +#define MTE_PAGE_TAG_STORAGE (MTE_GRANULES_PER_PAGE * MTE_TAG_SIZE / 8) #define __MTE_PREAMBLE ARM64_ASM_PREAMBLE ".arch_extension memtag\n" diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 075539f5f1c8..adcb937342f1 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -11,7 +11,9 @@ #ifndef __ASSEMBLY__ #include <linux/bitfield.h> +#include <linux/kasan-enabled.h> #include <linux/page-flags.h> +#include <linux/sched.h> #include <linux/types.h> #include <asm/pgtable-types.h> @@ -86,6 +88,26 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child, #endif /* CONFIG_ARM64_MTE */ +static inline void mte_disable_tco_entry(struct task_struct *task) +{ + if (!system_supports_mte()) + return; + + /* + * Re-enable tag checking (TCO set on exception entry). This is only + * necessary if MTE is enabled in either the kernel or the userspace + * task in synchronous or asymmetric mode (SCTLR_EL1.TCF0 bit 0 is set + * for both). With MTE disabled in the kernel and disabled or + * asynchronous in userspace, tag check faults (including in uaccesses) + * are not reported, therefore there is no need to re-enable checking. + * This is beneficial on microarchitectures where re-enabling TCO is + * expensive. + */ + if (kasan_hw_tags_enabled() || + (task->thread.sctlr_user & (1UL << SCTLR_EL1_TCF0_SHIFT))) + asm volatile(SET_PSTATE_TCO(0)); +} + #ifdef CONFIG_KASAN_HW_TAGS /* Whether the MTE asynchronous mode is enabled. */ DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode); diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 4ef6f19331f9..3eaf462f5752 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -15,70 +15,70 @@ /* * Common architectural and microarchitectural event numbers. */ -#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00 -#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x01 -#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x02 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04 -#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x05 -#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x06 -#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x07 -#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08 -#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x09 -#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x0A -#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x0B -#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x0C -#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x0D -#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x0E -#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x0F -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10 -#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11 -#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12 -#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x13 -#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x14 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x15 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x16 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x17 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x18 -#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x19 -#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x1A -#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B -#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x1C -#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x1D -#define ARMV8_PMUV3_PERFCTR_CHAIN 0x1E -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x1F -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x20 -#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x21 -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x22 -#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x23 -#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x24 -#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x25 -#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x26 -#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x27 -#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x28 -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x29 -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x2A -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x2B -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x2C -#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x2D -#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x2E -#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F -#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x30 -#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x31 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x32 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x33 -#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x34 -#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x35 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x36 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x37 -#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x38 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x39 -#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x3A -#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x3B -#define ARMV8_PMUV3_PERFCTR_STALL 0x3C -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x3D -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x3E -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x3F +#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x0000 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x0001 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x0002 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x0003 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x0004 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x0005 +#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x0006 +#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x0007 +#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x0008 +#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x0009 +#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x000A +#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x000B +#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x000C +#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x000D +#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x000E +#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x000F +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x0010 +#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x0011 +#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x0012 +#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x0013 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x0014 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x0015 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x0016 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x0017 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x0018 +#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x0019 +#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x001A +#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x001B +#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x001C +#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x001D +#define ARMV8_PMUV3_PERFCTR_CHAIN 0x001E +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x001F +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x0020 +#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x0021 +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x0022 +#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x0023 +#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x0024 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x0025 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x0026 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x0027 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x0028 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x0029 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x002A +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x002B +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x002C +#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x002D +#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x002E +#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x002F +#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x0030 +#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x0031 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x0032 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x0033 +#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x0034 +#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x0035 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x0036 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x0037 +#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x0038 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x0039 +#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x003A +#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x003B +#define ARMV8_PMUV3_PERFCTR_STALL 0x003C +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x003D +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x003E +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x003F /* Statistical profiling extension microarchitectural events */ #define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000 @@ -96,6 +96,20 @@ #define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A #define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B +/* Trace buffer events */ +#define ARMV8_PMUV3_PERFCTR_TRB_WRAP 0x400C +#define ARMV8_PMUV3_PERFCTR_TRB_TRIG 0x400E + +/* Trace unit events */ +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT0 0x4010 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT1 0x4011 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT2 0x4012 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT3 0x4013 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4 0x4018 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5 0x4019 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6 0x401A +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7 0x401B + /* additional latency from alignment events */ #define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020 #define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021 @@ -107,91 +121,91 @@ #define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026 /* ARMv8 recommended implementation defined event types */ -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x40 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x41 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x42 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x43 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x44 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x45 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x46 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x47 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x48 - -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x4C -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x4D -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x4E -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x4F -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x50 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x51 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x52 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x53 - -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x56 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x57 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x58 - -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x5C -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x5D -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x5E -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x5F -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x60 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x61 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x62 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x63 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x64 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x65 -#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x66 -#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x67 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x68 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x69 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x6A - -#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x6C -#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x6D -#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x6E -#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x6F -#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x70 -#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x71 -#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x72 -#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x73 -#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x74 -#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x75 -#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x76 -#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x77 -#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x78 -#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x79 -#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x7A - -#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x7C -#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x7D -#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x7E - -#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x81 -#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x82 -#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x83 -#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x84 - -#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x86 -#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x87 -#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x88 - -#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x8A -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x8B -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x8C -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x8D -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x8E -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x8F -#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x90 -#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x91 - -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0xA0 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0xA1 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0xA2 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0xA3 - -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0xA6 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0xA7 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0xA8 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x0040 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x0042 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x0044 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x0045 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x0046 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x0047 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x0048 + +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x0050 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x0051 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x0052 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x0053 + +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x0056 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x0057 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x0058 + +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x005C +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x005D +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x005E +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x005F +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x0062 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x0063 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x0064 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x0065 +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x0066 +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x0067 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x0068 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x0069 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x006A + +#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x006C +#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x006D +#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x006E +#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x006F +#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x0070 +#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x0071 +#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x0072 +#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x0073 +#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x0074 +#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x0075 +#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x0076 +#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x0077 +#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x0078 +#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x0079 +#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x007A + +#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x007C +#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x007D +#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x007E + +#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x0081 +#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x0082 +#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x0083 +#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x0084 + +#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x0086 +#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x0087 +#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x0088 + +#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x008A +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x008B +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x008C +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x008D +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x008E +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x008F +#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x0090 +#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x0091 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0x00A0 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0x00A1 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0x00A2 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0x00A3 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0x00A6 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0x00A7 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0x00A8 /* * Per-CPU PMCR: config reg diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 40085e53f573..66671ff05183 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -273,6 +273,8 @@ #define TCR_NFD1 (UL(1) << 54) #define TCR_E0PD0 (UL(1) << 55) #define TCR_E0PD1 (UL(1) << 56) +#define TCR_TCMA0 (UL(1) << 57) +#define TCR_TCMA1 (UL(1) << 58) /* * TTBR. diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 6f41b65f9962..73e38d9a540c 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -21,6 +21,7 @@ #define MTE_CTRL_TCF_SYNC (1UL << 16) #define MTE_CTRL_TCF_ASYNC (1UL << 17) +#define MTE_CTRL_TCF_ASYMM (1UL << 18) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 86e0cc9b9c68..aa3d3607d5c8 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -67,7 +67,8 @@ struct bp_hardening_data { DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); -static inline void arm64_apply_bp_hardening(void) +/* Called during entry so must be __always_inline */ +static __always_inline void arm64_apply_bp_hardening(void) { struct bp_hardening_data *d; diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index 95f7686b728d..3a3264ff47b9 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -12,13 +12,11 @@ extern char *strrchr(const char *, int c); #define __HAVE_ARCH_STRCHR extern char *strchr(const char *, int c); -#ifndef CONFIG_KASAN_HW_TAGS #define __HAVE_ARCH_STRCMP extern int strcmp(const char *, const char *); #define __HAVE_ARCH_STRNCMP extern int strncmp(const char *, const char *, __kernel_size_t); -#endif #define __HAVE_ARCH_STRLEN extern __kernel_size_t strlen(const char *); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 932d45b17877..c7ca3a105528 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -774,6 +774,8 @@ /* id_aa64isar2 */ #define ID_AA64ISAR2_CLEARBHB_SHIFT 28 +#define ID_AA64ISAR2_APA3_SHIFT 12 +#define ID_AA64ISAR2_GPA3_SHIFT 8 #define ID_AA64ISAR2_RPRES_SHIFT 4 #define ID_AA64ISAR2_WFXT_SHIFT 0 @@ -787,6 +789,16 @@ #define ID_AA64ISAR2_WFXT_NI 0x0 #define ID_AA64ISAR2_WFXT_SUPPORTED 0x2 +#define ID_AA64ISAR2_APA3_NI 0x0 +#define ID_AA64ISAR2_APA3_ARCHITECTED 0x1 +#define ID_AA64ISAR2_APA3_ARCH_EPAC 0x2 +#define ID_AA64ISAR2_APA3_ARCH_EPAC2 0x3 +#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC 0x4 +#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC_CMB 0x5 + +#define ID_AA64ISAR2_GPA3_NI 0x0 +#define ID_AA64ISAR2_GPA3_ARCHITECTED 0x1 + /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 @@ -1099,13 +1111,11 @@ #define ZCR_ELx_LEN_SIZE 9 #define ZCR_ELx_LEN_MASK 0x1ff +#define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ +#define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ + #define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ -#define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) - -/* TCR EL1 Bit Definitions */ -#define SYS_TCR_EL1_TCMA1 (BIT(58)) -#define SYS_TCR_EL1_TCMA0 (BIT(57)) /* GCR_EL1 Definitions */ #define SYS_GCR_EL1_RRND (BIT(16)) diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h index f64613a96d53..bc9a2145f419 100644 --- a/arch/arm64/include/asm/vectors.h +++ b/arch/arm64/include/asm/vectors.h @@ -56,14 +56,14 @@ enum arm64_bp_harden_el1_vectors { DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector); #ifndef CONFIG_UNMAP_KERNEL_AT_EL0 -#define TRAMP_VALIAS 0 +#define TRAMP_VALIAS 0ul #endif static inline const char * arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot) { if (arm64_kernel_unmapped_at_el0()) - return (char *)TRAMP_VALIAS + SZ_2K * slot; + return (char *)(TRAMP_VALIAS + SZ_2K * slot); WARN_ON_ONCE(slot == EL1_VECTOR_KPTI); diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index f03731847d9d..99cb5d383048 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -78,5 +78,6 @@ #define HWCAP2_ECV (1 << 19) #define HWCAP2_AFP (1 << 20) #define HWCAP2_RPRES (1 << 21) +#define HWCAP2_MTE3 (1 << 22) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 88b3e2a21408..986837d7ec82 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o +obj-$(CONFIG_ELF_CORE) += elfcore.o obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \ cpu-reset.o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a401180e8d66..4c9b5b4b7a0b 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -214,6 +214,21 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { }; #endif +#ifdef CONFIG_CAVIUM_ERRATUM_23154 +const struct midr_range cavium_erratum_23154_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_THUNDERX), + MIDR_ALL_VERSIONS(MIDR_THUNDERX_81XX), + MIDR_ALL_VERSIONS(MIDR_THUNDERX_83XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_98XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_96XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXN), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXMM), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXO), + {}, +}; +#endif + #ifdef CONFIG_CAVIUM_ERRATUM_27456 const struct midr_range cavium_erratum_27456_cpus[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ @@ -425,10 +440,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_CAVIUM_ERRATUM_23154 { - /* Cavium ThunderX, pass 1.x */ - .desc = "Cavium erratum 23154", + .desc = "Cavium errata 23154 and 38545", .capability = ARM64_WORKAROUND_CAVIUM_23154, - ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + ERRATA_MIDR_RANGE_LIST(cavium_erratum_23154_cpus), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 @@ -611,7 +626,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "ARM erratum 2077057", .capability = ARM64_WORKAROUND_2077057, - .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2), }, #endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d33687673f6b..d72c4b4d389c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -232,6 +232,10 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_APA3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_GPA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -602,6 +606,7 @@ static const struct arm64_ftr_bits ftr_raz[] = { struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override; struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; struct arm64_ftr_override __ro_after_init id_aa64isar1_override; +struct arm64_ftr_override __ro_after_init id_aa64isar2_override; static const struct __ftr_reg_entry { u32 sys_id; @@ -650,6 +655,8 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1, &id_aa64isar1_override), ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2, + &id_aa64isar2_override), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), @@ -1313,7 +1320,9 @@ u64 __read_sysreg_by_encoding(u32 sys_id) static bool feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) { - int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign); + int val = cpuid_feature_extract_field_width(reg, entry->field_pos, + entry->field_width, + entry->sign); return val >= entry->min_field_value; } @@ -1787,14 +1796,6 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); } -static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) -{ - u64 val = read_sysreg_s(SYS_CLIDR_EL1); - - /* Check that CLIDR_EL1.LOU{U,IS} are both 0 */ - WARN_ON(CLIDR_LOUU(val) || CLIDR_LOUIS(val)); -} - #ifdef CONFIG_ARM64_PAN static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { @@ -1841,21 +1842,27 @@ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, /* Now check for the secondary CPUs with SCOPE_LOCAL_CPU scope */ sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg), entry->field_pos, entry->sign); - return sec_val == boot_val; + return (sec_val >= entry->min_field_value) && (sec_val == boot_val); } static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry, int scope) { - return has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH], scope) || - has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); + bool api = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); + bool apa = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope); + bool apa3 = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope); + + return apa || apa3 || api; } static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, int __unused) { - return __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH) || - __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF); + bool gpi = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF); + bool gpa = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5); + bool gpa3 = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3); + + return gpa || gpa3 || gpi; } #endif /* CONFIG_ARM64_PTR_AUTH */ @@ -1957,6 +1964,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -1967,6 +1975,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR0_EL1, .field_pos = ID_AA64MMFR0_ECV_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -1978,6 +1987,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, .cpu_enable = cpu_enable_pan, @@ -1991,6 +2001,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 3, }, @@ -2003,6 +2014,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 2, }, @@ -2027,6 +2039,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL0_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT, }, #ifdef CONFIG_KVM @@ -2038,6 +2051,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL1_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT, }, { @@ -2058,6 +2072,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { */ .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_CSV3_SHIFT, + .field_width = 4, .min_field_value = 1, .matches = unmap_kernel_at_el0, .cpu_enable = kpti_install_ng_mappings, @@ -2077,6 +2092,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .field_width = 4, .min_field_value = 1, }, { @@ -2087,6 +2103,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .field_width = 4, .min_field_value = 2, }, #endif @@ -2098,6 +2115,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_SVE_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_SVE, .matches = has_cpuid_feature, .cpu_enable = sve_kernel_enable, @@ -2112,6 +2130,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_RAS_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_RAS_V1, .cpu_enable = cpu_clear_disr, }, @@ -2130,6 +2149,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_AMU_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_AMU, .cpu_enable = cpu_amu_enable, }, @@ -2154,9 +2174,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR2_FWB_SHIFT, + .field_width = 4, .min_field_value = 1, .matches = has_cpuid_feature, - .cpu_enable = cpu_has_fwb, }, { .desc = "ARMv8.4 Translation Table Level", @@ -2165,6 +2185,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR2_TTL_SHIFT, + .field_width = 4, .min_field_value = 1, .matches = has_cpuid_feature, }, @@ -2175,6 +2196,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_TLB_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = ID_AA64ISAR0_TLB_RANGE, }, @@ -2193,6 +2215,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR1_HADBS_SHIFT, + .field_width = 4, .min_field_value = 2, .matches = has_hw_dbm, .cpu_enable = cpu_enable_hw_dbm, @@ -2205,6 +2228,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_CRC32_SHIFT, + .field_width = 4, .min_field_value = 1, }, { @@ -2214,6 +2238,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_SSBS_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, }, @@ -2226,6 +2251,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR2_CNP_SHIFT, + .field_width = 4, .min_field_value = 1, .cpu_enable = cpu_enable_cnp, }, @@ -2237,27 +2263,41 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .field_pos = ID_AA64ISAR1_SB_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, #ifdef CONFIG_ARM64_PTR_AUTH { - .desc = "Address authentication (architected algorithm)", - .capability = ARM64_HAS_ADDRESS_AUTH_ARCH, + .desc = "Address authentication (architected QARMA5 algorithm)", + .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_APA_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_APA_ARCHITECTED, .matches = has_address_auth_cpucap, }, { + .desc = "Address authentication (architected QARMA3 algorithm)", + .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .sys_reg = SYS_ID_AA64ISAR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64ISAR2_APA3_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64ISAR2_APA3_ARCHITECTED, + .matches = has_address_auth_cpucap, + }, + { .desc = "Address authentication (IMP DEF algorithm)", .capability = ARM64_HAS_ADDRESS_AUTH_IMP_DEF, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_API_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_API_IMP_DEF, .matches = has_address_auth_cpucap, }, @@ -2267,22 +2307,35 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_address_auth_metacap, }, { - .desc = "Generic authentication (architected algorithm)", - .capability = ARM64_HAS_GENERIC_AUTH_ARCH, + .desc = "Generic authentication (architected QARMA5 algorithm)", + .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_GPA_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED, .matches = has_cpuid_feature, }, { + .desc = "Generic authentication (architected QARMA3 algorithm)", + .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .sys_reg = SYS_ID_AA64ISAR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64ISAR2_GPA3_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64ISAR2_GPA3_ARCHITECTED, + .matches = has_cpuid_feature, + }, + { .desc = "Generic authentication (IMP DEF algorithm)", .capability = ARM64_HAS_GENERIC_AUTH_IMP_DEF, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_GPI_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_GPI_IMP_DEF, .matches = has_cpuid_feature, }, @@ -2303,6 +2356,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = can_use_gic_priorities, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -2314,6 +2368,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, + .field_width = 4, .field_pos = ID_AA64MMFR2_E0PD_SHIFT, .matches = has_cpuid_feature, .min_field_value = 1, @@ -2328,6 +2383,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_RNDR_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -2345,6 +2401,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = bti_enable, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_BT_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR1_BT_BTI, .sign = FTR_UNSIGNED, }, @@ -2357,6 +2414,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_MTE_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR1_MTE, .sign = FTR_UNSIGNED, .cpu_enable = cpu_enable_mte, @@ -2368,6 +2426,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_MTE_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR1_MTE_ASYMM, .sign = FTR_UNSIGNED, }, @@ -2379,16 +2438,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_LRCPC_SHIFT, + .field_width = 4, .matches = has_cpuid_feature, .min_field_value = 1, }, {}, }; -#define HWCAP_CPUID_MATCH(reg, field, s, min_value) \ +#define HWCAP_CPUID_MATCH(reg, field, width, s, min_value) \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ + .field_width = width, \ .sign = s, \ .min_field_value = min_value, @@ -2398,10 +2459,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .hwcap_type = cap_type, \ .hwcap = cap, \ -#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ +#define HWCAP_CAP(reg, field, width, s, min_value, cap_type, cap) \ { \ __HWCAP_CAP(#cap, cap_type, cap) \ - HWCAP_CPUID_MATCH(reg, field, s, min_value) \ + HWCAP_CPUID_MATCH(reg, field, width, s, min_value) \ } #define HWCAP_MULTI_CAP(list, cap_type, cap) \ @@ -2421,11 +2482,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_APA_ARCHITECTED) + 4, FTR_UNSIGNED, + ID_AA64ISAR1_APA_ARCHITECTED) + }, + { + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR2_APA3_ARCHITECTED) }, { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF) + 4, FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF) }, {}, }; @@ -2433,77 +2499,82 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = { { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED) + 4, FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED) + }, + { + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_GPA3_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR2_GPA3_ARCHITECTED) }, { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF) + 4, FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF) }, {}, }; #endif static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), - HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), + HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), #endif - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_BTI - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI), #endif #ifdef CONFIG_ARM64_PTR_AUTH HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA), HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG), #endif #ifdef CONFIG_ARM64_MTE - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE_ASYMM, CAP_HWCAP, KERNEL_HWCAP_MTE3), #endif /* CONFIG_ARM64_MTE */ - HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), - HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), - HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), + HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), + HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), + HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), {}, }; @@ -2532,15 +2603,15 @@ static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope) static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), - HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), + HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ - HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), - HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), #endif {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 591c18a889a5..330b92ea863a 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -97,6 +97,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_ECV] = "ecv", [KERNEL_HWCAP_AFP] = "afp", [KERNEL_HWCAP_RPRES] = "rpres", + [KERNEL_HWCAP_MTE3] = "mte3", }; #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c index 314391a156ee..2b65aae332ce 100644 --- a/arch/arm64/kernel/crash_core.c +++ b/arch/arm64/kernel/crash_core.c @@ -20,6 +20,12 @@ void arch_crash_save_vmcoreinfo(void) { VMCOREINFO_NUMBER(VA_BITS); /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */ + vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); + vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); + vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); + vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); + vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); + vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n", kimage_voffset); vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c new file mode 100644 index 000000000000..3ed39c61a510 --- /dev/null +++ b/arch/arm64/kernel/elfcore.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/coredump.h> +#include <linux/elfcore.h> +#include <linux/kernel.h> +#include <linux/mm.h> + +#include <asm/cpufeature.h> +#include <asm/mte.h> + +#ifndef VMA_ITERATOR +#define VMA_ITERATOR(name, mm, addr) \ + struct mm_struct *name = mm +#define for_each_vma(vmi, vma) \ + for (vma = vmi->mmap; vma; vma = vma->vm_next) +#endif + +#define for_each_mte_vma(vmi, vma) \ + if (system_supports_mte()) \ + for_each_vma(vmi, vma) \ + if (vma->vm_flags & VM_MTE) + +static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_DONTDUMP) + return 0; + + return vma_pages(vma) * MTE_PAGE_TAG_STORAGE; +} + +/* Derived from dump_user_range(); start/end must be page-aligned */ +static int mte_dump_tag_range(struct coredump_params *cprm, + unsigned long start, unsigned long end) +{ + unsigned long addr; + + for (addr = start; addr < end; addr += PAGE_SIZE) { + char tags[MTE_PAGE_TAG_STORAGE]; + struct page *page = get_dump_page(addr); + + /* + * get_dump_page() returns NULL when encountering an empty + * page table entry that would otherwise have been filled with + * the zero page. Skip the equivalent tag dump which would + * have been all zeros. + */ + if (!page) { + dump_skip(cprm, MTE_PAGE_TAG_STORAGE); + continue; + } + + /* + * Pages mapped in user space as !pte_access_permitted() (e.g. + * PROT_EXEC only) may not have the PG_mte_tagged flag set. + */ + if (!test_bit(PG_mte_tagged, &page->flags)) { + put_page(page); + dump_skip(cprm, MTE_PAGE_TAG_STORAGE); + continue; + } + + mte_save_page_tags(page_address(page), tags); + put_page(page); + if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) + return 0; + } + + return 1; +} + +Elf_Half elf_core_extra_phdrs(void) +{ + struct vm_area_struct *vma; + int vma_count = 0; + VMA_ITERATOR(vmi, current->mm, 0); + + for_each_mte_vma(vmi, vma) + vma_count++; + + return vma_count; +} + +int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) +{ + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, current->mm, 0); + + for_each_mte_vma(vmi, vma) { + struct elf_phdr phdr; + + phdr.p_type = PT_ARM_MEMTAG_MTE; + phdr.p_offset = offset; + phdr.p_vaddr = vma->vm_start; + phdr.p_paddr = 0; + phdr.p_filesz = mte_vma_tag_dump_size(vma); + phdr.p_memsz = vma->vm_end - vma->vm_start; + offset += phdr.p_filesz; + phdr.p_flags = 0; + phdr.p_align = 0; + + if (!dump_emit(cprm, &phdr, sizeof(phdr))) + return 0; + } + + return 1; +} + +size_t elf_core_extra_data_size(void) +{ + struct vm_area_struct *vma; + size_t data_size = 0; + VMA_ITERATOR(vmi, current->mm, 0); + + for_each_mte_vma(vmi, vma) + data_size += mte_vma_tag_dump_size(vma); + + return data_size; +} + +int elf_core_write_extra_data(struct coredump_params *cprm) +{ + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, current->mm, 0); + + for_each_mte_vma(vmi, vma) { + if (vma->vm_flags & VM_DONTDUMP) + continue; + + if (!mte_dump_tag_range(cprm, vma->vm_start, vma->vm_end)) + return 0; + } + + return 1; +} diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index ef7fcefb96bd..7093b578e325 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -6,6 +6,7 @@ */ #include <linux/context_tracking.h> +#include <linux/kasan.h> #include <linux/linkage.h> #include <linux/lockdep.h> #include <linux/ptrace.h> @@ -56,6 +57,7 @@ static void noinstr enter_from_kernel_mode(struct pt_regs *regs) { __enter_from_kernel_mode(regs); mte_check_tfsr_entry(); + mte_disable_tco_entry(current); } /* @@ -103,6 +105,7 @@ static __always_inline void __enter_from_user_mode(void) CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); trace_hardirqs_off_finish(); + mte_disable_tco_entry(current); } static __always_inline void enter_from_user_mode(struct pt_regs *regs) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 4a3a653df07e..ede028dee81b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -307,6 +307,7 @@ alternative_else_nop_endif str w21, [sp, #S_SYSCALLNO] .endif +#ifdef CONFIG_ARM64_PSEUDO_NMI /* Save pmr */ alternative_if ARM64_HAS_IRQ_PRIO_MASKING mrs_s x20, SYS_ICC_PMR_EL1 @@ -314,12 +315,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET msr_s SYS_ICC_PMR_EL1, x20 alternative_else_nop_endif - - /* Re-enable tag checking (TCO set on exception entry) */ -#ifdef CONFIG_ARM64_MTE -alternative_if ARM64_MTE - SET_PSTATE_TCO(0) -alternative_else_nop_endif #endif /* @@ -337,6 +332,7 @@ alternative_else_nop_endif disable_daif .endif +#ifdef CONFIG_ARM64_PSEUDO_NMI /* Restore pmr */ alternative_if ARM64_HAS_IRQ_PRIO_MASKING ldr x20, [sp, #S_PMR_SAVE] @@ -346,6 +342,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING dsb sy // Ensure priority change is seen by redistributor .L__skip_pmr_sync\@: alternative_else_nop_endif +#endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index d8e606fe3c21..8a2ceb591686 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -17,7 +17,7 @@ #define FTR_DESC_NAME_LEN 20 #define FTR_DESC_FIELD_LEN 10 #define FTR_ALIAS_NAME_LEN 30 -#define FTR_ALIAS_OPTION_LEN 80 +#define FTR_ALIAS_OPTION_LEN 116 struct ftr_set_desc { char name[FTR_DESC_NAME_LEN]; @@ -71,6 +71,16 @@ static const struct ftr_set_desc isar1 __initconst = { }, }; +static const struct ftr_set_desc isar2 __initconst = { + .name = "id_aa64isar2", + .override = &id_aa64isar2_override, + .fields = { + { "gpa3", ID_AA64ISAR2_GPA3_SHIFT }, + { "apa3", ID_AA64ISAR2_APA3_SHIFT }, + {} + }, +}; + extern struct arm64_ftr_override kaslr_feature_override; static const struct ftr_set_desc kaslr __initconst = { @@ -88,6 +98,7 @@ static const struct ftr_set_desc * const regs[] __initconst = { &mmfr1, &pfr1, &isar1, + &isar2, &kaslr, }; @@ -100,7 +111,8 @@ static const struct { { "arm64.nobti", "id_aa64pfr1.bt=0" }, { "arm64.nopauth", "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " - "id_aa64isar1.api=0 id_aa64isar1.apa=0" }, + "id_aa64isar1.api=0 id_aa64isar1.apa=0 " + "id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0" }, { "arm64.nomte", "id_aa64pfr1.mte=0" }, { "nokaslr", "kaslr.disabled=1" }, }; diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index f418ebc65f95..78b3e0f8e997 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -186,6 +186,11 @@ void mte_check_tfsr_el1(void) } #endif +/* + * This is where we actually resolve the system and process MTE mode + * configuration into an actual value in SCTLR_EL1 that affects + * userspace. + */ static void mte_update_sctlr_user(struct task_struct *task) { /* @@ -199,9 +204,20 @@ static void mte_update_sctlr_user(struct task_struct *task) unsigned long pref, resolved_mte_tcf; pref = __this_cpu_read(mte_tcf_preferred); + /* + * If there is no overlap between the system preferred and + * program requested values go with what was requested. + */ resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl; sctlr &= ~SCTLR_EL1_TCF0_MASK; - if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC) + /* + * Pick an actual setting. The order in which we check for + * set bits and map into register values determines our + * default order. + */ + if (resolved_mte_tcf & MTE_CTRL_TCF_ASYMM) + sctlr |= SCTLR_EL1_TCF0_ASYMM; + else if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC) sctlr |= SCTLR_EL1_TCF0_ASYNC; else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC) sctlr |= SCTLR_EL1_TCF0_SYNC; @@ -253,6 +269,9 @@ void mte_thread_switch(struct task_struct *next) mte_update_sctlr_user(next); mte_update_gcr_excl(next); + /* TCO may not have been disabled on exception entry for the current task. */ + mte_disable_tco_entry(next); + /* * Check if an async tag exception occurred at EL1. * @@ -293,6 +312,17 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) if (arg & PR_MTE_TCF_SYNC) mte_ctrl |= MTE_CTRL_TCF_SYNC; + /* + * If the system supports it and both sync and async modes are + * specified then implicitly enable asymmetric mode. + * Userspace could see a mix of both sync and async anyway due + * to differing or changing defaults on CPUs. + */ + if (cpus_have_cap(ARM64_MTE_ASYMM) && + (arg & PR_MTE_TCF_ASYNC) && + (arg & PR_MTE_TCF_SYNC)) + mte_ctrl |= MTE_CTRL_TCF_ASYMM; + task->thread.mte_ctrl = mte_ctrl; if (task == current) { preempt_disable(); @@ -467,6 +497,8 @@ static ssize_t mte_tcf_preferred_show(struct device *dev, return sysfs_emit(buf, "async\n"); case MTE_CTRL_TCF_SYNC: return sysfs_emit(buf, "sync\n"); + case MTE_CTRL_TCF_ASYMM: + return sysfs_emit(buf, "asymm\n"); default: return sysfs_emit(buf, "???\n"); } @@ -482,6 +514,8 @@ static ssize_t mte_tcf_preferred_store(struct device *dev, tcf = MTE_CTRL_TCF_ASYNC; else if (sysfs_streq(buf, "sync")) tcf = MTE_CTRL_TCF_SYNC; + else if (cpus_have_cap(ARM64_MTE_ASYMM) && sysfs_streq(buf, "asymm")) + tcf = MTE_CTRL_TCF_ASYMM; else return -EINVAL; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cab678ed6618..cb69ff1e6138 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -242,6 +242,16 @@ static struct attribute *armv8_pmuv3_event_attrs[] = { ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD), ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS), ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD), + ARMV8_EVENT_ATTR(trb_wrap, ARMV8_PMUV3_PERFCTR_TRB_WRAP), + ARMV8_EVENT_ATTR(trb_trig, ARMV8_PMUV3_PERFCTR_TRB_TRIG), + ARMV8_EVENT_ATTR(trcextout0, ARMV8_PMUV3_PERFCTR_TRCEXTOUT0), + ARMV8_EVENT_ATTR(trcextout1, ARMV8_PMUV3_PERFCTR_TRCEXTOUT1), + ARMV8_EVENT_ATTR(trcextout2, ARMV8_PMUV3_PERFCTR_TRCEXTOUT2), + ARMV8_EVENT_ATTR(trcextout3, ARMV8_PMUV3_PERFCTR_TRCEXTOUT3), + ARMV8_EVENT_ATTR(cti_trigout4, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4), + ARMV8_EVENT_ATTR(cti_trigout5, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5), + ARMV8_EVENT_ATTR(cti_trigout6, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6), + ARMV8_EVENT_ATTR(cti_trigout7, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7), ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT), ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT), ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT), diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 5369e649fa79..7fa97df55e3a 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -635,7 +635,8 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) return -EINVAL; if (system_supports_mte()) - valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK; + valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \ + | PR_MTE_TAG_MASK; if (arg & ~valid_mask) return -EINVAL; diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 6d45c63c6454..5777929d35bf 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -233,17 +233,20 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn) __this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT); } -static void call_smc_arch_workaround_1(void) +/* Called during entry so must be noinstr */ +static noinstr void call_smc_arch_workaround_1(void) { arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void call_hvc_arch_workaround_1(void) +/* Called during entry so must be noinstr */ +static noinstr void call_hvc_arch_workaround_1(void) { arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void qcom_link_stack_sanitisation(void) +/* Called during entry so must be noinstr */ +static noinstr void qcom_link_stack_sanitisation(void) { u64 tmp; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index d8aaf4b6f432..50fe8eaf7df0 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -11,7 +11,6 @@ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/signal.h> -#include <linux/personality.h> #include <linux/freezer.h> #include <linux/stddef.h> #include <linux/uaccess.h> @@ -577,10 +576,12 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, { int err; - err = sigframe_alloc(user, &user->fpsimd_offset, - sizeof(struct fpsimd_context)); - if (err) - return err; + if (system_supports_fpsimd()) { + err = sigframe_alloc(user, &user->fpsimd_offset, + sizeof(struct fpsimd_context)); + if (err) + return err; + } /* fault information, if valid */ if (add_all || current->thread.fault_code) { diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index db5159a3055f..12c6864e51e1 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -9,7 +9,6 @@ #include <linux/compat.h> #include <linux/cpufeature.h> -#include <linux/personality.h> #include <linux/sched.h> #include <linux/sched/signal.h> #include <linux/slab.h> diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 70fc42470f13..bb878f52ca0a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -9,7 +9,6 @@ #include <linux/bug.h> #include <linux/context_tracking.h> #include <linux/signal.h> -#include <linux/personality.h> #include <linux/kallsyms.h> #include <linux/kprobes.h> #include <linux/spinlock.h> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4dca6ffd03d4..946e401ef6b0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1867,6 +1867,7 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits) kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1); kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1); + kvm_nvhe_sym(id_aa64isar2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 701cfb964905..6379a1e3e6e5 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -174,9 +174,9 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) /* Valid trap. Switch the context: */ if (has_vhe()) { - reg = CPACR_EL1_FPEN; + reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN; if (sve_guest) - reg |= CPACR_EL1_ZEN; + reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; sysreg_clear_set(cpacr_el1, 0, reg); } else { diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index eea1f6a53723..5ad626527d41 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -192,6 +192,11 @@ ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \ ) +#define PVM_ID_AA64ISAR2_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) \ + ) + u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code); diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 958734f4d6b0..0c367eb5f4e2 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -7,7 +7,8 @@ #include <asm/assembler.h> #include <asm/alternative.h> -SYM_FUNC_START_PI(dcache_clean_inval_poc) +SYM_FUNC_START(__pi_dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_inval_poc) +SYM_FUNC_END(__pi_dcache_clean_inval_poc) +SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc) diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 792cf6e6ac92..33f5181af330 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -22,6 +22,7 @@ u64 id_aa64pfr0_el1_sys_val; u64 id_aa64pfr1_el1_sys_val; u64 id_aa64isar0_el1_sys_val; u64 id_aa64isar1_el1_sys_val; +u64 id_aa64isar2_el1_sys_val; u64 id_aa64mmfr0_el1_sys_val; u64 id_aa64mmfr1_el1_sys_val; u64 id_aa64mmfr2_el1_sys_val; @@ -183,6 +184,17 @@ static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) return id_aa64isar1_el1_sys_val & allow_mask; } +static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu) +{ + u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW; + + if (!vcpu_has_ptrauth(vcpu)) + allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) | + ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3)); + + return id_aa64isar2_el1_sys_val & allow_mask; +} + static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu) { u64 set_mask; @@ -225,6 +237,8 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) return get_pvm_id_aa64isar0(vcpu); case SYS_ID_AA64ISAR1_EL1: return get_pvm_id_aa64isar1(vcpu); + case SYS_ID_AA64ISAR2_EL1: + return get_pvm_id_aa64isar2(vcpu); case SYS_ID_AA64MMFR0_EL1: return get_pvm_id_aa64mmfr0(vcpu); case SYS_ID_AA64MMFR1_EL1: diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 54af47005e45..262dfe03134d 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -41,7 +41,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val = read_sysreg(cpacr_el1); val |= CPACR_EL1_TTA; - val &= ~CPACR_EL1_ZEN; + val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN); /* * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to @@ -56,9 +56,9 @@ static void __activate_traps(struct kvm_vcpu *vcpu) if (update_fp_enabled(vcpu)) { if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; + val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; } else { - val &= ~CPACR_EL1_FPEN; + val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN); __activate_traps_fpsimd32(vcpu); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 4dc2fba316ff..baa65292bbc2 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1097,6 +1097,11 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI)); break; + case SYS_ID_AA64ISAR2_EL1: + if (!vcpu_has_ptrauth(vcpu)) + val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) | + ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3)); + break; case SYS_ID_AA64DFR0_EL1: /* Limit debug to ARMv8.0 */ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER); diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S index 1fd5d790ab80..ebde40e7fa2b 100644 --- a/arch/arm64/lib/clear_page.S +++ b/arch/arm64/lib/clear_page.S @@ -14,7 +14,7 @@ * Parameters: * x0 - dest */ -SYM_FUNC_START_PI(clear_page) +SYM_FUNC_START(__pi_clear_page) mrs x1, dczid_el0 tbnz x1, #4, 2f /* Branch if DC ZVA is prohibited */ and w1, w1, #0xf @@ -35,5 +35,6 @@ SYM_FUNC_START_PI(clear_page) tst x0, #(PAGE_SIZE - 1) b.ne 2b ret -SYM_FUNC_END_PI(clear_page) +SYM_FUNC_END(__pi_clear_page) +SYM_FUNC_ALIAS(clear_page, __pi_clear_page) EXPORT_SYMBOL(clear_page) diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 29144f4cd449..c336d2ffdec5 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -17,7 +17,7 @@ * x0 - dest * x1 - src */ -SYM_FUNC_START_PI(copy_page) +SYM_FUNC_START(__pi_copy_page) alternative_if ARM64_HAS_NO_HW_PREFETCH // Prefetch three cache lines ahead. prfm pldl1strm, [x1, #128] @@ -75,5 +75,6 @@ alternative_else_nop_endif stnp x16, x17, [x0, #112 - 256] ret -SYM_FUNC_END_PI(copy_page) +SYM_FUNC_END(__pi_copy_page) +SYM_FUNC_ALIAS(copy_page, __pi_copy_page) EXPORT_SYMBOL(copy_page) diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index fccfe363e567..5e90887deec4 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -578,10 +578,16 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, switch (type) { case AARCH64_INSN_LDST_LOAD_EX: + case AARCH64_INSN_LDST_LOAD_ACQ_EX: insn = aarch64_insn_get_load_ex_value(); + if (type == AARCH64_INSN_LDST_LOAD_ACQ_EX) + insn |= BIT(15); break; case AARCH64_INSN_LDST_STORE_EX: + case AARCH64_INSN_LDST_STORE_REL_EX: insn = aarch64_insn_get_store_ex_value(); + if (type == AARCH64_INSN_LDST_STORE_REL_EX) + insn |= BIT(15); break; default: pr_err("%s: unknown load/store exclusive encoding %d\n", __func__, type); @@ -603,12 +609,65 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, state); } -u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, - enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size) +#ifdef CONFIG_ARM64_LSE_ATOMICS +static u32 aarch64_insn_encode_ldst_order(enum aarch64_insn_mem_order_type type, + u32 insn) { - u32 insn = aarch64_insn_get_ldadd_value(); + u32 order; + + switch (type) { + case AARCH64_INSN_MEM_ORDER_NONE: + order = 0; + break; + case AARCH64_INSN_MEM_ORDER_ACQ: + order = 2; + break; + case AARCH64_INSN_MEM_ORDER_REL: + order = 1; + break; + case AARCH64_INSN_MEM_ORDER_ACQREL: + order = 3; + break; + default: + pr_err("%s: unknown mem order %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn &= ~GENMASK(23, 22); + insn |= order << 22; + + return insn; +} + +u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_atomic_op op, + enum aarch64_insn_mem_order_type order) +{ + u32 insn; + + switch (op) { + case AARCH64_INSN_MEM_ATOMIC_ADD: + insn = aarch64_insn_get_ldadd_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_CLR: + insn = aarch64_insn_get_ldclr_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_EOR: + insn = aarch64_insn_get_ldeor_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_SET: + insn = aarch64_insn_get_ldset_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_SWP: + insn = aarch64_insn_get_swp_value(); + break; + default: + pr_err("%s: unimplemented mem atomic op %d\n", __func__, op); + return AARCH64_BREAK_FAULT; + } switch (size) { case AARCH64_INSN_SIZE_32: @@ -621,6 +680,8 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, insn = aarch64_insn_encode_ldst_size(size, insn); + insn = aarch64_insn_encode_ldst_order(order, insn); + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, result); @@ -631,17 +692,68 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, value); } -u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size) +static u32 aarch64_insn_encode_cas_order(enum aarch64_insn_mem_order_type type, + u32 insn) { - /* - * STADD is simply encoded as an alias for LDADD with XZR as - * the destination register. - */ - return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address, - value, size); + u32 order; + + switch (type) { + case AARCH64_INSN_MEM_ORDER_NONE: + order = 0; + break; + case AARCH64_INSN_MEM_ORDER_ACQ: + order = BIT(22); + break; + case AARCH64_INSN_MEM_ORDER_REL: + order = BIT(15); + break; + case AARCH64_INSN_MEM_ORDER_ACQREL: + order = BIT(15) | BIT(22); + break; + default: + pr_err("%s: unknown mem order %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn &= ~(BIT(15) | BIT(22)); + insn |= order; + + return insn; +} + +u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_order_type order) +{ + u32 insn; + + switch (size) { + case AARCH64_INSN_SIZE_32: + case AARCH64_INSN_SIZE_64: + break; + default: + pr_err("%s: unimplemented size encoding %d\n", __func__, size); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_get_cas_value(); + + insn = aarch64_insn_encode_ldst_size(size, insn); + + insn = aarch64_insn_encode_cas_order(order, insn); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, + result); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, + address); + + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn, + value); } +#endif static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type, enum aarch64_insn_prfm_target target, @@ -1379,7 +1491,7 @@ static u32 aarch64_encode_immediate(u64 imm, * Compute the rotation to get a continuous set of * ones, with the first bit set at position 0 */ - ror = fls(~imm); + ror = fls64(~imm); } /* @@ -1456,3 +1568,48 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn); return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm); } + +u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) +{ + u32 opt; + u32 insn; + + switch (type) { + case AARCH64_INSN_MB_SY: + opt = 0xf; + break; + case AARCH64_INSN_MB_ST: + opt = 0xe; + break; + case AARCH64_INSN_MB_LD: + opt = 0xd; + break; + case AARCH64_INSN_MB_ISH: + opt = 0xb; + break; + case AARCH64_INSN_MB_ISHST: + opt = 0xa; + break; + case AARCH64_INSN_MB_ISHLD: + opt = 0x9; + break; + case AARCH64_INSN_MB_NSH: + opt = 0x7; + break; + case AARCH64_INSN_MB_NSHST: + opt = 0x6; + break; + case AARCH64_INSN_MB_NSHLD: + opt = 0x5; + break; + default: + pr_err("%s: unknown dmb type %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_get_dmb_value(); + insn &= ~GENMASK(11, 8); + insn |= (opt << 8); + + return insn; +} diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S index 7c2276fdab54..37a9f2a4f7f4 100644 --- a/arch/arm64/lib/memchr.S +++ b/arch/arm64/lib/memchr.S @@ -38,7 +38,7 @@ .p2align 4 nop -SYM_FUNC_START_WEAK_PI(memchr) +SYM_FUNC_START(__pi_memchr) and chrin, chrin, #0xff lsr wordcnt, cntin, #3 cbz wordcnt, L(byte_loop) @@ -71,5 +71,6 @@ CPU_LE( rev tmp, tmp) L(not_found): mov result, #0 ret -SYM_FUNC_END_PI(memchr) +SYM_FUNC_END(__pi_memchr) +SYM_FUNC_ALIAS_WEAK(memchr, __pi_memchr) EXPORT_SYMBOL_NOKASAN(memchr) diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S index 7d956384222f..a5ccf2c55f91 100644 --- a/arch/arm64/lib/memcmp.S +++ b/arch/arm64/lib/memcmp.S @@ -32,7 +32,7 @@ #define tmp1 x7 #define tmp2 x8 -SYM_FUNC_START_WEAK_PI(memcmp) +SYM_FUNC_START(__pi_memcmp) subs limit, limit, 8 b.lo L(less8) @@ -134,6 +134,6 @@ L(byte_loop): b.eq L(byte_loop) sub result, data1w, data2w ret - -SYM_FUNC_END_PI(memcmp) +SYM_FUNC_END(__pi_memcmp) +SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp) EXPORT_SYMBOL_NOKASAN(memcmp) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index b82fd64ee1e1..4ab48d49c451 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -57,10 +57,7 @@ The loop tail is handled by always copying 64 bytes from the end. */ -SYM_FUNC_START_ALIAS(__memmove) -SYM_FUNC_START_WEAK_ALIAS_PI(memmove) -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK_PI(memcpy) +SYM_FUNC_START(__pi_memcpy) add srcend, src, count add dstend, dstin, count cmp count, 128 @@ -241,12 +238,16 @@ L(copy64_from_start): stp B_l, B_h, [dstin, 16] stp C_l, C_h, [dstin] ret +SYM_FUNC_END(__pi_memcpy) -SYM_FUNC_END_PI(memcpy) -EXPORT_SYMBOL(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) +SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) EXPORT_SYMBOL(__memcpy) -SYM_FUNC_END_ALIAS_PI(memmove) -EXPORT_SYMBOL(memmove) -SYM_FUNC_END_ALIAS(__memmove) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +EXPORT_SYMBOL(memcpy) + +SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy) + +SYM_FUNC_ALIAS(__memmove, __pi_memmove) EXPORT_SYMBOL(__memmove) +SYM_FUNC_ALIAS_WEAK(memmove, __memmove) +EXPORT_SYMBOL(memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index a9c1c9a01ea9..a5aebe82ad73 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -42,8 +42,7 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9 -SYM_FUNC_START_ALIAS(__memset) -SYM_FUNC_START_WEAK_PI(memset) +SYM_FUNC_START(__pi_memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 orr A_lw, A_lw, A_lw, lsl #8 @@ -202,7 +201,10 @@ SYM_FUNC_START_WEAK_PI(memset) ands count, count, zva_bits_x b.ne .Ltail_maybe_long ret -SYM_FUNC_END_PI(memset) -EXPORT_SYMBOL(memset) -SYM_FUNC_END_ALIAS(__memset) +SYM_FUNC_END(__pi_memset) + +SYM_FUNC_ALIAS(__memset, __pi_memset) EXPORT_SYMBOL(__memset) + +SYM_FUNC_ALIAS_WEAK(memset, __pi_memset) +EXPORT_SYMBOL(memset) diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S index f531dcb95174..8590af3c98c0 100644 --- a/arch/arm64/lib/mte.S +++ b/arch/arm64/lib/mte.S @@ -134,7 +134,7 @@ SYM_FUNC_END(mte_copy_tags_to_user) /* * Save the tags in a page * x0 - page address - * x1 - tag storage + * x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes */ SYM_FUNC_START(mte_save_page_tags) multitag_transfer_size x7, x5 @@ -158,7 +158,7 @@ SYM_FUNC_END(mte_save_page_tags) /* * Restore the tags in a page * x0 - page address - * x1 - tag storage + * x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes */ SYM_FUNC_START(mte_restore_page_tags) multitag_transfer_size x7, x5 diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S index 1f47eae3b0d6..94ee67a6b212 100644 --- a/arch/arm64/lib/strchr.S +++ b/arch/arm64/lib/strchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of first occurrence of 'c' or 0 */ -SYM_FUNC_START_WEAK(strchr) +SYM_FUNC_START(__pi_strchr) and w1, w1, #0xff 1: ldrb w2, [x0], #1 cmp w2, w1 @@ -28,5 +28,7 @@ SYM_FUNC_START_WEAK(strchr) cmp w2, w1 csel x0, x0, xzr, eq ret -SYM_FUNC_END(strchr) +SYM_FUNC_END(__pi_strchr) + +SYM_FUNC_ALIAS_WEAK(strchr, __pi_strchr) EXPORT_SYMBOL_NOKASAN(strchr) diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index 83bcad72ec97..9b89b4533607 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -1,9 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2012-2021, Arm Limited. + * Copyright (c) 2012-2022, Arm Limited. * * Adapted from the original at: - * https://github.com/ARM-software/optimized-routines/blob/afd6244a1f8d9229/string/aarch64/strcmp.S + * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S */ #include <linux/linkage.h> @@ -11,166 +11,180 @@ /* Assumptions: * - * ARMv8-a, AArch64 + * ARMv8-a, AArch64. + * MTE compatible. */ #define L(label) .L ## label #define REP8_01 0x0101010101010101 #define REP8_7f 0x7f7f7f7f7f7f7f7f -#define REP8_80 0x8080808080808080 -/* Parameters and result. */ #define src1 x0 #define src2 x1 #define result x0 -/* Internal variables. */ #define data1 x2 #define data1w w2 #define data2 x3 #define data2w w3 #define has_nul x4 #define diff x5 +#define off1 x5 #define syndrome x6 -#define tmp1 x7 -#define tmp2 x8 -#define tmp3 x9 -#define zeroones x10 -#define pos x11 - - /* Start of performance-critical section -- one 64B cache line. */ - .align 6 -SYM_FUNC_START_WEAK_PI(strcmp) - eor tmp1, src1, src2 - mov zeroones, #REP8_01 - tst tmp1, #7 +#define tmp x6 +#define data3 x7 +#define zeroones x8 +#define shift x9 +#define off2 x10 + +/* On big-endian early bytes are at MSB and on little-endian LSB. + LS_FW means shifting towards early bytes. */ +#ifdef __AARCH64EB__ +# define LS_FW lsl +#else +# define LS_FW lsr +#endif + +/* NUL detection works on the principle that (X - 1) & (~X) & 0x80 + (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and + can be done in parallel across the entire word. + Since carry propagation makes 0x1 bytes before a NUL byte appear + NUL too in big-endian, byte-reverse the data before the NUL check. */ + + +SYM_FUNC_START(__pi_strcmp) + sub off2, src2, src1 + mov zeroones, REP8_01 + and tmp, src1, 7 + tst off2, 7 b.ne L(misaligned8) - ands tmp1, src1, #7 - b.ne L(mutual_align) - /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 - (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and - can be done in parallel across the entire word. */ + cbnz tmp, L(mutual_align) + + .p2align 4 + L(loop_aligned): - ldr data1, [src1], #8 - ldr data2, [src2], #8 + ldr data2, [src1, off2] + ldr data1, [src1], 8 L(start_realigned): - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ +#ifdef __AARCH64EB__ + rev tmp, data1 + sub has_nul, tmp, zeroones + orr tmp, tmp, REP8_7f +#else + sub has_nul, data1, zeroones + orr tmp, data1, REP8_7f +#endif + bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */ + ccmp data1, data2, 0, eq + b.eq L(loop_aligned) +#ifdef __AARCH64EB__ + rev has_nul, has_nul +#endif + eor diff, data1, data2 orr syndrome, diff, has_nul - cbz syndrome, L(loop_aligned) - /* End of performance-critical section -- one 64B cache line. */ - L(end): -#ifndef __AARCH64EB__ +#ifndef __AARCH64EB__ rev syndrome, syndrome rev data1, data1 - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. - Shifting left now will bring the critical information into the - top bits. */ - clz pos, syndrome rev data2, data2 - lsl data1, data1, pos - lsl data2, data2, pos - /* But we need to zero-extend (char is unsigned) the value and then - perform a signed 32-bit subtraction. */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 - ret -#else - /* For big-endian we cannot use the trick with the syndrome value - as carry-propagation can corrupt the upper bits if the trailing - bytes in the string contain 0x01. */ - /* However, if there is no NUL byte in the dword, we can generate - the result directly. We can't just subtract the bytes as the - MSB might be significant. */ - cbnz has_nul, 1f - cmp data1, data2 - cset result, ne - cneg result, result, lo - ret -1: - /* Re-compute the NUL-byte detection, using a byte-reversed value. */ - rev tmp3, data1 - sub tmp1, tmp3, zeroones - orr tmp2, tmp3, #REP8_7f - bic has_nul, tmp1, tmp2 - rev has_nul, has_nul - orr syndrome, diff, has_nul - clz pos, syndrome - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. +#endif + clz shift, syndrome + /* The most-significant-non-zero bit of the syndrome marks either the + first bit that is different, or the top bit of the first zero byte. Shifting left now will bring the critical information into the top bits. */ - lsl data1, data1, pos - lsl data2, data2, pos + lsl data1, data1, shift + lsl data2, data2, shift /* But we need to zero-extend (char is unsigned) the value and then perform a signed 32-bit subtraction. */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 + lsr data1, data1, 56 + sub result, data1, data2, lsr 56 ret -#endif + + .p2align 4 L(mutual_align): /* Sources are mutually aligned, but are not currently at an alignment boundary. Round down the addresses and then mask off - the bytes that preceed the start point. */ - bic src1, src1, #7 - bic src2, src2, #7 - lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ - ldr data1, [src1], #8 - neg tmp1, tmp1 /* Bits to alignment -64. */ - ldr data2, [src2], #8 - mov tmp2, #~0 -#ifdef __AARCH64EB__ - /* Big-endian. Early bytes are at MSB. */ - lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ -#else - /* Little-endian. Early bytes are at LSB. */ - lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ -#endif - orr data1, data1, tmp2 - orr data2, data2, tmp2 + the bytes that precede the start point. */ + bic src1, src1, 7 + ldr data2, [src1, off2] + ldr data1, [src1], 8 + neg shift, src2, lsl 3 /* Bits to alignment -64. */ + mov tmp, -1 + LS_FW tmp, tmp, shift + orr data1, data1, tmp + orr data2, data2, tmp b L(start_realigned) L(misaligned8): /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always - checking to make sure that we don't access beyond page boundary in - SRC2. */ - tst src1, #7 - b.eq L(loop_misaligned) + checking to make sure that we don't access beyond the end of SRC2. */ + cbz tmp, L(src1_aligned) L(do_misaligned): - ldrb data1w, [src1], #1 - ldrb data2w, [src2], #1 - cmp data1w, #1 - ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ + ldrb data1w, [src1], 1 + ldrb data2w, [src2], 1 + cmp data1w, 0 + ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ b.ne L(done) - tst src1, #7 + tst src1, 7 b.ne L(do_misaligned) -L(loop_misaligned): - /* Test if we are within the last dword of the end of a 4K page. If - yes then jump back to the misaligned loop to copy a byte at a time. */ - and tmp1, src2, #0xff8 - eor tmp1, tmp1, #0xff8 - cbz tmp1, L(do_misaligned) - ldr data1, [src1], #8 - ldr data2, [src2], #8 - - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ +L(src1_aligned): + neg shift, src2, lsl 3 + bic src2, src2, 7 + ldr data3, [src2], 8 +#ifdef __AARCH64EB__ + rev data3, data3 +#endif + lsr tmp, zeroones, shift + orr data3, data3, tmp + sub has_nul, data3, zeroones + orr tmp, data3, REP8_7f + bics has_nul, has_nul, tmp + b.ne L(tail) + + sub off1, src2, src1 + + .p2align 4 + +L(loop_unaligned): + ldr data3, [src1, off1] + ldr data2, [src1, off2] +#ifdef __AARCH64EB__ + rev data3, data3 +#endif + sub has_nul, data3, zeroones + orr tmp, data3, REP8_7f + ldr data1, [src1], 8 + bics has_nul, has_nul, tmp + ccmp data1, data2, 0, eq + b.eq L(loop_unaligned) + + lsl tmp, has_nul, shift +#ifdef __AARCH64EB__ + rev tmp, tmp +#endif + eor diff, data1, data2 + orr syndrome, diff, tmp + cbnz syndrome, L(end) +L(tail): + ldr data1, [src1] + neg shift, shift + lsr data2, data3, shift + lsr has_nul, has_nul, shift +#ifdef __AARCH64EB__ + rev data2, data2 + rev has_nul, has_nul +#endif + eor diff, data1, data2 orr syndrome, diff, has_nul - cbz syndrome, L(loop_misaligned) b L(end) L(done): sub result, data1, data2 ret - -SYM_FUNC_END_PI(strcmp) -EXPORT_SYMBOL_NOHWKASAN(strcmp) +SYM_FUNC_END(__pi_strcmp) +SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp) +EXPORT_SYMBOL_NOKASAN(strcmp) diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 1648790e91b3..4919fe81ae54 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -79,7 +79,7 @@ whether the first fetch, which may be misaligned, crosses a page boundary. */ -SYM_FUNC_START_WEAK_PI(strlen) +SYM_FUNC_START(__pi_strlen) and tmp1, srcin, MIN_PAGE_SIZE - 1 mov zeroones, REP8_01 cmp tmp1, MIN_PAGE_SIZE - 16 @@ -208,6 +208,6 @@ L(page_cross): csel data1, data1, tmp4, eq csel data2, data2, tmp2, eq b L(page_cross_entry) - -SYM_FUNC_END_PI(strlen) +SYM_FUNC_END(__pi_strlen) +SYM_FUNC_ALIAS_WEAK(strlen, __pi_strlen) EXPORT_SYMBOL_NOKASAN(strlen) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index e42bcfcd37e6..fe7bbc0b42a7 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -1,9 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2013-2021, Arm Limited. + * Copyright (c) 2013-2022, Arm Limited. * * Adapted from the original at: - * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/strncmp.S + * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strncmp.S */ #include <linux/linkage.h> @@ -11,14 +11,14 @@ /* Assumptions: * - * ARMv8-a, AArch64 + * ARMv8-a, AArch64. + * MTE compatible. */ #define L(label) .L ## label #define REP8_01 0x0101010101010101 #define REP8_7f 0x7f7f7f7f7f7f7f7f -#define REP8_80 0x8080808080808080 /* Parameters and result. */ #define src1 x0 @@ -39,12 +39,26 @@ #define tmp3 x10 #define zeroones x11 #define pos x12 -#define limit_wd x13 -#define mask x14 -#define endloop x15 +#define mask x13 +#define endloop x14 #define count mask +#define offset pos +#define neg_offset x15 -SYM_FUNC_START_WEAK_PI(strncmp) +/* Define endian dependent shift operations. + On big-endian early bytes are at MSB and on little-endian LSB. + LS_FW means shifting towards early bytes. + LS_BK means shifting towards later bytes. + */ +#ifdef __AARCH64EB__ +#define LS_FW lsl +#define LS_BK lsr +#else +#define LS_FW lsr +#define LS_BK lsl +#endif + +SYM_FUNC_START(__pi_strncmp) cbz limit, L(ret0) eor tmp1, src1, src2 mov zeroones, #REP8_01 @@ -52,9 +66,6 @@ SYM_FUNC_START_WEAK_PI(strncmp) and count, src1, #7 b.ne L(misaligned8) cbnz count, L(mutual_align) - /* Calculate the number of full and partial words -1. */ - sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ - lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and @@ -64,56 +75,52 @@ L(loop_aligned): ldr data1, [src1], #8 ldr data2, [src2], #8 L(start_realigned): - subs limit_wd, limit_wd, #1 + subs limit, limit, #8 sub tmp1, data1, zeroones orr tmp2, data1, #REP8_7f eor diff, data1, data2 /* Non-zero if differences found. */ - csinv endloop, diff, xzr, pl /* Last Dword or differences. */ + csinv endloop, diff, xzr, hi /* Last Dword or differences. */ bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ ccmp endloop, #0, #0, eq b.eq L(loop_aligned) /* End of main loop */ - /* Not reached the limit, must have found the end or a diff. */ - tbz limit_wd, #63, L(not_limit) - - /* Limit % 8 == 0 => all bytes significant. */ - ands limit, limit, #7 - b.eq L(not_limit) - - lsl limit, limit, #3 /* Bits -> bytes. */ - mov mask, #~0 -#ifdef __AARCH64EB__ - lsr mask, mask, limit -#else - lsl mask, mask, limit -#endif - bic data1, data1, mask - bic data2, data2, mask - - /* Make sure that the NUL byte is marked in the syndrome. */ - orr has_nul, has_nul, mask - -L(not_limit): +L(full_check): +#ifndef __AARCH64EB__ orr syndrome, diff, has_nul - -#ifndef __AARCH64EB__ + add limit, limit, 8 /* Rewind limit to before last subs. */ +L(syndrome_check): + /* Limit was reached. Check if the NUL byte or the difference + is before the limit. */ rev syndrome, syndrome rev data1, data1 - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. - Shifting left now will bring the critical information into the - top bits. */ clz pos, syndrome rev data2, data2 lsl data1, data1, pos + cmp limit, pos, lsr #3 lsl data2, data2, pos /* But we need to zero-extend (char is unsigned) the value and then perform a signed 32-bit subtraction. */ lsr data1, data1, #56 sub result, data1, data2, lsr #56 + csel result, result, xzr, hi ret #else + /* Not reached the limit, must have found the end or a diff. */ + tbz limit, #63, L(not_limit) + add tmp1, limit, 8 + cbz limit, L(not_limit) + + lsl limit, tmp1, #3 /* Bits -> bytes. */ + mov mask, #~0 + lsr mask, mask, limit + bic data1, data1, mask + bic data2, data2, mask + + /* Make sure that the NUL byte is marked in the syndrome. */ + orr has_nul, has_nul, mask + +L(not_limit): /* For big-endian we cannot use the trick with the syndrome value as carry-propagation can corrupt the upper bits if the trailing bytes in the string contain 0x01. */ @@ -134,10 +141,11 @@ L(not_limit): rev has_nul, has_nul orr syndrome, diff, has_nul clz pos, syndrome - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. + /* The most-significant-non-zero bit of the syndrome marks either the + first bit that is different, or the top bit of the first zero byte. Shifting left now will bring the critical information into the top bits. */ +L(end_quick): lsl data1, data1, pos lsl data2, data2, pos /* But we need to zero-extend (char is unsigned) the value and then @@ -159,22 +167,12 @@ L(mutual_align): neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */ ldr data2, [src2], #8 mov tmp2, #~0 - sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ -#ifdef __AARCH64EB__ - /* Big-endian. Early bytes are at MSB. */ - lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */ -#else - /* Little-endian. Early bytes are at LSB. */ - lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */ -#endif - and tmp3, limit_wd, #7 - lsr limit_wd, limit_wd, #3 - /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */ - add limit, limit, count - add tmp3, tmp3, count + LS_FW tmp2, tmp2, tmp3 /* Shift (count & 63). */ + /* Adjust the limit and ensure it doesn't overflow. */ + adds limit, limit, count + csinv limit, limit, xzr, lo orr data1, data1, tmp2 orr data2, data2, tmp2 - add limit_wd, limit_wd, tmp3, lsr #3 b L(start_realigned) .p2align 4 @@ -197,13 +195,11 @@ L(done): /* Align the SRC1 to a dword by doing a bytewise compare and then do the dword loop. */ L(try_misaligned_words): - lsr limit_wd, limit, #3 - cbz count, L(do_misaligned) + cbz count, L(src1_aligned) neg count, count and count, count, #7 sub limit, limit, count - lsr limit_wd, limit, #3 L(page_end_loop): ldrb data1w, [src1], #1 @@ -214,48 +210,101 @@ L(page_end_loop): subs count, count, #1 b.hi L(page_end_loop) -L(do_misaligned): - /* Prepare ourselves for the next page crossing. Unlike the aligned - loop, we fetch 1 less dword because we risk crossing bounds on - SRC2. */ - mov count, #8 - subs limit_wd, limit_wd, #1 - b.lo L(done_loop) -L(loop_misaligned): - and tmp2, src2, #0xff8 - eor tmp2, tmp2, #0xff8 - cbz tmp2, L(page_end_loop) + /* The following diagram explains the comparison of misaligned strings. + The bytes are shown in natural order. For little-endian, it is + reversed in the registers. The "x" bytes are before the string. + The "|" separates data that is loaded at one time. + src1 | a a a a a a a a | b b b c c c c c | . . . + src2 | x x x x x a a a a a a a a b b b | c c c c c . . . + + After shifting in each step, the data looks like this: + STEP_A STEP_B STEP_C + data1 a a a a a a a a b b b c c c c c b b b c c c c c + data2 a a a a a a a a b b b 0 0 0 0 0 0 0 0 c c c c c + The bytes with "0" are eliminated from the syndrome via mask. + + Align SRC2 down to 16 bytes. This way we can read 16 bytes at a + time from SRC2. The comparison happens in 3 steps. After each step + the loop can exit, or read from SRC1 or SRC2. */ +L(src1_aligned): + /* Calculate offset from 8 byte alignment to string start in bits. No + need to mask offset since shifts are ignoring upper bits. */ + lsl offset, src2, #3 + bic src2, src2, #0xf + mov mask, -1 + neg neg_offset, offset ldr data1, [src1], #8 - ldr data2, [src2], #8 - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ - ccmp diff, #0, #0, eq - b.ne L(not_limit) - subs limit_wd, limit_wd, #1 - b.pl L(loop_misaligned) + ldp tmp1, tmp2, [src2], #16 + LS_BK mask, mask, neg_offset + and neg_offset, neg_offset, #63 /* Need actual value for cmp later. */ + /* Skip the first compare if data in tmp1 is irrelevant. */ + tbnz offset, 6, L(misaligned_mid_loop) -L(done_loop): - /* We found a difference or a NULL before the limit was reached. */ - and limit, limit, #7 - cbz limit, L(not_limit) - /* Read the last word. */ - sub src1, src1, 8 - sub src2, src2, 8 - ldr data1, [src1, limit] - ldr data2, [src2, limit] - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f +L(loop_misaligned): + /* STEP_A: Compare full 8 bytes when there is enough data from SRC2.*/ + LS_FW data2, tmp1, offset + LS_BK tmp1, tmp2, neg_offset + subs limit, limit, #8 + orr data2, data2, tmp1 /* 8 bytes from SRC2 combined from two regs.*/ + sub has_nul, data1, zeroones eor diff, data1, data2 /* Non-zero if differences found. */ - bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ - ccmp diff, #0, #0, eq - b.ne L(not_limit) + orr tmp3, data1, #REP8_7f + csinv endloop, diff, xzr, hi /* If limit, set to all ones. */ + bic has_nul, has_nul, tmp3 /* Non-zero if NUL byte found in SRC1. */ + orr tmp3, endloop, has_nul + cbnz tmp3, L(full_check) + + ldr data1, [src1], #8 +L(misaligned_mid_loop): + /* STEP_B: Compare first part of data1 to second part of tmp2. */ + LS_FW data2, tmp2, offset +#ifdef __AARCH64EB__ + /* For big-endian we do a byte reverse to avoid carry-propagation + problem described above. This way we can reuse the has_nul in the + next step and also use syndrome value trick at the end. */ + rev tmp3, data1 + #define data1_fixed tmp3 +#else + #define data1_fixed data1 +#endif + sub has_nul, data1_fixed, zeroones + orr tmp3, data1_fixed, #REP8_7f + eor diff, data2, data1 /* Non-zero if differences found. */ + bic has_nul, has_nul, tmp3 /* Non-zero if NUL terminator. */ +#ifdef __AARCH64EB__ + rev has_nul, has_nul +#endif + cmp limit, neg_offset, lsr #3 + orr syndrome, diff, has_nul + bic syndrome, syndrome, mask /* Ignore later bytes. */ + csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */ + cbnz tmp3, L(syndrome_check) + + /* STEP_C: Compare second part of data1 to first part of tmp1. */ + ldp tmp1, tmp2, [src2], #16 + cmp limit, #8 + LS_BK data2, tmp1, neg_offset + eor diff, data2, data1 /* Non-zero if differences found. */ + orr syndrome, diff, has_nul + and syndrome, syndrome, mask /* Ignore earlier bytes. */ + csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */ + cbnz tmp3, L(syndrome_check) + + ldr data1, [src1], #8 + sub limit, limit, #8 + b L(loop_misaligned) + +#ifdef __AARCH64EB__ +L(syndrome_check): + clz pos, syndrome + cmp pos, limit, lsl #3 + b.lo L(end_quick) +#endif L(ret0): mov result, #0 ret - -SYM_FUNC_END_PI(strncmp) -EXPORT_SYMBOL_NOHWKASAN(strncmp) +SYM_FUNC_END(__pi_strncmp) +SYM_FUNC_ALIAS_WEAK(strncmp, __pi_strncmp) +EXPORT_SYMBOL_NOKASAN(strncmp) diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S index b72913a99038..d5ac0e10a01d 100644 --- a/arch/arm64/lib/strnlen.S +++ b/arch/arm64/lib/strnlen.S @@ -47,7 +47,7 @@ limit_wd .req x14 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 -SYM_FUNC_START_WEAK_PI(strnlen) +SYM_FUNC_START(__pi_strnlen) cbz limit, .Lhit_limit mov zeroones, #REP8_01 bic src, srcin, #15 @@ -156,5 +156,7 @@ CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ .Lhit_limit: mov len, limit ret -SYM_FUNC_END_PI(strnlen) +SYM_FUNC_END(__pi_strnlen) + +SYM_FUNC_ALIAS_WEAK(strnlen, __pi_strnlen) EXPORT_SYMBOL_NOKASAN(strnlen) diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S index 13132d1ed6d1..a5123cf0ce12 100644 --- a/arch/arm64/lib/strrchr.S +++ b/arch/arm64/lib/strrchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of last occurrence of 'c' or 0 */ -SYM_FUNC_START_WEAK_PI(strrchr) +SYM_FUNC_START(__pi_strrchr) mov x3, #0 and w1, w1, #0xff 1: ldrb w2, [x0], #1 @@ -29,5 +29,6 @@ SYM_FUNC_START_WEAK_PI(strrchr) b 1b 2: mov x0, x3 ret -SYM_FUNC_END_PI(strrchr) +SYM_FUNC_END(__pi_strrchr) +SYM_FUNC_ALIAS_WEAK(strrchr, __pi_strrchr) EXPORT_SYMBOL_NOKASAN(strrchr) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 7d0563db4201..0ea6cc25dc66 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -107,10 +107,11 @@ SYM_FUNC_END(icache_inval_pou) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(dcache_clean_inval_poc) +SYM_FUNC_START(__pi_dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_inval_poc) +SYM_FUNC_END(__pi_dcache_clean_inval_poc) +SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc) /* * dcache_clean_pou(start, end) @@ -140,7 +141,7 @@ SYM_FUNC_END(dcache_clean_pou) * - start - kernel start address of region * - end - kernel end address of region */ -SYM_FUNC_START_PI(dcache_inval_poc) +SYM_FUNC_START(__pi_dcache_inval_poc) dcache_line_size x2, x3 sub x3, x2, #1 tst x1, x3 // end cache line aligned? @@ -158,7 +159,8 @@ SYM_FUNC_START_PI(dcache_inval_poc) b.lo 2b dsb sy ret -SYM_FUNC_END_PI(dcache_inval_poc) +SYM_FUNC_END(__pi_dcache_inval_poc) +SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc) /* * dcache_clean_poc(start, end) @@ -169,10 +171,11 @@ SYM_FUNC_END_PI(dcache_inval_poc) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(dcache_clean_poc) +SYM_FUNC_START(__pi_dcache_clean_poc) dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_poc) +SYM_FUNC_END(__pi_dcache_clean_poc) +SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc) /* * dcache_clean_pop(start, end) @@ -183,13 +186,14 @@ SYM_FUNC_END_PI(dcache_clean_poc) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(dcache_clean_pop) +SYM_FUNC_START(__pi_dcache_clean_pop) alternative_if_not ARM64_HAS_DCPOP b dcache_clean_poc alternative_else_nop_endif dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_pop) +SYM_FUNC_END(__pi_dcache_clean_pop) +SYM_FUNC_ALIAS(dcache_clean_pop, __pi_dcache_clean_pop) /* * __dma_flush_area(start, size) @@ -199,11 +203,12 @@ SYM_FUNC_END_PI(dcache_clean_pop) * - start - virtual start address of region * - size - size in question */ -SYM_FUNC_START_PI(__dma_flush_area) +SYM_FUNC_START(__pi___dma_flush_area) add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__dma_flush_area) +SYM_FUNC_END(__pi___dma_flush_area) +SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area) /* * __dma_map_area(start, size, dir) @@ -211,12 +216,13 @@ SYM_FUNC_END_PI(__dma_flush_area) * - size - size of region * - dir - DMA direction */ -SYM_FUNC_START_PI(__dma_map_area) +SYM_FUNC_START(__pi___dma_map_area) add x1, x0, x1 cmp w2, #DMA_FROM_DEVICE b.eq __pi_dcache_inval_poc b __pi_dcache_clean_poc -SYM_FUNC_END_PI(__dma_map_area) +SYM_FUNC_END(__pi___dma_map_area) +SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area) /* * __dma_unmap_area(start, size, dir) @@ -224,9 +230,10 @@ SYM_FUNC_END_PI(__dma_map_area) * - size - size of region * - dir - DMA direction */ -SYM_FUNC_START_PI(__dma_unmap_area) +SYM_FUNC_START(__pi___dma_unmap_area) add x1, x0, x1 cmp w2, #DMA_TO_DEVICE b.ne __pi_dcache_inval_poc ret -SYM_FUNC_END_PI(__dma_unmap_area) +SYM_FUNC_END(__pi___dma_unmap_area) +SYM_FUNC_ALIAS(__dma_unmap_area, __pi___dma_unmap_area) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 2aaf950b906c..a06c6ac770d4 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -52,6 +52,13 @@ void __sync_icache_dcache(pte_t pte) { struct page *page = pte_page(pte); + /* + * HugeTLB pages are always fully mapped, so only setting head page's + * PG_dcache_clean flag is enough. + */ + if (PageHuge(page)) + page = compound_head(page); + if (!test_bit(PG_dcache_clean, &page->flags)) { sync_icache_aliases((unsigned long)page_address(page), (unsigned long)page_address(page) + diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index ffb9c229610a..a33aba91ad89 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -56,25 +56,34 @@ void __init arm64_hugetlb_cma_reserve(void) } #endif /* CONFIG_CMA */ -#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION -bool arch_hugetlb_migration_supported(struct hstate *h) +static bool __hugetlb_valid_size(unsigned long size) { - size_t pagesize = huge_page_size(h); - - switch (pagesize) { + switch (size) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: return pud_sect_supported(); #endif - case PMD_SIZE: case CONT_PMD_SIZE: + case PMD_SIZE: case CONT_PTE_SIZE: return true; } - pr_warn("%s: unrecognized huge page size 0x%lx\n", - __func__, pagesize); + return false; } + +#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION +bool arch_hugetlb_migration_supported(struct hstate *h) +{ + size_t pagesize = huge_page_size(h); + + if (!__hugetlb_valid_size(pagesize)) { + pr_warn("%s: unrecognized huge page size 0x%lx\n", + __func__, pagesize); + return false; + } + return true; +} #endif int pmd_huge(pmd_t pmd) @@ -506,16 +515,5 @@ arch_initcall(hugetlbpage_init); bool __init arch_hugetlb_valid_size(unsigned long size) { - switch (size) { -#ifndef __PAGETABLE_PMD_FOLDED - case PUD_SIZE: - return pud_sect_supported(); -#endif - case CONT_PMD_SIZE: - case PMD_SIZE: - case CONT_PTE_SIZE: - return true; - } - - return false; + return __hugetlb_valid_size(size); } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index db63cc885771..9e26ec80d317 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -61,8 +61,34 @@ EXPORT_SYMBOL(memstart_addr); * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4). * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory, * otherwise it is empty. + * + * Memory reservation for crash kernel either done early or deferred + * depending on DMA memory zones configs (ZONE_DMA) -- + * + * In absence of ZONE_DMA configs arm64_dma_phys_limit initialized + * here instead of max_zone_phys(). This lets early reservation of + * crash kernel memory which has a dependency on arm64_dma_phys_limit. + * Reserving memory early for crash kernel allows linear creation of block + * mappings (greater than page-granularity) for all the memory bank rangs. + * In this scheme a comparatively quicker boot is observed. + * + * If ZONE_DMA configs are defined, crash kernel memory reservation + * is delayed until DMA zone memory range size initilazation performed in + * zone_sizes_init(). The defer is necessary to steer clear of DMA zone + * memory range to avoid overlap allocation. So crash kernel memory boundaries + * are not known when mapping all bank memory ranges, which otherwise means + * not possible to exclude crash kernel range from creating block mappings + * so page-granularity mappings are created for the entire memory range. + * Hence a slightly slower boot is observed. + * + * Note: Page-granularity mapppings are necessary for crash kernel memory + * range for shrinking its size via /sys/kernel/kexec_crash_size interface. */ -phys_addr_t arm64_dma_phys_limit __ro_after_init; +#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32) +phys_addr_t __ro_after_init arm64_dma_phys_limit; +#else +phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1; +#endif #ifdef CONFIG_KEXEC_CORE /* @@ -153,8 +179,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (!arm64_dma_phys_limit) arm64_dma_phys_limit = dma32_phys_limit; #endif - if (!arm64_dma_phys_limit) - arm64_dma_phys_limit = PHYS_MASK + 1; max_zone_pfns[ZONE_NORMAL] = max; free_area_init(max_zone_pfns); @@ -315,6 +339,9 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); + if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) + reserve_crashkernel(); + high_memory = __va(memblock_end_of_DRAM() - 1) + 1; } @@ -361,7 +388,8 @@ void __init bootmem_init(void) * request_standard_resources() depends on crashkernel's memory being * reserved, so do it here. */ - reserve_crashkernel(); + if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)) + reserve_crashkernel(); memblock_dump_all(); } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 49abbf43bf35..0b7d25887ec3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -63,6 +63,7 @@ static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; static DEFINE_SPINLOCK(swapper_pgdir_lock); +static DEFINE_MUTEX(fixmap_lock); void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) { @@ -294,18 +295,6 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, } while (addr = next, addr != end); } -static inline bool use_1G_block(unsigned long addr, unsigned long next, - unsigned long phys) -{ - if (PAGE_SHIFT != 12) - return false; - - if (((addr | next | phys) & ~PUD_MASK) != 0) - return false; - - return true; -} - static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(int), @@ -329,6 +318,12 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, } BUG_ON(p4d_bad(p4d)); + /* + * No need for locking during early boot. And it doesn't work as + * expected with KASLR enabled. + */ + if (system_state != SYSTEM_BOOTING) + mutex_lock(&fixmap_lock); pudp = pud_set_fixmap_offset(p4dp, addr); do { pud_t old_pud = READ_ONCE(*pudp); @@ -338,7 +333,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, /* * For 4K granule only, attempt to put down a 1GB block */ - if (use_1G_block(addr, next, phys) && + if (pud_sect_supported() && + ((addr | next | phys) & ~PUD_MASK) == 0 && (flags & NO_BLOCK_MAPPINGS) == 0) { pud_set_huge(pudp, phys, prot); @@ -359,6 +355,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, } while (pudp++, addr = next, addr != end); pud_clear_fixmap(); + if (system_state != SYSTEM_BOOTING) + mutex_unlock(&fixmap_lock); } static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, @@ -517,7 +515,7 @@ static void __init map_mem(pgd_t *pgdp) */ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); - if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE)) + if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE)) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* @@ -528,6 +526,17 @@ static void __init map_mem(pgd_t *pgdp) */ memblock_mark_nomap(kernel_start, kernel_end - kernel_start); +#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map) { + if (IS_ENABLED(CONFIG_ZONE_DMA) || + IS_ENABLED(CONFIG_ZONE_DMA32)) + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + else if (crashk_res.end) + memblock_mark_nomap(crashk_res.start, + resource_size(&crashk_res)); + } +#endif + /* map all the memory banks */ for_each_mem_range(i, &start, &end) { if (start >= end) @@ -554,6 +563,25 @@ static void __init map_mem(pgd_t *pgdp) __map_memblock(pgdp, kernel_start, kernel_end, PAGE_KERNEL, NO_CONT_MAPPINGS); memblock_clear_nomap(kernel_start, kernel_end - kernel_start); + + /* + * Use page-level mappings here so that we can shrink the region + * in page granularity and put back unused memory to buddy system + * through /sys/kernel/kexec_crash_size interface. + */ +#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map && + !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) { + if (crashk_res.end) { + __map_memblock(pgdp, crashk_res.start, + crashk_res.end + 1, + PAGE_KERNEL, + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(crashk_res.start, + resource_size(&crashk_res)); + } + } +#endif } void mark_rodata_ro(void) diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index 7c4ef56265ee..a9e50e930484 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -12,7 +12,7 @@ static DEFINE_XARRAY(mte_pages); void *mte_allocate_tag_storage(void) { /* tags granule is 16 bytes, 2 tags stored per byte */ - return kmalloc(PAGE_SIZE / 16 / 2, GFP_KERNEL); + return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL); } void mte_free_tag_storage(char *storage) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index d35c90d2e47a..50bbed947bec 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -46,7 +46,7 @@ #endif #ifdef CONFIG_KASAN_HW_TAGS -#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1 +#define TCR_MTE_FLAGS TCR_TCMA1 | TCR_TBI1 | TCR_TBID1 #else /* * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index cc0cf0f5c7c3..9d9250c7cc72 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -89,9 +89,16 @@ #define A64_STXR(sf, Rt, Rn, Rs) \ A64_LSX(sf, Rt, Rn, Rs, STORE_EX) -/* LSE atomics */ +/* + * LSE atomics + * + * STADD is simply encoded as an alias for LDADD with XZR as + * the destination register. + */ #define A64_STADD(sf, Rn, Rs) \ - aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf)) + aarch64_insn_gen_atomic_ld_op(A64_ZR, Rn, Rs, \ + A64_SIZE(sf), AARCH64_INSN_MEM_ATOMIC_ADD, \ + AARCH64_INSN_MEM_ORDER_NONE) /* Add/subtract (immediate) */ #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \ diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile index 932b4fe5c768..cf1307188150 100644 --- a/arch/arm64/tools/Makefile +++ b/arch/arm64/tools/Makefile @@ -5,18 +5,14 @@ kapi := $(gen)/asm kapi-hdrs-y := $(kapi)/cpucaps.h -targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y)) +targets += $(addprefix ../../../, $(kapi-hdrs-y)) PHONY += kapi -kapi: $(kapi-hdrs-y) $(gen-y) - -# Create output directory if not already present -_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +kapi: $(kapi-hdrs-y) quiet_cmd_gen_cpucaps = GEN $@ - cmd_gen_cpucaps = mkdir -p $(dir $@) && \ - $(AWK) -f $(filter-out $(PHONY),$^) > $@ + cmd_gen_cpucaps = mkdir -p $(dir $@); $(AWK) -f $(real-prereqs) > $@ $(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE $(call if_changed,gen_cpucaps) diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index cea7533cb304..3ed418f70e3b 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -7,7 +7,8 @@ BTI HAS_32BIT_EL0_DO_NOT_USE HAS_32BIT_EL1 HAS_ADDRESS_AUTH -HAS_ADDRESS_AUTH_ARCH +HAS_ADDRESS_AUTH_ARCH_QARMA3 +HAS_ADDRESS_AUTH_ARCH_QARMA5 HAS_ADDRESS_AUTH_IMP_DEF HAS_AMU_EXTN HAS_ARMv8_4_TTL @@ -21,7 +22,8 @@ HAS_E0PD HAS_ECV HAS_EPAN HAS_GENERIC_AUTH -HAS_GENERIC_AUTH_ARCH +HAS_GENERIC_AUTH_ARCH_QARMA3 +HAS_GENERIC_AUTH_ARCH_QARMA5 HAS_GENERIC_AUTH_IMP_DEF HAS_IRQ_PRIO_MASKING HAS_LDAPR diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index a7e01573abd8..e003b2473c64 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -8,6 +8,7 @@ menu "Processor type and features" config IA64 bool + select ARCH_BINFMT_ELF_EXTRA_PHDRS select ARCH_HAS_DMA_MARK_CLEAN select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNLEN_USER diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index 51d20cb37706..1684716f0820 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -55,15 +55,15 @@ struct thread_info { #ifndef ASM_OFFSETS_C /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) -#define alloc_thread_stack_node(tsk, node) \ +#define arch_alloc_thread_stack_node(tsk, node) \ ((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE)) #define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) #else #define current_thread_info() ((struct thread_info *) 0) -#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) +#define arch_alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) #define task_thread_info(tsk) ((struct thread_info *) 0) #endif -#define free_thread_stack(tsk) /* nothing */ +#define arch_free_thread_stack(tsk) /* nothing */ #define task_stack_page(tsk) ((void *)(tsk)) #define __HAVE_THREAD_FUNCTIONS diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index be2dfab48fd4..3137b45750df 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -37,6 +37,7 @@ #include <asm/irq.h> #include <asm/machdep.h> #include <asm/io.h> +#include <asm/config.h> static unsigned long amiga_model; diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c index 581a5f68d102..42a8b8e2b664 100644 --- a/arch/m68k/apollo/config.c +++ b/arch/m68k/apollo/config.c @@ -16,6 +16,7 @@ #include <asm/apollohw.h> #include <asm/irq.h> #include <asm/machdep.h> +#include <asm/config.h> u_long sio01_physaddr; u_long sio23_physaddr; diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c index 261a0f57cc9a..38a7c0578105 100644 --- a/arch/m68k/atari/config.c +++ b/arch/m68k/atari/config.c @@ -46,6 +46,7 @@ #include <asm/machdep.h> #include <asm/hwtest.h> #include <asm/io.h> +#include <asm/config.h> u_long atari_mch_cookie; EXPORT_SYMBOL(atari_mch_cookie); diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c index 0c6feafbbd11..9b060d466e03 100644 --- a/arch/m68k/bvme6000/config.c +++ b/arch/m68k/bvme6000/config.c @@ -36,6 +36,7 @@ #include <asm/traps.h> #include <asm/machdep.h> #include <asm/bvme6000hw.h> +#include <asm/config.h> static void bvme6000_get_model(char *model); extern void bvme6000_sched_init(void); diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index bc9952f8be66..49f301c57df5 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -104,7 +104,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -204,7 +203,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -229,7 +227,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -435,6 +432,7 @@ CONFIG_FB_AMIGA_ECS=y CONFIG_FB_AMIGA_AGA=y CONFIG_FB_FM2=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y CONFIG_LOGO=y CONFIG_SOUND=m CONFIG_DMASOUND_PAULA=m @@ -643,7 +641,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index a77269c6e5ba..405997b61447 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -100,7 +100,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -200,7 +199,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -225,7 +223,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -393,6 +390,7 @@ CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_VGA16 is not set # CONFIG_LOGO_LINUX_CLUT224 is not set @@ -599,7 +597,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 7a74efa6b9a1..eb342a33b73e 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -107,7 +107,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -207,7 +206,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -232,7 +230,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -621,7 +618,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index a5323bf2eb33..e6de6b4dff86 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -97,7 +97,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -197,7 +196,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -222,7 +220,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -592,7 +589,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 5e80aa0869d5..048d9b114eb4 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -99,7 +99,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -199,7 +198,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -224,7 +222,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -395,6 +392,7 @@ CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set @@ -601,7 +599,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index e84326a3f62d..4e5b32ba00df 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -98,7 +98,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -198,7 +197,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -223,7 +221,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -623,7 +620,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 337552f43339..7df61e743591 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -118,7 +118,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -218,7 +217,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -243,7 +241,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -497,6 +494,7 @@ CONFIG_FB_ATARI=y CONFIG_FB_VALKYRIE=y CONFIG_FB_MAC=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y CONFIG_LOGO=y CONFIG_SOUND=m CONFIG_DMASOUND_ATARI=m @@ -708,7 +706,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 7b688f7d272a..80922fe80d9d 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -96,7 +96,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -196,7 +195,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -221,7 +219,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -591,7 +588,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 7c2cb31d63dd..530c4cf7c59b 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -97,7 +97,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -197,7 +196,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -222,7 +220,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -592,7 +589,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index ca43897af26d..d3f371e490ec 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -98,7 +98,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -198,7 +197,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -223,7 +221,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -610,7 +607,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index e3d515f37144..db6769790bdb 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -94,7 +94,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -194,7 +193,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -219,7 +217,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -388,9 +385,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -CONFIG_FB=y -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_LOGO=y CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -593,7 +587,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index d601606c969b..e9c362683666 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -94,7 +94,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -194,7 +193,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -219,7 +217,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -387,9 +384,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -CONFIG_FB=y -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_LOGO=y CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -593,7 +587,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c index ce1eb3d3d55d..2c92843397c3 100644 --- a/arch/m68k/hp300/config.c +++ b/arch/m68k/hp300/config.c @@ -22,6 +22,7 @@ #include <asm/blinken.h> #include <asm/io.h> /* readb() and writeb() */ #include <asm/hp300hw.h> +#include <asm/config.h> #include "time.h" diff --git a/arch/m68k/include/asm/config.h b/arch/m68k/include/asm/config.h new file mode 100644 index 000000000000..e73ffa23c4f5 --- /dev/null +++ b/arch/m68k/include/asm/config.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * This file contains prototypes provided by each m68k machine + * to parse bootinfo data structures and to configure the machine + */ + +#ifndef _M68K_CONFIG_H +#define _M68K_CONFIG_H + +extern int amiga_parse_bootinfo(const struct bi_record *record); +extern int apollo_parse_bootinfo(const struct bi_record *record); +extern int atari_parse_bootinfo(const struct bi_record *record); +extern int bvme6000_parse_bootinfo(const struct bi_record *record); +extern int hp300_parse_bootinfo(const struct bi_record *record); +extern int mac_parse_bootinfo(const struct bi_record *record); +extern int mvme147_parse_bootinfo(const struct bi_record *record); +extern int mvme16x_parse_bootinfo(const struct bi_record *record); +extern int q40_parse_bootinfo(const struct bi_record *record); + +extern void config_amiga(void); +extern void config_apollo(void); +extern void config_atari(void); +extern void config_bvme6000(void); +extern void config_hp300(void); +extern void config_mac(void); +extern void config_mvme147(void); +extern void config_mvme16x(void); +extern void config_q40(void); +extern void config_sun3(void); +extern void config_sun3x(void); + +#endif /* _M68K_CONFIG_H */ diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index 49e573b94326..8228275aae3e 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -47,6 +47,7 @@ #endif #include <asm/macintosh.h> #include <asm/natfeat.h> +#include <asm/config.h> #if !FPSTATESIZE || !NR_IRQS #warning No CPU/platform type selected, your kernel will not work! @@ -113,28 +114,6 @@ EXPORT_SYMBOL(isa_type); EXPORT_SYMBOL(isa_sex); #endif -extern int amiga_parse_bootinfo(const struct bi_record *); -extern int atari_parse_bootinfo(const struct bi_record *); -extern int mac_parse_bootinfo(const struct bi_record *); -extern int q40_parse_bootinfo(const struct bi_record *); -extern int bvme6000_parse_bootinfo(const struct bi_record *); -extern int mvme16x_parse_bootinfo(const struct bi_record *); -extern int mvme147_parse_bootinfo(const struct bi_record *); -extern int hp300_parse_bootinfo(const struct bi_record *); -extern int apollo_parse_bootinfo(const struct bi_record *); - -extern void config_amiga(void); -extern void config_atari(void); -extern void config_mac(void); -extern void config_sun3(void); -extern void config_apollo(void); -extern void config_mvme147(void); -extern void config_mvme16x(void); -extern void config_bvme6000(void); -extern void config_hp300(void); -extern void config_q40(void); -extern void config_sun3x(void); - #define MASK_256K 0xfffc0000 extern void paging_init(void); diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 5d16f9b47aa9..65d124ec80bb 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -47,6 +47,7 @@ #include <asm/mac_via.h> #include <asm/mac_oss.h> #include <asm/mac_psc.h> +#include <asm/config.h> /* Mac bootinfo struct */ struct mac_booter_data mac_bi_data; diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 1493cf5eac1e..71aa9f6315dc 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -93,8 +93,6 @@ retry: vma = find_vma(mm, address); if (!vma) goto map_err; - if (vma->vm_flags & VM_IO) - goto acc_err; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index dfd6202fd403..b96ea7c76a19 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -34,6 +34,7 @@ #include <asm/traps.h> #include <asm/machdep.h> #include <asm/mvme147hw.h> +#include <asm/config.h> static void mvme147_get_model(char *model); diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index b4422c2dfbbf..88cbdc10925b 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -37,6 +37,7 @@ #include <asm/traps.h> #include <asm/machdep.h> #include <asm/mvme16xhw.h> +#include <asm/config.h> extern t_bdid mvme_bdid; diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c index 5caf1e5be1c2..9237243077ce 100644 --- a/arch/m68k/q40/config.c +++ b/arch/m68k/q40/config.c @@ -34,6 +34,7 @@ #include <asm/traps.h> #include <asm/machdep.h> #include <asm/q40_master.h> +#include <asm/config.h> extern void q40_init_IRQ(void); static void q40_get_model(char *model); diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 59798e43cdb0..da568e981604 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -45,6 +45,8 @@ config MICROBLAZE select SET_FS select ZONE_DMA select TRACE_IRQFLAGS_SUPPORT + select GENERIC_IRQ_MULTI_HANDLER + select HANDLE_DOMAIN_IRQ # Endianness selection choice diff --git a/arch/microblaze/include/asm/irq.h b/arch/microblaze/include/asm/irq.h index 0a28e80bbab0..cb6ab55d1d01 100644 --- a/arch/microblaze/include/asm/irq.h +++ b/arch/microblaze/include/asm/irq.h @@ -11,7 +11,4 @@ struct pt_regs; extern void do_IRQ(struct pt_regs *regs); -/* should be defined in each interrupt controller driver */ -extern unsigned int xintc_get_irq(void); - #endif /* _ASM_MICROBLAZE_IRQ_H */ diff --git a/arch/microblaze/kernel/irq.c b/arch/microblaze/kernel/irq.c index 903dad822fad..1f8cb4c4f74f 100644 --- a/arch/microblaze/kernel/irq.c +++ b/arch/microblaze/kernel/irq.c @@ -20,27 +20,13 @@ #include <linux/irqchip.h> #include <linux/of_irq.h> -static u32 concurrent_irq; - void __irq_entry do_IRQ(struct pt_regs *regs) { - unsigned int irq; struct pt_regs *old_regs = set_irq_regs(regs); trace_hardirqs_off(); irq_enter(); - irq = xintc_get_irq(); -next_irq: - BUG_ON(!irq); - generic_handle_irq(irq); - - irq = xintc_get_irq(); - if (irq != -1U) { - pr_debug("next irq: %d\n", irq); - ++concurrent_irq; - goto next_irq; - } - + handle_arch_irq(regs); irq_exit(); set_irq_regs(old_regs); trace_hardirqs_on(); diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 43c1c880def6..864b4756dcdf 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -10,10 +10,12 @@ config PARISC select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAS_PTE_SPECIAL select ARCH_NO_SG_CHAIN select ARCH_SUPPORTS_HUGETLBFS if PA20 select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_STACKWALK + select ARCH_HAS_DEBUG_VM_PGTABLE select HAVE_RELIABLE_STACKTRACE select DMA_OPS select RTC_CLASS @@ -259,18 +261,6 @@ config PARISC_PAGE_SIZE_64KB endchoice -config PARISC_SELF_EXTRACT - bool "Build kernel as self-extracting executable" - default y - help - Say Y if you want to build the parisc kernel as a kind of - self-extracting executable. - - If you say N here, the kernel will be compressed with gzip - which can be loaded by the palo bootloader directly too. - - If you don't know what to do here, say Y. - config SMP bool "Symmetric multi-processing support" help diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 82d77f4b0d08..2a9387a93592 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -15,12 +15,8 @@ # Mike Shaver, Helge Deller and Martin K. Petersen # -ifdef CONFIG_PARISC_SELF_EXTRACT boot := arch/parisc/boot KBUILD_IMAGE := $(boot)/bzImage -else -KBUILD_IMAGE := vmlinuz -endif NM = sh $(srctree)/arch/parisc/nm CHECKFLAGS += -D__hppa__=1 @@ -44,6 +40,16 @@ endif export LD_BFD +# Set default 32 bits cross compilers for vdso +CC_ARCHES_32 = hppa hppa2.0 hppa1.1 +CC_SUFFIXES = linux linux-gnu unknown-linux-gnu +CROSS32_COMPILE := $(call cc-cross-prefix, \ + $(foreach a,$(CC_ARCHES_32), \ + $(foreach s,$(CC_SUFFIXES),$(a)-$(s)-))) +CROSS32CC := $(CROSS32_COMPILE)gcc +export CROSS32CC + +# Set default cross compiler for kernel build ifdef cross_compiling ifeq ($(CROSS_COMPILE),) CC_SUFFIXES = linux linux-gnu unknown-linux-gnu @@ -155,14 +161,29 @@ Image: vmlinux bzImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -ifdef CONFIG_PARISC_SELF_EXTRACT vmlinuz: bzImage $(OBJCOPY) $(boot)/bzImage $@ -else -vmlinuz: vmlinux - @$(KGZIP) -cf -9 $< > $@ + +ifeq ($(KBUILD_EXTMOD),) +# We need to generate vdso-offsets.h before compiling certain files in kernel/. +# In order to do that, we should use the archprepare target, but we can't since +# asm-offsets.h is included in some files used to generate vdso-offsets.h, and +# asm-offsets.h is built in prepare0, for which archprepare is a dependency. +# Therefore we need to generate the header after prepare0 has been made, hence +# this hack. +prepare: vdso_prepare +vdso_prepare: prepare0 + $(if $(CONFIG_64BIT),$(Q)$(MAKE) \ + $(build)=arch/parisc/kernel/vdso64 include/generated/vdso64-offsets.h) + $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso32 include/generated/vdso32-offsets.h endif +PHONY += vdso_install + +vdso_install: + $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso $@ + $(if $(CONFIG_COMPAT_VDSO), \ + $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso32 $@) install: $(CONFIG_SHELL) $(srctree)/arch/parisc/install.sh \ $(KERNELRELEASE) vmlinux System.map "$(INSTALL_PATH)" diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 6369082c6c74..ea0cb318b13d 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -47,6 +47,12 @@ #define PRIV_USER 3 #define PRIV_KERNEL 0 +/* Space register used inside kernel */ +#define SR_KERNEL 0 +#define SR_TEMP1 1 +#define SR_TEMP2 2 +#define SR_USER 3 + #ifdef __ASSEMBLY__ #ifdef CONFIG_64BIT diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index d53e9e27dba0..5032e758594e 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -39,16 +39,13 @@ extern int icache_stride; extern struct pdc_cache_info cache_info; void parisc_setup_cache_timing(void); -#define pdtlb(addr) asm volatile("pdtlb 0(%%sr1,%0)" \ +#define pdtlb(sr, addr) asm volatile("pdtlb 0(%%sr%0,%1)" \ ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ - : : "r" (addr) : "memory") -#define pitlb(addr) asm volatile("pitlb 0(%%sr1,%0)" \ + : : "i"(sr), "r" (addr) : "memory") +#define pitlb(sr, addr) asm volatile("pitlb 0(%%sr%0,%1)" \ ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ ALTERNATIVE(ALT_COND_NO_SPLIT_TLB, INSN_NOP) \ - : : "r" (addr) : "memory") -#define pdtlb_kernel(addr) asm volatile("pdtlb 0(%0)" \ - ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ - : : "r" (addr) : "memory") + : : "i"(sr), "r" (addr) : "memory") #define asm_io_fdc(addr) asm volatile("fdc %%r0(%0)" \ ALTERNATIVE(ALT_COND_NO_DCACHE, INSN_NOP) \ diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 859b8a34adcf..e8b4a03343d3 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -9,16 +9,11 @@ /* The usual comment is "Caches aren't brain-dead on the <architecture>". * Unfortunately, that doesn't apply to PA-RISC. */ -/* Internal implementation */ -void flush_data_cache_local(void *); /* flushes local data-cache only */ -void flush_instruction_cache_local(void *); /* flushes local code-cache only */ -#ifdef CONFIG_SMP -void flush_data_cache(void); /* flushes data-cache only (all processors) */ -void flush_instruction_cache(void); /* flushes i-cache only (all processors) */ -#else -#define flush_data_cache() flush_data_cache_local(NULL) -#define flush_instruction_cache() flush_instruction_cache_local(NULL) -#endif +#include <linux/jump_label.h> + +DECLARE_STATIC_KEY_TRUE(parisc_has_cache); +DECLARE_STATIC_KEY_TRUE(parisc_has_dcache); +DECLARE_STATIC_KEY_TRUE(parisc_has_icache); #define flush_cache_dup_mm(mm) flush_cache_mm(mm) diff --git a/arch/parisc/include/asm/current.h b/arch/parisc/include/asm/current.h index 568b739e42af..dc7aea07c3f3 100644 --- a/arch/parisc/include/asm/current.h +++ b/arch/parisc/include/asm/current.h @@ -2,14 +2,16 @@ #ifndef _ASM_PARISC_CURRENT_H #define _ASM_PARISC_CURRENT_H -#include <asm/special_insns.h> - #ifndef __ASSEMBLY__ struct task_struct; static __always_inline struct task_struct *get_current(void) { - return (struct task_struct *) mfctl(30); + struct task_struct *ts; + + /* do not use mfctl() macro as it is marked volatile */ + asm( "mfctl %%cr30,%0" : "=r" (ts) ); + return ts; } #define current get_current() diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h index 3bd465a27791..cc426d365892 100644 --- a/arch/parisc/include/asm/elf.h +++ b/arch/parisc/include/asm/elf.h @@ -359,4 +359,19 @@ struct mm_struct; extern unsigned long arch_randomize_brk(struct mm_struct *); #define arch_randomize_brk arch_randomize_brk + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +struct linux_binprm; +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int executable_stack); +#define VDSO_AUX_ENT(a, b) NEW_AUX_ENT(a, b) +#define VDSO_CURRENT_BASE current->mm->context.vdso_base + +#define ARCH_DLINFO \ +do { \ + if (VDSO_CURRENT_BASE) { \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE);\ + } \ +} while (0) + #endif diff --git a/arch/parisc/include/asm/kprobes.h b/arch/parisc/include/asm/kprobes.h index 904034da4974..0a175ac87698 100644 --- a/arch/parisc/include/asm/kprobes.h +++ b/arch/parisc/include/asm/kprobes.h @@ -18,8 +18,9 @@ #include <linux/notifier.h> #define PARISC_KPROBES_BREAK_INSN 0x3ff801f +#define PARISC_KPROBES_BREAK_INSN2 0x3ff801e #define __ARCH_WANT_KPROBES_INSN_SLOT -#define MAX_INSN_SIZE 1 +#define MAX_INSN_SIZE 2 typedef u32 kprobe_opcode_t; struct kprobe; @@ -29,7 +30,7 @@ void arch_remove_kprobe(struct kprobe *p); #define flush_insn_slot(p) \ flush_icache_range((unsigned long)&(p)->ainsn.insn[0], \ (unsigned long)&(p)->ainsn.insn[0] + \ - sizeof(kprobe_opcode_t)) + MAX_INSN_SIZE*sizeof(kprobe_opcode_t)) #define kretprobe_blacklist_size 0 diff --git a/arch/parisc/include/asm/mmu.h b/arch/parisc/include/asm/mmu.h index 3fb70a601d5c..44fd062b62ed 100644 --- a/arch/parisc/include/asm/mmu.h +++ b/arch/parisc/include/asm/mmu.h @@ -2,7 +2,9 @@ #ifndef _PARISC_MMU_H_ #define _PARISC_MMU_H_ -/* On parisc, we store the space id here */ -typedef unsigned long mm_context_t; +typedef struct { + unsigned long space_id; + unsigned long vdso_base; +} mm_context_t; #endif /* _PARISC_MMU_H_ */ diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h index 726257648d9f..c9187fe836a3 100644 --- a/arch/parisc/include/asm/mmu_context.h +++ b/arch/parisc/include/asm/mmu_context.h @@ -20,7 +20,7 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) { BUG_ON(atomic_read(&mm->mm_users) != 1); - mm->context = alloc_sid(); + mm->context.space_id = alloc_sid(); return 0; } @@ -28,22 +28,22 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) static inline void destroy_context(struct mm_struct *mm) { - free_sid(mm->context); - mm->context = 0; + free_sid(mm->context.space_id); + mm->context.space_id = 0; } static inline unsigned long __space_to_prot(mm_context_t context) { #if SPACEID_SHIFT == 0 - return context << 1; + return context.space_id << 1; #else - return context >> (SPACEID_SHIFT - 1); + return context.space_id >> (SPACEID_SHIFT - 1); #endif } static inline void load_context(mm_context_t context) { - mtsp(context, 3); + mtsp(context.space_id, SR_USER); mtctl(__space_to_prot(context), 8); } @@ -89,8 +89,8 @@ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) BUG_ON(next == &init_mm); /* Should never happen */ - if (next->context == 0) - next->context = alloc_sid(); + if (next->context.space_id == 0) + next->context.space_id = alloc_sid(); switch_mm(prev,next,current); } diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 3e7cf882639f..7dff736936d0 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -70,9 +70,9 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) unsigned long flags; purge_tlb_start(flags); - mtsp(mm->context, 1); - pdtlb(addr); - pitlb(addr); + mtsp(mm->context.space_id, SR_TEMP1); + pdtlb(SR_TEMP1, addr); + pitlb(SR_TEMP1, addr); purge_tlb_end(flags); } @@ -219,9 +219,10 @@ extern void __update_cache(pte_t pte); #define _PAGE_PRESENT (1 << xlate_pabit(_PAGE_PRESENT_BIT)) #define _PAGE_HUGE (1 << xlate_pabit(_PAGE_HPAGE_BIT)) #define _PAGE_USER (1 << xlate_pabit(_PAGE_USER_BIT)) +#define _PAGE_SPECIAL (_PAGE_DMB) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL) #define _PAGE_KERNEL_RO (_PAGE_PRESENT | _PAGE_READ | _PAGE_DIRTY | _PAGE_ACCESSED) #define _PAGE_KERNEL_EXEC (_PAGE_KERNEL_RO | _PAGE_EXEC) #define _PAGE_KERNEL_RWX (_PAGE_KERNEL_EXEC | _PAGE_WRITE) @@ -348,6 +349,7 @@ static inline void pud_clear(pud_t *pud) { static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } +static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } @@ -355,6 +357,7 @@ static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_WRITE; ret static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { pte_val(pte) |= _PAGE_SPECIAL; return pte; } /* * Huge pte definitions. diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 3a3d05438408..006364212795 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -236,7 +236,7 @@ on downward growing arches, it looks like this: #define start_thread(regs, new_pc, new_sp) do { \ elf_addr_t *sp = (elf_addr_t *)new_sp; \ - __u32 spaceid = (__u32)current->mm->context; \ + __u32 spaceid = (__u32)current->mm->context.space_id; \ elf_addr_t pc = (elf_addr_t)new_pc | 3; \ elf_caddr_t *argv = (elf_caddr_t *)bprm->exec + 1; \ \ diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h index 2b3010ade00e..bb7fb4153327 100644 --- a/arch/parisc/include/asm/rt_sigframe.h +++ b/arch/parisc/include/asm/rt_sigframe.h @@ -2,16 +2,8 @@ #ifndef _ASM_PARISC_RT_SIGFRAME_H #define _ASM_PARISC_RT_SIGFRAME_H -#define SIGRETURN_TRAMP 4 -#define SIGRESTARTBLOCK_TRAMP 5 -#define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP) - struct rt_sigframe { - /* XXX: Must match trampoline size in arch/parisc/kernel/signal.c - Secondary to that it must protect the ERESTART_RESTARTBLOCK - trampoline we left on the stack (we were bad and didn't - change sp so we could run really fast.) */ - unsigned int tramp[TRAMP_SIZE]; + unsigned int tramp[2]; /* holds original return address */ struct siginfo info; struct ucontext uc; }; diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h index 16ee41e77174..41b3ddbd344c 100644 --- a/arch/parisc/include/asm/special_insns.h +++ b/arch/parisc/include/asm/special_insns.h @@ -55,8 +55,8 @@ static inline void set_eiem(unsigned long val) #define mfsp(reg) ({ \ unsigned long cr; \ __asm__ __volatile__( \ - "mfsp " #reg ",%0" : \ - "=r" (cr) \ + "mfsp %%sr%1,%0" \ + : "=r" (cr) : "i"(reg) \ ); \ cr; \ }) diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h index c5ded01d45be..5ffd7c17f593 100644 --- a/arch/parisc/include/asm/tlbflush.h +++ b/arch/parisc/include/asm/tlbflush.h @@ -17,7 +17,7 @@ int __flush_tlb_range(unsigned long sid, unsigned long start, unsigned long end); #define flush_tlb_range(vma, start, end) \ - __flush_tlb_range((vma)->vm_mm->context, start, end) + __flush_tlb_range((vma)->vm_mm->context.space_id, start, end) #define flush_tlb_kernel_range(start, end) \ __flush_tlb_range(0, start, end) diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h index 34619f010c63..0ccdb738a9a3 100644 --- a/arch/parisc/include/asm/traps.h +++ b/arch/parisc/include/asm/traps.h @@ -18,6 +18,7 @@ unsigned long parisc_acctyp(unsigned long code, unsigned int inst); const char *trap_name(unsigned long code); void do_page_fault(struct pt_regs *regs, unsigned long code, unsigned long address); +int handle_nadtlb_fault(struct pt_regs *regs); #endif #endif diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 123d5f16cd9d..b3eb4541e441 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -79,18 +79,18 @@ struct exception_table_entry { #define __get_user(val, ptr) \ ({ \ - __get_user_internal("%%sr3,", val, ptr); \ + __get_user_internal(SR_USER, val, ptr); \ }) #define __get_user_asm(sr, val, ldx, ptr) \ { \ register long __gu_val; \ \ - __asm__("1: " ldx " 0(" sr "%2),%0\n" \ + __asm__("1: " ldx " 0(%%sr%2,%3),%0\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ : "=r"(__gu_val), "+r"(__gu_err) \ - : "r"(ptr)); \ + : "i"(sr), "r"(ptr)); \ \ (val) = (__force __typeof__(*(ptr))) __gu_val; \ } @@ -100,7 +100,7 @@ struct exception_table_entry { { \ type __z; \ long __err; \ - __err = __get_user_internal("%%sr0,", __z, (type *)(src)); \ + __err = __get_user_internal(SR_KERNEL, __z, (type *)(src)); \ if (unlikely(__err)) \ goto err_label; \ else \ @@ -118,13 +118,13 @@ struct exception_table_entry { } __gu_tmp; \ \ __asm__(" copy %%r0,%R0\n" \ - "1: ldw 0(" sr "%2),%0\n" \ - "2: ldw 4(" sr "%2),%R0\n" \ + "1: ldw 0(%%sr%2,%3),%0\n" \ + "2: ldw 4(%%sr%2,%3),%R0\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ : "=&r"(__gu_tmp.l), "+r"(__gu_err) \ - : "r"(ptr)); \ + : "i"(sr), "r"(ptr)); \ \ (val) = __gu_tmp.t; \ } @@ -151,14 +151,14 @@ struct exception_table_entry { ({ \ __typeof__(&*(ptr)) __ptr = ptr; \ __typeof__(*(__ptr)) __x = (__typeof__(*(__ptr)))(x); \ - __put_user_internal("%%sr3,", __x, __ptr); \ + __put_user_internal(SR_USER, __x, __ptr); \ }) #define __put_kernel_nofault(dst, src, type, err_label) \ { \ type __z = *(type *)(src); \ long __err; \ - __err = __put_user_internal("%%sr0,", __z, (type *)(dst)); \ + __err = __put_user_internal(SR_KERNEL, __z, (type *)(dst)); \ if (unlikely(__err)) \ goto err_label; \ } @@ -178,24 +178,24 @@ struct exception_table_entry { #define __put_user_asm(sr, stx, x, ptr) \ __asm__ __volatile__ ( \ - "1: " stx " %2,0(" sr "%1)\n" \ + "1: " stx " %1,0(%%sr%2,%3)\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ : "+r"(__pu_err) \ - : "r"(ptr), "r"(x)) + : "r"(x), "i"(sr), "r"(ptr)) #if !defined(CONFIG_64BIT) #define __put_user_asm64(sr, __val, ptr) do { \ __asm__ __volatile__ ( \ - "1: stw %2,0(" sr "%1)\n" \ - "2: stw %R2,4(" sr "%1)\n" \ + "1: stw %1,0(%%sr%2,%3)\n" \ + "2: stw %R1,4(%%sr%2,%3)\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ : "+r"(__pu_err) \ - : "r"(ptr), "r"(__val)); \ + : "r"(__val), "i"(sr), "r"(ptr)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index cd438e4150f6..7708a5806f09 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -63,10 +63,6 @@ ); \ __sys_res = (long)__res; \ } \ - if ( (unsigned long)__sys_res >= (unsigned long)-4095 ){ \ - errno = -__sys_res; \ - __sys_res = -1; \ - } \ __sys_res; \ }) diff --git a/arch/parisc/include/asm/vdso.h b/arch/parisc/include/asm/vdso.h new file mode 100644 index 000000000000..ef8206193f82 --- /dev/null +++ b/arch/parisc/include/asm/vdso.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PARISC_VDSO_H__ +#define __PARISC_VDSO_H__ + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_64BIT +#include <generated/vdso64-offsets.h> +#endif +#include <generated/vdso32-offsets.h> + +#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name)) +#define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name)) + +extern struct vdso_data *vdso_data; + +#endif /* __ASSEMBLY __ */ + +/* Default link addresses for the vDSOs */ +#define VDSO_LBASE 0 + +#define VDSO_VERSION_STRING LINUX_5.18 + +#endif /* __PARISC_VDSO_H__ */ diff --git a/arch/parisc/include/uapi/asm/auxvec.h b/arch/parisc/include/uapi/asm/auxvec.h new file mode 100644 index 000000000000..90d2aa699cf3 --- /dev/null +++ b/arch/parisc/include/uapi/asm/auxvec.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_PARISC_AUXVEC_H +#define _UAPI_PARISC_AUXVEC_H + +/* The vDSO location. */ +#define AT_SYSINFO_EHDR 33 + +#endif /* _UAPI_PARISC_AUXVEC_H */ diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index 8fb819bbbb17..d579243edc2f 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -39,3 +39,8 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KEXEC_CORE) += kexec.o relocate_kernel.o obj-$(CONFIG_KEXEC_FILE) += kexec_file.o + +# vdso +obj-y += vdso.o +obj-$(CONFIG_64BIT) += vdso64/ +obj-y += vdso32/ diff --git a/arch/parisc/kernel/alternative.c b/arch/parisc/kernel/alternative.c index fa28c4c9f972..daa1e9047275 100644 --- a/arch/parisc/kernel/alternative.c +++ b/arch/parisc/kernel/alternative.c @@ -8,6 +8,7 @@ #include <asm/processor.h> #include <asm/sections.h> #include <asm/alternative.h> +#include <asm/cacheflush.h> #include <linux/module.h> @@ -107,5 +108,14 @@ void __init apply_alternatives_all(void) apply_alternatives((struct alt_instr *) &__alt_instructions, (struct alt_instr *) &__alt_instructions_end, NULL); + if (cache_info.dc_size == 0 && cache_info.ic_size == 0) { + pr_info("alternatives: optimizing cache-flushes.\n"); + static_branch_disable(&parisc_has_cache); + } + if (cache_info.dc_size == 0) + static_branch_disable(&parisc_has_dcache); + if (cache_info.ic_size == 0) + static_branch_disable(&parisc_has_icache); + set_kernel_text_rw(0); } diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 2a83ef36d216..2673d57eeb00 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -26,7 +26,11 @@ #include <asm/ptrace.h> #include <asm/processor.h> #include <asm/pdc.h> +#include <uapi/asm/sigcontext.h> +#include <asm/ucontext.h> +#include <asm/rt_sigframe.h> #include <linux/uaccess.h> +#include "signal32.h" /* Add FRAME_SIZE to the size x and align it to y. All definitions * that use align_frame will include space for a frame. @@ -218,6 +222,11 @@ int main(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PRE_COUNT, offsetof(struct task_struct, thread_info.preempt_count)); BLANK(); + DEFINE(ASM_SIGFRAME_SIZE, PARISC_RT_SIGFRAME_SIZE); + DEFINE(SIGFRAME_CONTEXT_REGS, offsetof(struct rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE); + DEFINE(ASM_SIGFRAME_SIZE32, PARISC_RT_SIGFRAME_SIZE32); + DEFINE(SIGFRAME_CONTEXT_REGS32, offsetof(struct compat_rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE32); + BLANK(); DEFINE(ICACHE_BASE, offsetof(struct pdc_cache_info, ic_base)); DEFINE(ICACHE_STRIDE, offsetof(struct pdc_cache_info, ic_stride)); DEFINE(ICACHE_COUNT, offsetof(struct pdc_cache_info, ic_count)); diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 94150b91c96f..456e879d34a8 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -38,6 +38,9 @@ EXPORT_SYMBOL(flush_dcache_page_asm); void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr); +/* Internal implementation in arch/parisc/kernel/pacache.S */ +void flush_data_cache_local(void *); /* flushes local data-cache only */ +void flush_instruction_cache_local(void); /* flushes local code-cache only */ /* On some machines (i.e., ones with the Merced bus), there can be * only a single PxTLB broadcast at a time; this must be guaranteed @@ -58,26 +61,35 @@ struct pdc_cache_info cache_info __ro_after_init; static struct pdc_btlb_info btlb_info __ro_after_init; #endif -#ifdef CONFIG_SMP -void -flush_data_cache(void) +DEFINE_STATIC_KEY_TRUE(parisc_has_cache); +DEFINE_STATIC_KEY_TRUE(parisc_has_dcache); +DEFINE_STATIC_KEY_TRUE(parisc_has_icache); + +static void cache_flush_local_cpu(void *dummy) { - on_each_cpu(flush_data_cache_local, NULL, 1); + if (static_branch_likely(&parisc_has_icache)) + flush_instruction_cache_local(); + if (static_branch_likely(&parisc_has_dcache)) + flush_data_cache_local(NULL); } -void -flush_instruction_cache(void) + +void flush_cache_all_local(void) { - on_each_cpu(flush_instruction_cache_local, NULL, 1); + cache_flush_local_cpu(NULL); } -#endif -void -flush_cache_all_local(void) +void flush_cache_all(void) { - flush_instruction_cache_local(NULL); - flush_data_cache_local(NULL); + if (static_branch_likely(&parisc_has_cache)) + on_each_cpu(cache_flush_local_cpu, NULL, 1); } -EXPORT_SYMBOL(flush_cache_all_local); + +static inline void flush_data_cache(void) +{ + if (static_branch_likely(&parisc_has_dcache)) + on_each_cpu(flush_data_cache_local, NULL, 1); +} + /* Virtual address of pfn. */ #define pfn_va(pfn) __va(PFN_PHYS(pfn)) @@ -303,6 +315,8 @@ static inline void __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long physaddr) { + if (!static_branch_likely(&parisc_has_cache)) + return; preempt_disable(); flush_dcache_page_asm(physaddr, vmaddr); if (vma->vm_flags & VM_EXEC) @@ -314,6 +328,8 @@ static inline void __purge_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long physaddr) { + if (!static_branch_likely(&parisc_has_cache)) + return; preempt_disable(); purge_dcache_page_asm(physaddr, vmaddr); if (vma->vm_flags & VM_EXEC) @@ -375,7 +391,6 @@ EXPORT_SYMBOL(flush_dcache_page); /* Defined in arch/parisc/kernel/pacache.S */ EXPORT_SYMBOL(flush_kernel_dcache_range_asm); -EXPORT_SYMBOL(flush_data_cache_local); EXPORT_SYMBOL(flush_kernel_icache_range_asm); #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */ @@ -388,7 +403,7 @@ void __init parisc_setup_cache_timing(void) { unsigned long rangetime, alltime; unsigned long size; - unsigned long threshold; + unsigned long threshold, threshold2; alltime = mfctl(16); flush_data_cache(); @@ -403,8 +418,20 @@ void __init parisc_setup_cache_timing(void) alltime, size, rangetime); threshold = L1_CACHE_ALIGN(size * alltime / rangetime); - if (threshold > cache_info.dc_size) - threshold = cache_info.dc_size; + + /* + * The threshold computed above isn't very reliable since the + * flush times depend greatly on the percentage of dirty lines + * in the flush range. Further, the whole cache time doesn't + * include the time to refill lines that aren't in the mm/vma + * being flushed. By timing glibc build and checks on mako cpus, + * the following formula seems to work reasonably well. The + * value from the timing calculation is too small, and increases + * build and check times by almost a factor two. + */ + threshold2 = cache_info.dc_size * num_online_cpus(); + if (threshold2 > threshold) + threshold = threshold2; if (threshold) parisc_cache_flush_threshold = threshold; printk(KERN_INFO "Cache flush threshold set to %lu KiB\n", @@ -457,7 +484,7 @@ void flush_kernel_dcache_page_addr(void *addr) flush_kernel_dcache_page_asm(addr); purge_tlb_start(flags); - pdtlb_kernel(addr); + pdtlb(SR_KERNEL, addr); purge_tlb_end(flags); } EXPORT_SYMBOL(flush_kernel_dcache_page_addr); @@ -496,25 +523,15 @@ int __flush_tlb_range(unsigned long sid, unsigned long start, but cause a purge request to be broadcast to other TLBs. */ while (start < end) { purge_tlb_start(flags); - mtsp(sid, 1); - pdtlb(start); - pitlb(start); + mtsp(sid, SR_TEMP1); + pdtlb(SR_TEMP1, start); + pitlb(SR_TEMP1, start); purge_tlb_end(flags); start += PAGE_SIZE; } return 0; } -static void cacheflush_h_tmp_function(void *dummy) -{ - flush_cache_all_local(); -} - -void flush_cache_all(void) -{ - on_each_cpu(cacheflush_h_tmp_function, NULL, 1); -} - static inline unsigned long mm_total_size(struct mm_struct *mm) { struct vm_area_struct *vma; @@ -558,15 +575,6 @@ static void flush_cache_pages(struct vm_area_struct *vma, struct mm_struct *mm, } } -static void flush_user_cache_tlb(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - flush_user_dcache_range_asm(start, end); - if (vma->vm_flags & VM_EXEC) - flush_user_icache_range_asm(start, end); - flush_tlb_range(vma, start, end); -} - void flush_cache_mm(struct mm_struct *mm) { struct vm_area_struct *vma; @@ -575,23 +583,14 @@ void flush_cache_mm(struct mm_struct *mm) rp3440, etc. So, avoid it if the mm isn't too big. */ if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && mm_total_size(mm) >= parisc_cache_flush_threshold) { - if (mm->context) + if (mm->context.space_id) flush_tlb_all(); flush_cache_all(); return; } - preempt_disable(); - if (mm->context == mfsp(3)) { - for (vma = mm->mmap; vma; vma = vma->vm_next) - flush_user_cache_tlb(vma, vma->vm_start, vma->vm_end); - preempt_enable(); - return; - } - for (vma = mm->mmap; vma; vma = vma->vm_next) flush_cache_pages(vma, mm, vma->vm_start, vma->vm_end); - preempt_enable(); } void flush_cache_range(struct vm_area_struct *vma, @@ -599,29 +598,21 @@ void flush_cache_range(struct vm_area_struct *vma, { if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && end - start >= parisc_cache_flush_threshold) { - if (vma->vm_mm->context) + if (vma->vm_mm->context.space_id) flush_tlb_range(vma, start, end); flush_cache_all(); return; } - preempt_disable(); - if (vma->vm_mm->context == mfsp(3)) { - flush_user_cache_tlb(vma, start, end); - preempt_enable(); - return; - } - - flush_cache_pages(vma, vma->vm_mm, vma->vm_start, vma->vm_end); - preempt_enable(); + flush_cache_pages(vma, vma->vm_mm, start, end); } void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn) { if (pfn_valid(pfn)) { - if (likely(vma->vm_mm->context)) { - flush_tlb_page(vma, vmaddr); + flush_tlb_page(vma, vmaddr); + if (likely(vma->vm_mm->context.space_id)) { __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } else { __purge_cache_page(vma, vmaddr, PFN_PHYS(pfn)); @@ -633,6 +624,7 @@ void flush_kernel_vmap_range(void *vaddr, int size) { unsigned long start = (unsigned long)vaddr; unsigned long end = start + size; + unsigned long flags, physaddr; if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && (unsigned long)size >= parisc_cache_flush_threshold) { @@ -641,8 +633,14 @@ void flush_kernel_vmap_range(void *vaddr, int size) return; } - flush_kernel_dcache_range_asm(start, end); - flush_tlb_kernel_range(start, end); + while (start < end) { + physaddr = lpa(start); + purge_tlb_start(flags); + pdtlb(SR_KERNEL, start); + purge_tlb_end(flags); + flush_dcache_page_asm(physaddr, start); + start += PAGE_SIZE; + } } EXPORT_SYMBOL(flush_kernel_vmap_range); @@ -650,6 +648,7 @@ void invalidate_kernel_vmap_range(void *vaddr, int size) { unsigned long start = (unsigned long)vaddr; unsigned long end = start + size; + unsigned long flags, physaddr; if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && (unsigned long)size >= parisc_cache_flush_threshold) { @@ -658,7 +657,13 @@ void invalidate_kernel_vmap_range(void *vaddr, int size) return; } - purge_kernel_dcache_range_asm(start, end); - flush_tlb_kernel_range(start, end); + while (start < end) { + physaddr = lpa(start); + purge_tlb_start(flags); + pdtlb(SR_KERNEL, start); + purge_tlb_end(flags); + purge_dcache_page_asm(physaddr, start); + start += PAGE_SIZE; + } } EXPORT_SYMBOL(invalidate_kernel_vmap_range); diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 6e9cdb269862..ecf50159359e 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -1288,74 +1288,12 @@ nadtlb_check_alias_20: nadtlb_emulate: /* - * Non access misses can be caused by fdc,fic,pdc,lpa,probe and - * probei instructions. We don't want to fault for these - * instructions (not only does it not make sense, it can cause - * deadlocks, since some flushes are done with the mmap - * semaphore held). If the translation doesn't exist, we can't - * insert a translation, so have to emulate the side effects - * of the instruction. Since we don't insert a translation - * we can get a lot of faults during a flush loop, so it makes - * sense to try to do it here with minimum overhead. We only - * emulate fdc,fic,pdc,probew,prober instructions whose base - * and index registers are not shadowed. We defer everything - * else to the "slow" path. + * Non-access misses can be caused by fdc,fic,pdc,lpa,probe and + * probei instructions. The kernel no longer faults doing flushes. + * Use of lpa and probe instructions is rare. Given the issue + * with shadow registers, we defer everything to the "slow" path. */ - - mfctl %cr19,%r9 /* Get iir */ - - /* PA 2.0 Arch Ref. Book pg 382 has a good description of the insn bits. - Checks for fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw */ - - /* Checks for fdc,fdce,pdc,"fic,4f" only */ - ldi 0x280,%r16 - and %r9,%r16,%r17 - cmpb,<>,n %r16,%r17,nadtlb_probe_check - bb,>=,n %r9,26,nadtlb_nullify /* m bit not set, just nullify */ - BL get_register,%r25 - extrw,u %r9,15,5,%r8 /* Get index register # */ - cmpib,COND(=),n -1,%r1,nadtlb_fault /* have to use slow path */ - copy %r1,%r24 - BL get_register,%r25 - extrw,u %r9,10,5,%r8 /* Get base register # */ - cmpib,COND(=),n -1,%r1,nadtlb_fault /* have to use slow path */ - BL set_register,%r25 - add,l %r1,%r24,%r1 /* doesn't affect c/b bits */ - -nadtlb_nullify: - mfctl %ipsw,%r8 - ldil L%PSW_N,%r9 - or %r8,%r9,%r8 /* Set PSW_N */ - mtctl %r8,%ipsw - - rfir - nop - - /* - When there is no translation for the probe address then we - must nullify the insn and return zero in the target register. - This will indicate to the calling code that it does not have - write/read privileges to this address. - - This should technically work for prober and probew in PA 1.1, - and also probe,r and probe,w in PA 2.0 - - WARNING: USE ONLY NON-SHADOW REGISTERS WITH PROBE INSN! - THE SLOW-PATH EMULATION HAS NOT BEEN WRITTEN YET. - - */ -nadtlb_probe_check: - ldi 0x80,%r16 - and %r9,%r16,%r17 - cmpb,<>,n %r16,%r17,nadtlb_fault /* Must be probe,[rw]*/ - BL get_register,%r25 /* Find the target register */ - extrw,u %r9,31,5,%r8 /* Get target register */ - cmpib,COND(=),n -1,%r1,nadtlb_fault /* have to use slow path */ - BL set_register,%r25 - copy %r0,%r1 /* Write zero to target register */ - b nadtlb_nullify /* Nullify return insn */ - nop - + b,n nadtlb_fault #ifdef CONFIG_64BIT itlb_miss_20w: diff --git a/arch/parisc/kernel/kprobes.c b/arch/parisc/kernel/kprobes.c index e2bdb5a5f93e..3343d2fb7889 100644 --- a/arch/parisc/kernel/kprobes.c +++ b/arch/parisc/kernel/kprobes.c @@ -5,6 +5,7 @@ * PA-RISC kprobes implementation * * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org> + * Copyright (c) 2022 Helge Deller <deller@gmx.de> */ #include <linux/types.h> @@ -25,9 +26,14 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if (!p->ainsn.insn) return -ENOMEM; - memcpy(p->ainsn.insn, p->addr, - MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + /* + * Set up new instructions. Second break instruction will + * trigger call of parisc_kprobe_ss_handler(). + */ p->opcode = *p->addr; + p->ainsn.insn[0] = p->opcode; + p->ainsn.insn[1] = PARISC_KPROBES_BREAK_INSN2; + flush_insn_slot(p); return 0; } @@ -73,9 +79,7 @@ static void __kprobes setup_singlestep(struct kprobe *p, { kcb->iaoq[0] = regs->iaoq[0]; kcb->iaoq[1] = regs->iaoq[1]; - regs->iaoq[0] = (unsigned long)p->ainsn.insn; - mtctl(0, 0); - regs->gr[0] |= PSW_R; + instruction_pointer_set(regs, (unsigned long)p->ainsn.insn); } int __kprobes parisc_kprobe_break_handler(struct pt_regs *regs) @@ -165,9 +169,8 @@ int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs) regs->iaoq[0] = kcb->iaoq[1]; break; default: - regs->iaoq[1] = kcb->iaoq[0]; - regs->iaoq[1] += (regs->iaoq[1] - regs->iaoq[0]) + 4; regs->iaoq[0] = kcb->iaoq[1]; + regs->iaoq[1] = regs->iaoq[0] + 4; break; } kcb->kprobe_status = KPROBE_HIT_SSDONE; @@ -191,14 +194,17 @@ static struct kprobe trampoline_p = { static int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - unsigned long orig_ret_address; - - orig_ret_address = __kretprobe_trampoline_handler(regs, NULL); - instruction_pointer_set(regs, orig_ret_address); + __kretprobe_trampoline_handler(regs, NULL); return 1; } +void arch_kretprobe_fixup_return(struct pt_regs *regs, + kprobe_opcode_t *correct_ret_addr) +{ + regs->gr[2] = (unsigned long)correct_ret_addr; +} + void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 36a57aa38e87..160996f2198e 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -91,7 +91,7 @@ static inline int map_pte_uncached(pte_t * pte, printk(KERN_ERR "map_pte_uncached: page already exists\n"); purge_tlb_start(flags); set_pte(pte, __mk_pte(*paddr_ptr, PAGE_KERNEL_UNC)); - pdtlb_kernel(orig_vaddr); + pdtlb(SR_KERNEL, orig_vaddr); purge_tlb_end(flags); vaddr += PAGE_SIZE; orig_vaddr += PAGE_SIZE; @@ -175,7 +175,7 @@ static inline void unmap_uncached_pte(pmd_t * pmd, unsigned long vaddr, pte_clear(&init_mm, vaddr, pte); purge_tlb_start(flags); - pdtlb_kernel(orig_vaddr); + pdtlb(SR_KERNEL, orig_vaddr); purge_tlb_end(flags); vaddr += PAGE_SIZE; orig_vaddr += PAGE_SIZE; diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 46b1050640b8..24443908f905 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -1,16 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* - * linux/arch/parisc/kernel/signal.c: Architecture-specific signal - * handling support. + * PA-RISC architecture-specific signal handling support. * * Copyright (C) 2000 David Huggins-Daines <dhd@debian.org> * Copyright (C) 2000 Linuxcare, Inc. + * Copyright (C) 2000-2022 Helge Deller <deller@gmx.de> + * Copyright (C) 2022 John David Anglin <dave.anglin@bell.net> * * Based on the ia64, i386, and alpha versions. - * - * Like the IA-64, we are a recent enough port (we are *starting* - * with glibc2.2) that we do not need to support the old non-realtime - * Linux signals. Therefore we don't. */ #include <linux/sched.h> @@ -32,6 +29,7 @@ #include <linux/uaccess.h> #include <asm/cacheflush.h> #include <asm/asm-offsets.h> +#include <asm/vdso.h> #ifdef CONFIG_COMPAT #include "signal32.h" @@ -59,14 +57,6 @@ * Do a signal return - restore sigcontext. */ -/* Trampoline for calling rt_sigreturn() */ -#define INSN_LDI_R25_0 0x34190000 /* ldi 0,%r25 (in_syscall=0) */ -#define INSN_LDI_R25_1 0x34190002 /* ldi 1,%r25 (in_syscall=1) */ -#define INSN_LDI_R20 0x3414015a /* ldi __NR_rt_sigreturn,%r20 */ -#define INSN_BLE_SR2_R0 0xe4008200 /* be,l 0x100(%sr2,%r0),%sr0,%r31 */ -/* For debugging */ -#define INSN_DIE_HORRIBLY 0x68000ccc /* stw %r0,0x666(%sr0,%r0) */ - static long restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs) { @@ -77,9 +67,9 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs) err |= __copy_from_user(regs->iaoq, sc->sc_iaoq, sizeof(regs->iaoq)); err |= __copy_from_user(regs->iasq, sc->sc_iasq, sizeof(regs->iasq)); err |= __get_user(regs->sar, &sc->sc_sar); - DBG(2,"restore_sigcontext: iaoq is %#lx / %#lx\n", - regs->iaoq[0],regs->iaoq[1]); - DBG(2,"restore_sigcontext: r28 is %ld\n", regs->gr[28]); + DBG(2, "%s: iaoq is %#lx / %#lx\n", + __func__, regs->iaoq[0], regs->iaoq[1]); + DBG(2, "%s: r28 is %ld\n", __func__, regs->gr[28]); return err; } @@ -102,7 +92,7 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall) /* Unwind the user stack to get the rt_sigframe structure. */ frame = (struct rt_sigframe __user *) (usp - sigframe_size); - DBG(2,"sys_rt_sigreturn: frame is %p\n", frame); + DBG(2, "%s: frame is %p pid %d\n", __func__, frame, task_pid_nr(current)); regs->orig_r28 = 1; /* no restarts for sigreturn */ @@ -110,7 +100,6 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall) compat_frame = (struct compat_rt_sigframe __user *)frame; if (is_compat_task()) { - DBG(2,"sys_rt_sigreturn: ELF32 process.\n"); if (get_compat_sigset(&set, &compat_frame->uc.uc_sigmask)) goto give_sigsegv; } else @@ -125,25 +114,25 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall) /* Good thing we saved the old gr[30], eh? */ #ifdef CONFIG_64BIT if (is_compat_task()) { - DBG(1,"sys_rt_sigreturn: compat_frame->uc.uc_mcontext 0x%p\n", - &compat_frame->uc.uc_mcontext); + DBG(1, "%s: compat_frame->uc.uc_mcontext 0x%p\n", + __func__, &compat_frame->uc.uc_mcontext); // FIXME: Load upper half from register file if (restore_sigcontext32(&compat_frame->uc.uc_mcontext, &compat_frame->regs, regs)) goto give_sigsegv; - DBG(1,"sys_rt_sigreturn: usp %#08lx stack 0x%p\n", - usp, &compat_frame->uc.uc_stack); + DBG(1, "%s: usp %#08lx stack 0x%p\n", + __func__, usp, &compat_frame->uc.uc_stack); if (compat_restore_altstack(&compat_frame->uc.uc_stack)) goto give_sigsegv; } else #endif { - DBG(1,"sys_rt_sigreturn: frame->uc.uc_mcontext 0x%p\n", - &frame->uc.uc_mcontext); + DBG(1, "%s: frame->uc.uc_mcontext 0x%p\n", + __func__, &frame->uc.uc_mcontext); if (restore_sigcontext(&frame->uc.uc_mcontext, regs)) goto give_sigsegv; - DBG(1,"sys_rt_sigreturn: usp %#08lx stack 0x%p\n", - usp, &frame->uc.uc_stack); + DBG(1, "%s: usp %#08lx stack 0x%p\n", + __func__, usp, &frame->uc.uc_stack); if (restore_altstack(&frame->uc.uc_stack)) goto give_sigsegv; } @@ -155,14 +144,11 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall) */ if (in_syscall) regs->gr[31] = regs->iaoq[0]; -#if DEBUG_SIG - DBG(1,"sys_rt_sigreturn: returning to %#lx, DUMPING REGS:\n", regs->iaoq[0]); - show_regs(regs); -#endif + return; give_sigsegv: - DBG(1,"sys_rt_sigreturn: Sending SIGSEGV\n"); + DBG(1, "%s: Sending SIGSEGV\n", __func__); force_sig(SIGSEGV); return; } @@ -177,15 +163,15 @@ get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size) /*FIXME: ELF32 vs. ELF64 has different frame_size, but since we don't use the parameter it doesn't matter */ - DBG(1,"get_sigframe: ka = %#lx, sp = %#lx, frame_size = %#lx\n", - (unsigned long)ka, sp, frame_size); + DBG(1, "%s: ka = %#lx, sp = %#lx, frame_size = %zu\n", + __func__, (unsigned long)ka, sp, frame_size); /* Align alternate stack and reserve 64 bytes for the signal handler's frame marker. */ if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && ! sas_ss_flags(sp)) sp = (current->sas_ss_sp + 0x7f) & ~0x3f; /* Stacks grow up! */ - DBG(1,"get_sigframe: Returning sp = %#lx\n", (unsigned long)sp); + DBG(1, "%s: Returning sp = %#lx\n", __func__, (unsigned long)sp); return (void __user *) sp; /* Stacks grow up. Fun. */ } @@ -205,20 +191,20 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, int in_sysc err |= __put_user(regs->gr[31]+4, &sc->sc_iaoq[1]); err |= __put_user(regs->sr[3], &sc->sc_iasq[0]); err |= __put_user(regs->sr[3], &sc->sc_iasq[1]); - DBG(1,"setup_sigcontext: iaoq %#lx / %#lx (in syscall)\n", - regs->gr[31], regs->gr[31]+4); + DBG(1, "%s: iaoq %#lx / %#lx (in syscall)\n", + __func__, regs->gr[31], regs->gr[31]+4); } else { err |= __copy_to_user(sc->sc_iaoq, regs->iaoq, sizeof(regs->iaoq)); err |= __copy_to_user(sc->sc_iasq, regs->iasq, sizeof(regs->iasq)); - DBG(1,"setup_sigcontext: iaoq %#lx / %#lx (not in syscall)\n", - regs->iaoq[0], regs->iaoq[1]); + DBG(1, "%s: iaoq %#lx / %#lx (not in syscall)\n", + __func__, regs->iaoq[0], regs->iaoq[1]); } err |= __put_user(flags, &sc->sc_flags); err |= __copy_to_user(sc->sc_gr, regs->gr, sizeof(regs->gr)); err |= __copy_to_user(sc->sc_fr, regs->fr, sizeof(regs->fr)); err |= __put_user(regs->sar, &sc->sc_sar); - DBG(1,"setup_sigcontext: r28 is %ld\n", regs->gr[28]); + DBG(1, "%s: r28 is %ld\n", __func__, regs->gr[28]); return err; } @@ -230,7 +216,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, struct rt_sigframe __user *frame; unsigned long rp, usp; unsigned long haddr, sigframe_size; - unsigned long start, end; + unsigned long start; int err = 0; #ifdef CONFIG_64BIT struct compat_rt_sigframe __user * compat_frame; @@ -247,8 +233,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, #endif frame = get_sigframe(&ksig->ka, usp, sigframe_size); - DBG(1,"SETUP_RT_FRAME: START\n"); - DBG(1,"setup_rt_frame: frame %p info %p\n", frame, ksig->info); + DBG(1, "%s: frame %p info %p\n", __func__, frame, &ksig->info); start = (unsigned long) frame; if (start >= user_addr_max() - sigframe_size) @@ -259,11 +244,12 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, compat_frame = (struct compat_rt_sigframe __user *)frame; if (is_compat_task()) { - DBG(1,"setup_rt_frame: frame->info = 0x%p\n", &compat_frame->info); + DBG(1, "%s: frame->info = 0x%p\n", __func__, &compat_frame->info); err |= copy_siginfo_to_user32(&compat_frame->info, &ksig->info); err |= __compat_save_altstack( &compat_frame->uc.uc_stack, regs->gr[30]); - DBG(1,"setup_rt_frame: frame->uc = 0x%p\n", &compat_frame->uc); - DBG(1,"setup_rt_frame: frame->uc.uc_mcontext = 0x%p\n", &compat_frame->uc.uc_mcontext); + DBG(1, "%s: frame->uc = 0x%p\n", __func__, &compat_frame->uc); + DBG(1, "%s: frame->uc.uc_mcontext = 0x%p\n", + __func__, &compat_frame->uc.uc_mcontext); err |= setup_sigcontext32(&compat_frame->uc.uc_mcontext, &compat_frame->regs, regs, in_syscall); err |= put_compat_sigset(&compat_frame->uc.uc_sigmask, set, @@ -271,11 +257,12 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, } else #endif { - DBG(1,"setup_rt_frame: frame->info = 0x%p\n", &frame->info); + DBG(1, "%s: frame->info = 0x%p\n", __func__, &frame->info); err |= copy_siginfo_to_user(&frame->info, &ksig->info); err |= __save_altstack(&frame->uc.uc_stack, regs->gr[30]); - DBG(1,"setup_rt_frame: frame->uc = 0x%p\n", &frame->uc); - DBG(1,"setup_rt_frame: frame->uc.uc_mcontext = 0x%p\n", &frame->uc.uc_mcontext); + DBG(1, "%s: frame->uc = 0x%p\n", __func__, &frame->uc); + DBG(1, "%s: frame->uc.uc_mcontext = 0x%p\n", + __func__, &frame->uc.uc_mcontext); err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, in_syscall); /* FIXME: Should probably be converted as well for the compat case */ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); @@ -284,32 +271,15 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, if (err) return -EFAULT; - /* Set up to return from userspace. If provided, use a stub - already in userspace. The first words of tramp are used to - save the previous sigrestartblock trampoline that might be - on the stack. We start the sigreturn trampoline at - SIGRESTARTBLOCK_TRAMP+X. */ - err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0, - &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]); - err |= __put_user(INSN_LDI_R20, - &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]); - err |= __put_user(INSN_BLE_SR2_R0, - &frame->tramp[SIGRESTARTBLOCK_TRAMP+2]); - err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]); - - start = (unsigned long) &frame->tramp[0]; - end = (unsigned long) &frame->tramp[TRAMP_SIZE]; - flush_user_dcache_range_asm(start, end); - flush_user_icache_range_asm(start, end); - - /* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP - * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP - * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP - */ - rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP]; +#ifdef CONFIG_64BIT + if (!is_compat_task()) + rp = VDSO64_SYMBOL(current, sigtramp_rt); + else +#endif + rp = VDSO32_SYMBOL(current, sigtramp_rt); - if (err) - return -EFAULT; + if (in_syscall) + rp += 4*4; /* skip 4 instructions and start at ldi 1,%r25 */ haddr = A(ksig->ka.sa.sa_handler); /* The sa_handler may be a pointer to a function descriptor */ @@ -340,8 +310,8 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, haddr = fdesc.addr; regs->gr[19] = fdesc.gp; - DBG(1,"setup_rt_frame: 64 bit signal, exe=%#lx, r19=%#lx, in_syscall=%d\n", - haddr, regs->gr[19], in_syscall); + DBG(1, "%s: 64 bit signal, exe=%#lx, r19=%#lx, in_syscall=%d\n", + __func__, haddr, regs->gr[19], in_syscall); } #endif @@ -351,7 +321,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, regs->gr[31] = haddr; #ifdef CONFIG_64BIT if (!test_thread_flag(TIF_32BIT)) - sigframe_size |= 1; + sigframe_size |= 1; /* XXX ???? */ #endif } else { unsigned long psw = USER_PSW; @@ -373,11 +343,11 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, } regs->gr[0] = psw; - regs->iaoq[0] = haddr | 3; + regs->iaoq[0] = haddr | PRIV_USER; regs->iaoq[1] = regs->iaoq[0] + 4; } - regs->gr[2] = rp; /* userland return pointer */ + regs->gr[2] = rp; /* userland return pointer */ regs->gr[26] = ksig->sig; /* signal number */ #ifdef CONFIG_64BIT @@ -391,15 +361,15 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, regs->gr[24] = A(&frame->uc); /* ucontext pointer */ } - DBG(1,"setup_rt_frame: making sigreturn frame: %#lx + %#lx = %#lx\n", + DBG(1, "%s: making sigreturn frame: %#lx + %#lx = %#lx\n", __func__, regs->gr[30], sigframe_size, regs->gr[30] + sigframe_size); /* Raise the user stack pointer to make a proper call frame. */ regs->gr[30] = (A(frame) + sigframe_size); - DBG(1,"setup_rt_frame: sig deliver (%s,%d) frame=0x%p sp=%#lx iaoq=%#lx/%#lx rp=%#lx\n", - current->comm, current->pid, frame, regs->gr[30], + DBG(1, "%s: sig deliver (%s,%d) frame=0x%p sp=%#lx iaoq=%#lx/%#lx rp=%#lx\n", + __func__, current->comm, current->pid, frame, regs->gr[30], regs->iaoq[0], regs->iaoq[1], rp); return 0; @@ -415,8 +385,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs, int in_syscall) int ret; sigset_t *oldset = sigmask_to_save(); - DBG(1,"handle_signal: sig=%ld, ka=%p, info=%p, oldset=%p, regs=%p\n", - ksig->sig, ksig->ka, ksig->info, oldset, regs); + DBG(1, "%s: sig=%d, ka=%p, info=%p, oldset=%p, regs=%p\n", + __func__, ksig->sig, &ksig->ka, &ksig->info, oldset, regs); /* Set up the stack frame */ ret = setup_rt_frame(ksig, oldset, regs, in_syscall); @@ -424,8 +394,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs, int in_syscall) signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP) || test_thread_flag(TIF_BLOCKSTEP)); - DBG(1,KERN_DEBUG "do_signal: Exit (success), regs->gr[28] = %ld\n", - regs->gr[28]); + DBG(1, "%s: Exit (success), regs->gr[28] = %ld\n", + __func__, regs->gr[28]); } /* @@ -483,21 +453,27 @@ syscall_restart(struct pt_regs *regs, struct k_sigaction *ka) if (regs->orig_r28) return; regs->orig_r28 = 1; /* no more restarts */ + + DBG(1, "%s: orig_r28 = %ld pid %d r20 %ld\n", + __func__, regs->orig_r28, task_pid_nr(current), regs->gr[20]); + /* Check the return code */ switch (regs->gr[28]) { case -ERESTART_RESTARTBLOCK: case -ERESTARTNOHAND: - DBG(1,"ERESTARTNOHAND: returning -EINTR\n"); + DBG(1, "%s: ERESTARTNOHAND: returning -EINTR\n", __func__); regs->gr[28] = -EINTR; break; case -ERESTARTSYS: if (!(ka->sa.sa_flags & SA_RESTART)) { - DBG(1,"ERESTARTSYS: putting -EINTR\n"); + DBG(1, "%s: ERESTARTSYS: putting -EINTR pid %d\n", + __func__, task_pid_nr(current)); regs->gr[28] = -EINTR; break; } fallthrough; case -ERESTARTNOINTR: + DBG(1, "%s: %ld\n", __func__, regs->gr[28]); check_syscallno_in_delay_branch(regs); break; } @@ -509,50 +485,52 @@ insert_restart_trampoline(struct pt_regs *regs) if (regs->orig_r28) return; regs->orig_r28 = 1; /* no more restarts */ - switch(regs->gr[28]) { + + DBG(2, "%s: gr28 = %ld pid %d\n", + __func__, regs->gr[28], task_pid_nr(current)); + + switch (regs->gr[28]) { case -ERESTART_RESTARTBLOCK: { /* Restart the system call - no handlers present */ unsigned int *usp = (unsigned int *)regs->gr[30]; - unsigned long start = (unsigned long) &usp[2]; - unsigned long end = (unsigned long) &usp[5]; + unsigned long rp; long err = 0; /* check that we don't exceed the stack */ if (A(&usp[0]) >= user_addr_max() - 5 * sizeof(int)) return; - /* Setup a trampoline to restart the syscall - * with __NR_restart_syscall + /* Call trampoline in vdso to restart the syscall + * with __NR_restart_syscall. + * Original return addresses are on stack like this: * * 0: <return address (orig r31)> * 4: <2nd half for 64-bit> - * 8: ldw 0(%sp), %r31 - * 12: be 0x100(%sr2, %r0) - * 16: ldi __NR_restart_syscall, %r20 */ #ifdef CONFIG_64BIT - err |= put_user(regs->gr[31] >> 32, &usp[0]); - err |= put_user(regs->gr[31] & 0xffffffff, &usp[1]); - err |= put_user(0x0fc010df, &usp[2]); -#else - err |= put_user(regs->gr[31], &usp[0]); - err |= put_user(0x0fc0109f, &usp[2]); + if (!is_compat_task()) { + err |= put_user(regs->gr[31] >> 32, &usp[0]); + err |= put_user(regs->gr[31] & 0xffffffff, &usp[1]); + rp = VDSO64_SYMBOL(current, restart_syscall); + } else #endif - err |= put_user(0xe0008200, &usp[3]); - err |= put_user(0x34140000, &usp[4]); - + { + err |= put_user(regs->gr[31], &usp[0]); + rp = VDSO32_SYMBOL(current, restart_syscall); + } WARN_ON(err); - /* flush data/instruction cache for new insns */ - flush_user_dcache_range_asm(start, end); - flush_user_icache_range_asm(start, end); - - regs->gr[31] = regs->gr[30] + 8; + regs->gr[31] = rp; + DBG(1, "%s: ERESTART_RESTARTBLOCK\n", __func__); return; } + case -EINTR: + /* ok, was handled before and should be returned. */ + break; case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: + DBG(1, "%s: Type %ld\n", __func__, regs->gr[28]); check_syscallno_in_delay_branch(regs); return; default: @@ -567,30 +545,35 @@ insert_restart_trampoline(struct pt_regs *regs) * registers). As noted below, the syscall number gets restored for * us due to the magic of delayed branching. */ -asmlinkage void -do_signal(struct pt_regs *regs, long in_syscall) +static void do_signal(struct pt_regs *regs, long in_syscall) { struct ksignal ksig; + int restart_syscall; + bool has_handler; - DBG(1,"\ndo_signal: regs=0x%p, sr7 %#lx, in_syscall=%d\n", - regs, regs->sr[7], in_syscall); + has_handler = get_signal(&ksig); - if (get_signal(&ksig)) { - DBG(3,"do_signal: signr = %d, regs->gr[28] = %ld\n", signr, regs->gr[28]); + restart_syscall = 0; + if (in_syscall) + restart_syscall = 1; + + if (has_handler) { /* Restart a system call if necessary. */ - if (in_syscall) + if (restart_syscall) syscall_restart(regs, &ksig.ka); handle_signal(&ksig, regs, in_syscall); + DBG(1, "%s: Handled signal pid %d\n", + __func__, task_pid_nr(current)); return; } - /* Did we come from a system call? */ - if (in_syscall) + /* Do we need to restart the system call? */ + if (restart_syscall) insert_restart_trampoline(regs); - DBG(1,"do_signal: Exit (not delivered), regs->gr[28] = %ld\n", - regs->gr[28]); + DBG(1, "%s: Exit (not delivered), regs->gr[28] = %ld orig_r28 = %ld pid %d\n", + __func__, regs->gr[28], regs->orig_r28, task_pid_nr(current)); restore_saved_sigmask(); } diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h index f166250f2d06..c03eb1ed4c53 100644 --- a/arch/parisc/kernel/signal32.h +++ b/arch/parisc/kernel/signal32.h @@ -36,21 +36,12 @@ struct compat_regfile { compat_int_t rf_sar; }; -#define COMPAT_SIGRETURN_TRAMP 4 -#define COMPAT_SIGRESTARTBLOCK_TRAMP 5 -#define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \ - COMPAT_SIGRESTARTBLOCK_TRAMP) - struct compat_rt_sigframe { - /* XXX: Must match trampoline size in arch/parisc/kernel/signal.c - Secondary to that it must protect the ERESTART_RESTARTBLOCK - trampoline we left on the stack (we were bad and didn't - change sp so we could run really fast.) */ - compat_uint_t tramp[COMPAT_TRAMP_SIZE]; - compat_siginfo_t info; - struct compat_ucontext uc; - /* Hidden location of truncated registers, *must* be last. */ - struct compat_regfile regs; + unsigned int tramp[2]; /* holds original return address */ + compat_siginfo_t info; + struct compat_ucontext uc; + /* Hidden location of truncated registers, *must* be last. */ + struct compat_regfile regs; }; /* diff --git a/arch/parisc/kernel/topology.c b/arch/parisc/kernel/topology.c index 0a10e4ddc528..e88a6ce7c96d 100644 --- a/arch/parisc/kernel/topology.c +++ b/arch/parisc/kernel/topology.c @@ -101,8 +101,8 @@ void __init store_cpu_topology(unsigned int cpuid) update_siblings_masks(cpuid); - pr_info("CPU%u: thread %d, cpu %d, socket %d\n", - cpuid, cpu_topology[cpuid].thread_id, + pr_info("CPU%u: cpu core %d of socket %d\n", + cpuid, cpu_topology[cpuid].core_id, cpu_topology[cpuid].socket_id); } diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index b6fdebddc8e9..a6e61cf2cad0 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -302,7 +302,10 @@ static void handle_break(struct pt_regs *regs) parisc_kprobe_break_handler(regs); return; } - + if (unlikely(iir == PARISC_KPROBES_BREAK_INSN2)) { + parisc_kprobe_ss_handler(regs); + return; + } #endif #ifdef CONFIG_KGDB @@ -539,11 +542,6 @@ void notrace handle_interruption(int code, struct pt_regs *regs) /* Recovery counter trap */ regs->gr[0] &= ~PSW_R; -#ifdef CONFIG_KPROBES - if (parisc_kprobe_ss_handler(regs)) - return; -#endif - #ifdef CONFIG_KGDB if (kgdb_single_step) { kgdb_handle_exception(0, SIGTRAP, 0, regs); @@ -662,6 +660,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs) by hand. Technically we need to emulate: fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw */ + if (code == 17 && handle_nadtlb_fault(regs)) + return; fault_address = regs->ior; fault_space = regs->isr; break; diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 286cec4d86d7..ed1e88a74dc4 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -3,14 +3,11 @@ * Unaligned memory access handler * * Copyright (C) 2001 Randolph Chung <tausq@debian.org> + * Copyright (C) 2022 Helge Deller <deller@gmx.de> * Significantly tweaked by LaMont Jones <lamont@debian.org> */ -#include <linux/jiffies.h> -#include <linux/kernel.h> -#include <linux/module.h> #include <linux/sched/signal.h> -#include <linux/sched/debug.h> #include <linux/signal.h> #include <linux/ratelimit.h> #include <linux/uaccess.h> @@ -25,18 +22,7 @@ #define DPRINTF(fmt, args...) #endif -#ifdef CONFIG_64BIT -#define RFMT "%016lx" -#else -#define RFMT "%08lx" -#endif - -#define FIXUP_BRANCH(lbl) \ - "\tldil L%%" #lbl ", %%r1\n" \ - "\tldo R%%" #lbl "(%%r1), %%r1\n" \ - "\tbv,n %%r0(%%r1)\n" -/* If you use FIXUP_BRANCH, then you must list this clobber */ -#define FIXUP_BRANCH_CLOBBER "r1" +#define RFMT "%#08lx" /* 1111 1100 0000 0000 0001 0011 1100 0000 */ #define OPCODE1(a,b,c) ((a)<<26|(b)<<12|(c)<<6) @@ -114,37 +100,30 @@ #define IM14(i) IM((i),14) #define ERR_NOTHANDLED -1 -#define ERR_PAGEFAULT -2 int unaligned_enabled __read_mostly = 1; static int emulate_ldh(struct pt_regs *regs, int toreg) { unsigned long saddr = regs->ior; - unsigned long val = 0; - int ret; + unsigned long val = 0, temp1; + ASM_EXCEPTIONTABLE_VAR(ret); DPRINTF("load " RFMT ":" RFMT " to r%d for 2 bytes\n", regs->isr, regs->ior, toreg); __asm__ __volatile__ ( " mtsp %4, %%sr1\n" -"1: ldbs 0(%%sr1,%3), %%r20\n" +"1: ldbs 0(%%sr1,%3), %2\n" "2: ldbs 1(%%sr1,%3), %0\n" -" depw %%r20, 23, 24, %0\n" -" copy %%r0, %1\n" +" depw %2, 23, 24, %0\n" "3: \n" -" .section .fixup,\"ax\"\n" -"4: ldi -2, %1\n" - FIXUP_BRANCH(3b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b, 4b) - ASM_EXCEPTIONTABLE_ENTRY(2b, 4b) - : "=r" (val), "=r" (ret) - : "0" (val), "r" (saddr), "r" (regs->isr) - : "r20", FIXUP_BRANCH_CLOBBER ); + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + : "+r" (val), "+r" (ret), "=&r" (temp1) + : "r" (saddr), "r" (regs->isr) ); - DPRINTF("val = 0x" RFMT "\n", val); + DPRINTF("val = " RFMT "\n", val); if (toreg) regs->gr[toreg] = val; @@ -155,34 +134,28 @@ static int emulate_ldh(struct pt_regs *regs, int toreg) static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) { unsigned long saddr = regs->ior; - unsigned long val = 0; - int ret; + unsigned long val = 0, temp1, temp2; + ASM_EXCEPTIONTABLE_VAR(ret); DPRINTF("load " RFMT ":" RFMT " to r%d for 4 bytes\n", regs->isr, regs->ior, toreg); __asm__ __volatile__ ( -" zdep %3,28,2,%%r19\n" /* r19=(ofs&3)*8 */ -" mtsp %4, %%sr1\n" -" depw %%r0,31,2,%3\n" -"1: ldw 0(%%sr1,%3),%0\n" -"2: ldw 4(%%sr1,%3),%%r20\n" -" subi 32,%%r19,%%r19\n" -" mtctl %%r19,11\n" -" vshd %0,%%r20,%0\n" -" copy %%r0, %1\n" +" zdep %4,28,2,%2\n" /* r19=(ofs&3)*8 */ +" mtsp %5, %%sr1\n" +" depw %%r0,31,2,%4\n" +"1: ldw 0(%%sr1,%4),%0\n" +"2: ldw 4(%%sr1,%4),%3\n" +" subi 32,%4,%2\n" +" mtctl %2,11\n" +" vshd %0,%3,%0\n" "3: \n" -" .section .fixup,\"ax\"\n" -"4: ldi -2, %1\n" - FIXUP_BRANCH(3b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b, 4b) - ASM_EXCEPTIONTABLE_ENTRY(2b, 4b) - : "=r" (val), "=r" (ret) - : "0" (val), "r" (saddr), "r" (regs->isr) - : "r19", "r20", FIXUP_BRANCH_CLOBBER ); + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + : "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2) + : "r" (saddr), "r" (regs->isr) ); - DPRINTF("val = 0x" RFMT "\n", val); + DPRINTF("val = " RFMT "\n", val); if (flop) ((__u32*)(regs->fr))[toreg] = val; @@ -195,16 +168,15 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) { unsigned long saddr = regs->ior; __u64 val = 0; - int ret; + ASM_EXCEPTIONTABLE_VAR(ret); DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n", regs->isr, regs->ior, toreg); -#ifdef CONFIG_PA20 -#ifndef CONFIG_64BIT - if (!flop) - return -1; -#endif + if (!IS_ENABLED(CONFIG_64BIT) && !flop) + return ERR_NOTHANDLED; + +#ifdef CONFIG_64BIT __asm__ __volatile__ ( " depd,z %3,60,3,%%r19\n" /* r19=(ofs&7)*8 */ " mtsp %4, %%sr1\n" @@ -214,44 +186,32 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " subi 64,%%r19,%%r19\n" " mtsar %%r19\n" " shrpd %0,%%r20,%%sar,%0\n" -" copy %%r0, %1\n" "3: \n" -" .section .fixup,\"ax\"\n" -"4: ldi -2, %1\n" - FIXUP_BRANCH(3b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b,4b) - ASM_EXCEPTIONTABLE_ENTRY(2b,4b) - : "=r" (val), "=r" (ret) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + : "=r" (val), "+r" (ret) : "0" (val), "r" (saddr), "r" (regs->isr) - : "r19", "r20", FIXUP_BRANCH_CLOBBER ); + : "r19", "r20" ); #else { - unsigned long valh=0,vall=0; + unsigned long shift, temp1; __asm__ __volatile__ ( -" zdep %5,29,2,%%r19\n" /* r19=(ofs&3)*8 */ -" mtsp %6, %%sr1\n" -" dep %%r0,31,2,%5\n" -"1: ldw 0(%%sr1,%5),%0\n" -"2: ldw 4(%%sr1,%5),%1\n" -"3: ldw 8(%%sr1,%5),%%r20\n" -" subi 32,%%r19,%%r19\n" -" mtsar %%r19\n" -" vshd %0,%1,%0\n" -" vshd %1,%%r20,%1\n" -" copy %%r0, %2\n" +" zdep %2,29,2,%3\n" /* r19=(ofs&3)*8 */ +" mtsp %5, %%sr1\n" +" dep %%r0,31,2,%2\n" +"1: ldw 0(%%sr1,%2),%0\n" +"2: ldw 4(%%sr1,%2),%R0\n" +"3: ldw 8(%%sr1,%2),%4\n" +" subi 32,%3,%3\n" +" mtsar %3\n" +" vshd %0,%R0,%0\n" +" vshd %R0,%4,%R0\n" "4: \n" -" .section .fixup,\"ax\"\n" -"5: ldi -2, %2\n" - FIXUP_BRANCH(4b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b,5b) - ASM_EXCEPTIONTABLE_ENTRY(2b,5b) - ASM_EXCEPTIONTABLE_ENTRY(3b,5b) - : "=r" (valh), "=r" (vall), "=r" (ret) - : "0" (valh), "1" (vall), "r" (saddr), "r" (regs->isr) - : "r19", "r20", FIXUP_BRANCH_CLOBBER ); - val=((__u64)valh<<32)|(__u64)vall; + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b) + : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) + : "r" (regs->isr) ); } #endif @@ -267,31 +227,25 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) static int emulate_sth(struct pt_regs *regs, int frreg) { - unsigned long val = regs->gr[frreg]; - int ret; + unsigned long val = regs->gr[frreg], temp1; + ASM_EXCEPTIONTABLE_VAR(ret); if (!frreg) val = 0; - DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 2 bytes\n", frreg, + DPRINTF("store r%d (" RFMT ") to " RFMT ":" RFMT " for 2 bytes\n", frreg, val, regs->isr, regs->ior); __asm__ __volatile__ ( -" mtsp %3, %%sr1\n" -" extrw,u %1, 23, 8, %%r19\n" -"1: stb %1, 1(%%sr1, %2)\n" -"2: stb %%r19, 0(%%sr1, %2)\n" -" copy %%r0, %0\n" +" mtsp %4, %%sr1\n" +" extrw,u %2, 23, 8, %1\n" +"1: stb %1, 0(%%sr1, %3)\n" +"2: stb %2, 1(%%sr1, %3)\n" "3: \n" -" .section .fixup,\"ax\"\n" -"4: ldi -2, %0\n" - FIXUP_BRANCH(3b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b,4b) - ASM_EXCEPTIONTABLE_ENTRY(2b,4b) - : "=r" (ret) - : "r" (val), "r" (regs->ior), "r" (regs->isr) - : "r19", FIXUP_BRANCH_CLOBBER ); + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + : "+r" (ret), "=&r" (temp1) + : "r" (val), "r" (regs->ior), "r" (regs->isr) ); return ret; } @@ -299,7 +253,7 @@ static int emulate_sth(struct pt_regs *regs, int frreg) static int emulate_stw(struct pt_regs *regs, int frreg, int flop) { unsigned long val; - int ret; + ASM_EXCEPTIONTABLE_VAR(ret); if (flop) val = ((__u32*)(regs->fr))[frreg]; @@ -308,7 +262,7 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) else val = 0; - DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 4 bytes\n", frreg, + DPRINTF("store r%d (" RFMT ") to " RFMT ":" RFMT " for 4 bytes\n", frreg, val, regs->isr, regs->ior); @@ -328,24 +282,19 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) " or %%r1, %%r21, %%r21\n" " stw %%r20,0(%%sr1,%2)\n" " stw %%r21,4(%%sr1,%2)\n" -" copy %%r0, %0\n" "3: \n" -" .section .fixup,\"ax\"\n" -"4: ldi -2, %0\n" - FIXUP_BRANCH(3b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b,4b) - ASM_EXCEPTIONTABLE_ENTRY(2b,4b) - : "=r" (ret) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) - : "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER ); + : "r19", "r20", "r21", "r22", "r1" ); return ret; } static int emulate_std(struct pt_regs *regs, int frreg, int flop) { __u64 val; - int ret; + ASM_EXCEPTIONTABLE_VAR(ret); if (flop) val = regs->fr[frreg]; @@ -357,11 +306,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) DPRINTF("store r%d (0x%016llx) to " RFMT ":" RFMT " for 8 bytes\n", frreg, val, regs->isr, regs->ior); -#ifdef CONFIG_PA20 -#ifndef CONFIG_64BIT - if (!flop) - return -1; -#endif + if (!IS_ENABLED(CONFIG_64BIT) && !flop) + return ERR_NOTHANDLED; + +#ifdef CONFIG_64BIT __asm__ __volatile__ ( " mtsp %3, %%sr1\n" " depd,z %2, 60, 3, %%r19\n" @@ -378,19 +326,14 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) " or %%r1, %%r21, %%r21\n" "3: std %%r20,0(%%sr1,%2)\n" "4: std %%r21,8(%%sr1,%2)\n" -" copy %%r0, %0\n" "5: \n" -" .section .fixup,\"ax\"\n" -"6: ldi -2, %0\n" - FIXUP_BRANCH(5b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b,6b) - ASM_EXCEPTIONTABLE_ENTRY(2b,6b) - ASM_EXCEPTIONTABLE_ENTRY(3b,6b) - ASM_EXCEPTIONTABLE_ENTRY(4b,6b) - : "=r" (ret) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b) + : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) - : "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER ); + : "r19", "r20", "r21", "r22", "r1" ); #else { unsigned long valh=(val>>32),vall=(val&0xffffffffl); @@ -412,20 +355,15 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "3: stw %1,0(%%sr1,%3)\n" "4: stw %%r1,4(%%sr1,%3)\n" "5: stw %2,8(%%sr1,%3)\n" -" copy %%r0, %0\n" "6: \n" -" .section .fixup,\"ax\"\n" -"7: ldi -2, %0\n" - FIXUP_BRANCH(6b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b,7b) - ASM_EXCEPTIONTABLE_ENTRY(2b,7b) - ASM_EXCEPTIONTABLE_ENTRY(3b,7b) - ASM_EXCEPTIONTABLE_ENTRY(4b,7b) - ASM_EXCEPTIONTABLE_ENTRY(5b,7b) - : "=r" (ret) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b) + : "+r" (ret) : "r" (valh), "r" (vall), "r" (regs->ior), "r" (regs->isr) - : "r19", "r20", "r21", "r1", FIXUP_BRANCH_CLOBBER ); + : "r19", "r20", "r21", "r1" ); } #endif @@ -438,7 +376,6 @@ void handle_unaligned(struct pt_regs *regs) unsigned long newbase = R1(regs->iir)?regs->gr[R1(regs->iir)]:0; int modify = 0; int ret = ERR_NOTHANDLED; - register int flop=0; /* true if this is a flop */ __inc_irq_stat(irq_unaligned_count); @@ -450,10 +387,10 @@ void handle_unaligned(struct pt_regs *regs) if (!(current->thread.flags & PARISC_UAC_NOPRINT) && __ratelimit(&ratelimit)) { - char buf[256]; - sprintf(buf, "%s(%d): unaligned access to 0x" RFMT " at ip=0x" RFMT "\n", - current->comm, task_pid_nr(current), regs->ior, regs->iaoq[0]); - printk(KERN_WARNING "%s", buf); + printk(KERN_WARNING "%s(%d): unaligned access to " RFMT + " at ip " RFMT " (iir " RFMT ")\n", + current->comm, task_pid_nr(current), regs->ior, + regs->iaoq[0], regs->iir); #ifdef DEBUG_UNALIGNED show_regs(regs); #endif @@ -547,7 +484,7 @@ void handle_unaligned(struct pt_regs *regs) ret = emulate_stw(regs, R2(regs->iir),0); break; -#ifdef CONFIG_PA20 +#ifdef CONFIG_64BIT case OPCODE_LDD_I: case OPCODE_LDDA_I: case OPCODE_LDD_S: @@ -565,13 +502,11 @@ void handle_unaligned(struct pt_regs *regs) case OPCODE_FLDWS: case OPCODE_FLDWXR: case OPCODE_FLDWSR: - flop=1; ret = emulate_ldw(regs,FR3(regs->iir),1); break; case OPCODE_FLDDX: case OPCODE_FLDDS: - flop=1; ret = emulate_ldd(regs,R3(regs->iir),1); break; @@ -579,13 +514,11 @@ void handle_unaligned(struct pt_regs *regs) case OPCODE_FSTWS: case OPCODE_FSTWXR: case OPCODE_FSTWSR: - flop=1; ret = emulate_stw(regs,FR3(regs->iir),1); break; case OPCODE_FSTDX: case OPCODE_FSTDS: - flop=1; ret = emulate_std(regs,R3(regs->iir),1); break; @@ -599,14 +532,12 @@ void handle_unaligned(struct pt_regs *regs) switch (regs->iir & OPCODE2_MASK) { case OPCODE_FLDD_L: - flop=1; ret = emulate_ldd(regs,R2(regs->iir),1); break; case OPCODE_FSTD_L: - flop=1; ret = emulate_std(regs, R2(regs->iir),1); break; -#ifdef CONFIG_PA20 +#ifdef CONFIG_64BIT case OPCODE_LDD_L: ret = emulate_ldd(regs, R2(regs->iir),0); break; @@ -618,7 +549,6 @@ void handle_unaligned(struct pt_regs *regs) switch (regs->iir & OPCODE3_MASK) { case OPCODE_FLDW_L: - flop=1; ret = emulate_ldw(regs, R2(regs->iir), 1); break; case OPCODE_LDW_M: @@ -626,7 +556,6 @@ void handle_unaligned(struct pt_regs *regs) break; case OPCODE_FSTW_L: - flop=1; ret = emulate_stw(regs, R2(regs->iir),1); break; case OPCODE_STW_M: @@ -673,7 +602,7 @@ void handle_unaligned(struct pt_regs *regs) printk(KERN_CRIT "Unaligned handler failed, ret = %d\n", ret); die_if_kernel("Unaligned data reference", regs, 28); - if (ret == ERR_PAGEFAULT) + if (ret == -EFAULT) { force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)regs->ior); diff --git a/arch/parisc/kernel/vdso.c b/arch/parisc/kernel/vdso.c new file mode 100644 index 000000000000..63dc44c4c246 --- /dev/null +++ b/arch/parisc/kernel/vdso.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 Helge Deller <deller@gmx.de> + * + * based on arch/s390/kernel/vdso.c which is + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/elf.h> +#include <linux/timekeeper_internal.h> +#include <linux/compat.h> +#include <linux/nsproxy.h> +#include <linux/time_namespace.h> +#include <linux/random.h> + +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/sections.h> +#include <asm/vdso.h> +#include <asm/cacheflush.h> + +extern char vdso32_start, vdso32_end; +extern char vdso64_start, vdso64_end; + +static int vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *vma) +{ + current->mm->context.vdso_base = vma->vm_start; + return 0; +} + +#ifdef CONFIG_64BIT +static struct vm_special_mapping vdso64_mapping = { + .name = "[vdso]", + .mremap = vdso_mremap, +}; +#endif + +static struct vm_special_mapping vdso32_mapping = { + .name = "[vdso]", + .mremap = vdso_mremap, +}; + +/* + * This is called from binfmt_elf, we create the special vma for the + * vDSO and insert it into the mm struct tree + */ +int arch_setup_additional_pages(struct linux_binprm *bprm, + int executable_stack) +{ + + unsigned long vdso_text_start, vdso_text_len, map_base; + struct vm_special_mapping *vdso_mapping; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + +#ifdef CONFIG_64BIT + if (!is_compat_task()) { + vdso_text_len = &vdso64_end - &vdso64_start; + vdso_mapping = &vdso64_mapping; + } else +#endif + { + vdso_text_len = &vdso32_end - &vdso32_start; + vdso_mapping = &vdso32_mapping; + } + + map_base = mm->mmap_base; + if (current->flags & PF_RANDOMIZE) + map_base -= (get_random_int() & 0x1f) * PAGE_SIZE; + + vdso_text_start = get_unmapped_area(NULL, map_base, vdso_text_len, 0, 0); + + /* VM_MAYWRITE for COW so gdb can set breakpoints */ + vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_mapping); + if (IS_ERR(vma)) { + do_munmap(mm, vdso_text_start, PAGE_SIZE, NULL); + rc = PTR_ERR(vma); + } else { + current->mm->context.vdso_base = vdso_text_start; + rc = 0; + } + + mmap_write_unlock(mm); + return rc; +} + +static struct page ** __init vdso_setup_pages(void *start, void *end) +{ + int pages = (end - start) >> PAGE_SHIFT; + struct page **pagelist; + int i; + + pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL); + if (!pagelist) + panic("%s: Cannot allocate page list for VDSO", __func__); + for (i = 0; i < pages; i++) + pagelist[i] = virt_to_page(start + i * PAGE_SIZE); + return pagelist; +} + +static int __init vdso_init(void) +{ +#ifdef CONFIG_64BIT + vdso64_mapping.pages = vdso_setup_pages(&vdso64_start, &vdso64_end); +#endif + if (IS_ENABLED(CONFIG_COMPAT) || !IS_ENABLED(CONFIG_64BIT)) + vdso32_mapping.pages = vdso_setup_pages(&vdso32_start, &vdso32_end); + return 0; +} +arch_initcall(vdso_init); diff --git a/arch/parisc/kernel/vdso32/Makefile b/arch/parisc/kernel/vdso32/Makefile new file mode 100644 index 000000000000..85b1c6d261d1 --- /dev/null +++ b/arch/parisc/kernel/vdso32/Makefile @@ -0,0 +1,53 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso32 = note.o sigtramp.o restart_syscall.o + +# Build rules + +targets := $(obj-vdso32) vdso32.so +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) + +ccflags-y := -shared -fno-common -fbuiltin -mno-fast-indirect-calls -O2 -mno-long-calls +# -march=1.1 -mschedule=7100LC +ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ + $(call ld-option, -Wl$(comma)--hash-style=sysv) +asflags-y := -D__VDSO32__ -s + +KBUILD_AFLAGS += -DBUILD_VDSO +KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING + +VDSO_LIBGCC := $(shell $(CROSS32CC) -print-libgcc-file-name) + +obj-y += vdso32_wrapper.o +extra-y += vdso32.lds +CPPFLAGS_vdso32.lds += -P -C # -U$(ARCH) + +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so FORCE + +# Force dependency (incbin is bad) +# link rule for the .so file, .lds has to be first +$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) $(obj-cvdso32) $(VDSO_LIBGCC) + $(call if_changed,vdso32ld) + +# assembly rules for the .S files +$(obj-vdso32): %.o: %.S FORCE + $(call if_changed_dep,vdso32as) + +$(obj-cvdso32): %.o: %.c FORCE + $(call if_changed_dep,vdso32cc) + +# actual build commands +quiet_cmd_vdso32ld = VDSO32L $@ + cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso32as = VDSO32A $@ + cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $< +quiet_cmd_vdso32cc = VDSO32C $@ + cmd_vdso32cc = $(CROSS32CC) $(c_flags) -c -fPIC -mno-fast-indirect-calls -o $@ $< + +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso32-offsets.h: $(obj)/vdso32.so FORCE + $(call if_changed,vdsosym) diff --git a/arch/parisc/kernel/vdso32/gen_vdso_offsets.sh b/arch/parisc/kernel/vdso32/gen_vdso_offsets.sh new file mode 100755 index 000000000000..da39d6cff7f1 --- /dev/null +++ b/arch/parisc/kernel/vdso32/gen_vdso_offsets.sh @@ -0,0 +1,15 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Inspired by arm64 version. +# + +LC_ALL=C +sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso32_offset_\2\t0x\1/p' diff --git a/arch/parisc/kernel/vdso32/note.S b/arch/parisc/kernel/vdso32/note.S new file mode 100644 index 000000000000..bb350918bebd --- /dev/null +++ b/arch/parisc/kernel/vdso32/note.S @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> + +#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \ + .section name, flags; \ + .balign 4; \ + .long 1f - 0f; /* name length */ \ + .long 3f - 2f; /* data length */ \ + .long type; /* note type */ \ +0: .asciz vendor; /* vendor name */ \ +1: .balign 4; \ +2: + +#define ASM_ELF_NOTE_END \ +3: .balign 4; /* pad out section */ \ + .previous + + ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0) + .long LINUX_VERSION_CODE + ASM_ELF_NOTE_END diff --git a/arch/parisc/kernel/vdso32/restart_syscall.S b/arch/parisc/kernel/vdso32/restart_syscall.S new file mode 100644 index 000000000000..7e82008d7e40 --- /dev/null +++ b/arch/parisc/kernel/vdso32/restart_syscall.S @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Syscall restart trampoline for 32 and 64 bits processes. + * + * Copyright (C) 2018-2022 Helge Deller <deller@gmx.de> + * Copyright (C) 2022 John David Anglin <dave.anglin@bell.net> + */ + +#include <asm/unistd.h> +#include <asm/vdso.h> + +#include <linux/linkage.h> + + .text + +ENTRY_CFI(__kernel_restart_syscall) + /* + * Setup a trampoline to restart the syscall + * with __NR_restart_syscall + */ + + /* load return pointer */ +#if defined(__VDSO64__) + ldd 0(%sp), %r31 +#elif defined(__VDSO32__) + ldw 0(%sp), %r31 +#endif + + be 0x100(%sr2, %r0) + ldi __NR_restart_syscall, %r20 + +ENDPROC_CFI(__kernel_restart_syscall) diff --git a/arch/parisc/kernel/vdso32/sigtramp.S b/arch/parisc/kernel/vdso32/sigtramp.S new file mode 100644 index 000000000000..192da7077869 --- /dev/null +++ b/arch/parisc/kernel/vdso32/sigtramp.S @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Signal trampolines for 32 bit processes. + * + * Copyright (C) 2006 Randolph Chung <tausq@debian.org> + * Copyright (C) 2018-2022 Helge Deller <deller@gmx.de> + * Copyright (C) 2022 John David Anglin <dave.anglin@bell.net> + */ +#include <asm/unistd.h> +#include <linux/linkage.h> +#include <generated/asm-offsets.h> + + .text + +/* Gdb expects the trampoline is on the stack and the pc is offset from + a 64-byte boundary by 0, 4 or 5 instructions. Since the vdso trampoline + is not on the stack, we need a new variant with different offsets and + data to tell gdb where to find the signal context on the stack. + + Here we put the offset to the context data at the start of the trampoline + region and offset the first trampoline by 2 instructions. Please do + not change the trampoline as the code in gdb depends on the following + instruction sequence exactly. + */ + .align 64 + .word SIGFRAME_CONTEXT_REGS32 + +/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from + the return address to get an address in the middle of the presumed + call instruction. Since we don't have a call here, we artifically + extend the range covered by the unwind info by adding a nop before + the real start. + */ + nop + + .globl __kernel_sigtramp_rt + .type __kernel_sigtramp_rt, @function +__kernel_sigtramp_rt: + .proc + .callinfo FRAME=ASM_SIGFRAME_SIZE32,CALLS,SAVE_RP + .entry + +.Lsigrt_start = . - 4 +0: ldi 0, %r25 /* (in_syscall=0) */ + ldi __NR_rt_sigreturn, %r20 + ble 0x100(%sr2, %r0) + nop + +1: ldi 1, %r25 /* (in_syscall=1) */ + ldi __NR_rt_sigreturn, %r20 + ble 0x100(%sr2, %r0) + nop +.Lsigrt_end: + .exit + .procend + .size __kernel_sigtramp_rt,.-__kernel_sigtramp_rt + + + .section .eh_frame,"a",@progbits + +/* This is where the mcontext_t struct can be found on the stack. */ +#define PTREGS SIGFRAME_CONTEXT_REGS32 /* 32-bit process offset is -672 */ + +/* Register REGNO can be found at offset OFS of the mcontext_t structure. */ + .macro rsave regno,ofs + .byte 0x05 /* DW_CFA_offset_extended */ + .uleb128 \regno; /* regno */ + .uleb128 \ofs /* factored offset */ + .endm + +.Lcie: + .long .Lcie_end - .Lcie_start +.Lcie_start: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .stringz "zRS" /* NUL-terminated augmentation string */ + .uleb128 4 /* Code alignment factor */ + .sleb128 4 /* Data alignment factor */ + .byte 89 /* Return address register column, iaoq[0] */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */ + .byte 0x0f /* DW_CFA_def_cfa_expresion */ + .uleb128 9f - 1f /* length */ +1: + .byte 0x8e /* DW_OP_breg30 */ + .sleb128 PTREGS +9: + .balign 4 +.Lcie_end: + + .long .Lfde0_end - .Lfde0_start +.Lfde0_start: + .long .Lfde0_start - .Lcie /* CIE pointer. */ + .long .Lsigrt_start - . /* PC start, length */ + .long .Lsigrt_end - .Lsigrt_start + .uleb128 0 /* Augmentation */ + + /* General registers */ + rsave 1, 2 + rsave 2, 3 + rsave 3, 4 + rsave 4, 5 + rsave 5, 6 + rsave 6, 7 + rsave 7, 8 + rsave 8, 9 + rsave 9, 10 + rsave 10, 11 + rsave 11, 12 + rsave 12, 13 + rsave 13, 14 + rsave 14, 15 + rsave 15, 16 + rsave 16, 17 + rsave 17, 18 + rsave 18, 19 + rsave 19, 20 + rsave 20, 21 + rsave 21, 22 + rsave 22, 23 + rsave 23, 24 + rsave 24, 25 + rsave 25, 26 + rsave 26, 27 + rsave 27, 28 + rsave 28, 29 + rsave 29, 30 + rsave 30, 31 + rsave 31, 32 + + /* Floating-point registers */ + rsave 32, 42 + rsave 33, 43 + rsave 34, 44 + rsave 35, 45 + rsave 36, 46 + rsave 37, 47 + rsave 38, 48 + rsave 39, 49 + rsave 40, 50 + rsave 41, 51 + rsave 42, 52 + rsave 43, 53 + rsave 44, 54 + rsave 45, 55 + rsave 46, 56 + rsave 47, 57 + rsave 48, 58 + rsave 49, 59 + rsave 50, 60 + rsave 51, 61 + rsave 52, 62 + rsave 53, 63 + rsave 54, 64 + rsave 55, 65 + rsave 56, 66 + rsave 57, 67 + rsave 58, 68 + rsave 59, 69 + rsave 60, 70 + rsave 61, 71 + rsave 62, 72 + rsave 63, 73 + rsave 64, 74 + rsave 65, 75 + rsave 66, 76 + rsave 67, 77 + rsave 68, 78 + rsave 69, 79 + rsave 70, 80 + rsave 71, 81 + rsave 72, 82 + rsave 73, 83 + rsave 74, 84 + rsave 75, 85 + rsave 76, 86 + rsave 77, 87 + rsave 78, 88 + rsave 79, 89 + rsave 80, 90 + rsave 81, 91 + rsave 82, 92 + rsave 83, 93 + rsave 84, 94 + rsave 85, 95 + rsave 86, 96 + rsave 87, 97 + + /* SAR register */ + rsave 88, 102 + + /* iaoq[0] return address register */ + rsave 89, 100 + .balign 4 +.Lfde0_end: diff --git a/arch/parisc/kernel/vdso32/vdso32.lds.S b/arch/parisc/kernel/vdso32/vdso32.lds.S new file mode 100644 index 000000000000..d4aff3af5262 --- /dev/null +++ b/arch/parisc/kernel/vdso32/vdso32.lds.S @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This is the infamous ld script for the 32 bits vdso library + */ +#include <asm/vdso.h> +#include <asm/page.h> + +/* Default link addresses for the vDSOs */ +OUTPUT_FORMAT("elf32-hppa-linux") +OUTPUT_ARCH(hppa) +ENTRY(_start) + +SECTIONS +{ + . = VDSO_LBASE + SIZEOF_HEADERS; + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN (16); + .text : + { + *(.text .stub .text.* .gnu.linkonce.t.*) + } + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + /* Other stuff is appended to the text segment: */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + .rodata2 : { *(.data.rel.ro) } + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table) } + .fixup : { *(.fixup) } + + .dynamic : { *(.dynamic) } :text :dynamic + .plt : { *(.plt) } + .got : { *(.got) } + + _end = .; + __end = .; + PROVIDE (end = .); + + + /* Stabs debugging sections are here too + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + + /DISCARD/ : { *(.note.GNU-stack) } + /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) } + /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) } +} + + +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + note PT_NOTE FLAGS(4); /* PF_R */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + __kernel_sigtramp_rt32; + __kernel_restart_syscall32; + local: *; + }; +} diff --git a/arch/parisc/kernel/vdso32/vdso32_wrapper.S b/arch/parisc/kernel/vdso32/vdso32_wrapper.S new file mode 100644 index 000000000000..5ac06093e8ec --- /dev/null +++ b/arch/parisc/kernel/vdso32/vdso32_wrapper.S @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso32_start, vdso32_end + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/parisc/kernel/vdso32/vdso32.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/parisc/kernel/vdso64/Makefile b/arch/parisc/kernel/vdso64/Makefile new file mode 100644 index 000000000000..a30f5ec5eb4b --- /dev/null +++ b/arch/parisc/kernel/vdso64/Makefile @@ -0,0 +1,48 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso64 = note.o sigtramp.o restart_syscall.o + +# Build rules + +targets := $(obj-vdso64) vdso64.so +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) + + +ccflags-y := -shared -fno-common -fno-builtin +ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ + $(call ld-option, -Wl$(comma)--hash-style=sysv) +asflags-y := -D__VDSO64__ -s + +KBUILD_AFLAGS += -DBUILD_VDSO +KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING + +VDSO_LIBGCC := $(shell $(CC) -print-libgcc-file-name) + +obj-y += vdso64_wrapper.o +extra-y += vdso64.lds +CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) + +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so FORCE + +# Force dependency (incbin is bad) +# link rule for the .so file, .lds has to be first +$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64) $(VDSO_LIBGCC) + $(call if_changed,vdso64ld) + +# assembly rules for the .S files +$(obj-vdso64): %.o: %.S FORCE + $(call if_changed_dep,vdso64as) + +# actual build commands +quiet_cmd_vdso64ld = VDSO64L $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< + +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso64-offsets.h: $(obj)/vdso64.so FORCE + $(call if_changed,vdsosym) diff --git a/arch/parisc/kernel/vdso64/gen_vdso_offsets.sh b/arch/parisc/kernel/vdso64/gen_vdso_offsets.sh new file mode 100755 index 000000000000..37f05cb38dad --- /dev/null +++ b/arch/parisc/kernel/vdso64/gen_vdso_offsets.sh @@ -0,0 +1,15 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Inspired by arm64 version. +# + +LC_ALL=C +sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso64_offset_\2\t0x\1/p' diff --git a/arch/parisc/kernel/vdso64/note.S b/arch/parisc/kernel/vdso64/note.S new file mode 100644 index 000000000000..bd1fa23597d6 --- /dev/null +++ b/arch/parisc/kernel/vdso64/note.S @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "../vdso32/note.S" diff --git a/arch/parisc/kernel/vdso64/restart_syscall.S b/arch/parisc/kernel/vdso64/restart_syscall.S new file mode 100644 index 000000000000..83004451f6b1 --- /dev/null +++ b/arch/parisc/kernel/vdso64/restart_syscall.S @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "../vdso32/restart_syscall.S" diff --git a/arch/parisc/kernel/vdso64/sigtramp.S b/arch/parisc/kernel/vdso64/sigtramp.S new file mode 100644 index 000000000000..66a6d2b241e1 --- /dev/null +++ b/arch/parisc/kernel/vdso64/sigtramp.S @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Signal trampolines for 64 bit processes. + * + * Copyright (C) 2006 Randolph Chung <tausq@debian.org> + * Copyright (C) 2018-2022 Helge Deller <deller@gmx.de> + * Copyright (C) 2022 John David Anglin <dave.anglin@bell.net> + */ +#include <asm/unistd.h> +#include <linux/linkage.h> +#include <generated/asm-offsets.h> + + .text + +/* Gdb expects the trampoline is on the stack and the pc is offset from + a 64-byte boundary by 0, 4 or 5 instructions. Since the vdso trampoline + is not on the stack, we need a new variant with different offsets and + data to tell gdb where to find the signal context on the stack. + + Here we put the offset to the context data at the start of the trampoline + region and offset the first trampoline by 2 instructions. Please do + not change the trampoline as the code in gdb depends on the following + instruction sequence exactly. + */ + .align 64 + .word SIGFRAME_CONTEXT_REGS + +/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from + the return address to get an address in the middle of the presumed + call instruction. Since we don't have a call here, we artifically + extend the range covered by the unwind info by adding a nop before + the real start. + */ + nop + + .globl __kernel_sigtramp_rt + .type __kernel_sigtramp_rt, @function +__kernel_sigtramp_rt: + .proc + .callinfo FRAME=ASM_SIGFRAME_SIZE,CALLS,SAVE_RP + .entry + +.Lsigrt_start = . - 4 +0: ldi 0, %r25 /* (in_syscall=0) */ + ldi __NR_rt_sigreturn, %r20 + ble 0x100(%sr2, %r0) + nop + +1: ldi 1, %r25 /* (in_syscall=1) */ + ldi __NR_rt_sigreturn, %r20 + ble 0x100(%sr2, %r0) + nop +.Lsigrt_end: + .exit + .procend + .size __kernel_sigtramp_rt,.-__kernel_sigtramp_rt + + .section .eh_frame,"a",@progbits + +/* This is where the mcontext_t struct can be found on the stack. */ +#define PTREGS SIGFRAME_CONTEXT_REGS /* 64-bit process offset is -720 */ + +/* Register REGNO can be found at offset OFS of the mcontext_t structure. */ + .macro rsave regno,ofs + .byte 0x05 /* DW_CFA_offset_extended */ + .uleb128 \regno; /* regno */ + .uleb128 \ofs /* factored offset */ + .endm + +.Lcie: + .long .Lcie_end - .Lcie_start +.Lcie_start: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .stringz "zRS" /* NUL-terminated augmentation string */ + .uleb128 4 /* Code alignment factor */ + .sleb128 8 /* Data alignment factor */ + .byte 61 /* Return address register column, iaoq[0] */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */ + .byte 0x0f /* DW_CFA_def_cfa_expresion */ + .uleb128 9f - 1f /* length */ +1: + .byte 0x8e /* DW_OP_breg30 */ + .sleb128 PTREGS +9: + .balign 8 +.Lcie_end: + + .long .Lfde0_end - .Lfde0_start +.Lfde0_start: + .long .Lfde0_start - .Lcie /* CIE pointer. */ + .long .Lsigrt_start - . /* PC start, length */ + .long .Lsigrt_end - .Lsigrt_start + .uleb128 0 /* Augmentation */ + + /* General registers */ + rsave 1, 2 + rsave 2, 3 + rsave 3, 4 + rsave 4, 5 + rsave 5, 6 + rsave 6, 7 + rsave 7, 8 + rsave 8, 9 + rsave 9, 10 + rsave 10, 11 + rsave 11, 12 + rsave 12, 13 + rsave 13, 14 + rsave 14, 15 + rsave 15, 16 + rsave 16, 17 + rsave 17, 18 + rsave 18, 19 + rsave 19, 20 + rsave 20, 21 + rsave 21, 22 + rsave 22, 23 + rsave 23, 24 + rsave 24, 25 + rsave 25, 26 + rsave 26, 27 + rsave 27, 28 + rsave 28, 29 + rsave 29, 30 + rsave 30, 31 + rsave 31, 32 + + /* Floating-point registers */ + rsave 32, 36 + rsave 33, 37 + rsave 34, 38 + rsave 35, 39 + rsave 36, 40 + rsave 37, 41 + rsave 38, 42 + rsave 39, 43 + rsave 40, 44 + rsave 41, 45 + rsave 42, 46 + rsave 43, 47 + rsave 44, 48 + rsave 45, 49 + rsave 46, 50 + rsave 47, 51 + rsave 48, 52 + rsave 49, 53 + rsave 50, 54 + rsave 51, 55 + rsave 52, 56 + rsave 53, 57 + rsave 54, 58 + rsave 55, 59 + rsave 56, 60 + rsave 57, 61 + rsave 58, 62 + rsave 59, 63 + + /* SAR register */ + rsave 60, 67 + + /* iaoq[0] return address register */ + rsave 61, 65 + .balign 8 +.Lfde0_end: diff --git a/arch/parisc/kernel/vdso64/vdso64.lds.S b/arch/parisc/kernel/vdso64/vdso64.lds.S new file mode 100644 index 000000000000..de1fb4b19286 --- /dev/null +++ b/arch/parisc/kernel/vdso64/vdso64.lds.S @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This is the infamous ld script for the 64 bits vdso library + */ +#include <asm/vdso.h> + +/* Default link addresses for the vDSOs */ +OUTPUT_FORMAT("elf64-hppa-linux") +OUTPUT_ARCH(hppa:hppa2.0w) +ENTRY(_start) + +SECTIONS +{ + . = VDSO_LBASE + SIZEOF_HEADERS; + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN (16); + .text : + { + *(.text .stub .text.* .gnu.linkonce.t.*) + } + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + /* Other stuff is appended to the text segment: */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table) } + .fixup : { *(.fixup) } + + .dynamic : { *(.dynamic) } :text :dynamic + .plt : { *(.plt) } + .got : { *(.got) } + + _end = .; + __end = .; + PROVIDE (end = .); + + + /* Stabs debugging sections are here too + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + + /DISCARD/ : { *(.note.GNU-stack) } + /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) } + /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) } +} + + +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + note PT_NOTE FLAGS(4); /* PF_R */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + __kernel_sigtramp_rt64; + __kernel_restart_syscall64; + local: *; + }; +} diff --git a/arch/parisc/kernel/vdso64/vdso64_wrapper.S b/arch/parisc/kernel/vdso64/vdso64_wrapper.S new file mode 100644 index 000000000000..66f929482d3d --- /dev/null +++ b/arch/parisc/kernel/vdso64/vdso64_wrapper.S @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso64_start, vdso64_end + .balign PAGE_SIZE +vdso64_start: + .incbin "arch/parisc/kernel/vdso64/vdso64.so" + .balign PAGE_SIZE +vdso64_end: + + .previous diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index ea70a0e08321..5fc0c852c84c 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -13,8 +13,8 @@ #include <linux/compiler.h> #include <linux/uaccess.h> -#define get_user_space() (uaccess_kernel() ? 0 : mfsp(3)) -#define get_kernel_space() (0) +#define get_user_space() mfsp(SR_USER) +#define get_kernel_space() SR_KERNEL /* Returns 0 for success, otherwise, returns number of bytes not transferred. */ extern unsigned long pa_memcpy(void *dst, const void *src, @@ -23,8 +23,8 @@ extern unsigned long pa_memcpy(void *dst, const void *src, unsigned long raw_copy_to_user(void __user *dst, const void *src, unsigned long len) { - mtsp(get_kernel_space(), 1); - mtsp(get_user_space(), 2); + mtsp(get_kernel_space(), SR_TEMP1); + mtsp(get_user_space(), SR_TEMP2); return pa_memcpy((void __force *)dst, src, len); } EXPORT_SYMBOL(raw_copy_to_user); @@ -32,16 +32,16 @@ EXPORT_SYMBOL(raw_copy_to_user); unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long len) { - mtsp(get_user_space(), 1); - mtsp(get_kernel_space(), 2); + mtsp(get_user_space(), SR_TEMP1); + mtsp(get_kernel_space(), SR_TEMP2); return pa_memcpy(dst, (void __force *)src, len); } EXPORT_SYMBOL(raw_copy_from_user); void * memcpy(void * dst,const void *src, size_t count) { - mtsp(get_kernel_space(), 1); - mtsp(get_kernel_space(), 2); + mtsp(get_kernel_space(), SR_TEMP1); + mtsp(get_kernel_space(), SR_TEMP2); pa_memcpy(dst, src, count); return dst; } diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index e9eabf8f14d7..f114e102aaf2 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -425,3 +425,92 @@ out_of_memory: } pagefault_out_of_memory(); } + +/* Handle non-access data TLB miss faults. + * + * For probe instructions, accesses to userspace are considered allowed + * if they lie in a valid VMA and the access type matches. We are not + * allowed to handle MM faults here so there may be situations where an + * actual access would fail even though a probe was successful. + */ +int +handle_nadtlb_fault(struct pt_regs *regs) +{ + unsigned long insn = regs->iir; + int breg, treg, xreg, val = 0; + struct vm_area_struct *vma, *prev_vma; + struct task_struct *tsk; + struct mm_struct *mm; + unsigned long address; + unsigned long acc_type; + + switch (insn & 0x380) { + case 0x280: + /* FDC instruction */ + fallthrough; + case 0x380: + /* PDC and FIC instructions */ + if (printk_ratelimit()) { + pr_warn("BUG: nullifying cache flush/purge instruction\n"); + show_regs(regs); + } + if (insn & 0x20) { + /* Base modification */ + breg = (insn >> 21) & 0x1f; + xreg = (insn >> 16) & 0x1f; + if (breg && xreg) + regs->gr[breg] += regs->gr[xreg]; + } + regs->gr[0] |= PSW_N; + return 1; + + case 0x180: + /* PROBE instruction */ + treg = insn & 0x1f; + if (regs->isr) { + tsk = current; + mm = tsk->mm; + if (mm) { + /* Search for VMA */ + address = regs->ior; + mmap_read_lock(mm); + vma = find_vma_prev(mm, address, &prev_vma); + mmap_read_unlock(mm); + + /* + * Check if access to the VMA is okay. + * We don't allow for stack expansion. + */ + acc_type = (insn & 0x40) ? VM_WRITE : VM_READ; + if (vma + && address >= vma->vm_start + && (vma->vm_flags & acc_type) == acc_type) + val = 1; + } + } + if (treg) + regs->gr[treg] = val; + regs->gr[0] |= PSW_N; + return 1; + + case 0x300: + /* LPA instruction */ + if (insn & 0x20) { + /* Base modification */ + breg = (insn >> 21) & 0x1f; + xreg = (insn >> 16) & 0x1f; + if (breg && xreg) + regs->gr[breg] += regs->gr[xreg]; + } + treg = insn & 0x1f; + if (treg) + regs->gr[treg] = 0; + regs->gr[0] |= PSW_N; + return 1; + + default: + break; + } + + return 0; +} diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index f384cb1a4f7a..5a83da703e87 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += coco/ + obj-y += entry/ obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9f5bd41bf660..a4f6672d0cb5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -120,6 +120,7 @@ config X86 select ARCH_WANTS_NO_INSTR select ARCH_WANT_HUGE_PMD_SHARE select ARCH_WANT_LD_ORPHAN_WARN + select ARCH_WANTS_RT_DELAYED_SIGNALS select ARCH_WANTS_THP_SWAP if X86_64 select ARCH_HAS_PARANOID_L1D_FLUSH select BUILDTIME_TABLE_SORT @@ -1638,7 +1639,7 @@ config ARCH_SPARSEMEM_DEFAULT config ARCH_SELECT_MEMORY_MODEL def_bool y - depends on ARCH_SPARSEMEM_ENABLE + depends on ARCH_SPARSEMEM_ENABLE && ARCH_FLATMEM_ENABLE config ARCH_MEMORY_PROBE bool "Enable sysfs memory/probe interface" diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 659fad53ca82..3b354eb9516d 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -152,14 +152,13 @@ SYM_FUNC_END(startup_32) #ifdef CONFIG_EFI_STUB SYM_FUNC_START(efi32_stub_entry) -SYM_FUNC_START_ALIAS(efi_stub_entry) add $0x4, %esp movl 8(%esp), %esi /* save boot_params pointer */ call efi_main /* efi_main returns the possibly relocated address of startup_32 */ jmp *%eax SYM_FUNC_END(efi32_stub_entry) -SYM_FUNC_END_ALIAS(efi_stub_entry) +SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) #endif .text diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fd9441f40457..dea95301196b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -535,7 +535,6 @@ SYM_CODE_END(startup_64) #ifdef CONFIG_EFI_STUB .org 0x390 SYM_FUNC_START(efi64_stub_entry) -SYM_FUNC_START_ALIAS(efi_stub_entry) and $~0xf, %rsp /* realign the stack */ movq %rdx, %rbx /* save boot_params pointer */ call efi_main @@ -543,7 +542,7 @@ SYM_FUNC_START_ALIAS(efi_stub_entry) leaq rva(startup_64)(%rax), %rax jmp *%rax SYM_FUNC_END(efi64_stub_entry) -SYM_FUNC_END_ALIAS(efi_stub_entry) +SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) #endif .text diff --git a/arch/x86/coco/Makefile b/arch/x86/coco/Makefile new file mode 100644 index 000000000000..c1ead00017a7 --- /dev/null +++ b/arch/x86/coco/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS_REMOVE_core.o = -pg +KASAN_SANITIZE_core.o := n +CFLAGS_core.o += -fno-stack-protector + +obj-y += core.o diff --git a/arch/x86/kernel/cc_platform.c b/arch/x86/coco/core.c index 6a6ffcd978f6..fc1365dd927e 100644 --- a/arch/x86/kernel/cc_platform.c +++ b/arch/x86/coco/core.c @@ -9,18 +9,16 @@ #include <linux/export.h> #include <linux/cc_platform.h> -#include <linux/mem_encrypt.h> -#include <asm/mshyperv.h> +#include <asm/coco.h> #include <asm/processor.h> -static bool __maybe_unused intel_cc_platform_has(enum cc_attr attr) +static enum cc_vendor vendor __ro_after_init; +static u64 cc_mask __ro_after_init; + +static bool intel_cc_platform_has(enum cc_attr attr) { -#ifdef CONFIG_INTEL_TDX_GUEST - return false; -#else return false; -#endif } /* @@ -74,12 +72,46 @@ static bool hyperv_cc_platform_has(enum cc_attr attr) bool cc_platform_has(enum cc_attr attr) { - if (sme_me_mask) + switch (vendor) { + case CC_VENDOR_AMD: return amd_cc_platform_has(attr); - - if (hv_is_isolation_supported()) + case CC_VENDOR_INTEL: + return intel_cc_platform_has(attr); + case CC_VENDOR_HYPERV: return hyperv_cc_platform_has(attr); - - return false; + default: + return false; + } } EXPORT_SYMBOL_GPL(cc_platform_has); + +u64 cc_mkenc(u64 val) +{ + switch (vendor) { + case CC_VENDOR_AMD: + return val | cc_mask; + default: + return val; + } +} + +u64 cc_mkdec(u64 val) +{ + switch (vendor) { + case CC_VENDOR_AMD: + return val & ~cc_mask; + default: + return val; + } +} +EXPORT_SYMBOL_GPL(cc_mkdec); + +__init void cc_set_vendor(enum cc_vendor v) +{ + vendor = v; +} + +__init void cc_set_mask(u64 mask) +{ + cc_mask = mask; +} diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 363699dd7220..837c1e0aa021 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -1751,8 +1751,6 @@ SYM_FUNC_END(aesni_gcm_finalize) #endif - -SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128) SYM_FUNC_START_LOCAL(_key_expansion_256a) pshufd $0b11111111, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 @@ -1764,7 +1762,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a) add $0x10, TKEYP RET SYM_FUNC_END(_key_expansion_256a) -SYM_FUNC_END_ALIAS(_key_expansion_128) +SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a) SYM_FUNC_START_LOCAL(_key_expansion_192a) pshufd $0b01010101, %xmm1, %xmm1 diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h new file mode 100644 index 000000000000..3d98c3a60d34 --- /dev/null +++ b/arch/x86/include/asm/coco.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_COCO_H +#define _ASM_X86_COCO_H + +#include <asm/types.h> + +enum cc_vendor { + CC_VENDOR_NONE, + CC_VENDOR_AMD, + CC_VENDOR_HYPERV, + CC_VENDOR_INTEL, +}; + +void cc_set_vendor(enum cc_vendor v); +void cc_set_mask(u64 mask); + +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM +u64 cc_mkenc(u64 val); +u64 cc_mkdec(u64 val); +#else +static inline u64 cc_mkenc(u64 val) +{ + return val; +} + +static inline u64 cc_mkdec(u64 val) +{ + return val; +} +#endif + +#endif /* _ASM_X86_COCO_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 65d147974f8d..f7436fccc076 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -299,9 +299,6 @@ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ -#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ -#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ -#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ @@ -390,7 +387,10 @@ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ #define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 8f28fafa98b3..1231d63f836d 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -56,8 +56,11 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif -/* Force disable because it's broken beyond repair */ -#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#ifdef CONFIG_INTEL_IOMMU_SVM +# define DISABLE_ENQCMD 0 +#else +# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#endif #ifdef CONFIG_X86_SGX # define DISABLE_SGX 0 diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 05a6ab940f45..1b29f58f730f 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -124,7 +124,7 @@ struct insn { #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ /* VEX bit fields */ -#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ +#define X86_EVEX_M(vex) ((vex) & 0x07) /* EVEX Byte1 */ #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ #define X86_VEX2_M 1 /* VEX2.M always 1 */ #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index a69012e1903f..e1591467668e 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -279,7 +279,7 @@ extern void (*paravirt_iret)(void); #define paravirt_type(op) \ [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ - [paravirt_opptr] "i" (&(pv_ops.op)) + [paravirt_opptr] "m" (pv_ops.op) #define paravirt_clobber(clobber) \ [paravirt_clobber] "i" (clobber) @@ -316,7 +316,7 @@ int paravirt_disable_iospace(void); */ #define PARAVIRT_CALL \ ANNOTATE_RETPOLINE_SAFE \ - "call *%c[paravirt_opptr];" + "call *%[paravirt_opptr];" /* * These macros are intended to wrap calls through one of the paravirt diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 8a9432fb3802..62ab07e24aef 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -15,17 +15,12 @@ cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \ : (prot)) -/* - * Macros to add or remove encryption attribute - */ -#define pgprot_encrypted(prot) __pgprot(__sme_set(pgprot_val(prot))) -#define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot))) - #ifndef __ASSEMBLY__ #include <linux/spinlock.h> #include <asm/x86_init.h> #include <asm/pkru.h> #include <asm/fpu/api.h> +#include <asm/coco.h> #include <asm-generic/pgtable_uffd.h> #include <linux/page_table_check.h> @@ -38,6 +33,12 @@ void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, void ptdump_walk_pgd_level_checkwx(void); void ptdump_walk_user_pgd_level_checkwx(void); +/* + * Macros to add or remove encryption attribute + */ +#define pgprot_encrypted(prot) __pgprot(cc_mkenc(pgprot_val(prot))) +#define pgprot_decrypted(prot) __pgprot(cc_mkdec(pgprot_val(prot))) + #ifdef CONFIG_DEBUG_WX #define debug_checkwx() ptdump_walk_pgd_level_checkwx() #define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx() diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2c5f12ae7d04..a87e7c33d5ac 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -119,6 +119,8 @@ struct cpuinfo_x86 { int x86_cache_mbm_width_offset; int x86_power; unsigned long loops_per_jiffy; + /* protected processor identification number */ + u64 ppin; /* cpuid returned max cores value: */ u16 x86_max_cores; u16 apicid; diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index ff0f2d90338a..78ca53512486 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -53,7 +53,6 @@ int set_memory_global(unsigned long addr, int numpages); int set_pages_array_uc(struct page **pages, int addrinarray); int set_pages_array_wc(struct page **pages, int addrinarray); -int set_pages_array_wt(struct page **pages, int addrinarray); int set_pages_array_wb(struct page **pages, int addrinarray); /* @@ -84,7 +83,6 @@ int set_pages_rw(struct page *page, int numpages); int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); bool kernel_page_present(struct page *page); -void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc); extern int kernel_set_to_readonly; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index c512c647b4fc..9619385bf749 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -110,6 +110,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); #define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id) #define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) +#define topology_ppin(cpu) (cpu_data(cpu).ppin) extern unsigned int __max_die_per_package; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 22b7412c08f6..e9170457697e 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -142,6 +142,21 @@ struct x86_init_acpi { }; /** + * struct x86_guest - Functions used by misc guest incarnations like SEV, TDX, etc. + * + * @enc_status_change_prepare Notify HV before the encryption status of a range is changed + * @enc_status_change_finish Notify HV after the encryption status of a range is changed + * @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status + * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status + */ +struct x86_guest { + void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); + bool (*enc_tlb_flush_required)(bool enc); + bool (*enc_cache_flush_required)(void); +}; + +/** * struct x86_init_ops - functions for platform specific setup * */ @@ -287,6 +302,7 @@ struct x86_platform_ops { struct x86_legacy_features legacy; void (*set_legacy_features)(void); struct x86_hyper_runtime hyper; + struct x86_guest guest; }; struct x86_apic_ops { diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 6aef9ee28a39..6462e3dd98f4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -21,7 +21,6 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_head64.o = -pg CFLAGS_REMOVE_sev.o = -pg -CFLAGS_REMOVE_cc_platform.o = -pg endif KASAN_SANITIZE_head$(BITS).o := n @@ -30,7 +29,6 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_paravirt.o := n KASAN_SANITIZE_sev.o := n -KASAN_SANITIZE_cc_platform.o := n # With some compiler versions the generated code results in boot hangs, caused # by several compilation units. To be safe, disable all instrumentation. @@ -49,7 +47,6 @@ endif KCOV_INSTRUMENT := n CFLAGS_head$(BITS).o += -fno-stack-protector -CFLAGS_cc_platform.o += -fno-stack-protector CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace @@ -151,8 +148,6 @@ obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o -obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o - ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4edb6f0f628c..0c0b09796ced 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -394,35 +394,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) per_cpu(cpu_llc_id, cpu) = c->cpu_die_id = c->phys_proc_id; } -static void amd_detect_ppin(struct cpuinfo_x86 *c) -{ - unsigned long long val; - - if (!cpu_has(c, X86_FEATURE_AMD_PPIN)) - return; - - /* When PPIN is defined in CPUID, still need to check PPIN_CTL MSR */ - if (rdmsrl_safe(MSR_AMD_PPIN_CTL, &val)) - goto clear_ppin; - - /* PPIN is locked in disabled mode, clear feature bit */ - if ((val & 3UL) == 1UL) - goto clear_ppin; - - /* If PPIN is disabled, try to enable it */ - if (!(val & 2UL)) { - wrmsrl_safe(MSR_AMD_PPIN_CTL, val | 2UL); - rdmsrl_safe(MSR_AMD_PPIN_CTL, &val); - } - - /* If PPIN_EN bit is 1, return from here; otherwise fall through */ - if (val & 2UL) - return; - -clear_ppin: - clear_cpu_cap(c, X86_FEATURE_AMD_PPIN); -} - u32 amd_get_nodes_per_socket(void) { return nodes_per_socket; @@ -585,6 +556,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) * the SME physical address space reduction value. * If BIOS has not enabled SME then don't advertise the * SME feature (set in scattered.c). + * If the kernel has not enabled SME via any means then + * don't advertise the SME feature. * For SEV: If BIOS has not enabled SEV then don't advertise the * SEV and SEV_ES feature (set in scattered.c). * @@ -607,6 +580,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) if (IS_ENABLED(CONFIG_X86_32)) goto clear_all; + if (!sme_me_mask) + setup_clear_cpu_cap(X86_FEATURE_SME); + rdmsrl(MSR_K7_HWCR, msr); if (!(msr & MSR_K7_HWCR_SMMLOCK)) goto clear_sev; @@ -947,7 +923,6 @@ static void init_amd(struct cpuinfo_x86 *c) amd_detect_cmp(c); amd_get_topology(c); srat_detect_node(c); - amd_detect_ppin(c); init_amd_cacheinfo(c); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7b8382c11788..64deb7727d00 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -88,6 +88,83 @@ EXPORT_SYMBOL_GPL(get_llc_id); /* L2 cache ID of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID; +static struct ppin_info { + int feature; + int msr_ppin_ctl; + int msr_ppin; +} ppin_info[] = { + [X86_VENDOR_INTEL] = { + .feature = X86_FEATURE_INTEL_PPIN, + .msr_ppin_ctl = MSR_PPIN_CTL, + .msr_ppin = MSR_PPIN + }, + [X86_VENDOR_AMD] = { + .feature = X86_FEATURE_AMD_PPIN, + .msr_ppin_ctl = MSR_AMD_PPIN_CTL, + .msr_ppin = MSR_AMD_PPIN + }, +}; + +static const struct x86_cpu_id ppin_cpuids[] = { + X86_MATCH_FEATURE(X86_FEATURE_AMD_PPIN, &ppin_info[X86_VENDOR_AMD]), + X86_MATCH_FEATURE(X86_FEATURE_INTEL_PPIN, &ppin_info[X86_VENDOR_INTEL]), + + /* Legacy models without CPUID enumeration */ + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &ppin_info[X86_VENDOR_INTEL]), + + {} +}; + +static void ppin_init(struct cpuinfo_x86 *c) +{ + const struct x86_cpu_id *id; + unsigned long long val; + struct ppin_info *info; + + id = x86_match_cpu(ppin_cpuids); + if (!id) + return; + + /* + * Testing the presence of the MSR is not enough. Need to check + * that the PPIN_CTL allows reading of the PPIN. + */ + info = (struct ppin_info *)id->driver_data; + + if (rdmsrl_safe(info->msr_ppin_ctl, &val)) + goto clear_ppin; + + if ((val & 3UL) == 1UL) { + /* PPIN locked in disabled mode */ + goto clear_ppin; + } + + /* If PPIN is disabled, try to enable */ + if (!(val & 2UL)) { + wrmsrl_safe(info->msr_ppin_ctl, val | 2UL); + rdmsrl_safe(info->msr_ppin_ctl, &val); + } + + /* Is the enable bit set? */ + if (val & 2UL) { + c->ppin = __rdmsr(info->msr_ppin); + set_cpu_cap(c, info->feature); + return; + } + +clear_ppin: + clear_cpu_cap(c, info->feature); +} + /* correctly size the local cpu masks */ void __init setup_cpu_local_masks(void) { @@ -1655,6 +1732,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[i] |= boot_cpu_data.x86_capability[i]; } + ppin_init(c); + /* Init Machine Check Exception if available. */ mcheck_cpu_init(c); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 9f4b508886dd..1940d305db1c 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -993,6 +993,7 @@ static struct attribute *default_attrs[] = { NULL, /* possibly interrupt_enable if supported, see below */ NULL, }; +ATTRIBUTE_GROUPS(default); #define to_block(k) container_of(k, struct threshold_block, kobj) #define to_attr(a) container_of(a, struct threshold_attr, attr) @@ -1029,7 +1030,7 @@ static void threshold_block_release(struct kobject *kobj); static struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, - .default_attrs = default_attrs, + .default_groups = default_groups, .release = threshold_block_release, }; @@ -1101,10 +1102,10 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb b->threshold_limit = THRESHOLD_MAX; if (b->interrupt_capable) { - threshold_ktype.default_attrs[2] = &interrupt_enable.attr; + default_attrs[2] = &interrupt_enable.attr; b->interrupt_enable = 1; } else { - threshold_ktype.default_attrs[2] = NULL; + default_attrs[2] = NULL; } INIT_LIST_HEAD(&b->miscj); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 5818b837fd4d..4f1e825033ce 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -138,12 +138,7 @@ void mce_setup(struct mce *m) m->socketid = cpu_data(m->extcpu).phys_proc_id; m->apicid = cpu_data(m->extcpu).initial_apicid; m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP); - - if (this_cpu_has(X86_FEATURE_INTEL_PPIN)) - m->ppin = __rdmsr(MSR_PPIN); - else if (this_cpu_has(X86_FEATURE_AMD_PPIN)) - m->ppin = __rdmsr(MSR_AMD_PPIN); - + m->ppin = cpu_data(m->extcpu).ppin; m->microcode = boot_cpu_data.microcode; } diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index baafbb37be67..95275a5e57e0 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -470,47 +470,6 @@ void intel_clear_lmce(void) wrmsrl(MSR_IA32_MCG_EXT_CTL, val); } -static void intel_ppin_init(struct cpuinfo_x86 *c) -{ - unsigned long long val; - - /* - * Even if testing the presence of the MSR would be enough, we don't - * want to risk the situation where other models reuse this MSR for - * other purposes. - */ - switch (c->x86_model) { - case INTEL_FAM6_IVYBRIDGE_X: - case INTEL_FAM6_HASWELL_X: - case INTEL_FAM6_BROADWELL_D: - case INTEL_FAM6_BROADWELL_X: - case INTEL_FAM6_SKYLAKE_X: - case INTEL_FAM6_ICELAKE_X: - case INTEL_FAM6_ICELAKE_D: - case INTEL_FAM6_SAPPHIRERAPIDS_X: - case INTEL_FAM6_XEON_PHI_KNL: - case INTEL_FAM6_XEON_PHI_KNM: - - if (rdmsrl_safe(MSR_PPIN_CTL, &val)) - return; - - if ((val & 3UL) == 1UL) { - /* PPIN locked in disabled mode */ - return; - } - - /* If PPIN is disabled, try to enable */ - if (!(val & 2UL)) { - wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); - rdmsrl_safe(MSR_PPIN_CTL, &val); - } - - /* Is the enable bit set? */ - if (val & 2UL) - set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); - } -} - /* * Enable additional error logs from the integrated * memory controller on processors that support this. @@ -535,7 +494,6 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_cmci(); intel_init_lmce(); - intel_ppin_init(c); intel_imc_init(c); } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 5a99f993e639..e0a572472052 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -33,6 +33,7 @@ #include <asm/nmi.h> #include <clocksource/hyperv_timer.h> #include <asm/numa.h> +#include <asm/coco.h> /* Is Linux running as the root partition? */ bool hv_root_partition; @@ -344,6 +345,11 @@ static void __init ms_hyperv_init_platform(void) */ swiotlb_force = SWIOTLB_FORCE; #endif + /* Isolation VMs are unenlightened SEV-based VMs, thus this check: */ + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { + if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE) + cc_set_vendor(CC_VENDOR_HYPERV); + } } if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) { diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 21d1f062895a..4143b1e4c5c6 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -26,6 +26,7 @@ struct cpuid_bit { static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 8dea01ffc5c1..19821f027cb3 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -612,6 +612,13 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags) fpu_inherit_perms(dst_fpu); fpregs_unlock(); + /* + * Children never inherit PASID state. + * Force it to have its init value: + */ + if (use_xsave()) + dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID; + trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index de563db9cdcd..4f5ecbbaae77 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -126,7 +126,7 @@ static bool __head check_la57_support(unsigned long physaddr) } #endif -static unsigned long sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd) +static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd) { unsigned long vaddr, vaddr_end; int i; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 4bce802d25fb..e73f7df362f5 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -292,7 +292,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", reason, smp_processor_id()); - pr_emerg("Do you have a strange power saving mode enabled?\n"); if (unknown_nmi_panic || panic_on_unrecovered_nmi) nmi_panic(regs, "NMI: Not continuing"); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 81d8ef036637..e131d71b3cae 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -765,8 +765,11 @@ void stop_this_cpu(void *dummy) * without the encryption bit, they don't race each other when flushed * and potentially end up with the wrong entry being committed to * memory. + * + * Test the CPUID bit directly because the machine might've cleared + * X86_FEATURE_SME due to cmdline options. */ - if (boot_cpu_has(X86_FEATURE_SME)) + if (cpuid_eax(0x8000001f) & BIT(0)) native_wbinvd(); for (;;) { /* diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 8143693a7ea6..2e37862e3a8c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -39,6 +39,7 @@ #include <linux/io.h> #include <linux/hardirq.h> #include <linux/atomic.h> +#include <linux/ioasid.h> #include <asm/stacktrace.h> #include <asm/processor.h> @@ -559,6 +560,57 @@ static bool fixup_iopl_exception(struct pt_regs *regs) return true; } +/* + * The unprivileged ENQCMD instruction generates #GPs if the + * IA32_PASID MSR has not been populated. If possible, populate + * the MSR from a PASID previously allocated to the mm. + */ +static bool try_fixup_enqcmd_gp(void) +{ +#ifdef CONFIG_IOMMU_SVA + u32 pasid; + + /* + * MSR_IA32_PASID is managed using XSAVE. Directly + * writing to the MSR is only possible when fpregs + * are valid and the fpstate is not. This is + * guaranteed when handling a userspace exception + * in *before* interrupts are re-enabled. + */ + lockdep_assert_irqs_disabled(); + + /* + * Hardware without ENQCMD will not generate + * #GPs that can be fixed up here. + */ + if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) + return false; + + pasid = current->mm->pasid; + + /* + * If the mm has not been allocated a + * PASID, the #GP can not be fixed up. + */ + if (!pasid_valid(pasid)) + return false; + + /* + * Did this thread already have its PASID activated? + * If so, the #GP must be from something else. + */ + if (current->pasid_activated) + return false; + + wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID); + current->pasid_activated = 1; + + return true; +#else + return false; +#endif +} + DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) { char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR; @@ -567,6 +619,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) unsigned long gp_addr; int ret; + if (user_mode(regs) && try_fixup_enqcmd_gp()) + return; + cond_local_irq_enable(regs); if (static_cpu_has(X86_FEATURE_UMIP)) { diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 7d20c1d34a3c..e84ee5cdbd8c 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -129,6 +129,11 @@ struct x86_cpuinit_ops x86_cpuinit = { static void default_nmi_init(void) { }; +static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { } +static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; } +static bool enc_tlb_flush_required_noop(bool enc) { return false; } +static bool enc_cache_flush_required_noop(void) { return false; } + struct x86_platform_ops x86_platform __ro_after_init = { .calibrate_cpu = native_calibrate_cpu_early, .calibrate_tsc = native_calibrate_tsc, @@ -138,9 +143,16 @@ struct x86_platform_ops x86_platform __ro_after_init = { .is_untracked_pat_range = is_ISA_range, .nmi_init = default_nmi_init, .get_nmi_reason = default_get_nmi_reason, - .save_sched_clock_state = tsc_save_sched_clock_state, - .restore_sched_clock_state = tsc_restore_sched_clock_state, + .save_sched_clock_state = tsc_save_sched_clock_state, + .restore_sched_clock_state = tsc_restore_sched_clock_state, .hyper.pin_vcpu = x86_op_int_noop, + + .guest = { + .enc_status_change_prepare = enc_status_change_prepare_noop, + .enc_status_change_finish = enc_status_change_finish_noop, + .enc_tlb_flush_required = enc_tlb_flush_required_noop, + .enc_cache_flush_required = enc_cache_flush_required_noop, + }, }; EXPORT_SYMBOL_GPL(x86_platform); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5719d8cfdbd9..e86d610dc6b7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -429,8 +429,23 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); FOP_END /* Special case for SETcc - 1 instruction per cc */ + +/* + * Depending on .config the SETcc functions look like: + * + * SETcc %al [3 bytes] + * RET [1 byte] + * INT3 [1 byte; CONFIG_SLS] + * + * Which gives possible sizes 4 or 5. When rounded up to the + * next power-of-two alignment they become 4 or 8. + */ +#define SETCC_LENGTH (4 + IS_ENABLED(CONFIG_SLS)) +#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS)) +static_assert(SETCC_LENGTH <= SETCC_ALIGN); + #define FOP_SETCC(op) \ - ".align 4 \n\t" \ + ".align " __stringify(SETCC_ALIGN) " \n\t" \ ".type " #op ", @function \n\t" \ #op ": \n\t" \ #op " %al \n\t" \ @@ -1047,7 +1062,7 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt) static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) { u8 rc; - void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); + void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf); flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; asm("push %[flags]; popf; " CALL_NOSPEC diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 59cf2343f3d9..d0d7b9bc6cad 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -27,8 +27,7 @@ * Output: * rax original destination */ -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK(memcpy) +SYM_FUNC_START(__memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS @@ -40,11 +39,12 @@ SYM_FUNC_START_WEAK(memcpy) movl %edx, %ecx rep movsb RET -SYM_FUNC_END(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) -EXPORT_SYMBOL(memcpy) +SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +EXPORT_SYMBOL(memcpy) + /* * memcpy_erms() - enhanced fast string memcpy. This is faster and * simpler than memcpy. Use memcpy_erms when possible. diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 50ea390df712..d83cba364e31 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -24,7 +24,6 @@ * Output: * rax: dest */ -SYM_FUNC_START_WEAK(memmove) SYM_FUNC_START(__memmove) mov %rdi, %rax @@ -207,6 +206,7 @@ SYM_FUNC_START(__memmove) 13: RET SYM_FUNC_END(__memmove) -SYM_FUNC_END_ALIAS(memmove) EXPORT_SYMBOL(__memmove) + +SYM_FUNC_ALIAS_WEAK(memmove, __memmove) EXPORT_SYMBOL(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index d624f2bc42f1..fc9ffd3ff3b2 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -17,7 +17,6 @@ * * rax original destination */ -SYM_FUNC_START_WEAK(memset) SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended @@ -42,10 +41,11 @@ SYM_FUNC_START(__memset) movq %r9,%rax RET SYM_FUNC_END(__memset) -SYM_FUNC_END_ALIAS(memset) -EXPORT_SYMBOL(memset) EXPORT_SYMBOL(__memset) +SYM_FUNC_ALIAS_WEAK(memset, __memset) +EXPORT_SYMBOL(memset) + /* * ISO C memset - set a memory block to a byte value. This function uses * enhanced rep stosb to override the fast string function. diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index ec31f5b60323..d12d1358f96d 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -690,7 +690,10 @@ AVXcode: 2 45: vpsrlvd/q Vx,Hx,Wx (66),(v) 46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) 47: vpsllvd/q Vx,Hx,Wx (66),(v) -# Skip 0x48-0x4b +# Skip 0x48 +49: TILERELEASE (v1),(000),(11B) | LDTILECFG Mtc (v1)(000) | STTILECFG Mtc (66),(v1),(000) | TILEZERO Vt (F2),(v1),(11B) +# Skip 0x4a +4b: TILELOADD Vt,Wsm (F2),(v1) | TILELOADDT1 Vt,Wsm (66),(v1) | TILESTORED Wsm,Vt (F3),(v) 4c: vrcp14ps/d Vpd,Wpd (66),(ev) 4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) 4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) @@ -705,7 +708,10 @@ AVXcode: 2 59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) 5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) 5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) -# Skip 0x5c-0x61 +5c: TDPBF16PS Vt,Wt,Ht (F3),(v1) +# Skip 0x5d +5e: TDPBSSD Vt,Wt,Ht (F2),(v1) | TDPBSUD Vt,Wt,Ht (F3),(v1) | TDPBUSD Vt,Wt,Ht (66),(v1) | TDPBUUD Vt,Wt,Ht (v1) +# Skip 0x5f-0x61 62: vpexpandb/w Vx,Wx (66),(ev) 63: vpcompressb/w Wx,Vx (66),(ev) 64: vpblendmd/q Vx,Hx,Wx (66),(ev) @@ -822,9 +828,9 @@ AVXcode: 3 05: vpermilpd Vx,Wx,Ib (66),(v) 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) 07: -08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) +08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) | vrndscaleph Vx,Wx,Ib (evo) 09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) -0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) +0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) | vrndscalesh Vx,Hx,Wx,Ib (evo) 0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) 0c: vblendps Vx,Hx,Wx,Ib (66) 0d: vblendpd Vx,Hx,Wx,Ib (66) @@ -846,8 +852,8 @@ AVXcode: 3 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) 23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) 25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) -26: vgetmantps/d Vx,Wx,Ib (66),(ev) -27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) +26: vgetmantps/d Vx,Wx,Ib (66),(ev) | vgetmantph Vx,Wx,Ib (ev) +27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | vgetmantsh Vx,Hx,Wx,Ib (ev) 30: kshiftrb/w Vk,Uk,Ib (66),(v) 31: kshiftrd/q Vk,Uk,Ib (66),(v) 32: kshiftlb/w Vk,Uk,Ib (66),(v) @@ -871,23 +877,102 @@ AVXcode: 3 51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) 54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) 55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) -56: vreduceps/d Vx,Wx,Ib (66),(ev) -57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) +56: vreduceps/d Vx,Wx,Ib (66),(ev) | vreduceph Vx,Wx,Ib (ev) +57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | vreducesh Vx,Hx,Wx,Ib (ev) 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) -66: vfpclassps/d Vk,Wx,Ib (66),(ev) -67: vfpclassss/d Vk,Wx,Ib (66),(ev) +66: vfpclassps/d Vk,Wx,Ib (66),(ev) | vfpclassph Vx,Wx,Ib (ev) +67: vfpclassss/d Vk,Wx,Ib (66),(ev) | vfpclasssh Vx,Wx,Ib (ev) 70: vpshldw Vx,Hx,Wx,Ib (66),(ev) 71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev) 72: vpshrdw Vx,Hx,Wx,Ib (66),(ev) 73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev) +c2: vcmpph Vx,Hx,Wx,Ib (ev) | vcmpsh Vx,Hx,Wx,Ib (F3),(ev) cc: sha1rnds4 Vdq,Wdq,Ib ce: vgf2p8affineqb Vx,Wx,Ib (66) cf: vgf2p8affineinvqb Vx,Wx,Ib (66) df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) -f0: RORX Gy,Ey,Ib (F2),(v) +f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B) +EndTable + +Table: EVEX map 5 +Referrer: +AVXcode: 5 +10: vmovsh Vx,Hx,Wx (F3),(ev) | vmovsh Vx,Wx (F3),(ev) +11: vmovsh Wx,Hx,Vx (F3),(ev) | vmovsh Wx,Vx (F3),(ev) +1d: vcvtps2phx Vx,Wx (66),(ev) | vcvtss2sh Vx,Hx,Wx (ev) +2a: vcvtsi2sh Vx,Hx,Wx (F3),(ev) +2c: vcvttsh2si Vx,Wx (F3),(ev) +2d: vcvtsh2si Vx,Wx (F3),(ev) +2e: vucomish Vx,Wx (ev) +2f: vcomish Vx,Wx (ev) +51: vsqrtph Vx,Wx (ev) | vsqrtsh Vx,Hx,Wx (F3),(ev) +58: vaddph Vx,Hx,Wx (ev) | vaddsh Vx,Hx,Wx (F3),(ev) +59: vmulph Vx,Hx,Wx (ev) | vmulsh Vx,Hx,Wx (F3),(ev) +5a: vcvtpd2ph Vx,Wx (66),(ev) | vcvtph2pd Vx,Wx (ev) | vcvtsd2sh Vx,Hx,Wx (F2),(ev) | vcvtsh2sd Vx,Hx,Wx (F3),(ev) +5b: vcvtdq2ph Vx,Wx (ev) | vcvtph2dq Vx,Wx (66),(ev) | vcvtqq2ph Vx,Wx (ev) | vcvttph2dq Vx,Wx (F3),(ev) +5c: vsubph Vx,Hx,Wx (ev) | vsubsh Vx,Hx,Wx (F3),(ev) +5d: vminph Vx,Hx,Wx (ev) | vminsh Vx,Hx,Wx (F3),(ev) +5e: vdivph Vx,Hx,Wx (ev) | vdivsh Vx,Hx,Wx (F3),(ev) +5f: vmaxph Vx,Hx,Wx (ev) | vmaxsh Vx,Hx,Wx (F3),(ev) +6e: vmovw Vx,Wx (66),(ev) +78: vcvttph2udq Vx,Wx (ev) | vcvttph2uqq Vx,Wx (66),(ev) | vcvttsh2usi Vx,Wx (F3),(ev) +79: vcvtph2udq Vx,Wx (ev) | vcvtph2uqq Vx,Wx (66),(ev) | vcvtsh2usi Vx,Wx (F3),(ev) +7a: vcvttph2qq Vx,Wx (66),(ev) | vcvtudq2ph Vx,Wx (F2),(ev) | vcvtuqq2ph Vx,Wx (F2),(ev) +7b: vcvtph2qq Vx,Wx (66),(ev) | vcvtusi2sh Vx,Hx,Wx (F3),(ev) +7c: vcvttph2uw Vx,Wx (ev) | vcvttph2w Vx,Wx (66),(ev) +7d: vcvtph2uw Vx,Wx (ev) | vcvtph2w Vx,Wx (66),(ev) | vcvtuw2ph Vx,Wx (F2),(ev) | vcvtw2ph Vx,Wx (F3),(ev) +7e: vmovw Wx,Vx (66),(ev) +EndTable + +Table: EVEX map 6 +Referrer: +AVXcode: 6 +13: vcvtph2psx Vx,Wx (66),(ev) | vcvtsh2ss Vx,Hx,Wx (ev) +2c: vscalefph Vx,Hx,Wx (66),(ev) +2d: vscalefsh Vx,Hx,Wx (66),(ev) +42: vgetexpph Vx,Wx (66),(ev) +43: vgetexpsh Vx,Hx,Wx (66),(ev) +4c: vrcpph Vx,Wx (66),(ev) +4d: vrcpsh Vx,Hx,Wx (66),(ev) +4e: vrsqrtph Vx,Wx (66),(ev) +4f: vrsqrtsh Vx,Hx,Wx (66),(ev) +56: vfcmaddcph Vx,Hx,Wx (F2),(ev) | vfmaddcph Vx,Hx,Wx (F3),(ev) +57: vfcmaddcsh Vx,Hx,Wx (F2),(ev) | vfmaddcsh Vx,Hx,Wx (F3),(ev) +96: vfmaddsub132ph Vx,Hx,Wx (66),(ev) +97: vfmsubadd132ph Vx,Hx,Wx (66),(ev) +98: vfmadd132ph Vx,Hx,Wx (66),(ev) +99: vfmadd132sh Vx,Hx,Wx (66),(ev) +9a: vfmsub132ph Vx,Hx,Wx (66),(ev) +9b: vfmsub132sh Vx,Hx,Wx (66),(ev) +9c: vfnmadd132ph Vx,Hx,Wx (66),(ev) +9d: vfnmadd132sh Vx,Hx,Wx (66),(ev) +9e: vfnmsub132ph Vx,Hx,Wx (66),(ev) +9f: vfnmsub132sh Vx,Hx,Wx (66),(ev) +a6: vfmaddsub213ph Vx,Hx,Wx (66),(ev) +a7: vfmsubadd213ph Vx,Hx,Wx (66),(ev) +a8: vfmadd213ph Vx,Hx,Wx (66),(ev) +a9: vfmadd213sh Vx,Hx,Wx (66),(ev) +aa: vfmsub213ph Vx,Hx,Wx (66),(ev) +ab: vfmsub213sh Vx,Hx,Wx (66),(ev) +ac: vfnmadd213ph Vx,Hx,Wx (66),(ev) +ad: vfnmadd213sh Vx,Hx,Wx (66),(ev) +ae: vfnmsub213ph Vx,Hx,Wx (66),(ev) +af: vfnmsub213sh Vx,Hx,Wx (66),(ev) +b6: vfmaddsub231ph Vx,Hx,Wx (66),(ev) +b7: vfmsubadd231ph Vx,Hx,Wx (66),(ev) +b8: vfmadd231ph Vx,Hx,Wx (66),(ev) +b9: vfmadd231sh Vx,Hx,Wx (66),(ev) +ba: vfmsub231ph Vx,Hx,Wx (66),(ev) +bb: vfmsub231sh Vx,Hx,Wx (66),(ev) +bc: vfnmadd231ph Vx,Hx,Wx (66),(ev) +bd: vfnmadd231sh Vx,Hx,Wx (66),(ev) +be: vfnmsub231ph Vx,Hx,Wx (66),(ev) +bf: vfnmsub231sh Vx,Hx,Wx (66),(ev) +d6: vfcmulcph Vx,Hx,Wx (F2),(ev) | vfmulcph Vx,Hx,Wx (F3),(ev) +d7: vfcmulcsh Vx,Hx,Wx (F2),(ev) | vfmulcsh Vx,Hx,Wx (F3),(ev) EndTable GrpTable: Grp1 @@ -970,7 +1055,7 @@ GrpTable: Grp7 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) +5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) | CLUI (F3),(110),(11B) | SERIALIZE (000),(11B) | STUI (F3),(111),(11B) | TESTUI (F3)(101)(11B) | UIRET (F3),(100),(11B) | XRESLDTRK (F2),(000),(11B) | XSUSLDTRK (F2),(001),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -987,7 +1072,7 @@ GrpTable: Grp9 3: xrstors 4: xsavec 5: xsaves -6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | SENDUIPI Gq (F3) 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) EndTable diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 2b2d018ea345..6169053c2854 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -177,25 +177,6 @@ void __init sme_map_bootdata(char *real_mode_data) __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); } -void __init sme_early_init(void) -{ - unsigned int i; - - if (!sme_me_mask) - return; - - early_pmd_flags = __sme_set(early_pmd_flags); - - __supported_pte_mask = __sme_set(__supported_pte_mask); - - /* Update the protection map with memory encryption mask */ - for (i = 0; i < ARRAY_SIZE(protection_map); i++) - protection_map[i] = pgprot_encrypted(protection_map[i]); - - if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) - swiotlb_force = SWIOTLB_FORCE; -} - void __init sev_setup_arch(void) { phys_addr_t total_mem = memblock_phys_mem_size(); @@ -256,7 +237,17 @@ static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) return pfn; } -void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc) +static bool amd_enc_tlb_flush_required(bool enc) +{ + return true; +} + +static bool amd_enc_cache_flush_required(void) +{ + return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT); +} + +static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) { #ifdef CONFIG_PARAVIRT unsigned long sz = npages << PAGE_SHIFT; @@ -287,6 +278,19 @@ void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc) #endif } +static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) +{ +} + +/* Return true unconditionally: return value doesn't matter for the SEV side */ +static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc) +{ + if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + enc_dec_hypercall(vaddr, npages, enc); + + return true; +} + static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) { pgprot_t old_prot, new_prot; @@ -392,7 +396,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr, ret = 0; - notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); + early_set_mem_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); out: __flush_tlb_all(); return ret; @@ -410,7 +414,31 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) { - notify_range_enc_status_changed(vaddr, npages, enc); + enc_dec_hypercall(vaddr, npages, enc); +} + +void __init sme_early_init(void) +{ + unsigned int i; + + if (!sme_me_mask) + return; + + early_pmd_flags = __sme_set(early_pmd_flags); + + __supported_pte_mask = __sme_set(__supported_pte_mask); + + /* Update the protection map with memory encryption mask */ + for (i = 0; i < ARRAY_SIZE(protection_map); i++) + protection_map[i] = pgprot_encrypted(protection_map[i]); + + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + swiotlb_force = SWIOTLB_FORCE; + + x86_platform.guest.enc_status_change_prepare = amd_enc_status_change_prepare; + x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish; + x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required; + x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required; } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 3f0abb403340..b43bc24d2bb6 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -44,6 +44,7 @@ #include <asm/setup.h> #include <asm/sections.h> #include <asm/cmdline.h> +#include <asm/coco.h> #include "mm_internal.h" @@ -565,8 +566,7 @@ void __init sme_enable(struct boot_params *bp) } else { /* SEV state cannot be controlled by a command line option */ sme_me_mask = me_mask; - physical_mask &= ~sme_me_mask; - return; + goto out; } /* @@ -600,6 +600,10 @@ void __init sme_enable(struct boot_params *bp) sme_me_mask = 0; else sme_me_mask = active_by_default ? me_mask : 0; - - physical_mask &= ~sme_me_mask; +out: + if (sme_me_mask) { + physical_mask &= ~sme_me_mask; + cc_set_vendor(CC_VENDOR_AMD); + cc_set_mask(sme_me_mask); + } } diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index b4072115c8ef..abf5ed76e4b7 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1989,6 +1989,7 @@ int set_memory_global(unsigned long addr, int numpages) */ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) { + pgprot_t empty = __pgprot(0); struct cpa_data cpa; int ret; @@ -1999,18 +2000,20 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) memset(&cpa, 0, sizeof(cpa)); cpa.vaddr = &addr; cpa.numpages = numpages; - cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0); - cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC); + cpa.mask_set = enc ? pgprot_encrypted(empty) : pgprot_decrypted(empty); + cpa.mask_clr = enc ? pgprot_decrypted(empty) : pgprot_encrypted(empty); cpa.pgd = init_mm.pgd; /* Must avoid aliasing mappings in the highmem code */ kmap_flush_unused(); vm_unmap_aliases(); - /* - * Before changing the encryption attribute, we need to flush caches. - */ - cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT)); + /* Flush the caches as needed before changing the encryption attribute. */ + if (x86_platform.guest.enc_tlb_flush_required(enc)) + cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required()); + + /* Notify hypervisor that we are about to set/clr encryption attribute. */ + x86_platform.guest.enc_status_change_prepare(addr, numpages, enc); ret = __change_page_attr_set_clr(&cpa, 1); @@ -2023,11 +2026,11 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) */ cpa_flush(&cpa, 0); - /* - * Notify hypervisor that a given memory range is mapped encrypted - * or decrypted. - */ - notify_range_enc_status_changed(addr, numpages, enc); + /* Notify hypervisor that we have successfully set/clr encryption attribute. */ + if (!ret) { + if (!x86_platform.guest.enc_status_change_finish(addr, numpages, enc)) + ret = -EIO; + } return ret; } @@ -2121,12 +2124,6 @@ int set_pages_array_wc(struct page **pages, int numpages) } EXPORT_SYMBOL(set_pages_array_wc); -int set_pages_array_wt(struct page **pages, int numpages) -{ - return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT); -} -EXPORT_SYMBOL_GPL(set_pages_array_wt); - int set_pages_wb(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 40d6a06e41c8..ead7e5b3a975 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -8,6 +8,7 @@ endmenu config UML_X86 def_bool y + select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32 config 64BIT bool "64-bit kernel" if "$(SUBARCH)" = "x86" diff --git a/block/blk-core.c b/block/blk-core.c index 1039515c99d6..779b4a1f66ac 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -50,6 +50,7 @@ #include "blk-mq-sched.h" #include "blk-pm.h" #include "blk-throttle.h" +#include "blk-rq-qos.h" struct dentry *blk_debugfs_root; @@ -314,6 +315,9 @@ void blk_cleanup_queue(struct request_queue *q) */ blk_freeze_queue(q); + /* cleanup rq qos structures for queue without disk */ + rq_qos_exit(q); + blk_queue_flag_set(QUEUE_FLAG_DEAD, q); blk_sync_queue(q); diff --git a/certs/system_keyring.c b/certs/system_keyring.c index 692365dee2bd..05b66ce9d1c9 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -22,6 +22,9 @@ static struct key *builtin_trusted_keys; #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING static struct key *secondary_trusted_keys; #endif +#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING +static struct key *machine_trusted_keys; +#endif #ifdef CONFIG_INTEGRITY_PLATFORM_KEYRING static struct key *platform_trusted_keys; #endif @@ -86,11 +89,50 @@ static __init struct key_restriction *get_builtin_and_secondary_restriction(void if (!restriction) panic("Can't allocate secondary trusted keyring restriction\n"); - restriction->check = restrict_link_by_builtin_and_secondary_trusted; + if (IS_ENABLED(CONFIG_INTEGRITY_MACHINE_KEYRING)) + restriction->check = restrict_link_by_builtin_secondary_and_machine; + else + restriction->check = restrict_link_by_builtin_and_secondary_trusted; return restriction; } #endif +#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING +void __init set_machine_trusted_keys(struct key *keyring) +{ + machine_trusted_keys = keyring; + + if (key_link(secondary_trusted_keys, machine_trusted_keys) < 0) + panic("Can't link (machine) trusted keyrings\n"); +} + +/** + * restrict_link_by_builtin_secondary_and_machine - Restrict keyring addition. + * @dest_keyring: Keyring being linked to. + * @type: The type of key being added. + * @payload: The payload of the new key. + * @restrict_key: A ring of keys that can be used to vouch for the new cert. + * + * Restrict the addition of keys into a keyring based on the key-to-be-added + * being vouched for by a key in either the built-in, the secondary, or + * the machine keyrings. + */ +int restrict_link_by_builtin_secondary_and_machine( + struct key *dest_keyring, + const struct key_type *type, + const union key_payload *payload, + struct key *restrict_key) +{ + if (machine_trusted_keys && type == &key_type_keyring && + dest_keyring == secondary_trusted_keys && + payload == &machine_trusted_keys->payload) + /* Allow the machine keyring to be added to the secondary */ + return 0; + + return restrict_link_by_builtin_and_secondary_trusted(dest_keyring, type, + payload, restrict_key); +} +#endif /* * Create the trusted keyrings diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig index 1f1f004dc757..460bc5d0a828 100644 --- a/crypto/asymmetric_keys/Kconfig +++ b/crypto/asymmetric_keys/Kconfig @@ -22,18 +22,6 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE appropriate hash algorithms (such as SHA-1) must be available. ENOPKG will be reported if the requisite algorithm is unavailable. -config ASYMMETRIC_TPM_KEY_SUBTYPE - tristate "Asymmetric TPM backed private key subtype" - depends on TCG_TPM - depends on TRUSTED_KEYS - select CRYPTO_HMAC - select CRYPTO_SHA1 - select CRYPTO_HASH_INFO - help - This option provides support for TPM backed private key type handling. - Operations such as sign, verify, encrypt, decrypt are performed by - the TPM after the private key is loaded. - config X509_CERTIFICATE_PARSER tristate "X.509 certificate parser" depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE @@ -54,15 +42,6 @@ config PKCS8_PRIVATE_KEY_PARSER private key data and provides the ability to instantiate a crypto key from that data. -config TPM_KEY_PARSER - tristate "TPM private key parser" - depends on ASYMMETRIC_TPM_KEY_SUBTYPE - select ASN1 - help - This option provides support for parsing TPM format blobs for - private key data and provides the ability to instantiate a crypto key - from that data. - config PKCS7_MESSAGE_PARSER tristate "PKCS#7 message parser" depends on X509_CERTIFICATE_PARSER diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile index 28b91adba2ae..c38424f55b08 100644 --- a/crypto/asymmetric_keys/Makefile +++ b/crypto/asymmetric_keys/Makefile @@ -11,7 +11,6 @@ asymmetric_keys-y := \ signature.o obj-$(CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE) += public_key.o -obj-$(CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE) += asym_tpm.o # # X.509 Certificate handling @@ -75,14 +74,3 @@ verify_signed_pefile-y := \ $(obj)/mscode_parser.o: $(obj)/mscode.asn1.h $(obj)/mscode.asn1.h $(obj)/mscode.asn1.o: $(obj)/mscode.asn1.c $(obj)/mscode.asn1.h - -# -# TPM private key parsing -# -obj-$(CONFIG_TPM_KEY_PARSER) += tpm_key_parser.o -tpm_key_parser-y := \ - tpm.asn1.o \ - tpm_parser.o - -$(obj)/tpm_parser.o: $(obj)/tpm.asn1.h -$(obj)/tpm.asn1.o: $(obj)/tpm.asn1.c $(obj)/tpm.asn1.h diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c deleted file mode 100644 index 0959613560b9..000000000000 --- a/crypto/asymmetric_keys/asym_tpm.c +++ /dev/null @@ -1,957 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define pr_fmt(fmt) "ASYM-TPM: "fmt -#include <linux/slab.h> -#include <linux/module.h> -#include <linux/export.h> -#include <linux/kernel.h> -#include <linux/seq_file.h> -#include <linux/scatterlist.h> -#include <linux/tpm.h> -#include <linux/tpm_command.h> -#include <crypto/akcipher.h> -#include <crypto/hash.h> -#include <crypto/sha1.h> -#include <asm/unaligned.h> -#include <keys/asymmetric-subtype.h> -#include <keys/trusted_tpm.h> -#include <crypto/asym_tpm_subtype.h> -#include <crypto/public_key.h> - -#define TPM_ORD_FLUSHSPECIFIC 186 -#define TPM_ORD_LOADKEY2 65 -#define TPM_ORD_UNBIND 30 -#define TPM_ORD_SIGN 60 - -#define TPM_RT_KEY 0x00000001 - -/* - * Load a TPM key from the blob provided by userspace - */ -static int tpm_loadkey2(struct tpm_buf *tb, - uint32_t keyhandle, unsigned char *keyauth, - const unsigned char *keyblob, int keybloblen, - uint32_t *newhandle) -{ - unsigned char nonceodd[TPM_NONCE_SIZE]; - unsigned char enonce[TPM_NONCE_SIZE]; - unsigned char authdata[SHA1_DIGEST_SIZE]; - uint32_t authhandle = 0; - unsigned char cont = 0; - uint32_t ordinal; - int ret; - - ordinal = htonl(TPM_ORD_LOADKEY2); - - /* session for loading the key */ - ret = oiap(tb, &authhandle, enonce); - if (ret < 0) { - pr_info("oiap failed (%d)\n", ret); - return ret; - } - - /* generate odd nonce */ - ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE); - if (ret < 0) { - pr_info("tpm_get_random failed (%d)\n", ret); - return ret; - } - - /* calculate authorization HMAC value */ - ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce, - nonceodd, cont, sizeof(uint32_t), &ordinal, - keybloblen, keyblob, 0, 0); - if (ret < 0) - return ret; - - /* build the request buffer */ - tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_LOADKEY2); - tpm_buf_append_u32(tb, keyhandle); - tpm_buf_append(tb, keyblob, keybloblen); - tpm_buf_append_u32(tb, authhandle); - tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE); - tpm_buf_append_u8(tb, cont); - tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE); - - ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE); - if (ret < 0) { - pr_info("authhmac failed (%d)\n", ret); - return ret; - } - - ret = TSS_checkhmac1(tb->data, ordinal, nonceodd, keyauth, - SHA1_DIGEST_SIZE, 0, 0); - if (ret < 0) { - pr_info("TSS_checkhmac1 failed (%d)\n", ret); - return ret; - } - - *newhandle = LOAD32(tb->data, TPM_DATA_OFFSET); - return 0; -} - -/* - * Execute the FlushSpecific TPM command - */ -static int tpm_flushspecific(struct tpm_buf *tb, uint32_t handle) -{ - tpm_buf_reset(tb, TPM_TAG_RQU_COMMAND, TPM_ORD_FLUSHSPECIFIC); - tpm_buf_append_u32(tb, handle); - tpm_buf_append_u32(tb, TPM_RT_KEY); - - return trusted_tpm_send(tb->data, MAX_BUF_SIZE); -} - -/* - * Decrypt a blob provided by userspace using a specific key handle. - * The handle is a well known handle or previously loaded by e.g. LoadKey2 - */ -static int tpm_unbind(struct tpm_buf *tb, - uint32_t keyhandle, unsigned char *keyauth, - const unsigned char *blob, uint32_t bloblen, - void *out, uint32_t outlen) -{ - unsigned char nonceodd[TPM_NONCE_SIZE]; - unsigned char enonce[TPM_NONCE_SIZE]; - unsigned char authdata[SHA1_DIGEST_SIZE]; - uint32_t authhandle = 0; - unsigned char cont = 0; - uint32_t ordinal; - uint32_t datalen; - int ret; - - ordinal = htonl(TPM_ORD_UNBIND); - datalen = htonl(bloblen); - - /* session for loading the key */ - ret = oiap(tb, &authhandle, enonce); - if (ret < 0) { - pr_info("oiap failed (%d)\n", ret); - return ret; - } - - /* generate odd nonce */ - ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE); - if (ret < 0) { - pr_info("tpm_get_random failed (%d)\n", ret); - return ret; - } - - /* calculate authorization HMAC value */ - ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce, - nonceodd, cont, sizeof(uint32_t), &ordinal, - sizeof(uint32_t), &datalen, - bloblen, blob, 0, 0); - if (ret < 0) - return ret; - - /* build the request buffer */ - tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_UNBIND); - tpm_buf_append_u32(tb, keyhandle); - tpm_buf_append_u32(tb, bloblen); - tpm_buf_append(tb, blob, bloblen); - tpm_buf_append_u32(tb, authhandle); - tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE); - tpm_buf_append_u8(tb, cont); - tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE); - - ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE); - if (ret < 0) { - pr_info("authhmac failed (%d)\n", ret); - return ret; - } - - datalen = LOAD32(tb->data, TPM_DATA_OFFSET); - - ret = TSS_checkhmac1(tb->data, ordinal, nonceodd, - keyauth, SHA1_DIGEST_SIZE, - sizeof(uint32_t), TPM_DATA_OFFSET, - datalen, TPM_DATA_OFFSET + sizeof(uint32_t), - 0, 0); - if (ret < 0) { - pr_info("TSS_checkhmac1 failed (%d)\n", ret); - return ret; - } - - memcpy(out, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t), - min(outlen, datalen)); - - return datalen; -} - -/* - * Sign a blob provided by userspace (that has had the hash function applied) - * using a specific key handle. The handle is assumed to have been previously - * loaded by e.g. LoadKey2. - * - * Note that the key signature scheme of the used key should be set to - * TPM_SS_RSASSAPKCS1v15_DER. This allows the hashed input to be of any size - * up to key_length_in_bytes - 11 and not be limited to size 20 like the - * TPM_SS_RSASSAPKCS1v15_SHA1 signature scheme. - */ -static int tpm_sign(struct tpm_buf *tb, - uint32_t keyhandle, unsigned char *keyauth, - const unsigned char *blob, uint32_t bloblen, - void *out, uint32_t outlen) -{ - unsigned char nonceodd[TPM_NONCE_SIZE]; - unsigned char enonce[TPM_NONCE_SIZE]; - unsigned char authdata[SHA1_DIGEST_SIZE]; - uint32_t authhandle = 0; - unsigned char cont = 0; - uint32_t ordinal; - uint32_t datalen; - int ret; - - ordinal = htonl(TPM_ORD_SIGN); - datalen = htonl(bloblen); - - /* session for loading the key */ - ret = oiap(tb, &authhandle, enonce); - if (ret < 0) { - pr_info("oiap failed (%d)\n", ret); - return ret; - } - - /* generate odd nonce */ - ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE); - if (ret < 0) { - pr_info("tpm_get_random failed (%d)\n", ret); - return ret; - } - - /* calculate authorization HMAC value */ - ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce, - nonceodd, cont, sizeof(uint32_t), &ordinal, - sizeof(uint32_t), &datalen, - bloblen, blob, 0, 0); - if (ret < 0) - return ret; - - /* build the request buffer */ - tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_SIGN); - tpm_buf_append_u32(tb, keyhandle); - tpm_buf_append_u32(tb, bloblen); - tpm_buf_append(tb, blob, bloblen); - tpm_buf_append_u32(tb, authhandle); - tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE); - tpm_buf_append_u8(tb, cont); - tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE); - - ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE); - if (ret < 0) { - pr_info("authhmac failed (%d)\n", ret); - return ret; - } - - datalen = LOAD32(tb->data, TPM_DATA_OFFSET); - - ret = TSS_checkhmac1(tb->data, ordinal, nonceodd, - keyauth, SHA1_DIGEST_SIZE, - sizeof(uint32_t), TPM_DATA_OFFSET, - datalen, TPM_DATA_OFFSET + sizeof(uint32_t), - 0, 0); - if (ret < 0) { - pr_info("TSS_checkhmac1 failed (%d)\n", ret); - return ret; - } - - memcpy(out, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t), - min(datalen, outlen)); - - return datalen; -} - -/* Room to fit two u32 zeros for algo id and parameters length. */ -#define SETKEY_PARAMS_SIZE (sizeof(u32) * 2) - -/* - * Maximum buffer size for the BER/DER encoded public key. The public key - * is of the form SEQUENCE { INTEGER n, INTEGER e } where n is a maximum 2048 - * bit key and e is usually 65537 - * The encoding overhead is: - * - max 4 bytes for SEQUENCE - * - max 4 bytes for INTEGER n type/length - * - 257 bytes of n - * - max 2 bytes for INTEGER e type/length - * - 3 bytes of e - * - 4+4 of zeros for set_pub_key parameters (SETKEY_PARAMS_SIZE) - */ -#define PUB_KEY_BUF_SIZE (4 + 4 + 257 + 2 + 3 + SETKEY_PARAMS_SIZE) - -/* - * Provide a part of a description of the key for /proc/keys. - */ -static void asym_tpm_describe(const struct key *asymmetric_key, - struct seq_file *m) -{ - struct tpm_key *tk = asymmetric_key->payload.data[asym_crypto]; - - if (!tk) - return; - - seq_printf(m, "TPM1.2/Blob"); -} - -static void asym_tpm_destroy(void *payload0, void *payload3) -{ - struct tpm_key *tk = payload0; - - if (!tk) - return; - - kfree(tk->blob); - tk->blob_len = 0; - - kfree(tk); -} - -/* How many bytes will it take to encode the length */ -static inline uint32_t definite_length(uint32_t len) -{ - if (len <= 127) - return 1; - if (len <= 255) - return 2; - return 3; -} - -static inline uint8_t *encode_tag_length(uint8_t *buf, uint8_t tag, - uint32_t len) -{ - *buf++ = tag; - - if (len <= 127) { - buf[0] = len; - return buf + 1; - } - - if (len <= 255) { - buf[0] = 0x81; - buf[1] = len; - return buf + 2; - } - - buf[0] = 0x82; - put_unaligned_be16(len, buf + 1); - return buf + 3; -} - -static uint32_t derive_pub_key(const void *pub_key, uint32_t len, uint8_t *buf) -{ - uint8_t *cur = buf; - uint32_t n_len = definite_length(len) + 1 + len + 1; - uint32_t e_len = definite_length(3) + 1 + 3; - uint8_t e[3] = { 0x01, 0x00, 0x01 }; - - /* SEQUENCE */ - cur = encode_tag_length(cur, 0x30, n_len + e_len); - /* INTEGER n */ - cur = encode_tag_length(cur, 0x02, len + 1); - cur[0] = 0x00; - memcpy(cur + 1, pub_key, len); - cur += len + 1; - cur = encode_tag_length(cur, 0x02, sizeof(e)); - memcpy(cur, e, sizeof(e)); - cur += sizeof(e); - /* Zero parameters to satisfy set_pub_key ABI. */ - memzero_explicit(cur, SETKEY_PARAMS_SIZE); - - return cur - buf; -} - -/* - * Determine the crypto algorithm name. - */ -static int determine_akcipher(const char *encoding, const char *hash_algo, - char alg_name[CRYPTO_MAX_ALG_NAME]) -{ - if (strcmp(encoding, "pkcs1") == 0) { - if (!hash_algo) { - strcpy(alg_name, "pkcs1pad(rsa)"); - return 0; - } - - if (snprintf(alg_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(rsa,%s)", - hash_algo) >= CRYPTO_MAX_ALG_NAME) - return -EINVAL; - - return 0; - } - - if (strcmp(encoding, "raw") == 0) { - strcpy(alg_name, "rsa"); - return 0; - } - - return -ENOPKG; -} - -/* - * Query information about a key. - */ -static int tpm_key_query(const struct kernel_pkey_params *params, - struct kernel_pkey_query *info) -{ - struct tpm_key *tk = params->key->payload.data[asym_crypto]; - int ret; - char alg_name[CRYPTO_MAX_ALG_NAME]; - struct crypto_akcipher *tfm; - uint8_t der_pub_key[PUB_KEY_BUF_SIZE]; - uint32_t der_pub_key_len; - int len; - - /* TPM only works on private keys, public keys still done in software */ - ret = determine_akcipher(params->encoding, params->hash_algo, alg_name); - if (ret < 0) - return ret; - - tfm = crypto_alloc_akcipher(alg_name, 0, 0); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len, - der_pub_key); - - ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len); - if (ret < 0) - goto error_free_tfm; - - len = crypto_akcipher_maxsize(tfm); - - info->key_size = tk->key_len; - info->max_data_size = tk->key_len / 8; - info->max_sig_size = len; - info->max_enc_size = len; - info->max_dec_size = tk->key_len / 8; - - info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT | - KEYCTL_SUPPORTS_DECRYPT | - KEYCTL_SUPPORTS_VERIFY | - KEYCTL_SUPPORTS_SIGN; - - ret = 0; -error_free_tfm: - crypto_free_akcipher(tfm); - pr_devel("<==%s() = %d\n", __func__, ret); - return ret; -} - -/* - * Encryption operation is performed with the public key. Hence it is done - * in software - */ -static int tpm_key_encrypt(struct tpm_key *tk, - struct kernel_pkey_params *params, - const void *in, void *out) -{ - char alg_name[CRYPTO_MAX_ALG_NAME]; - struct crypto_akcipher *tfm; - struct akcipher_request *req; - struct crypto_wait cwait; - struct scatterlist in_sg, out_sg; - uint8_t der_pub_key[PUB_KEY_BUF_SIZE]; - uint32_t der_pub_key_len; - int ret; - - pr_devel("==>%s()\n", __func__); - - ret = determine_akcipher(params->encoding, params->hash_algo, alg_name); - if (ret < 0) - return ret; - - tfm = crypto_alloc_akcipher(alg_name, 0, 0); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len, - der_pub_key); - - ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len); - if (ret < 0) - goto error_free_tfm; - - ret = -ENOMEM; - req = akcipher_request_alloc(tfm, GFP_KERNEL); - if (!req) - goto error_free_tfm; - - sg_init_one(&in_sg, in, params->in_len); - sg_init_one(&out_sg, out, params->out_len); - akcipher_request_set_crypt(req, &in_sg, &out_sg, params->in_len, - params->out_len); - crypto_init_wait(&cwait); - akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | - CRYPTO_TFM_REQ_MAY_SLEEP, - crypto_req_done, &cwait); - - ret = crypto_akcipher_encrypt(req); - ret = crypto_wait_req(ret, &cwait); - - if (ret == 0) - ret = req->dst_len; - - akcipher_request_free(req); -error_free_tfm: - crypto_free_akcipher(tfm); - pr_devel("<==%s() = %d\n", __func__, ret); - return ret; -} - -/* - * Decryption operation is performed with the private key in the TPM. - */ -static int tpm_key_decrypt(struct tpm_key *tk, - struct kernel_pkey_params *params, - const void *in, void *out) -{ - struct tpm_buf tb; - uint32_t keyhandle; - uint8_t srkauth[SHA1_DIGEST_SIZE]; - uint8_t keyauth[SHA1_DIGEST_SIZE]; - int r; - - pr_devel("==>%s()\n", __func__); - - if (params->hash_algo) - return -ENOPKG; - - if (strcmp(params->encoding, "pkcs1")) - return -ENOPKG; - - r = tpm_buf_init(&tb, 0, 0); - if (r) - return r; - - /* TODO: Handle a non-all zero SRK authorization */ - memset(srkauth, 0, sizeof(srkauth)); - - r = tpm_loadkey2(&tb, SRKHANDLE, srkauth, - tk->blob, tk->blob_len, &keyhandle); - if (r < 0) { - pr_devel("loadkey2 failed (%d)\n", r); - goto error; - } - - /* TODO: Handle a non-all zero key authorization */ - memset(keyauth, 0, sizeof(keyauth)); - - r = tpm_unbind(&tb, keyhandle, keyauth, - in, params->in_len, out, params->out_len); - if (r < 0) - pr_devel("tpm_unbind failed (%d)\n", r); - - if (tpm_flushspecific(&tb, keyhandle) < 0) - pr_devel("flushspecific failed (%d)\n", r); - -error: - tpm_buf_destroy(&tb); - pr_devel("<==%s() = %d\n", __func__, r); - return r; -} - -/* - * Hash algorithm OIDs plus ASN.1 DER wrappings [RFC4880 sec 5.2.2]. - */ -static const u8 digest_info_md5[] = { - 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08, - 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x02, 0x05, /* OID */ - 0x05, 0x00, 0x04, 0x10 -}; - -static const u8 digest_info_sha1[] = { - 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, - 0x2b, 0x0e, 0x03, 0x02, 0x1a, - 0x05, 0x00, 0x04, 0x14 -}; - -static const u8 digest_info_rmd160[] = { - 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, - 0x2b, 0x24, 0x03, 0x02, 0x01, - 0x05, 0x00, 0x04, 0x14 -}; - -static const u8 digest_info_sha224[] = { - 0x30, 0x2d, 0x30, 0x0d, 0x06, 0x09, - 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04, - 0x05, 0x00, 0x04, 0x1c -}; - -static const u8 digest_info_sha256[] = { - 0x30, 0x31, 0x30, 0x0d, 0x06, 0x09, - 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01, - 0x05, 0x00, 0x04, 0x20 -}; - -static const u8 digest_info_sha384[] = { - 0x30, 0x41, 0x30, 0x0d, 0x06, 0x09, - 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02, - 0x05, 0x00, 0x04, 0x30 -}; - -static const u8 digest_info_sha512[] = { - 0x30, 0x51, 0x30, 0x0d, 0x06, 0x09, - 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03, - 0x05, 0x00, 0x04, 0x40 -}; - -static const struct asn1_template { - const char *name; - const u8 *data; - size_t size; -} asn1_templates[] = { -#define _(X) { #X, digest_info_##X, sizeof(digest_info_##X) } - _(md5), - _(sha1), - _(rmd160), - _(sha256), - _(sha384), - _(sha512), - _(sha224), - { NULL } -#undef _ -}; - -static const struct asn1_template *lookup_asn1(const char *name) -{ - const struct asn1_template *p; - - for (p = asn1_templates; p->name; p++) - if (strcmp(name, p->name) == 0) - return p; - return NULL; -} - -/* - * Sign operation is performed with the private key in the TPM. - */ -static int tpm_key_sign(struct tpm_key *tk, - struct kernel_pkey_params *params, - const void *in, void *out) -{ - struct tpm_buf tb; - uint32_t keyhandle; - uint8_t srkauth[SHA1_DIGEST_SIZE]; - uint8_t keyauth[SHA1_DIGEST_SIZE]; - void *asn1_wrapped = NULL; - uint32_t in_len = params->in_len; - int r; - - pr_devel("==>%s()\n", __func__); - - if (strcmp(params->encoding, "pkcs1")) - return -ENOPKG; - - if (params->hash_algo) { - const struct asn1_template *asn1 = - lookup_asn1(params->hash_algo); - - if (!asn1) - return -ENOPKG; - - /* request enough space for the ASN.1 template + input hash */ - asn1_wrapped = kzalloc(in_len + asn1->size, GFP_KERNEL); - if (!asn1_wrapped) - return -ENOMEM; - - /* Copy ASN.1 template, then the input */ - memcpy(asn1_wrapped, asn1->data, asn1->size); - memcpy(asn1_wrapped + asn1->size, in, in_len); - - in = asn1_wrapped; - in_len += asn1->size; - } - - if (in_len > tk->key_len / 8 - 11) { - r = -EOVERFLOW; - goto error_free_asn1_wrapped; - } - - r = tpm_buf_init(&tb, 0, 0); - if (r) - goto error_free_asn1_wrapped; - - /* TODO: Handle a non-all zero SRK authorization */ - memset(srkauth, 0, sizeof(srkauth)); - - r = tpm_loadkey2(&tb, SRKHANDLE, srkauth, - tk->blob, tk->blob_len, &keyhandle); - if (r < 0) { - pr_devel("loadkey2 failed (%d)\n", r); - goto error_free_tb; - } - - /* TODO: Handle a non-all zero key authorization */ - memset(keyauth, 0, sizeof(keyauth)); - - r = tpm_sign(&tb, keyhandle, keyauth, in, in_len, out, params->out_len); - if (r < 0) - pr_devel("tpm_sign failed (%d)\n", r); - - if (tpm_flushspecific(&tb, keyhandle) < 0) - pr_devel("flushspecific failed (%d)\n", r); - -error_free_tb: - tpm_buf_destroy(&tb); -error_free_asn1_wrapped: - kfree(asn1_wrapped); - pr_devel("<==%s() = %d\n", __func__, r); - return r; -} - -/* - * Do encryption, decryption and signing ops. - */ -static int tpm_key_eds_op(struct kernel_pkey_params *params, - const void *in, void *out) -{ - struct tpm_key *tk = params->key->payload.data[asym_crypto]; - int ret = -EOPNOTSUPP; - - /* Perform the encryption calculation. */ - switch (params->op) { - case kernel_pkey_encrypt: - ret = tpm_key_encrypt(tk, params, in, out); - break; - case kernel_pkey_decrypt: - ret = tpm_key_decrypt(tk, params, in, out); - break; - case kernel_pkey_sign: - ret = tpm_key_sign(tk, params, in, out); - break; - default: - BUG(); - } - - return ret; -} - -/* - * Verify a signature using a public key. - */ -static int tpm_key_verify_signature(const struct key *key, - const struct public_key_signature *sig) -{ - const struct tpm_key *tk = key->payload.data[asym_crypto]; - struct crypto_wait cwait; - struct crypto_akcipher *tfm; - struct akcipher_request *req; - struct scatterlist src_sg[2]; - char alg_name[CRYPTO_MAX_ALG_NAME]; - uint8_t der_pub_key[PUB_KEY_BUF_SIZE]; - uint32_t der_pub_key_len; - int ret; - - pr_devel("==>%s()\n", __func__); - - BUG_ON(!tk); - BUG_ON(!sig); - BUG_ON(!sig->s); - - if (!sig->digest) - return -ENOPKG; - - ret = determine_akcipher(sig->encoding, sig->hash_algo, alg_name); - if (ret < 0) - return ret; - - tfm = crypto_alloc_akcipher(alg_name, 0, 0); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len, - der_pub_key); - - ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len); - if (ret < 0) - goto error_free_tfm; - - ret = -ENOMEM; - req = akcipher_request_alloc(tfm, GFP_KERNEL); - if (!req) - goto error_free_tfm; - - sg_init_table(src_sg, 2); - sg_set_buf(&src_sg[0], sig->s, sig->s_size); - sg_set_buf(&src_sg[1], sig->digest, sig->digest_size); - akcipher_request_set_crypt(req, src_sg, NULL, sig->s_size, - sig->digest_size); - crypto_init_wait(&cwait); - akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | - CRYPTO_TFM_REQ_MAY_SLEEP, - crypto_req_done, &cwait); - ret = crypto_wait_req(crypto_akcipher_verify(req), &cwait); - - akcipher_request_free(req); -error_free_tfm: - crypto_free_akcipher(tfm); - pr_devel("<==%s() = %d\n", __func__, ret); - if (WARN_ON_ONCE(ret > 0)) - ret = -EINVAL; - return ret; -} - -/* - * Parse enough information out of TPM_KEY structure: - * TPM_STRUCT_VER -> 4 bytes - * TPM_KEY_USAGE -> 2 bytes - * TPM_KEY_FLAGS -> 4 bytes - * TPM_AUTH_DATA_USAGE -> 1 byte - * TPM_KEY_PARMS -> variable - * UINT32 PCRInfoSize -> 4 bytes - * BYTE* -> PCRInfoSize bytes - * TPM_STORE_PUBKEY - * UINT32 encDataSize; - * BYTE* -> encDataSize; - * - * TPM_KEY_PARMS: - * TPM_ALGORITHM_ID -> 4 bytes - * TPM_ENC_SCHEME -> 2 bytes - * TPM_SIG_SCHEME -> 2 bytes - * UINT32 parmSize -> 4 bytes - * BYTE* -> variable - */ -static int extract_key_parameters(struct tpm_key *tk) -{ - const void *cur = tk->blob; - uint32_t len = tk->blob_len; - const void *pub_key; - uint32_t sz; - uint32_t key_len; - - if (len < 11) - return -EBADMSG; - - /* Ensure this is a legacy key */ - if (get_unaligned_be16(cur + 4) != 0x0015) - return -EBADMSG; - - /* Skip to TPM_KEY_PARMS */ - cur += 11; - len -= 11; - - if (len < 12) - return -EBADMSG; - - /* Make sure this is an RSA key */ - if (get_unaligned_be32(cur) != 0x00000001) - return -EBADMSG; - - /* Make sure this is TPM_ES_RSAESPKCSv15 encoding scheme */ - if (get_unaligned_be16(cur + 4) != 0x0002) - return -EBADMSG; - - /* Make sure this is TPM_SS_RSASSAPKCS1v15_DER signature scheme */ - if (get_unaligned_be16(cur + 6) != 0x0003) - return -EBADMSG; - - sz = get_unaligned_be32(cur + 8); - if (len < sz + 12) - return -EBADMSG; - - /* Move to TPM_RSA_KEY_PARMS */ - len -= 12; - cur += 12; - - /* Grab the RSA key length */ - key_len = get_unaligned_be32(cur); - - switch (key_len) { - case 512: - case 1024: - case 1536: - case 2048: - break; - default: - return -EINVAL; - } - - /* Move just past TPM_KEY_PARMS */ - cur += sz; - len -= sz; - - if (len < 4) - return -EBADMSG; - - sz = get_unaligned_be32(cur); - if (len < 4 + sz) - return -EBADMSG; - - /* Move to TPM_STORE_PUBKEY */ - cur += 4 + sz; - len -= 4 + sz; - - /* Grab the size of the public key, it should jive with the key size */ - sz = get_unaligned_be32(cur); - if (sz > 256) - return -EINVAL; - - pub_key = cur + 4; - - tk->key_len = key_len; - tk->pub_key = pub_key; - tk->pub_key_len = sz; - - return 0; -} - -/* Given the blob, parse it and load it into the TPM */ -struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len) -{ - int r; - struct tpm_key *tk; - - r = tpm_is_tpm2(NULL); - if (r < 0) - goto error; - - /* We don't support TPM2 yet */ - if (r > 0) { - r = -ENODEV; - goto error; - } - - r = -ENOMEM; - tk = kzalloc(sizeof(struct tpm_key), GFP_KERNEL); - if (!tk) - goto error; - - tk->blob = kmemdup(blob, blob_len, GFP_KERNEL); - if (!tk->blob) - goto error_memdup; - - tk->blob_len = blob_len; - - r = extract_key_parameters(tk); - if (r < 0) - goto error_extract; - - return tk; - -error_extract: - kfree(tk->blob); - tk->blob_len = 0; -error_memdup: - kfree(tk); -error: - return ERR_PTR(r); -} -EXPORT_SYMBOL_GPL(tpm_key_create); - -/* - * TPM-based asymmetric key subtype - */ -struct asymmetric_key_subtype asym_tpm_subtype = { - .owner = THIS_MODULE, - .name = "asym_tpm", - .name_len = sizeof("asym_tpm") - 1, - .describe = asym_tpm_describe, - .destroy = asym_tpm_destroy, - .query = tpm_key_query, - .eds_op = tpm_key_eds_op, - .verify_signature = tpm_key_verify_signature, -}; -EXPORT_SYMBOL_GPL(asym_tpm_subtype); - -MODULE_DESCRIPTION("TPM based asymmetric key subtype"); -MODULE_AUTHOR("Intel Corporation"); -MODULE_LICENSE("GPL v2"); diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c index 0b4d07aa8811..f6321c785714 100644 --- a/crypto/asymmetric_keys/pkcs7_verify.c +++ b/crypto/asymmetric_keys/pkcs7_verify.c @@ -174,12 +174,6 @@ static int pkcs7_find_key(struct pkcs7_message *pkcs7, pr_devel("Sig %u: Found cert serial match X.509[%u]\n", sinfo->index, certix); - if (strcmp(x509->pub->pkey_algo, sinfo->sig->pkey_algo) != 0) { - pr_warn("Sig %u: X.509 algo and PKCS#7 sig algo don't match\n", - sinfo->index); - continue; - } - sinfo->signer = x509; return 0; } @@ -226,9 +220,6 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7, return 0; } - if (x509->unsupported_key) - goto unsupported_crypto_in_x509; - pr_debug("- issuer %s\n", x509->issuer); sig = x509->sig; if (sig->auth_ids[0]) @@ -245,7 +236,7 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7, * authority. */ if (x509->unsupported_sig) - goto unsupported_crypto_in_x509; + goto unsupported_sig_in_x509; x509->signer = x509; pr_debug("- self-signed\n"); return 0; @@ -309,7 +300,7 @@ static int pkcs7_verify_sig_chain(struct pkcs7_message *pkcs7, might_sleep(); } -unsupported_crypto_in_x509: +unsupported_sig_in_x509: /* Just prune the certificate chain at this point if we lack some * crypto module to go further. Note, however, we don't want to set * sinfo->unsupported_crypto as the signed info block may still be diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index 4fefb219bfdc..7c9e6be35c30 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -60,39 +60,83 @@ static void public_key_destroy(void *payload0, void *payload3) } /* - * Determine the crypto algorithm name. + * Given a public_key, and an encoding and hash_algo to be used for signing + * and/or verification with that key, determine the name of the corresponding + * akcipher algorithm. Also check that encoding and hash_algo are allowed. */ -static -int software_key_determine_akcipher(const char *encoding, - const char *hash_algo, - const struct public_key *pkey, - char alg_name[CRYPTO_MAX_ALG_NAME]) +static int +software_key_determine_akcipher(const struct public_key *pkey, + const char *encoding, const char *hash_algo, + char alg_name[CRYPTO_MAX_ALG_NAME]) { int n; - if (strcmp(encoding, "pkcs1") == 0) { - /* The data wangled by the RSA algorithm is typically padded - * and encoded in some manner, such as EMSA-PKCS1-1_5 [RFC3447 - * sec 8.2]. + if (!encoding) + return -EINVAL; + + if (strcmp(pkey->pkey_algo, "rsa") == 0) { + /* + * RSA signatures usually use EMSA-PKCS1-1_5 [RFC3447 sec 8.2]. + */ + if (strcmp(encoding, "pkcs1") == 0) { + if (!hash_algo) + n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, + "pkcs1pad(%s)", + pkey->pkey_algo); + else + n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, + "pkcs1pad(%s,%s)", + pkey->pkey_algo, hash_algo); + return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0; + } + if (strcmp(encoding, "raw") != 0) + return -EINVAL; + /* + * Raw RSA cannot differentiate between different hash + * algorithms. + */ + if (hash_algo) + return -EINVAL; + } else if (strncmp(pkey->pkey_algo, "ecdsa", 5) == 0) { + if (strcmp(encoding, "x962") != 0) + return -EINVAL; + /* + * ECDSA signatures are taken over a raw hash, so they don't + * differentiate between different hash algorithms. That means + * that the verifier should hard-code a specific hash algorithm. + * Unfortunately, in practice ECDSA is used with multiple SHAs, + * so we have to allow all of them and not just one. */ if (!hash_algo) - n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, - "pkcs1pad(%s)", - pkey->pkey_algo); - else - n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME, - "pkcs1pad(%s,%s)", - pkey->pkey_algo, hash_algo); - return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0; - } - - if (strcmp(encoding, "raw") == 0 || - strcmp(encoding, "x962") == 0) { - strcpy(alg_name, pkey->pkey_algo); - return 0; + return -EINVAL; + if (strcmp(hash_algo, "sha1") != 0 && + strcmp(hash_algo, "sha224") != 0 && + strcmp(hash_algo, "sha256") != 0 && + strcmp(hash_algo, "sha384") != 0 && + strcmp(hash_algo, "sha512") != 0) + return -EINVAL; + } else if (strcmp(pkey->pkey_algo, "sm2") == 0) { + if (strcmp(encoding, "raw") != 0) + return -EINVAL; + if (!hash_algo) + return -EINVAL; + if (strcmp(hash_algo, "sm3") != 0) + return -EINVAL; + } else if (strcmp(pkey->pkey_algo, "ecrdsa") == 0) { + if (strcmp(encoding, "raw") != 0) + return -EINVAL; + if (!hash_algo) + return -EINVAL; + if (strcmp(hash_algo, "streebog256") != 0 && + strcmp(hash_algo, "streebog512") != 0) + return -EINVAL; + } else { + /* Unknown public key algorithm */ + return -ENOPKG; } - - return -ENOPKG; + if (strscpy(alg_name, pkey->pkey_algo, CRYPTO_MAX_ALG_NAME) < 0) + return -EINVAL; + return 0; } static u8 *pkey_pack_u32(u8 *dst, u32 val) @@ -113,9 +157,8 @@ static int software_key_query(const struct kernel_pkey_params *params, u8 *key, *ptr; int ret, len; - ret = software_key_determine_akcipher(params->encoding, - params->hash_algo, - pkey, alg_name); + ret = software_key_determine_akcipher(pkey, params->encoding, + params->hash_algo, alg_name); if (ret < 0) return ret; @@ -179,9 +222,8 @@ static int software_key_eds_op(struct kernel_pkey_params *params, pr_devel("==>%s()\n", __func__); - ret = software_key_determine_akcipher(params->encoding, - params->hash_algo, - pkey, alg_name); + ret = software_key_determine_akcipher(pkey, params->encoding, + params->hash_algo, alg_name); if (ret < 0) return ret; @@ -325,9 +367,23 @@ int public_key_verify_signature(const struct public_key *pkey, BUG_ON(!sig); BUG_ON(!sig->s); - ret = software_key_determine_akcipher(sig->encoding, - sig->hash_algo, - pkey, alg_name); + /* + * If the signature specifies a public key algorithm, it *must* match + * the key's actual public key algorithm. + * + * Small exception: ECDSA signatures don't specify the curve, but ECDSA + * keys do. So the strings can mismatch slightly in that case: + * "ecdsa-nist-*" for the key, but "ecdsa" for the signature. + */ + if (sig->pkey_algo) { + if (strcmp(pkey->pkey_algo, sig->pkey_algo) != 0 && + (strncmp(pkey->pkey_algo, "ecdsa-", 6) != 0 || + strcmp(sig->pkey_algo, "ecdsa") != 0)) + return -EKEYREJECTED; + } + + ret = software_key_determine_akcipher(pkey, sig->encoding, + sig->hash_algo, alg_name); if (ret < 0) return ret; diff --git a/crypto/asymmetric_keys/tpm.asn1 b/crypto/asymmetric_keys/tpm.asn1 deleted file mode 100644 index d7f194232f30..000000000000 --- a/crypto/asymmetric_keys/tpm.asn1 +++ /dev/null @@ -1,5 +0,0 @@ --- --- Unencryted TPM Blob. For details of the format, see: --- http://david.woodhou.se/draft-woodhouse-cert-best-practice.html#I-D.mavrogiannopoulos-tpmuri --- -PrivateKeyInfo ::= OCTET STRING ({ tpm_note_key }) diff --git a/crypto/asymmetric_keys/tpm_parser.c b/crypto/asymmetric_keys/tpm_parser.c deleted file mode 100644 index 96405d8dcd98..000000000000 --- a/crypto/asymmetric_keys/tpm_parser.c +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define pr_fmt(fmt) "TPM-PARSER: "fmt -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/export.h> -#include <linux/slab.h> -#include <linux/err.h> -#include <keys/asymmetric-subtype.h> -#include <keys/asymmetric-parser.h> -#include <crypto/asym_tpm_subtype.h> -#include "tpm.asn1.h" - -struct tpm_parse_context { - const void *blob; - u32 blob_len; -}; - -/* - * Note the key data of the ASN.1 blob. - */ -int tpm_note_key(void *context, size_t hdrlen, - unsigned char tag, - const void *value, size_t vlen) -{ - struct tpm_parse_context *ctx = context; - - ctx->blob = value; - ctx->blob_len = vlen; - - return 0; -} - -/* - * Parse a TPM-encrypted private key blob. - */ -static struct tpm_key *tpm_parse(const void *data, size_t datalen) -{ - struct tpm_parse_context ctx; - long ret; - - memset(&ctx, 0, sizeof(ctx)); - - /* Attempt to decode the private key */ - ret = asn1_ber_decoder(&tpm_decoder, &ctx, data, datalen); - if (ret < 0) - goto error; - - return tpm_key_create(ctx.blob, ctx.blob_len); - -error: - return ERR_PTR(ret); -} -/* - * Attempt to parse a data blob for a key as a TPM private key blob. - */ -static int tpm_key_preparse(struct key_preparsed_payload *prep) -{ - struct tpm_key *tk; - - /* - * TPM 1.2 keys are max 2048 bits long, so assume the blob is no - * more than 4x that - */ - if (prep->datalen > 256 * 4) - return -EMSGSIZE; - - tk = tpm_parse(prep->data, prep->datalen); - - if (IS_ERR(tk)) - return PTR_ERR(tk); - - /* We're pinning the module by being linked against it */ - __module_get(asym_tpm_subtype.owner); - prep->payload.data[asym_subtype] = &asym_tpm_subtype; - prep->payload.data[asym_key_ids] = NULL; - prep->payload.data[asym_crypto] = tk; - prep->payload.data[asym_auth] = NULL; - prep->quotalen = 100; - return 0; -} - -static struct asymmetric_key_parser tpm_key_parser = { - .owner = THIS_MODULE, - .name = "tpm_parser", - .parse = tpm_key_preparse, -}; - -static int __init tpm_key_init(void) -{ - return register_asymmetric_key_parser(&tpm_key_parser); -} - -static void __exit tpm_key_exit(void) -{ - unregister_asymmetric_key_parser(&tpm_key_parser); -} - -module_init(tpm_key_init); -module_exit(tpm_key_exit); - -MODULE_DESCRIPTION("TPM private key-blob parser"); -MODULE_LICENSE("GPL v2"); diff --git a/crypto/asymmetric_keys/x509.asn1 b/crypto/asymmetric_keys/x509.asn1 index 5c9f4e4a5231..92d59c32f96a 100644 --- a/crypto/asymmetric_keys/x509.asn1 +++ b/crypto/asymmetric_keys/x509.asn1 @@ -7,7 +7,7 @@ Certificate ::= SEQUENCE { TBSCertificate ::= SEQUENCE { version [ 0 ] Version DEFAULT, serialNumber CertificateSerialNumber ({ x509_note_serial }), - signature AlgorithmIdentifier ({ x509_note_pkey_algo }), + signature AlgorithmIdentifier ({ x509_note_sig_algo }), issuer Name ({ x509_note_issuer }), validity Validity, subject Name ({ x509_note_subject }), diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index 083405eb80c3..2899ed80bb18 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -19,15 +19,13 @@ struct x509_parse_context { struct x509_certificate *cert; /* Certificate being constructed */ unsigned long data; /* Start of data */ - const void *cert_start; /* Start of cert content */ const void *key; /* Key data */ size_t key_size; /* Size of key data */ const void *params; /* Key parameters */ size_t params_size; /* Size of key parameters */ - enum OID key_algo; /* Public key algorithm */ + enum OID key_algo; /* Algorithm used by the cert's key */ enum OID last_oid; /* Last OID encountered */ - enum OID algo_oid; /* Algorithm OID */ - unsigned char nr_mpi; /* Number of MPIs stored */ + enum OID sig_algo; /* Algorithm used to sign the cert */ u8 o_size; /* Size of organizationName (O) */ u8 cn_size; /* Size of commonName (CN) */ u8 email_size; /* Size of emailAddress */ @@ -187,11 +185,10 @@ int x509_note_tbs_certificate(void *context, size_t hdrlen, } /* - * Record the public key algorithm + * Record the algorithm that was used to sign this certificate. */ -int x509_note_pkey_algo(void *context, size_t hdrlen, - unsigned char tag, - const void *value, size_t vlen) +int x509_note_sig_algo(void *context, size_t hdrlen, unsigned char tag, + const void *value, size_t vlen) { struct x509_parse_context *ctx = context; @@ -263,22 +260,22 @@ int x509_note_pkey_algo(void *context, size_t hdrlen, rsa_pkcs1: ctx->cert->sig->pkey_algo = "rsa"; ctx->cert->sig->encoding = "pkcs1"; - ctx->algo_oid = ctx->last_oid; + ctx->sig_algo = ctx->last_oid; return 0; ecrdsa: ctx->cert->sig->pkey_algo = "ecrdsa"; ctx->cert->sig->encoding = "raw"; - ctx->algo_oid = ctx->last_oid; + ctx->sig_algo = ctx->last_oid; return 0; sm2: ctx->cert->sig->pkey_algo = "sm2"; ctx->cert->sig->encoding = "raw"; - ctx->algo_oid = ctx->last_oid; + ctx->sig_algo = ctx->last_oid; return 0; ecdsa: ctx->cert->sig->pkey_algo = "ecdsa"; ctx->cert->sig->encoding = "x962"; - ctx->algo_oid = ctx->last_oid; + ctx->sig_algo = ctx->last_oid; return 0; } @@ -291,11 +288,16 @@ int x509_note_signature(void *context, size_t hdrlen, { struct x509_parse_context *ctx = context; - pr_debug("Signature type: %u size %zu\n", ctx->last_oid, vlen); + pr_debug("Signature: alg=%u, size=%zu\n", ctx->last_oid, vlen); - if (ctx->last_oid != ctx->algo_oid) { - pr_warn("Got cert with pkey (%u) and sig (%u) algorithm OIDs\n", - ctx->algo_oid, ctx->last_oid); + /* + * In X.509 certificates, the signature's algorithm is stored in two + * places: inside the TBSCertificate (the data that is signed), and + * alongside the signature. These *must* match. + */ + if (ctx->last_oid != ctx->sig_algo) { + pr_warn("signatureAlgorithm (%u) differs from tbsCertificate.signature (%u)\n", + ctx->last_oid, ctx->sig_algo); return -EINVAL; } diff --git a/crypto/asymmetric_keys/x509_parser.h b/crypto/asymmetric_keys/x509_parser.h index c233f136fb35..da854c94f111 100644 --- a/crypto/asymmetric_keys/x509_parser.h +++ b/crypto/asymmetric_keys/x509_parser.h @@ -36,7 +36,6 @@ struct x509_certificate { bool seen; /* Infinite recursion prevention */ bool verified; bool self_signed; /* T if self-signed (check unsupported_sig too) */ - bool unsupported_key; /* T if key uses unsupported crypto */ bool unsupported_sig; /* T if signature uses unsupported crypto */ bool blacklisted; }; diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index fe14cae115b5..91a4ad50dea2 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -33,18 +33,6 @@ int x509_get_sig_params(struct x509_certificate *cert) sig->data = cert->tbs; sig->data_size = cert->tbs_size; - if (!cert->pub->pkey_algo) - cert->unsupported_key = true; - - if (!sig->pkey_algo) - cert->unsupported_sig = true; - - /* We check the hash if we can - even if we can't then verify it */ - if (!sig->hash_algo) { - cert->unsupported_sig = true; - return 0; - } - sig->s = kmemdup(cert->raw_sig, cert->raw_sig_size, GFP_KERNEL); if (!sig->s) return -ENOMEM; @@ -128,12 +116,6 @@ int x509_check_for_self_signed(struct x509_certificate *cert) goto out; } - ret = -EKEYREJECTED; - if (strcmp(cert->pub->pkey_algo, cert->sig->pkey_algo) != 0 && - (strncmp(cert->pub->pkey_algo, "ecdsa-", 6) != 0 || - strcmp(cert->sig->pkey_algo, "ecdsa") != 0)) - goto out; - ret = public_key_verify_signature(cert->pub, cert->sig); if (ret < 0) { if (ret == -ENOPKG) { @@ -173,12 +155,6 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) pr_devel("Cert Issuer: %s\n", cert->issuer); pr_devel("Cert Subject: %s\n", cert->subject); - - if (cert->unsupported_key) { - ret = -ENOPKG; - goto error_free_cert; - } - pr_devel("Cert Key Algo: %s\n", cert->pub->pkey_algo); pr_devel("Cert Valid period: %lld-%lld\n", cert->valid_from, cert->valid_to); diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index e1a5eca3ae3c..d3bd14aaabf6 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -370,6 +370,7 @@ int amba_driver_register(struct amba_driver *drv) return driver_register(&drv->drv); } +EXPORT_SYMBOL(amba_driver_register); /** * amba_driver_unregister - remove an AMBA device driver @@ -383,7 +384,7 @@ void amba_driver_unregister(struct amba_driver *drv) { driver_unregister(&drv->drv); } - +EXPORT_SYMBOL(amba_driver_unregister); static void amba_device_release(struct device *dev) { @@ -642,6 +643,7 @@ int amba_device_register(struct amba_device *dev, struct resource *parent) return amba_device_add(dev, parent); } +EXPORT_SYMBOL(amba_device_register); /** * amba_device_put - put an AMBA device @@ -668,66 +670,7 @@ void amba_device_unregister(struct amba_device *dev) { device_unregister(&dev->dev); } - - -struct find_data { - struct amba_device *dev; - struct device *parent; - const char *busid; - unsigned int id; - unsigned int mask; -}; - -static int amba_find_match(struct device *dev, void *data) -{ - struct find_data *d = data; - struct amba_device *pcdev = to_amba_device(dev); - int r; - - r = (pcdev->periphid & d->mask) == d->id; - if (d->parent) - r &= d->parent == dev->parent; - if (d->busid) - r &= strcmp(dev_name(dev), d->busid) == 0; - - if (r) { - get_device(dev); - d->dev = pcdev; - } - - return r; -} - -/** - * amba_find_device - locate an AMBA device given a bus id - * @busid: bus id for device (or NULL) - * @parent: parent device (or NULL) - * @id: peripheral ID (or 0) - * @mask: peripheral ID mask (or 0) - * - * Return the AMBA device corresponding to the supplied parameters. - * If no device matches, returns NULL. - * - * NOTE: When a valid device is found, its refcount is - * incremented, and must be decremented before the returned - * reference. - */ -struct amba_device * -amba_find_device(const char *busid, struct device *parent, unsigned int id, - unsigned int mask) -{ - struct find_data data; - - data.dev = NULL; - data.parent = parent; - data.busid = busid; - data.id = id; - data.mask = mask; - - bus_for_each_dev(&amba_bustype, NULL, &data, amba_find_match); - - return data.dev; -} +EXPORT_SYMBOL(amba_device_unregister); /** * amba_request_regions - request all mem regions associated with device @@ -749,6 +692,7 @@ int amba_request_regions(struct amba_device *dev, const char *name) return ret; } +EXPORT_SYMBOL(amba_request_regions); /** * amba_release_regions - release mem regions associated with device @@ -763,11 +707,4 @@ void amba_release_regions(struct amba_device *dev) size = resource_size(&dev->res); release_mem_region(dev->res.start, size); } - -EXPORT_SYMBOL(amba_driver_register); -EXPORT_SYMBOL(amba_driver_unregister); -EXPORT_SYMBOL(amba_device_register); -EXPORT_SYMBOL(amba_device_unregister); -EXPORT_SYMBOL(amba_find_device); -EXPORT_SYMBOL(amba_request_regions); EXPORT_SYMBOL(amba_release_regions); diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 422753d52244..a31ffe16e626 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -1112,6 +1112,8 @@ DPRINTK("iovcnt = %d\n",skb_shinfo(skb)->nr_frags); skb_data3 = skb->data[3]; paddr = dma_map_single(&eni_dev->pci_dev->dev,skb->data,skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(&eni_dev->pci_dev->dev, paddr)) + return enq_next; ENI_PRV_PADDR(skb) = paddr; /* prepare DMA queue entries */ j = 0; diff --git a/drivers/base/topology.c b/drivers/base/topology.c index fc24e89f9592..e9d1efcda89b 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -14,11 +14,11 @@ #include <linux/hardirq.h> #include <linux/topology.h> -#define define_id_show_func(name) \ +#define define_id_show_func(name, fmt) \ static ssize_t name##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ - return sysfs_emit(buf, "%d\n", topology_##name(dev->id)); \ + return sysfs_emit(buf, fmt "\n", topology_##name(dev->id)); \ } #define define_siblings_read_func(name, mask) \ @@ -42,22 +42,25 @@ static ssize_t name##_list_read(struct file *file, struct kobject *kobj, \ off, count); \ } -define_id_show_func(physical_package_id); +define_id_show_func(physical_package_id, "%d"); static DEVICE_ATTR_RO(physical_package_id); #ifdef TOPOLOGY_DIE_SYSFS -define_id_show_func(die_id); +define_id_show_func(die_id, "%d"); static DEVICE_ATTR_RO(die_id); #endif #ifdef TOPOLOGY_CLUSTER_SYSFS -define_id_show_func(cluster_id); +define_id_show_func(cluster_id, "%d"); static DEVICE_ATTR_RO(cluster_id); #endif -define_id_show_func(core_id); +define_id_show_func(core_id, "%d"); static DEVICE_ATTR_RO(core_id); +define_id_show_func(ppin, "0x%llx"); +static DEVICE_ATTR_ADMIN_RO(ppin); + define_siblings_read_func(thread_siblings, sibling_cpumask); static BIN_ATTR_RO(thread_siblings, 0); static BIN_ATTR_RO(thread_siblings_list, 0); @@ -87,7 +90,7 @@ static BIN_ATTR_RO(package_cpus, 0); static BIN_ATTR_RO(package_cpus_list, 0); #ifdef TOPOLOGY_BOOK_SYSFS -define_id_show_func(book_id); +define_id_show_func(book_id, "%d"); static DEVICE_ATTR_RO(book_id); define_siblings_read_func(book_siblings, book_cpumask); static BIN_ATTR_RO(book_siblings, 0); @@ -95,7 +98,7 @@ static BIN_ATTR_RO(book_siblings_list, 0); #endif #ifdef TOPOLOGY_DRAWER_SYSFS -define_id_show_func(drawer_id); +define_id_show_func(drawer_id, "%d"); static DEVICE_ATTR_RO(drawer_id); define_siblings_read_func(drawer_siblings, drawer_cpumask); static BIN_ATTR_RO(drawer_siblings, 0); @@ -145,6 +148,7 @@ static struct attribute *default_attrs[] = { #ifdef TOPOLOGY_DRAWER_SYSFS &dev_attr_drawer_id.attr, #endif + &dev_attr_ppin.attr, NULL }; diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index b009e7479b70..783d65fc71f0 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -274,14 +274,6 @@ static void tpm_dev_release(struct device *dev) kfree(chip); } -static void tpm_devs_release(struct device *dev) -{ - struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs); - - /* release the master device reference */ - put_device(&chip->dev); -} - /** * tpm_class_shutdown() - prepare the TPM device for loss of power. * @dev: device to which the chip is associated. @@ -344,7 +336,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev, chip->dev_num = rc; device_initialize(&chip->dev); - device_initialize(&chip->devs); chip->dev.class = tpm_class; chip->dev.class->shutdown_pre = tpm_class_shutdown; @@ -352,39 +343,20 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev, chip->dev.parent = pdev; chip->dev.groups = chip->groups; - chip->devs.parent = pdev; - chip->devs.class = tpmrm_class; - chip->devs.release = tpm_devs_release; - /* get extra reference on main device to hold on - * behalf of devs. This holds the chip structure - * while cdevs is in use. The corresponding put - * is in the tpm_devs_release (TPM2 only) - */ - if (chip->flags & TPM_CHIP_FLAG_TPM2) - get_device(&chip->dev); - if (chip->dev_num == 0) chip->dev.devt = MKDEV(MISC_MAJOR, TPM_MINOR); else chip->dev.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num); - chip->devs.devt = - MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES); - rc = dev_set_name(&chip->dev, "tpm%d", chip->dev_num); if (rc) goto out; - rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num); - if (rc) - goto out; if (!pdev) chip->flags |= TPM_CHIP_FLAG_VIRTUAL; cdev_init(&chip->cdev, &tpm_fops); - cdev_init(&chip->cdevs, &tpmrm_fops); chip->cdev.owner = THIS_MODULE; - chip->cdevs.owner = THIS_MODULE; rc = tpm2_init_space(&chip->work_space, TPM2_SPACE_BUFFER_SIZE); if (rc) { @@ -396,7 +368,6 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev, return chip; out: - put_device(&chip->devs); put_device(&chip->dev); return ERR_PTR(rc); } @@ -445,14 +416,9 @@ static int tpm_add_char_device(struct tpm_chip *chip) } if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip)) { - rc = cdev_device_add(&chip->cdevs, &chip->devs); - if (rc) { - dev_err(&chip->devs, - "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n", - dev_name(&chip->devs), MAJOR(chip->devs.devt), - MINOR(chip->devs.devt), rc); - return rc; - } + rc = tpm_devs_add(chip); + if (rc) + goto err_del_cdev; } /* Make the chip available. */ @@ -460,6 +426,10 @@ static int tpm_add_char_device(struct tpm_chip *chip) idr_replace(&dev_nums_idr, chip, chip->dev_num); mutex_unlock(&idr_lock); + return 0; + +err_del_cdev: + cdev_device_del(&chip->cdev, &chip->dev); return rc; } @@ -654,7 +624,7 @@ void tpm_chip_unregister(struct tpm_chip *chip) hwrng_unregister(&chip->hwrng); tpm_bios_log_teardown(chip); if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip)) - cdev_device_del(&chip->cdevs, &chip->devs); + tpm_devs_remove(chip); tpm_del_char_device(chip); } EXPORT_SYMBOL_GPL(tpm_chip_unregister); diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c index c08cbb306636..dc4c0a0a5129 100644 --- a/drivers/char/tpm/tpm-dev-common.c +++ b/drivers/char/tpm/tpm-dev-common.c @@ -69,7 +69,13 @@ static void tpm_dev_async_work(struct work_struct *work) ret = tpm_dev_transmit(priv->chip, priv->space, priv->data_buffer, sizeof(priv->data_buffer)); tpm_put_ops(priv->chip); - if (ret > 0) { + + /* + * If ret is > 0 then tpm_dev_transmit returned the size of the + * response. If ret is < 0 then tpm_dev_transmit failed and + * returned an error code. + */ + if (ret != 0) { priv->response_length = ret; mod_timer(&priv->user_read_timer, jiffies + (120 * HZ)); } diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 283f78211c3a..2163c6ee0d36 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -234,6 +234,8 @@ int tpm2_prepare_space(struct tpm_chip *chip, struct tpm_space *space, u8 *cmd, size_t cmdsiz); int tpm2_commit_space(struct tpm_chip *chip, struct tpm_space *space, void *buf, size_t *bufsiz); +int tpm_devs_add(struct tpm_chip *chip); +void tpm_devs_remove(struct tpm_chip *chip); void tpm_bios_log_setup(struct tpm_chip *chip); void tpm_bios_log_teardown(struct tpm_chip *chip); diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c index 97e916856cf3..ffb35f0154c1 100644 --- a/drivers/char/tpm/tpm2-space.c +++ b/drivers/char/tpm/tpm2-space.c @@ -58,12 +58,12 @@ int tpm2_init_space(struct tpm_space *space, unsigned int buf_size) void tpm2_del_space(struct tpm_chip *chip, struct tpm_space *space) { - mutex_lock(&chip->tpm_mutex); - if (!tpm_chip_start(chip)) { + + if (tpm_try_get_ops(chip) == 0) { tpm2_flush_sessions(chip, space); - tpm_chip_stop(chip); + tpm_put_ops(chip); } - mutex_unlock(&chip->tpm_mutex); + kfree(space->context_buf); kfree(space->session_buf); } @@ -574,3 +574,68 @@ out: dev_err(&chip->dev, "%s: error %d\n", __func__, rc); return rc; } + +/* + * Put the reference to the main device. + */ +static void tpm_devs_release(struct device *dev) +{ + struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs); + + /* release the master device reference */ + put_device(&chip->dev); +} + +/* + * Remove the device file for exposed TPM spaces and release the device + * reference. This may also release the reference to the master device. + */ +void tpm_devs_remove(struct tpm_chip *chip) +{ + cdev_device_del(&chip->cdevs, &chip->devs); + put_device(&chip->devs); +} + +/* + * Add a device file to expose TPM spaces. Also take a reference to the + * main device. + */ +int tpm_devs_add(struct tpm_chip *chip) +{ + int rc; + + device_initialize(&chip->devs); + chip->devs.parent = chip->dev.parent; + chip->devs.class = tpmrm_class; + + /* + * Get extra reference on main device to hold on behalf of devs. + * This holds the chip structure while cdevs is in use. The + * corresponding put is in the tpm_devs_release. + */ + get_device(&chip->dev); + chip->devs.release = tpm_devs_release; + chip->devs.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num + TPM_NUM_DEVICES); + cdev_init(&chip->cdevs, &tpmrm_fops); + chip->cdevs.owner = THIS_MODULE; + + rc = dev_set_name(&chip->devs, "tpmrm%d", chip->dev_num); + if (rc) + goto err_put_devs; + + rc = cdev_device_add(&chip->cdevs, &chip->devs); + if (rc) { + dev_err(&chip->devs, + "unable to cdev_device_add() %s, major %d, minor %d, err=%d\n", + dev_name(&chip->devs), MAJOR(chip->devs.devt), + MINOR(chip->devs.devt), rc); + goto err_put_devs; + } + + return 0; + +err_put_devs: + put_device(&chip->devs); + + return rc; +} diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index da5b30771418..f53e0cf1ec7e 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -126,16 +126,16 @@ static void vtpm_cancel(struct tpm_chip *chip) notify_remote_via_evtchn(priv->evtchn); } -static unsigned int shr_data_offset(struct vtpm_shared_page *shr) +static size_t shr_data_offset(struct vtpm_shared_page *shr) { - return sizeof(*shr) + sizeof(u32) * shr->nr_extra_pages; + return struct_size(shr, extra_pages, shr->nr_extra_pages); } static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) { struct tpm_private *priv = dev_get_drvdata(&chip->dev); struct vtpm_shared_page *shr = priv->shr; - unsigned int offset = shr_data_offset(shr); + size_t offset = shr_data_offset(shr); u32 ordinal; unsigned long duration; @@ -177,7 +177,7 @@ static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count) { struct tpm_private *priv = dev_get_drvdata(&chip->dev); struct vtpm_shared_page *shr = priv->shr; - unsigned int offset = shr_data_offset(shr); + size_t offset = shr_data_offset(shr); size_t length = shr->length; if (shr->state == VTPM_STATE_IDLE) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index cfb8ea0df3b1..1ea556e75494 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -713,7 +713,6 @@ config INGENIC_OST config MICROCHIP_PIT64B bool "Microchip PIT64B support" depends on OF || COMPILE_TEST - select CLKSRC_MMIO select TIMER_OF help This option enables Microchip PIT64B timer for Atmel diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 1ecd52f903b8..9ab8221ee3c6 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -880,10 +880,19 @@ static void __arch_timer_setup(unsigned type, clockevents_config_and_register(clk, arch_timer_rate, 0xf, max_delta); } -static void arch_timer_evtstrm_enable(int divider) +static void arch_timer_evtstrm_enable(unsigned int divider) { u32 cntkctl = arch_timer_get_cntkctl(); +#ifdef CONFIG_ARM64 + /* ECV is likely to require a large divider. Use the EVNTIS flag. */ + if (cpus_have_const_cap(ARM64_HAS_ECV) && divider > 15) { + cntkctl |= ARCH_TIMER_EVT_INTERVAL_SCALE; + divider -= 8; + } +#endif + + divider = min(divider, 15U); cntkctl &= ~ARCH_TIMER_EVT_TRIGGER_MASK; /* Set the divider and enable virtual event stream */ cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT) @@ -912,7 +921,7 @@ static void arch_timer_configure_evtstream(void) lsb++; /* enable event stream */ - arch_timer_evtstrm_enable(max(0, min(lsb, 15))); + arch_timer_evtstrm_enable(max(0, lsb)); } static void arch_counter_set_user_access(void) diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 6db3d5511b0f..f29c812b70c9 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -60,27 +60,18 @@ #define MCT_CLKEVENTS_RATING 350 #endif +/* There are four Global timers starting with 0 offset */ +#define MCT_G0_IRQ 0 +/* Local timers count starts after global timer count */ +#define MCT_L0_IRQ 4 +/* Max number of IRQ as per DT binding document */ +#define MCT_NR_IRQS 20 + enum { MCT_INT_SPI, MCT_INT_PPI }; -enum { - MCT_G0_IRQ, - MCT_G1_IRQ, - MCT_G2_IRQ, - MCT_G3_IRQ, - MCT_L0_IRQ, - MCT_L1_IRQ, - MCT_L2_IRQ, - MCT_L3_IRQ, - MCT_L4_IRQ, - MCT_L5_IRQ, - MCT_L6_IRQ, - MCT_L7_IRQ, - MCT_NR_IRQS, -}; - static void __iomem *reg_base; static unsigned long clk_rate; static unsigned int mct_int_type; @@ -89,7 +80,11 @@ static int mct_irqs[MCT_NR_IRQS]; struct mct_clock_event_device { struct clock_event_device evt; unsigned long base; - char name[10]; + /** + * The length of the name must be adjusted if number of + * local timer interrupts grow over two digits + */ + char name[11]; }; static void exynos4_mct_write(unsigned int value, unsigned long offset) @@ -541,6 +536,11 @@ static int __init exynos4_timer_interrupts(struct device_node *np, * irqs are specified. */ nr_irqs = of_irq_count(np); + if (nr_irqs > ARRAY_SIZE(mct_irqs)) { + pr_err("exynos-mct: too many (%d) interrupts configured in DT\n", + nr_irqs); + nr_irqs = ARRAY_SIZE(mct_irqs); + } for (i = MCT_L0_IRQ; i < nr_irqs; i++) mct_irqs[i] = irq_of_parse_and_map(np, i); @@ -553,11 +553,14 @@ static int __init exynos4_timer_interrupts(struct device_node *np, mct_irqs[MCT_L0_IRQ], err); } else { for_each_possible_cpu(cpu) { - int mct_irq = mct_irqs[MCT_L0_IRQ + cpu]; + int mct_irq; struct mct_clock_event_device *pcpu_mevt = per_cpu_ptr(&percpu_mct_tick, cpu); pcpu_mevt->evt.irq = -1; + if (MCT_L0_IRQ + cpu >= ARRAY_SIZE(mct_irqs)) + break; + mct_irq = mct_irqs[MCT_L0_IRQ + cpu]; irq_set_status_flags(mct_irq, IRQ_NOAUTOEN); if (request_irq(mct_irq, diff --git a/drivers/clocksource/timer-imx-sysctr.c b/drivers/clocksource/timer-imx-sysctr.c index 55a8e198d2a1..523e37662a6e 100644 --- a/drivers/clocksource/timer-imx-sysctr.c +++ b/drivers/clocksource/timer-imx-sysctr.c @@ -110,7 +110,7 @@ static struct timer_of to_sysctr = { }, .of_irq = { .handler = sysctr_timer_interrupt, - .flags = IRQF_TIMER | IRQF_IRQPOLL, + .flags = IRQF_TIMER, }, .of_clk = { .name = "per", diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 2cdc077a39f5..bd64a8a8427f 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -32,8 +32,8 @@ #define TPM_C0SC_CHF_MASK (0x1 << 7) #define TPM_C0V 0x24 -static int counter_width; -static void __iomem *timer_base; +static int counter_width __ro_after_init; +static void __iomem *timer_base __ro_after_init; static inline void tpm_timer_disable(void) { @@ -73,12 +73,12 @@ static unsigned long tpm_read_current_timer(void) { return tpm_read_counter(); } -#endif static u64 notrace tpm_read_sched_clock(void) { return tpm_read_counter(); } +#endif static int tpm_set_next_event(unsigned long delta, struct clock_event_device *evt) @@ -127,9 +127,9 @@ static irqreturn_t tpm_timer_interrupt(int irq, void *dev_id) static struct timer_of to_tpm = { .flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK, .clkevt = { - .name = "i.MX7ULP TPM Timer", + .name = "i.MX TPM Timer", .rating = 200, - .features = CLOCK_EVT_FEAT_ONESHOT, + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_DYNIRQ, .set_state_shutdown = tpm_set_state_shutdown, .set_state_oneshot = tpm_set_state_oneshot, .set_next_event = tpm_set_next_event, @@ -137,7 +137,7 @@ static struct timer_of to_tpm = { }, .of_irq = { .handler = tpm_timer_interrupt, - .flags = IRQF_TIMER | IRQF_IRQPOLL, + .flags = IRQF_TIMER, }, .of_clk = { .name = "per", @@ -150,10 +150,10 @@ static int __init tpm_clocksource_init(void) tpm_delay_timer.read_current_timer = &tpm_read_current_timer; tpm_delay_timer.freq = timer_of_rate(&to_tpm) >> 3; register_current_timer_delay(&tpm_delay_timer); -#endif sched_clock_register(tpm_read_sched_clock, counter_width, timer_of_rate(&to_tpm) >> 3); +#endif return clocksource_mmio_init(timer_base + TPM_CNT, "imx-tpm", diff --git a/drivers/clocksource/timer-microchip-pit64b.c b/drivers/clocksource/timer-microchip-pit64b.c index cfa4ec7ef396..abce83d2f00b 100644 --- a/drivers/clocksource/timer-microchip-pit64b.c +++ b/drivers/clocksource/timer-microchip-pit64b.c @@ -42,8 +42,7 @@ #define MCHP_PIT64B_LSBMASK GENMASK_ULL(31, 0) #define MCHP_PIT64B_PRES_TO_MODE(p) (MCHP_PIT64B_MR_PRES & ((p) << 8)) #define MCHP_PIT64B_MODE_TO_PRES(m) ((MCHP_PIT64B_MR_PRES & (m)) >> 8) -#define MCHP_PIT64B_DEF_CS_FREQ 5000000UL /* 5 MHz */ -#define MCHP_PIT64B_DEF_CE_FREQ 32768 /* 32 KHz */ +#define MCHP_PIT64B_DEF_FREQ 5000000UL /* 5 MHz */ #define MCHP_PIT64B_NAME "pit64b" @@ -165,7 +164,7 @@ static u64 mchp_pit64b_clksrc_read(struct clocksource *cs) return mchp_pit64b_cnt_read(mchp_pit64b_cs_base); } -static u64 mchp_pit64b_sched_read_clk(void) +static u64 notrace mchp_pit64b_sched_read_clk(void) { return mchp_pit64b_cnt_read(mchp_pit64b_cs_base); } @@ -418,7 +417,6 @@ static int __init mchp_pit64b_init_clkevt(struct mchp_pit64b_timer *timer, static int __init mchp_pit64b_dt_init_timer(struct device_node *node, bool clkevt) { - u32 freq = clkevt ? MCHP_PIT64B_DEF_CE_FREQ : MCHP_PIT64B_DEF_CS_FREQ; struct mchp_pit64b_timer timer; unsigned long clk_rate; u32 irq = 0; @@ -446,7 +444,7 @@ static int __init mchp_pit64b_dt_init_timer(struct device_node *node, } /* Initialize mode (prescaler + SGCK bit). To be used at runtime. */ - ret = mchp_pit64b_init_mode(&timer, freq); + ret = mchp_pit64b_init_mode(&timer, MCHP_PIT64B_DEF_FREQ); if (ret) goto irq_unmap; diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index 529cc6a51cdb..c3f54d9912be 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -157,9 +157,9 @@ static __init int timer_of_base_init(struct device_node *np, of_base->base = of_base->name ? of_io_request_and_map(np, of_base->index, of_base->name) : of_iomap(np, of_base->index); - if (IS_ERR(of_base->base)) { - pr_err("Failed to iomap (%s)\n", of_base->name); - return PTR_ERR(of_base->base); + if (IS_ERR_OR_NULL(of_base->base)) { + pr_err("Failed to iomap (%s:%s)\n", np->name, of_base->name); + return of_base->base ? PTR_ERR(of_base->base) : -ENOMEM; } return 0; diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c index 1fccb457fcc5..2737407ff069 100644 --- a/drivers/clocksource/timer-ti-dm-systimer.c +++ b/drivers/clocksource/timer-ti-dm-systimer.c @@ -694,9 +694,9 @@ static int __init dmtimer_percpu_quirk_init(struct device_node *np, u32 pa) return 0; } - if (pa == 0x48034000) /* dra7 dmtimer3 */ + if (pa == 0x4882c000) /* dra7 dmtimer15 */ return dmtimer_percpu_timer_init(np, 0); - else if (pa == 0x48036000) /* dra7 dmtimer4 */ + else if (pa == 0x4882e000) /* dra7 dmtimer16 */ return dmtimer_percpu_timer_init(np, 1); return 0; diff --git a/drivers/counter/counter-sysfs.c b/drivers/counter/counter-sysfs.c index 7cc4d1d523ea..04eac41dad33 100644 --- a/drivers/counter/counter-sysfs.c +++ b/drivers/counter/counter-sysfs.c @@ -19,6 +19,11 @@ #include "counter-sysfs.h" +static inline struct counter_device *counter_from_dev(struct device *dev) +{ + return container_of(dev, struct counter_device, dev); +} + /** * struct counter_attribute - Counter sysfs attribute * @dev_attr: device attribute for sysfs @@ -90,7 +95,7 @@ static ssize_t counter_comp_u8_show(struct device *dev, struct device_attribute *attr, char *buf) { const struct counter_attribute *const a = to_counter_attribute(attr); - struct counter_device *const counter = dev_get_drvdata(dev); + struct counter_device *const counter = counter_from_dev(dev); int err; u8 data = 0; @@ -122,7 +127,7 @@ static ssize_t counter_comp_u8_store(struct device *dev, const char *buf, size_t len) { const struct counter_attribute *const a = to_counter_attribute(attr); - struct counter_device *const counter = dev_get_drvdata(dev); + struct counter_device *const counter = counter_from_dev(dev); int err; bool bool_data = 0; u8 data = 0; @@ -158,7 +163,7 @@ static ssize_t counter_comp_u32_show(struct device *dev, struct device_attribute *attr, char *buf) { const struct counter_attribute *const a = to_counter_attribute(attr); - struct counter_device *const counter = dev_get_drvdata(dev); + struct counter_device *const counter = counter_from_dev(dev); const struct counter_available *const avail = a->comp.priv; int err; u32 data = 0; @@ -221,7 +226,7 @@ static ssize_t counter_comp_u32_store(struct device *dev, const char *buf, size_t len) { const struct counter_attribute *const a = to_counter_attribute(attr); - struct counter_device *const counter = dev_get_drvdata(dev); + struct counter_device *const counter = counter_from_dev(dev); struct counter_count *const count = a->parent; struct counter_synapse *const synapse = a->comp.priv; const struct counter_available *const avail = a->comp.priv; @@ -281,7 +286,7 @@ static ssize_t counter_comp_u64_show(struct device *dev, struct device_attribute *attr, char *buf) { const struct counter_attribute *const a = to_counter_attribute(attr); - struct counter_device *const counter = dev_get_drvdata(dev); + struct counter_device *const counter = counter_from_dev(dev); int err; u64 data = 0; @@ -309,7 +314,7 @@ static ssize_t counter_comp_u64_store(struct device *dev, const char *buf, size_t len) { const struct counter_attribute *const a = to_counter_attribute(attr); - struct counter_device *const counter = dev_get_drvdata(dev); + struct counter_device *const counter = counter_from_dev(dev); int err; u64 data = 0; diff --git a/drivers/crypto/qcom-rng.c b/drivers/crypto/qcom-rng.c index 99ba8d51d102..11f30fd48c14 100644 --- a/drivers/crypto/qcom-rng.c +++ b/drivers/crypto/qcom-rng.c @@ -8,6 +8,7 @@ #include <linux/clk.h> #include <linux/crypto.h> #include <linux/io.h> +#include <linux/iopoll.h> #include <linux/module.h> #include <linux/of.h> #include <linux/platform_device.h> @@ -43,16 +44,19 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max) { unsigned int currsize = 0; u32 val; + int ret; /* read random data from hardware */ do { - val = readl_relaxed(rng->base + PRNG_STATUS); - if (!(val & PRNG_STATUS_DATA_AVAIL)) - break; + ret = readl_poll_timeout(rng->base + PRNG_STATUS, val, + val & PRNG_STATUS_DATA_AVAIL, + 200, 10000); + if (ret) + return ret; val = readl_relaxed(rng->base + PRNG_DATA_OUT); if (!val) - break; + return -EINVAL; if ((max - currsize) >= WORD_SZ) { memcpy(data, &val, WORD_SZ); @@ -61,11 +65,10 @@ static int qcom_rng_read(struct qcom_rng *rng, u8 *data, unsigned int max) } else { /* copy only remaining bytes */ memcpy(data, &val, max - currsize); - break; } } while (currsize < max); - return currsize; + return 0; } static int qcom_rng_generate(struct crypto_rng *tfm, @@ -87,7 +90,7 @@ static int qcom_rng_generate(struct crypto_rng *tfm, mutex_unlock(&rng->lock); clk_disable_unprepare(rng->clk); - return 0; + return ret; } static int qcom_rng_seed(struct crypto_rng *tfm, const u8 *seed, diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 110de8a60058..858400e42ec0 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2968,7 +2968,7 @@ static int __maybe_unused pl330_suspend(struct device *dev) struct amba_device *pcdev = to_amba_device(dev); pm_runtime_force_suspend(dev); - amba_pclk_unprepare(pcdev); + clk_unprepare(pcdev->pclk); return 0; } @@ -2978,7 +2978,7 @@ static int __maybe_unused pl330_resume(struct device *dev) struct amba_device *pcdev = to_amba_device(dev); int ret; - ret = amba_pclk_prepare(pcdev); + ret = clk_prepare(pcdev->pclk); if (ret) return ret; diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 5dd29789f97d..e7e8e624a436 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1083,8 +1083,46 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) #ifdef CONFIG_EDAC_ALTERA_SDRAM +/* + * A legacy U-Boot bug only enabled memory mapped access to the ECC Enable + * register if ECC is enabled. Linux checks the ECC Enable register to + * determine ECC status. + * Use an SMC call (which always works) to determine ECC enablement. + */ +static int altr_s10_sdram_check_ecc_deps(struct altr_edac_device_dev *device) +{ + const struct edac_device_prv_data *prv = device->data; + unsigned long sdram_ecc_addr; + struct arm_smccc_res result; + struct device_node *np; + phys_addr_t sdram_addr; + u32 read_reg; + int ret; + + np = of_find_compatible_node(NULL, NULL, "altr,sdr-ctl"); + if (!np) + goto sdram_err; + + sdram_addr = of_translate_address(np, of_get_address(np, 0, + NULL, NULL)); + of_node_put(np); + sdram_ecc_addr = (unsigned long)sdram_addr + prv->ecc_en_ofst; + arm_smccc_smc(INTEL_SIP_SMC_REG_READ, sdram_ecc_addr, + 0, 0, 0, 0, 0, 0, &result); + read_reg = (unsigned int)result.a1; + ret = (int)result.a0; + if (!ret && (read_reg & prv->ecc_enable_mask)) + return 0; + +sdram_err: + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: No ECC present or ECC disabled.\n", + device->edac_dev_name); + return -ENODEV; +} + static const struct edac_device_prv_data s10_sdramecc_data = { - .setup = altr_check_ecc_deps, + .setup = altr_s10_sdram_check_ecc_deps, .ce_clear_mask = ALTR_S10_ECC_SERRPENA, .ue_clear_mask = ALTR_S10_ECC_DERRPENA, .ecc_enable_mask = ALTR_S10_ECC_EN, diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index fba609ada0e6..812baa48b290 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -15,6 +15,21 @@ static struct msr __percpu *msrs; static struct amd64_family_type *fam_type; +static inline u32 get_umc_reg(u32 reg) +{ + if (!fam_type->flags.zn_regs_v2) + return reg; + + switch (reg) { + case UMCCH_ADDR_CFG: return UMCCH_ADDR_CFG_DDR5; + case UMCCH_ADDR_MASK_SEC: return UMCCH_ADDR_MASK_SEC_DDR5; + case UMCCH_DIMM_CFG: return UMCCH_DIMM_CFG_DDR5; + } + + WARN_ONCE(1, "%s: unknown register 0x%x", __func__, reg); + return 0; +} + /* Per-node stuff */ static struct ecc_settings **ecc_stngs; @@ -1429,8 +1444,10 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) edac_dbg(1, "UMC%d x16 DIMMs present: %s\n", i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no"); - if (pvt->dram_type == MEM_LRDDR4) { - amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp); + if (umc->dram_type == MEM_LRDDR4 || umc->dram_type == MEM_LRDDR5) { + amd_smn_read(pvt->mc_node_id, + umc_base + get_umc_reg(UMCCH_ADDR_CFG), + &tmp); edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n", i, 1 << ((tmp >> 4) & 0x3)); } @@ -1505,7 +1522,7 @@ static void prep_chip_selects(struct amd64_pvt *pvt) for_each_umc(umc) { pvt->csels[umc].b_cnt = 4; - pvt->csels[umc].m_cnt = 2; + pvt->csels[umc].m_cnt = fam_type->flags.zn_regs_v2 ? 4 : 2; } } else { @@ -1545,7 +1562,7 @@ static void read_umc_base_mask(struct amd64_pvt *pvt) } umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; - umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC; + umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(UMCCH_ADDR_MASK_SEC); for_each_chip_select_mask(cs, umc, pvt) { mask = &pvt->csels[umc].csmasks[cs]; @@ -1616,19 +1633,49 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) } } +static void determine_memory_type_df(struct amd64_pvt *pvt) +{ + struct amd64_umc *umc; + u32 i; + + for_each_umc(i) { + umc = &pvt->umc[i]; + + if (!(umc->sdp_ctrl & UMC_SDP_INIT)) { + umc->dram_type = MEM_EMPTY; + continue; + } + + /* + * Check if the system supports the "DDR Type" field in UMC Config + * and has DDR5 DIMMs in use. + */ + if (fam_type->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) { + if (umc->dimm_cfg & BIT(5)) + umc->dram_type = MEM_LRDDR5; + else if (umc->dimm_cfg & BIT(4)) + umc->dram_type = MEM_RDDR5; + else + umc->dram_type = MEM_DDR5; + } else { + if (umc->dimm_cfg & BIT(5)) + umc->dram_type = MEM_LRDDR4; + else if (umc->dimm_cfg & BIT(4)) + umc->dram_type = MEM_RDDR4; + else + umc->dram_type = MEM_DDR4; + } + + edac_dbg(1, " UMC%d DIMM type: %s\n", i, edac_mem_types[umc->dram_type]); + } +} + static void determine_memory_type(struct amd64_pvt *pvt) { u32 dram_ctrl, dcsm; - if (pvt->umc) { - if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) - pvt->dram_type = MEM_LRDDR4; - else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) - pvt->dram_type = MEM_RDDR4; - else - pvt->dram_type = MEM_DDR4; - return; - } + if (pvt->umc) + return determine_memory_type_df(pvt); switch (pvt->fam) { case 0xf: @@ -2149,6 +2196,7 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, { u32 addr_mask_orig, addr_mask_deinterleaved; u32 msb, weight, num_zero_bits; + int cs_mask_nr = csrow_nr; int dimm, size = 0; /* No Chip Selects are enabled. */ @@ -2164,17 +2212,33 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, return size; /* - * There is one mask per DIMM, and two Chip Selects per DIMM. - * CS0 and CS1 -> DIMM0 - * CS2 and CS3 -> DIMM1 + * Family 17h introduced systems with one mask per DIMM, + * and two Chip Selects per DIMM. + * + * CS0 and CS1 -> MASK0 / DIMM0 + * CS2 and CS3 -> MASK1 / DIMM1 + * + * Family 19h Model 10h introduced systems with one mask per Chip Select, + * and two Chip Selects per DIMM. + * + * CS0 -> MASK0 -> DIMM0 + * CS1 -> MASK1 -> DIMM0 + * CS2 -> MASK2 -> DIMM1 + * CS3 -> MASK3 -> DIMM1 + * + * Keep the mask number equal to the Chip Select number for newer systems, + * and shift the mask number for older systems. */ dimm = csrow_nr >> 1; + if (!fam_type->flags.zn_regs_v2) + cs_mask_nr >>= 1; + /* Asymmetric dual-rank DIMM support. */ if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) - addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm]; + addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr]; else - addr_mask_orig = pvt->csels[umc].csmasks[dimm]; + addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr]; /* * The number of zero bits in the mask is equal to the number of bits @@ -2930,6 +2994,7 @@ static struct amd64_family_type family_types[] = { .f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6, .max_mcs = 12, + .flags.zn_regs_v2 = 1, .ops = { .early_channel_count = f17_early_channel_count, .dbam_to_cs = f17_addr_mask_to_cs_size, @@ -3368,7 +3433,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt) umc_base = get_umc_base(i); umc = &pvt->umc[i]; - amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg); + amd_smn_read(nid, umc_base + get_umc_reg(UMCCH_DIMM_CFG), &umc->dimm_cfg); amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg); amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl); amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl); @@ -3452,7 +3517,9 @@ skip: read_dct_base_mask(pvt); determine_memory_type(pvt); - edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); + + if (!pvt->umc) + edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); determine_ecc_sym_sz(pvt); } @@ -3548,7 +3615,7 @@ static int init_csrows_df(struct mem_ctl_info *mci) pvt->mc_node_id, cs); dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs); - dimm->mtype = pvt->dram_type; + dimm->mtype = pvt->umc[umc].dram_type; dimm->edac_mode = edac_mode; dimm->dtype = dev_type; dimm->grain = 64; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 352bda9803f6..38e5ad95d010 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -273,8 +273,11 @@ #define UMCCH_BASE_ADDR_SEC 0x10 #define UMCCH_ADDR_MASK 0x20 #define UMCCH_ADDR_MASK_SEC 0x28 +#define UMCCH_ADDR_MASK_SEC_DDR5 0x30 #define UMCCH_ADDR_CFG 0x30 +#define UMCCH_ADDR_CFG_DDR5 0x40 #define UMCCH_DIMM_CFG 0x80 +#define UMCCH_DIMM_CFG_DDR5 0x90 #define UMCCH_UMC_CFG 0x100 #define UMCCH_SDP_CTRL 0x104 #define UMCCH_ECC_CTRL 0x14C @@ -344,6 +347,9 @@ struct amd64_umc { u32 sdp_ctrl; /* SDP Control reg */ u32 ecc_ctrl; /* DRAM ECC Control reg */ u32 umc_cap_hi; /* Capabilities High reg */ + + /* cache the dram_type */ + enum mem_type dram_type; }; struct amd64_pvt { @@ -391,7 +397,12 @@ struct amd64_pvt { /* place to store error injection parameters prior to issue */ struct error_injection injection; - /* cache the dram_type */ + /* + * cache the dram_type + * + * NOTE: Don't use this for Family 17h and later. + * Use dram_type in struct amd64_umc instead. + */ enum mem_type dram_type; struct amd64_umc *umc; /* UMC registers */ @@ -480,11 +491,22 @@ struct low_ops { unsigned cs_mode, int cs_mask_nr); }; +struct amd64_family_flags { + /* + * Indicates that the system supports the new register offsets, etc. + * first introduced with Family 19h Model 10h. + */ + __u64 zn_regs_v2 : 1, + + __reserved : 63; +}; + struct amd64_family_type { const char *ctl_name; u16 f0_id, f1_id, f2_id, f6_id; /* Maximum number of memory controllers per die/node. */ u8 max_mcs; + struct amd64_family_flags flags; struct low_ops ops; }; diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c index 5e7593753799..9a61d92bdf42 100644 --- a/drivers/edac/edac_device_sysfs.c +++ b/drivers/edac/edac_device_sysfs.c @@ -163,13 +163,14 @@ CTL_INFO_ATTR(poll_msec, S_IRUGO | S_IWUSR, edac_device_ctl_poll_msec_show, edac_device_ctl_poll_msec_store); /* Base Attributes of the EDAC_DEVICE ECC object */ -static struct ctl_info_attribute *device_ctrl_attr[] = { - &attr_ctl_info_panic_on_ue, - &attr_ctl_info_log_ue, - &attr_ctl_info_log_ce, - &attr_ctl_info_poll_msec, +static struct attribute *device_ctrl_attrs[] = { + &attr_ctl_info_panic_on_ue.attr, + &attr_ctl_info_log_ue.attr, + &attr_ctl_info_log_ce.attr, + &attr_ctl_info_poll_msec.attr, NULL, }; +ATTRIBUTE_GROUPS(device_ctrl); /* * edac_device_ctrl_master_release @@ -217,7 +218,7 @@ static void edac_device_ctrl_master_release(struct kobject *kobj) static struct kobj_type ktype_device_ctrl = { .release = edac_device_ctrl_master_release, .sysfs_ops = &device_ctl_info_ops, - .default_attrs = (struct attribute **)device_ctrl_attr, + .default_groups = device_ctrl_groups, }; /* @@ -389,17 +390,18 @@ INSTANCE_ATTR(ce_count, S_IRUGO, instance_ce_count_show, NULL); INSTANCE_ATTR(ue_count, S_IRUGO, instance_ue_count_show, NULL); /* list of edac_dev 'instance' attributes */ -static struct instance_attribute *device_instance_attr[] = { - &attr_instance_ce_count, - &attr_instance_ue_count, +static struct attribute *device_instance_attrs[] = { + &attr_instance_ce_count.attr, + &attr_instance_ue_count.attr, NULL, }; +ATTRIBUTE_GROUPS(device_instance); /* The 'ktype' for each edac_dev 'instance' */ static struct kobj_type ktype_instance_ctrl = { .release = edac_device_ctrl_instance_release, .sysfs_ops = &device_instance_ops, - .default_attrs = (struct attribute **)device_instance_attr, + .default_groups = device_instance_groups, }; /* edac_dev -> instance -> block information */ @@ -487,17 +489,18 @@ BLOCK_ATTR(ce_count, S_IRUGO, block_ce_count_show, NULL); BLOCK_ATTR(ue_count, S_IRUGO, block_ue_count_show, NULL); /* list of edac_dev 'block' attributes */ -static struct edac_dev_sysfs_block_attribute *device_block_attr[] = { - &attr_block_ce_count, - &attr_block_ue_count, +static struct attribute *device_block_attrs[] = { + &attr_block_ce_count.attr, + &attr_block_ue_count.attr, NULL, }; +ATTRIBUTE_GROUPS(device_block); /* The 'ktype' for each edac_dev 'block' */ static struct kobj_type ktype_block_ctrl = { .release = edac_device_ctrl_block_release, .sysfs_ops = &device_block_ops, - .default_attrs = (struct attribute **)device_block_attr, + .default_groups = device_block_groups, }; /* block ctor/dtor code */ diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index f5677d81bd2d..d2715774af6f 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -213,12 +213,12 @@ void *edac_align_ptr(void **p, unsigned int size, int n_elems) else if (size > sizeof(char)) align = sizeof(short); else - return (char *)ptr; + return ptr; r = (unsigned long)ptr % align; if (r == 0) - return (char *)ptr; + return ptr; *p += align - r; diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 53042af7262e..888d5728ecef 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -135,17 +135,18 @@ INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL); INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL); /* pci instance attributes */ -static struct instance_attribute *pci_instance_attr[] = { - &attr_instance_pe_count, - &attr_instance_npe_count, +static struct attribute *pci_instance_attrs[] = { + &attr_instance_pe_count.attr, + &attr_instance_npe_count.attr, NULL }; +ATTRIBUTE_GROUPS(pci_instance); /* the ktype for a pci instance */ static struct kobj_type ktype_pci_instance = { .release = edac_pci_instance_release, .sysfs_ops = &pci_instance_ops, - .default_attrs = (struct attribute **)pci_instance_attr, + .default_groups = pci_instance_groups, }; /* @@ -292,15 +293,16 @@ EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL); EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL); /* Base Attributes of the memory ECC object */ -static struct edac_pci_dev_attribute *edac_pci_attr[] = { - &edac_pci_attr_check_pci_errors, - &edac_pci_attr_edac_pci_log_pe, - &edac_pci_attr_edac_pci_log_npe, - &edac_pci_attr_edac_pci_panic_on_pe, - &edac_pci_attr_pci_parity_count, - &edac_pci_attr_pci_nonparity_count, +static struct attribute *edac_pci_attrs[] = { + &edac_pci_attr_check_pci_errors.attr, + &edac_pci_attr_edac_pci_log_pe.attr, + &edac_pci_attr_edac_pci_log_npe.attr, + &edac_pci_attr_edac_pci_panic_on_pe.attr, + &edac_pci_attr_pci_parity_count.attr, + &edac_pci_attr_pci_nonparity_count.attr, NULL, }; +ATTRIBUTE_GROUPS(edac_pci); /* * edac_pci_release_main_kobj @@ -327,7 +329,7 @@ static void edac_pci_release_main_kobj(struct kobject *kobj) static struct kobj_type ktype_edac_pci_main_kobj = { .release = edac_pci_release_main_kobj, .sysfs_ops = &edac_pci_sysfs_ops, - .default_attrs = (struct attribute **)edac_pci_attr, + .default_groups = edac_pci_groups, }; /** diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c index 4c3201e290e2..ea84108035eb 100644 --- a/drivers/firmware/efi/apple-properties.c +++ b/drivers/firmware/efi/apple-properties.c @@ -24,7 +24,7 @@ static bool dump_properties __initdata; static int __init dump_properties_enable(char *arg) { dump_properties = true; - return 0; + return 1; } __setup("dump_apple_properties", dump_properties_enable); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 7de3f5b6e8d0..5502e176d51b 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -212,7 +212,7 @@ static int __init efivar_ssdt_setup(char *str) memcpy(efivar_ssdt, str, strlen(str)); else pr_warn("efivar_ssdt: name too long: %s\n", str); - return 0; + return 1; } __setup("efivar_ssdt=", efivar_ssdt_setup); diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c index 38722d2009e2..5ed0602c2f75 100644 --- a/drivers/firmware/efi/mokvar-table.c +++ b/drivers/firmware/efi/mokvar-table.c @@ -359,4 +359,4 @@ static int __init efi_mokvar_sysfs_init(void) } return err; } -device_initcall(efi_mokvar_sysfs_init); +fs_initcall(efi_mokvar_sysfs_init); diff --git a/drivers/gpio/gpio-mt7621.c b/drivers/gpio/gpio-mt7621.c index ccaad1cb3c2e..d8a26e503ca5 100644 --- a/drivers/gpio/gpio-mt7621.c +++ b/drivers/gpio/gpio-mt7621.c @@ -239,7 +239,6 @@ mediatek_gpio_bank_probe(struct device *dev, int bank) rg->chip.offset = bank * MTK_BANK_WIDTH; rg->irq_chip.name = dev_name(dev); - rg->irq_chip.parent_device = dev; rg->irq_chip.irq_unmask = mediatek_gpio_irq_unmask; rg->irq_chip.irq_mask = mediatek_gpio_irq_mask; rg->irq_chip.irq_mask_ack = mediatek_gpio_irq_mask; diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index e099c39e0355..80ddc43fd875 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -986,7 +986,8 @@ static void omap_gpio_mod_init(struct gpio_bank *bank) writel_relaxed(0, base + bank->regs->ctrl); } -static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc) +static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc, + struct device *pm_dev) { struct gpio_irq_chip *irq; static int gpio; @@ -1052,6 +1053,7 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc) if (ret) return dev_err_probe(bank->chip.parent, ret, "Could not register gpio chip\n"); + irq_domain_set_pm_device(bank->chip.irq.domain, pm_dev); ret = devm_request_irq(bank->chip.parent, bank->irq, omap_gpio_irq_handler, 0, dev_name(bank->chip.parent), bank); @@ -1402,7 +1404,6 @@ static int omap_gpio_probe(struct platform_device *pdev) irqc->irq_bus_sync_unlock = gpio_irq_bus_sync_unlock, irqc->name = dev_name(&pdev->dev); irqc->flags = IRQCHIP_MASK_ON_SUSPEND; - irqc->parent_device = dev; bank->irq = platform_get_irq(pdev, 0); if (bank->irq <= 0) { @@ -1466,7 +1467,7 @@ static int omap_gpio_probe(struct platform_device *pdev) omap_gpio_mod_init(bank); - ret = omap_gpio_chip_init(bank, irqc); + ret = omap_gpio_chip_init(bank, irqc, dev); if (ret) { pm_runtime_put_sync(dev); pm_runtime_disable(dev); diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index bd2e16d6e21c..3a76538f27fa 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -530,7 +530,6 @@ static int gpio_rcar_probe(struct platform_device *pdev) irq_chip = &p->irq_chip; irq_chip->name = "gpio-rcar"; - irq_chip->parent_device = dev; irq_chip->irq_mask = gpio_rcar_irq_disable; irq_chip->irq_unmask = gpio_rcar_irq_enable; irq_chip->irq_set_type = gpio_rcar_irq_set_type; @@ -552,6 +551,7 @@ static int gpio_rcar_probe(struct platform_device *pdev) goto err0; } + irq_domain_set_pm_device(gpio_chip->irq.domain, dev); ret = devm_request_irq(dev, p->irq_parent, gpio_rcar_irq_handler, IRQF_SHARED, name, p); if (ret) { diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c index 5b103221b58d..fa4bc7481f9a 100644 --- a/drivers/gpio/gpio-tqmx86.c +++ b/drivers/gpio/gpio-tqmx86.c @@ -281,7 +281,6 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) u8 irq_status; irq_chip->name = chip->label; - irq_chip->parent_device = &pdev->dev; irq_chip->irq_mask = tqmx86_gpio_irq_mask; irq_chip->irq_unmask = tqmx86_gpio_irq_unmask; irq_chip->irq_set_type = tqmx86_gpio_irq_set_type; @@ -316,6 +315,8 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) goto out_pm_dis; } + irq_domain_set_pm_device(girq->domain, dev); + dev_info(dev, "GPIO functionality initialized with %d pins\n", chip->ngpio); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index defb7c464b87..6630d92e30ad 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1701,6 +1701,11 @@ static inline void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gc) */ int gpiochip_generic_request(struct gpio_chip *gc, unsigned int offset) { +#ifdef CONFIG_PINCTRL + if (list_empty(&gc->gpiodev->pin_ranges)) + return 0; +#endif + return pinctrl_gpio_request(gc->gpiodev->base + offset); } EXPORT_SYMBOL_GPL(gpiochip_generic_request); @@ -1712,6 +1717,11 @@ EXPORT_SYMBOL_GPL(gpiochip_generic_request); */ void gpiochip_generic_free(struct gpio_chip *gc, unsigned int offset) { +#ifdef CONFIG_PINCTRL + if (list_empty(&gc->gpiodev->pin_ranges)) + return; +#endif + pinctrl_gpio_free(gc->gpiodev->base + offset); } EXPORT_SYMBOL_GPL(gpiochip_generic_free); diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 61db5a66b493..44ad70939663 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -8,7 +8,6 @@ config DRM_BRIDGE config DRM_PANEL_BRIDGE def_bool y depends on DRM_BRIDGE - depends on DRM_KMS_HELPER select DRM_PANEL help DRM bridge wrapper of DRM panels @@ -30,6 +29,7 @@ config DRM_CDNS_DSI config DRM_CHIPONE_ICN6211 tristate "Chipone ICN6211 MIPI-DSI/RGB Converter bridge" depends on OF + select DRM_KMS_HELPER select DRM_MIPI_DSI select DRM_PANEL_BRIDGE help diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index a8aba0141ce7..06cb1a59b9bc 100644 --- a/drivers/gpu/drm/imx/parallel-display.c +++ b/drivers/gpu/drm/imx/parallel-display.c @@ -217,14 +217,6 @@ static int imx_pd_bridge_atomic_check(struct drm_bridge *bridge, if (!imx_pd_format_supported(bus_fmt)) return -EINVAL; - if (bus_flags & - ~(DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_DE_HIGH | - DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE | - DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE)) { - dev_warn(imxpd->dev, "invalid bus_flags (%x)\n", bus_flags); - return -EINVAL; - } - bridge_state->output_bus_cfg.flags = bus_flags; bridge_state->input_bus_cfg.flags = bus_flags; imx_crtc_state->bus_flags = bus_flags; diff --git a/drivers/gpu/drm/mgag200/mgag200_pll.c b/drivers/gpu/drm/mgag200/mgag200_pll.c index e9ae22b4f813..52be08b744ad 100644 --- a/drivers/gpu/drm/mgag200/mgag200_pll.c +++ b/drivers/gpu/drm/mgag200/mgag200_pll.c @@ -404,9 +404,9 @@ mgag200_pixpll_update_g200wb(struct mgag200_pll *pixpll, const struct mgag200_pl udelay(50); /* program pixel pll register */ - WREG_DAC(MGA1064_PIX_PLLC_N, xpixpllcn); - WREG_DAC(MGA1064_PIX_PLLC_M, xpixpllcm); - WREG_DAC(MGA1064_PIX_PLLC_P, xpixpllcp); + WREG_DAC(MGA1064_WB_PIX_PLLC_N, xpixpllcn); + WREG_DAC(MGA1064_WB_PIX_PLLC_M, xpixpllcm); + WREG_DAC(MGA1064_WB_PIX_PLLC_P, xpixpllcp); udelay(50); diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index 0aec5a10b064..9989a316fe88 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -107,6 +107,7 @@ config DRM_PANEL_EDP select VIDEOMODE_HELPERS select DRM_DP_AUX_BUS select DRM_DP_HELPER + select DRM_KMS_HELPER help DRM panel driver for dumb eDP panels that need at most a regulator and a GPIO to be powered up. Optionally a backlight can be attached so diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 3c08f9827acf..b42c1d816e79 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2017,7 +2017,7 @@ static const struct display_timing innolux_g070y2_l01_timing = { static const struct panel_desc innolux_g070y2_l01 = { .timings = &innolux_g070y2_l01_timing, .num_timings = 1, - .bpc = 6, + .bpc = 8, .size = { .width = 152, .height = 91, diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c index fcb1b646436a..1581f6ef0927 100644 --- a/drivers/input/tablet/aiptek.c +++ b/drivers/input/tablet/aiptek.c @@ -1787,15 +1787,13 @@ aiptek_probe(struct usb_interface *intf, const struct usb_device_id *id) input_set_abs_params(inputdev, ABS_TILT_Y, AIPTEK_TILT_MIN, AIPTEK_TILT_MAX, 0, 0); input_set_abs_params(inputdev, ABS_WHEEL, AIPTEK_WHEEL_MIN, AIPTEK_WHEEL_MAX - 1, 0, 0); - /* Verify that a device really has an endpoint */ - if (intf->cur_altsetting->desc.bNumEndpoints < 1) { + err = usb_find_common_endpoints(intf->cur_altsetting, + NULL, NULL, &endpoint, NULL); + if (err) { dev_err(&intf->dev, - "interface has %d endpoints, but must have minimum 1\n", - intf->cur_altsetting->desc.bNumEndpoints); - err = -EINVAL; + "interface has no int in endpoints, but must have minimum 1\n"); goto fail3; } - endpoint = &intf->cur_altsetting->endpoint[0].desc; /* Go set up our URB, which is called when the tablet receives * input. diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c index 129ebc810de8..8bd03278ad9a 100644 --- a/drivers/input/touchscreen/zinitix.c +++ b/drivers/input/touchscreen/zinitix.c @@ -135,7 +135,7 @@ struct point_coord { struct touch_event { __le16 status; - u8 finger_cnt; + u8 finger_mask; u8 time_stamp; struct point_coord point_coord[MAX_SUPPORTED_FINGER_NUM]; }; @@ -322,11 +322,32 @@ static int zinitix_send_power_on_sequence(struct bt541_ts_data *bt541) static void zinitix_report_finger(struct bt541_ts_data *bt541, int slot, const struct point_coord *p) { + u16 x, y; + + if (unlikely(!(p->sub_status & + (SUB_BIT_UP | SUB_BIT_DOWN | SUB_BIT_MOVE)))) { + dev_dbg(&bt541->client->dev, "unknown finger event %#02x\n", + p->sub_status); + return; + } + + x = le16_to_cpu(p->x); + y = le16_to_cpu(p->y); + input_mt_slot(bt541->input_dev, slot); - input_mt_report_slot_state(bt541->input_dev, MT_TOOL_FINGER, true); - touchscreen_report_pos(bt541->input_dev, &bt541->prop, - le16_to_cpu(p->x), le16_to_cpu(p->y), true); - input_report_abs(bt541->input_dev, ABS_MT_TOUCH_MAJOR, p->width); + if (input_mt_report_slot_state(bt541->input_dev, MT_TOOL_FINGER, + !(p->sub_status & SUB_BIT_UP))) { + touchscreen_report_pos(bt541->input_dev, + &bt541->prop, x, y, true); + input_report_abs(bt541->input_dev, + ABS_MT_TOUCH_MAJOR, p->width); + dev_dbg(&bt541->client->dev, "finger %d %s (%u, %u)\n", + slot, p->sub_status & SUB_BIT_DOWN ? "down" : "move", + x, y); + } else { + dev_dbg(&bt541->client->dev, "finger %d up (%u, %u)\n", + slot, x, y); + } } static irqreturn_t zinitix_ts_irq_handler(int irq, void *bt541_handler) @@ -334,6 +355,7 @@ static irqreturn_t zinitix_ts_irq_handler(int irq, void *bt541_handler) struct bt541_ts_data *bt541 = bt541_handler; struct i2c_client *client = bt541->client; struct touch_event touch_event; + unsigned long finger_mask; int error; int i; @@ -346,10 +368,14 @@ static irqreturn_t zinitix_ts_irq_handler(int irq, void *bt541_handler) goto out; } - for (i = 0; i < MAX_SUPPORTED_FINGER_NUM; i++) - if (touch_event.point_coord[i].sub_status & SUB_BIT_EXIST) - zinitix_report_finger(bt541, i, - &touch_event.point_coord[i]); + finger_mask = touch_event.finger_mask; + for_each_set_bit(i, &finger_mask, MAX_SUPPORTED_FINGER_NUM) { + const struct point_coord *p = &touch_event.point_coord[i]; + + /* Only process contacts that are actually reported */ + if (p->sub_status & SUB_BIT_EXIST) + zinitix_report_finger(bt541, i, p); + } input_mt_sync_frame(bt541->input_dev); input_sync(bt541->input_dev); diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 3eb68fa1b8cc..c79a0df090c0 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -144,8 +144,8 @@ config IOMMU_DMA select IRQ_MSI_IOMMU select NEED_SG_DMA_LENGTH -# Shared Virtual Addressing library -config IOMMU_SVA_LIB +# Shared Virtual Addressing +config IOMMU_SVA bool select IOASID @@ -379,7 +379,7 @@ config ARM_SMMU_V3 config ARM_SMMU_V3_SVA bool "Shared Virtual Addressing support for the ARM SMMUv3" depends on ARM_SMMU_V3 - select IOMMU_SVA_LIB + select IOMMU_SVA select MMU_NOTIFIER help Support for sharing process address spaces with devices using the diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index bc7f730edbb0..44475a9b3eea 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -27,6 +27,6 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o -obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o +obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o obj-$(CONFIG_APPLE_DART) += apple-dart.o diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index a737ba5f727e..22ddd05bbdcd 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -340,14 +340,12 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm); if (IS_ERR(bond->smmu_mn)) { ret = PTR_ERR(bond->smmu_mn); - goto err_free_pasid; + goto err_free_bond; } list_add(&bond->list, &master->bonds); return &bond->sva; -err_free_pasid: - iommu_sva_free_pasid(mm); err_free_bond: kfree(bond); return ERR_PTR(ret); @@ -377,7 +375,6 @@ void arm_smmu_sva_unbind(struct iommu_sva *handle) if (refcount_dec_and_test(&bond->refs)) { list_del(&bond->list); arm_smmu_mmu_notifier_put(bond->smmu_mn); - iommu_sva_free_pasid(bond->mm); kfree(bond); } mutex_unlock(&sva_lock); diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 247d0f2d5fdf..39a06d245f12 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -52,7 +52,7 @@ config INTEL_IOMMU_SVM select PCI_PRI select MMU_NOTIFIER select IOASID - select IOMMU_SVA_LIB + select IOMMU_SVA help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5b196cfe9ed2..1ce1741a7fa4 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4781,7 +4781,7 @@ attach_failed: link_failed: spin_unlock_irqrestore(&device_domain_lock, flags); if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); + ioasid_free(domain->default_pasid); return ret; } @@ -4811,7 +4811,7 @@ static void aux_domain_remove_dev(struct dmar_domain *domain, spin_unlock_irqrestore(&device_domain_lock, flags); if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); + ioasid_free(domain->default_pasid); } static int prepare_domain_attach_device(struct iommu_domain *domain, diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 5b5d69b04fcc..51ac2096b3da 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -514,11 +514,6 @@ static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); } -static void intel_svm_free_pasid(struct mm_struct *mm) -{ - iommu_sva_free_pasid(mm); -} - static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, struct mm_struct *mm, @@ -662,8 +657,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree(svm); } } - /* Drop a PASID reference and free it if no reference. */ - intel_svm_free_pasid(mm); } out: return ret; @@ -1047,8 +1040,6 @@ struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void } sva = intel_svm_bind_mm(iommu, dev, mm, flags); - if (IS_ERR_OR_NULL(sva)) - intel_svm_free_pasid(mm); mutex_unlock(&pasid_mutex); return sva; diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c index 06fee7416816..a786c034907c 100644 --- a/drivers/iommu/ioasid.c +++ b/drivers/iommu/ioasid.c @@ -2,7 +2,7 @@ /* * I/O Address Space ID allocator. There is one global IOASID space, split into * subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and - * free IOASIDs with ioasid_alloc and ioasid_put. + * free IOASIDs with ioasid_alloc() and ioasid_free(). */ #include <linux/ioasid.h> #include <linux/module.h> @@ -15,7 +15,6 @@ struct ioasid_data { struct ioasid_set *set; void *private; struct rcu_head rcu; - refcount_t refs; }; /* @@ -315,7 +314,6 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, data->set = set; data->private = private; - refcount_set(&data->refs, 1); /* * Custom allocator needs allocator data to perform platform specific @@ -348,35 +346,11 @@ exit_free: EXPORT_SYMBOL_GPL(ioasid_alloc); /** - * ioasid_get - obtain a reference to the IOASID - * @ioasid: the ID to get - */ -void ioasid_get(ioasid_t ioasid) -{ - struct ioasid_data *ioasid_data; - - spin_lock(&ioasid_allocator_lock); - ioasid_data = xa_load(&active_allocator->xa, ioasid); - if (ioasid_data) - refcount_inc(&ioasid_data->refs); - else - WARN_ON(1); - spin_unlock(&ioasid_allocator_lock); -} -EXPORT_SYMBOL_GPL(ioasid_get); - -/** - * ioasid_put - Release a reference to an ioasid + * ioasid_free - Free an ioasid * @ioasid: the ID to remove - * - * Put a reference to the IOASID, free it when the number of references drops to - * zero. - * - * Return: %true if the IOASID was freed, %false otherwise. */ -bool ioasid_put(ioasid_t ioasid) +void ioasid_free(ioasid_t ioasid) { - bool free = false; struct ioasid_data *ioasid_data; spin_lock(&ioasid_allocator_lock); @@ -386,10 +360,6 @@ bool ioasid_put(ioasid_t ioasid) goto exit_unlock; } - free = refcount_dec_and_test(&ioasid_data->refs); - if (!free) - goto exit_unlock; - active_allocator->ops->free(ioasid, active_allocator->ops->pdata); /* Custom allocator needs additional steps to free the xa element */ if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) { @@ -399,9 +369,8 @@ bool ioasid_put(ioasid_t ioasid) exit_unlock: spin_unlock(&ioasid_allocator_lock); - return free; } -EXPORT_SYMBOL_GPL(ioasid_put); +EXPORT_SYMBOL_GPL(ioasid_free); /** * ioasid_find - Find IOASID data diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c index bd41405d34e9..106506143896 100644 --- a/drivers/iommu/iommu-sva-lib.c +++ b/drivers/iommu/iommu-sva-lib.c @@ -18,8 +18,7 @@ static DECLARE_IOASID_SET(iommu_sva_pasid); * * Try to allocate a PASID for this mm, or take a reference to the existing one * provided it fits within the [@min, @max] range. On success the PASID is - * available in mm->pasid, and must be released with iommu_sva_free_pasid(). - * @min must be greater than 0, because 0 indicates an unused mm->pasid. + * available in mm->pasid and will be available for the lifetime of the mm. * * Returns 0 on success and < 0 on error. */ @@ -33,38 +32,24 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) return -EINVAL; mutex_lock(&iommu_sva_lock); - if (mm->pasid) { - if (mm->pasid >= min && mm->pasid <= max) - ioasid_get(mm->pasid); - else + /* Is a PASID already associated with this mm? */ + if (pasid_valid(mm->pasid)) { + if (mm->pasid < min || mm->pasid >= max) ret = -EOVERFLOW; - } else { - pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); - if (pasid == INVALID_IOASID) - ret = -ENOMEM; - else - mm->pasid = pasid; + goto out; } + + pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); + if (!pasid_valid(pasid)) + ret = -ENOMEM; + else + mm_pasid_set(mm, pasid); +out: mutex_unlock(&iommu_sva_lock); return ret; } EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid); -/** - * iommu_sva_free_pasid - Release the mm's PASID - * @mm: the mm - * - * Drop one reference to a PASID allocated with iommu_sva_alloc_pasid() - */ -void iommu_sva_free_pasid(struct mm_struct *mm) -{ - mutex_lock(&iommu_sva_lock); - if (ioasid_put(mm->pasid)) - mm->pasid = 0; - mutex_unlock(&iommu_sva_lock); -} -EXPORT_SYMBOL_GPL(iommu_sva_free_pasid); - /* ioasid_find getter() requires a void * argument */ static bool __mmget_not_zero(void *mm) { diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h index 031155010ca8..8909ea1094e3 100644 --- a/drivers/iommu/iommu-sva-lib.h +++ b/drivers/iommu/iommu-sva-lib.h @@ -9,7 +9,6 @@ #include <linux/mm_types.h> int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max); -void iommu_sva_free_pasid(struct mm_struct *mm); struct mm_struct *iommu_sva_find(ioasid_t pasid); /* I/O Page fault */ @@ -17,7 +16,7 @@ struct device; struct iommu_fault; struct iopf_queue; -#ifdef CONFIG_IOMMU_SVA_LIB +#ifdef CONFIG_IOMMU_SVA int iommu_queue_iopf(struct iommu_fault *fault, void *cookie); int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev); @@ -28,7 +27,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); -#else /* CONFIG_IOMMU_SVA_LIB */ +#else /* CONFIG_IOMMU_SVA */ static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) { return -ENODEV; @@ -64,5 +63,5 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue) { return -ENODEV; } -#endif /* CONFIG_IOMMU_SVA_LIB */ +#endif /* CONFIG_IOMMU_SVA */ #endif /* _IOMMU_SVA_LIB_H */ diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 7038957f4a77..680d2fcf2686 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -430,6 +430,14 @@ config QCOM_PDC Power Domain Controller driver to manage and configure wakeup IRQs for Qualcomm Technologies Inc (QTI) mobile chips. +config QCOM_MPM + tristate "QCOM MPM" + depends on ARCH_QCOM + select IRQ_DOMAIN_HIERARCHY + help + MSM Power Manager driver to manage and configure wakeup + IRQs for Qualcomm Technologies Inc (QTI) mobile chips. + config CSKY_MPINTC bool depends on CSKY diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index c1f611cbfbf8..1f8990f812f1 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -94,6 +94,7 @@ obj-$(CONFIG_MESON_IRQ_GPIO) += irq-meson-gpio.o obj-$(CONFIG_GOLDFISH_PIC) += irq-goldfish-pic.o obj-$(CONFIG_NDS32) += irq-ativic32.o obj-$(CONFIG_QCOM_PDC) += qcom-pdc.o +obj-$(CONFIG_QCOM_MPM) += irq-qcom-mpm.o obj-$(CONFIG_CSKY_MPINTC) += irq-csky-mpintc.o obj-$(CONFIG_CSKY_APB_INTC) += irq-csky-apb-intc.o obj-$(CONFIG_RISCV_INTC) += irq-riscv-intc.o diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 38091ebb9403..12dd48727a15 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -24,7 +24,7 @@ * - Default "this CPU" register view and explicit per-CPU views * * In addition, this driver also handles FIQs, as these are routed to the same - * IRQ vector. These are used for Fast IPIs (TODO), the ARMv8 timer IRQs, and + * IRQ vector. These are used for Fast IPIs, the ARMv8 timer IRQs, and * performance counters (TODO). * * Implementation notes: @@ -52,9 +52,12 @@ #include <linux/irqchip.h> #include <linux/irqchip/arm-vgic-info.h> #include <linux/irqdomain.h> +#include <linux/jump_label.h> #include <linux/limits.h> #include <linux/of_address.h> #include <linux/slab.h> +#include <asm/apple_m1_pmu.h> +#include <asm/cputype.h> #include <asm/exception.h> #include <asm/sysreg.h> #include <asm/virt.h> @@ -62,20 +65,22 @@ #include <dt-bindings/interrupt-controller/apple-aic.h> /* - * AIC registers (MMIO) + * AIC v1 registers (MMIO) */ #define AIC_INFO 0x0004 -#define AIC_INFO_NR_HW GENMASK(15, 0) +#define AIC_INFO_NR_IRQ GENMASK(15, 0) #define AIC_CONFIG 0x0010 #define AIC_WHOAMI 0x2000 #define AIC_EVENT 0x2004 -#define AIC_EVENT_TYPE GENMASK(31, 16) +#define AIC_EVENT_DIE GENMASK(31, 24) +#define AIC_EVENT_TYPE GENMASK(23, 16) #define AIC_EVENT_NUM GENMASK(15, 0) -#define AIC_EVENT_TYPE_HW 1 +#define AIC_EVENT_TYPE_FIQ 0 /* Software use */ +#define AIC_EVENT_TYPE_IRQ 1 #define AIC_EVENT_TYPE_IPI 4 #define AIC_EVENT_IPI_OTHER 1 #define AIC_EVENT_IPI_SELF 2 @@ -91,34 +96,73 @@ #define AIC_IPI_SELF BIT(31) #define AIC_TARGET_CPU 0x3000 -#define AIC_SW_SET 0x4000 -#define AIC_SW_CLR 0x4080 -#define AIC_MASK_SET 0x4100 -#define AIC_MASK_CLR 0x4180 #define AIC_CPU_IPI_SET(cpu) (0x5008 + ((cpu) << 7)) #define AIC_CPU_IPI_CLR(cpu) (0x500c + ((cpu) << 7)) #define AIC_CPU_IPI_MASK_SET(cpu) (0x5024 + ((cpu) << 7)) #define AIC_CPU_IPI_MASK_CLR(cpu) (0x5028 + ((cpu) << 7)) +#define AIC_MAX_IRQ 0x400 + +/* + * AIC v2 registers (MMIO) + */ + +#define AIC2_VERSION 0x0000 +#define AIC2_VERSION_VER GENMASK(7, 0) + +#define AIC2_INFO1 0x0004 +#define AIC2_INFO1_NR_IRQ GENMASK(15, 0) +#define AIC2_INFO1_LAST_DIE GENMASK(27, 24) + +#define AIC2_INFO2 0x0008 + +#define AIC2_INFO3 0x000c +#define AIC2_INFO3_MAX_IRQ GENMASK(15, 0) +#define AIC2_INFO3_MAX_DIE GENMASK(27, 24) + +#define AIC2_RESET 0x0010 +#define AIC2_RESET_RESET BIT(0) + +#define AIC2_CONFIG 0x0014 +#define AIC2_CONFIG_ENABLE BIT(0) +#define AIC2_CONFIG_PREFER_PCPU BIT(28) + +#define AIC2_TIMEOUT 0x0028 +#define AIC2_CLUSTER_PRIO 0x0030 +#define AIC2_DELAY_GROUPS 0x0100 + +#define AIC2_IRQ_CFG 0x2000 + +/* + * AIC2 registers are laid out like this, starting at AIC2_IRQ_CFG: + * + * Repeat for each die: + * IRQ_CFG: u32 * MAX_IRQS + * SW_SET: u32 * (MAX_IRQS / 32) + * SW_CLR: u32 * (MAX_IRQS / 32) + * MASK_SET: u32 * (MAX_IRQS / 32) + * MASK_CLR: u32 * (MAX_IRQS / 32) + * HW_STATE: u32 * (MAX_IRQS / 32) + * + * This is followed by a set of event registers, each 16K page aligned. + * The first one is the AP event register we will use. Unfortunately, + * the actual implemented die count is not specified anywhere in the + * capability registers, so we have to explicitly specify the event + * register as a second reg entry in the device tree to remain + * forward-compatible. + */ + +#define AIC2_IRQ_CFG_TARGET GENMASK(3, 0) +#define AIC2_IRQ_CFG_DELAY_IDX GENMASK(7, 5) + #define MASK_REG(x) (4 * ((x) >> 5)) #define MASK_BIT(x) BIT((x) & GENMASK(4, 0)) /* * IMP-DEF sysregs that control FIQ sources - * Note: sysreg-based IPIs are not supported yet. */ -/* Core PMC control register */ -#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0) -#define PMCR0_IMODE GENMASK(10, 8) -#define PMCR0_IMODE_OFF 0 -#define PMCR0_IMODE_PMI 1 -#define PMCR0_IMODE_AIC 2 -#define PMCR0_IMODE_HALT 3 -#define PMCR0_IMODE_FIQ 4 -#define PMCR0_IACT BIT(11) - /* IPI request registers */ #define SYS_IMP_APL_IPI_RR_LOCAL_EL1 sys_reg(3, 5, 15, 0, 0) #define SYS_IMP_APL_IPI_RR_GLOBAL_EL1 sys_reg(3, 5, 15, 0, 1) @@ -155,7 +199,18 @@ #define SYS_IMP_APL_UPMSR_EL1 sys_reg(3, 7, 15, 6, 4) #define UPMSR_IACT BIT(0) -#define AIC_NR_FIQ 4 +/* MPIDR fields */ +#define MPIDR_CPU(x) MPIDR_AFFINITY_LEVEL(x, 0) +#define MPIDR_CLUSTER(x) MPIDR_AFFINITY_LEVEL(x, 1) + +#define AIC_IRQ_HWIRQ(die, irq) (FIELD_PREP(AIC_EVENT_DIE, die) | \ + FIELD_PREP(AIC_EVENT_TYPE, AIC_EVENT_TYPE_IRQ) | \ + FIELD_PREP(AIC_EVENT_NUM, irq)) +#define AIC_FIQ_HWIRQ(x) (FIELD_PREP(AIC_EVENT_TYPE, AIC_EVENT_TYPE_FIQ) | \ + FIELD_PREP(AIC_EVENT_NUM, x)) +#define AIC_HWIRQ_IRQ(x) FIELD_GET(AIC_EVENT_NUM, x) +#define AIC_HWIRQ_DIE(x) FIELD_GET(AIC_EVENT_DIE, x) +#define AIC_NR_FIQ 6 #define AIC_NR_SWIPI 32 /* @@ -173,11 +228,81 @@ #define AIC_TMR_EL02_PHYS AIC_TMR_GUEST_PHYS #define AIC_TMR_EL02_VIRT AIC_TMR_GUEST_VIRT +DEFINE_STATIC_KEY_TRUE(use_fast_ipi); + +struct aic_info { + int version; + + /* Register offsets */ + u32 event; + u32 target_cpu; + u32 irq_cfg; + u32 sw_set; + u32 sw_clr; + u32 mask_set; + u32 mask_clr; + + u32 die_stride; + + /* Features */ + bool fast_ipi; +}; + +static const struct aic_info aic1_info = { + .version = 1, + + .event = AIC_EVENT, + .target_cpu = AIC_TARGET_CPU, +}; + +static const struct aic_info aic1_fipi_info = { + .version = 1, + + .event = AIC_EVENT, + .target_cpu = AIC_TARGET_CPU, + + .fast_ipi = true, +}; + +static const struct aic_info aic2_info = { + .version = 2, + + .irq_cfg = AIC2_IRQ_CFG, + + .fast_ipi = true, +}; + +static const struct of_device_id aic_info_match[] = { + { + .compatible = "apple,t8103-aic", + .data = &aic1_fipi_info, + }, + { + .compatible = "apple,aic", + .data = &aic1_info, + }, + { + .compatible = "apple,aic2", + .data = &aic2_info, + }, + {} +}; + struct aic_irq_chip { void __iomem *base; + void __iomem *event; struct irq_domain *hw_domain; struct irq_domain *ipi_domain; - int nr_hw; + struct { + cpumask_t aff; + } *fiq_aff[AIC_NR_FIQ]; + + int nr_irq; + int max_irq; + int nr_die; + int max_die; + + struct aic_info info; }; static DEFINE_PER_CPU(uint32_t, aic_fiq_unmasked); @@ -205,18 +330,24 @@ static void aic_ic_write(struct aic_irq_chip *ic, u32 reg, u32 val) static void aic_irq_mask(struct irq_data *d) { + irq_hw_number_t hwirq = irqd_to_hwirq(d); struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d); - aic_ic_write(ic, AIC_MASK_SET + MASK_REG(irqd_to_hwirq(d)), - MASK_BIT(irqd_to_hwirq(d))); + u32 off = AIC_HWIRQ_DIE(hwirq) * ic->info.die_stride; + u32 irq = AIC_HWIRQ_IRQ(hwirq); + + aic_ic_write(ic, ic->info.mask_set + off + MASK_REG(irq), MASK_BIT(irq)); } static void aic_irq_unmask(struct irq_data *d) { + irq_hw_number_t hwirq = irqd_to_hwirq(d); struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d); - aic_ic_write(ic, AIC_MASK_CLR + MASK_REG(d->hwirq), - MASK_BIT(irqd_to_hwirq(d))); + u32 off = AIC_HWIRQ_DIE(hwirq) * ic->info.die_stride; + u32 irq = AIC_HWIRQ_IRQ(hwirq); + + aic_ic_write(ic, ic->info.mask_clr + off + MASK_REG(irq), MASK_BIT(irq)); } static void aic_irq_eoi(struct irq_data *d) @@ -239,12 +370,12 @@ static void __exception_irq_entry aic_handle_irq(struct pt_regs *regs) * We cannot use a relaxed read here, as reads from DMA buffers * need to be ordered after the IRQ fires. */ - event = readl(ic->base + AIC_EVENT); + event = readl(ic->event + ic->info.event); type = FIELD_GET(AIC_EVENT_TYPE, event); irq = FIELD_GET(AIC_EVENT_NUM, event); - if (type == AIC_EVENT_TYPE_HW) - generic_handle_domain_irq(aic_irqc->hw_domain, irq); + if (type == AIC_EVENT_TYPE_IRQ) + generic_handle_domain_irq(aic_irqc->hw_domain, event); else if (type == AIC_EVENT_TYPE_IPI && irq == 1) aic_handle_ipi(regs); else if (event != 0) @@ -271,12 +402,14 @@ static int aic_irq_set_affinity(struct irq_data *d, struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d); int cpu; + BUG_ON(!ic->info.target_cpu); + if (force) cpu = cpumask_first(mask_val); else cpu = cpumask_any_and(mask_val, cpu_online_mask); - aic_ic_write(ic, AIC_TARGET_CPU + hwirq * 4, BIT(cpu)); + aic_ic_write(ic, ic->info.target_cpu + AIC_HWIRQ_IRQ(hwirq) * 4, BIT(cpu)); irq_data_update_effective_affinity(d, cpumask_of(cpu)); return IRQ_SET_MASK_OK; @@ -300,15 +433,21 @@ static struct irq_chip aic_chip = { .irq_set_type = aic_irq_set_type, }; +static struct irq_chip aic2_chip = { + .name = "AIC2", + .irq_mask = aic_irq_mask, + .irq_unmask = aic_irq_unmask, + .irq_eoi = aic_irq_eoi, + .irq_set_type = aic_irq_set_type, +}; + /* * FIQ irqchip */ static unsigned long aic_fiq_get_idx(struct irq_data *d) { - struct aic_irq_chip *ic = irq_data_get_irq_chip_data(d); - - return irqd_to_hwirq(d) - ic->nr_hw; + return AIC_HWIRQ_IRQ(irqd_to_hwirq(d)); } static void aic_fiq_set_mask(struct irq_data *d) @@ -386,17 +525,21 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs) */ if (read_sysreg_s(SYS_IMP_APL_IPI_SR_EL1) & IPI_SR_PENDING) { - pr_err_ratelimited("Fast IPI fired. Acking.\n"); - write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1); + if (static_branch_likely(&use_fast_ipi)) { + aic_handle_ipi(regs); + } else { + pr_err_ratelimited("Fast IPI fired. Acking.\n"); + write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1); + } } if (TIMER_FIRING(read_sysreg(cntp_ctl_el0))) generic_handle_domain_irq(aic_irqc->hw_domain, - aic_irqc->nr_hw + AIC_TMR_EL0_PHYS); + AIC_FIQ_HWIRQ(AIC_TMR_EL0_PHYS)); if (TIMER_FIRING(read_sysreg(cntv_ctl_el0))) generic_handle_domain_irq(aic_irqc->hw_domain, - aic_irqc->nr_hw + AIC_TMR_EL0_VIRT); + AIC_FIQ_HWIRQ(AIC_TMR_EL0_VIRT)); if (is_kernel_in_hyp_mode()) { uint64_t enabled = read_sysreg_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2); @@ -404,24 +547,23 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs) if ((enabled & VM_TMR_FIQ_ENABLE_P) && TIMER_FIRING(read_sysreg_s(SYS_CNTP_CTL_EL02))) generic_handle_domain_irq(aic_irqc->hw_domain, - aic_irqc->nr_hw + AIC_TMR_EL02_PHYS); + AIC_FIQ_HWIRQ(AIC_TMR_EL02_PHYS)); if ((enabled & VM_TMR_FIQ_ENABLE_V) && TIMER_FIRING(read_sysreg_s(SYS_CNTV_CTL_EL02))) generic_handle_domain_irq(aic_irqc->hw_domain, - aic_irqc->nr_hw + AIC_TMR_EL02_VIRT); + AIC_FIQ_HWIRQ(AIC_TMR_EL02_VIRT)); } - if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) == - (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) { - /* - * Not supported yet, let's figure out how to handle this when - * we implement these proprietary performance counters. For now, - * just mask it and move on. - */ - pr_err_ratelimited("PMC FIQ fired. Masking.\n"); - sysreg_clear_set_s(SYS_IMP_APL_PMCR0_EL1, PMCR0_IMODE | PMCR0_IACT, - FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF)); + if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & PMCR0_IACT) { + int irq; + if (cpumask_test_cpu(smp_processor_id(), + &aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff)) + irq = AIC_CPU_PMU_P; + else + irq = AIC_CPU_PMU_E; + generic_handle_domain_irq(aic_irqc->hw_domain, + AIC_FIQ_HWIRQ(irq)); } if (FIELD_GET(UPMCR0_IMODE, read_sysreg_s(SYS_IMP_APL_UPMCR0_EL1)) == UPMCR0_IMODE_FIQ && @@ -455,13 +597,29 @@ static int aic_irq_domain_map(struct irq_domain *id, unsigned int irq, irq_hw_number_t hw) { struct aic_irq_chip *ic = id->host_data; + u32 type = FIELD_GET(AIC_EVENT_TYPE, hw); + struct irq_chip *chip = &aic_chip; - if (hw < ic->nr_hw) { - irq_domain_set_info(id, irq, hw, &aic_chip, id->host_data, + if (ic->info.version == 2) + chip = &aic2_chip; + + if (type == AIC_EVENT_TYPE_IRQ) { + irq_domain_set_info(id, irq, hw, chip, id->host_data, handle_fasteoi_irq, NULL, NULL); irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq))); } else { - irq_set_percpu_devid(irq); + int fiq = FIELD_GET(AIC_EVENT_NUM, hw); + + switch (fiq) { + case AIC_CPU_PMU_P: + case AIC_CPU_PMU_E: + irq_set_percpu_devid_partition(irq, &ic->fiq_aff[fiq]->aff); + break; + default: + irq_set_percpu_devid(irq); + break; + } + irq_domain_set_info(id, irq, hw, &fiq_chip, id->host_data, handle_percpu_devid_irq, NULL, NULL); } @@ -475,32 +633,46 @@ static int aic_irq_domain_translate(struct irq_domain *id, unsigned int *type) { struct aic_irq_chip *ic = id->host_data; + u32 *args; + u32 die = 0; - if (fwspec->param_count != 3 || !is_of_node(fwspec->fwnode)) + if (fwspec->param_count < 3 || fwspec->param_count > 4 || + !is_of_node(fwspec->fwnode)) return -EINVAL; + args = &fwspec->param[1]; + + if (fwspec->param_count == 4) { + die = args[0]; + args++; + } + switch (fwspec->param[0]) { case AIC_IRQ: - if (fwspec->param[1] >= ic->nr_hw) + if (die >= ic->nr_die) return -EINVAL; - *hwirq = fwspec->param[1]; + if (args[0] >= ic->nr_irq) + return -EINVAL; + *hwirq = AIC_IRQ_HWIRQ(die, args[0]); break; case AIC_FIQ: - if (fwspec->param[1] >= AIC_NR_FIQ) + if (die != 0) + return -EINVAL; + if (args[0] >= AIC_NR_FIQ) return -EINVAL; - *hwirq = ic->nr_hw + fwspec->param[1]; + *hwirq = AIC_FIQ_HWIRQ(args[0]); /* * In EL1 the non-redirected registers are the guest's, * not EL2's, so remap the hwirqs to match. */ if (!is_kernel_in_hyp_mode()) { - switch (fwspec->param[1]) { + switch (args[0]) { case AIC_TMR_GUEST_PHYS: - *hwirq = ic->nr_hw + AIC_TMR_EL0_PHYS; + *hwirq = AIC_FIQ_HWIRQ(AIC_TMR_EL0_PHYS); break; case AIC_TMR_GUEST_VIRT: - *hwirq = ic->nr_hw + AIC_TMR_EL0_VIRT; + *hwirq = AIC_FIQ_HWIRQ(AIC_TMR_EL0_VIRT); break; case AIC_TMR_HV_PHYS: case AIC_TMR_HV_VIRT: @@ -514,7 +686,7 @@ static int aic_irq_domain_translate(struct irq_domain *id, return -EINVAL; } - *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK; + *type = args[1] & IRQ_TYPE_SENSE_MASK; return 0; } @@ -563,6 +735,22 @@ static const struct irq_domain_ops aic_irq_domain_ops = { * IPI irqchip */ +static void aic_ipi_send_fast(int cpu) +{ + u64 mpidr = cpu_logical_map(cpu); + u64 my_mpidr = read_cpuid_mpidr(); + u64 cluster = MPIDR_CLUSTER(mpidr); + u64 idx = MPIDR_CPU(mpidr); + + if (MPIDR_CLUSTER(my_mpidr) == cluster) + write_sysreg_s(FIELD_PREP(IPI_RR_CPU, idx), + SYS_IMP_APL_IPI_RR_LOCAL_EL1); + else + write_sysreg_s(FIELD_PREP(IPI_RR_CPU, idx) | FIELD_PREP(IPI_RR_CLUSTER, cluster), + SYS_IMP_APL_IPI_RR_GLOBAL_EL1); + isb(); +} + static void aic_ipi_mask(struct irq_data *d) { u32 irq_bit = BIT(irqd_to_hwirq(d)); @@ -588,8 +776,12 @@ static void aic_ipi_unmask(struct irq_data *d) * If a pending vIPI was unmasked, raise a HW IPI to ourselves. * No barriers needed here since this is a self-IPI. */ - if (atomic_read(this_cpu_ptr(&aic_vipi_flag)) & irq_bit) - aic_ic_write(ic, AIC_IPI_SEND, AIC_IPI_SEND_CPU(smp_processor_id())); + if (atomic_read(this_cpu_ptr(&aic_vipi_flag)) & irq_bit) { + if (static_branch_likely(&use_fast_ipi)) + aic_ipi_send_fast(smp_processor_id()); + else + aic_ic_write(ic, AIC_IPI_SEND, AIC_IPI_SEND_CPU(smp_processor_id())); + } } static void aic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask) @@ -617,8 +809,12 @@ static void aic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask) smp_mb__after_atomic(); if (!(pending & irq_bit) && - (atomic_read(per_cpu_ptr(&aic_vipi_enable, cpu)) & irq_bit)) - send |= AIC_IPI_SEND_CPU(cpu); + (atomic_read(per_cpu_ptr(&aic_vipi_enable, cpu)) & irq_bit)) { + if (static_branch_likely(&use_fast_ipi)) + aic_ipi_send_fast(cpu); + else + send |= AIC_IPI_SEND_CPU(cpu); + } } /* @@ -650,8 +846,16 @@ static void aic_handle_ipi(struct pt_regs *regs) /* * Ack the IPI. We need to order this after the AIC event read, but * that is enforced by normal MMIO ordering guarantees. + * + * For the Fast IPI case, this needs to be ordered before the vIPI + * handling below, so we need to isb(); */ - aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER); + if (static_branch_likely(&use_fast_ipi)) { + write_sysreg_s(IPI_SR_PENDING, SYS_IMP_APL_IPI_SR_EL1); + isb(); + } else { + aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_OTHER); + } /* * The mask read does not need to be ordered. Only we can change @@ -679,7 +883,8 @@ static void aic_handle_ipi(struct pt_regs *regs) * No ordering needed here; at worst this just changes the timing of * when the next IPI will be delivered. */ - aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER); + if (!static_branch_likely(&use_fast_ipi)) + aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER); } static int aic_ipi_alloc(struct irq_domain *d, unsigned int virq, @@ -766,20 +971,27 @@ static int aic_init_cpu(unsigned int cpu) /* Commit all of the above */ isb(); - /* - * Make sure the kernel's idea of logical CPU order is the same as AIC's - * If we ever end up with a mismatch here, we will have to introduce - * a mapping table similar to what other irqchip drivers do. - */ - WARN_ON(aic_ic_read(aic_irqc, AIC_WHOAMI) != smp_processor_id()); + if (aic_irqc->info.version == 1) { + /* + * Make sure the kernel's idea of logical CPU order is the same as AIC's + * If we ever end up with a mismatch here, we will have to introduce + * a mapping table similar to what other irqchip drivers do. + */ + WARN_ON(aic_ic_read(aic_irqc, AIC_WHOAMI) != smp_processor_id()); - /* - * Always keep IPIs unmasked at the hardware level (except auto-masking - * by AIC during processing). We manage masks at the vIPI level. - */ - aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_SELF | AIC_IPI_OTHER); - aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF); - aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER); + /* + * Always keep IPIs unmasked at the hardware level (except auto-masking + * by AIC during processing). We manage masks at the vIPI level. + * These registers only exist on AICv1, AICv2 always uses fast IPIs. + */ + aic_ic_write(aic_irqc, AIC_IPI_ACK, AIC_IPI_SELF | AIC_IPI_OTHER); + if (static_branch_likely(&use_fast_ipi)) { + aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF | AIC_IPI_OTHER); + } else { + aic_ic_write(aic_irqc, AIC_IPI_MASK_SET, AIC_IPI_SELF); + aic_ic_write(aic_irqc, AIC_IPI_MASK_CLR, AIC_IPI_OTHER); + } + } /* Initialize the local mask state */ __this_cpu_write(aic_fiq_unmasked, 0); @@ -793,68 +1005,193 @@ static struct gic_kvm_info vgic_info __initdata = { .no_hw_deactivation = true, }; +static void build_fiq_affinity(struct aic_irq_chip *ic, struct device_node *aff) +{ + int i, n; + u32 fiq; + + if (of_property_read_u32(aff, "apple,fiq-index", &fiq) || + WARN_ON(fiq >= AIC_NR_FIQ) || ic->fiq_aff[fiq]) + return; + + n = of_property_count_elems_of_size(aff, "cpus", sizeof(u32)); + if (WARN_ON(n < 0)) + return; + + ic->fiq_aff[fiq] = kzalloc(sizeof(*ic->fiq_aff[fiq]), GFP_KERNEL); + if (!ic->fiq_aff[fiq]) + return; + + for (i = 0; i < n; i++) { + struct device_node *cpu_node; + u32 cpu_phandle; + int cpu; + + if (of_property_read_u32_index(aff, "cpus", i, &cpu_phandle)) + continue; + + cpu_node = of_find_node_by_phandle(cpu_phandle); + if (WARN_ON(!cpu_node)) + continue; + + cpu = of_cpu_node_to_id(cpu_node); + if (WARN_ON(cpu < 0)) + continue; + + cpumask_set_cpu(cpu, &ic->fiq_aff[fiq]->aff); + } +} + static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent) { - int i; + int i, die; + u32 off, start_off; void __iomem *regs; - u32 info; struct aic_irq_chip *irqc; + struct device_node *affs; + const struct of_device_id *match; regs = of_iomap(node, 0); if (WARN_ON(!regs)) return -EIO; irqc = kzalloc(sizeof(*irqc), GFP_KERNEL); - if (!irqc) + if (!irqc) { + iounmap(regs); return -ENOMEM; + } - aic_irqc = irqc; irqc->base = regs; - info = aic_ic_read(irqc, AIC_INFO); - irqc->nr_hw = FIELD_GET(AIC_INFO_NR_HW, info); + match = of_match_node(aic_info_match, node); + if (!match) + goto err_unmap; - irqc->hw_domain = irq_domain_create_linear(of_node_to_fwnode(node), - irqc->nr_hw + AIC_NR_FIQ, - &aic_irq_domain_ops, irqc); - if (WARN_ON(!irqc->hw_domain)) { - iounmap(irqc->base); - kfree(irqc); - return -ENODEV; + irqc->info = *(struct aic_info *)match->data; + + aic_irqc = irqc; + + switch (irqc->info.version) { + case 1: { + u32 info; + + info = aic_ic_read(irqc, AIC_INFO); + irqc->nr_irq = FIELD_GET(AIC_INFO_NR_IRQ, info); + irqc->max_irq = AIC_MAX_IRQ; + irqc->nr_die = irqc->max_die = 1; + + off = start_off = irqc->info.target_cpu; + off += sizeof(u32) * irqc->max_irq; /* TARGET_CPU */ + + irqc->event = irqc->base; + + break; } + case 2: { + u32 info1, info3; + + info1 = aic_ic_read(irqc, AIC2_INFO1); + info3 = aic_ic_read(irqc, AIC2_INFO3); + + irqc->nr_irq = FIELD_GET(AIC2_INFO1_NR_IRQ, info1); + irqc->max_irq = FIELD_GET(AIC2_INFO3_MAX_IRQ, info3); + irqc->nr_die = FIELD_GET(AIC2_INFO1_LAST_DIE, info1) + 1; + irqc->max_die = FIELD_GET(AIC2_INFO3_MAX_DIE, info3); + + off = start_off = irqc->info.irq_cfg; + off += sizeof(u32) * irqc->max_irq; /* IRQ_CFG */ + + irqc->event = of_iomap(node, 1); + if (WARN_ON(!irqc->event)) + goto err_unmap; + + break; + } + } + + irqc->info.sw_set = off; + off += sizeof(u32) * (irqc->max_irq >> 5); /* SW_SET */ + irqc->info.sw_clr = off; + off += sizeof(u32) * (irqc->max_irq >> 5); /* SW_CLR */ + irqc->info.mask_set = off; + off += sizeof(u32) * (irqc->max_irq >> 5); /* MASK_SET */ + irqc->info.mask_clr = off; + off += sizeof(u32) * (irqc->max_irq >> 5); /* MASK_CLR */ + off += sizeof(u32) * (irqc->max_irq >> 5); /* HW_STATE */ + + if (irqc->info.fast_ipi) + static_branch_enable(&use_fast_ipi); + else + static_branch_disable(&use_fast_ipi); + + irqc->info.die_stride = off - start_off; + + irqc->hw_domain = irq_domain_create_tree(of_node_to_fwnode(node), + &aic_irq_domain_ops, irqc); + if (WARN_ON(!irqc->hw_domain)) + goto err_unmap; irq_domain_update_bus_token(irqc->hw_domain, DOMAIN_BUS_WIRED); - if (aic_init_smp(irqc, node)) { - irq_domain_remove(irqc->hw_domain); - iounmap(irqc->base); - kfree(irqc); - return -ENODEV; + if (aic_init_smp(irqc, node)) + goto err_remove_domain; + + affs = of_get_child_by_name(node, "affinities"); + if (affs) { + struct device_node *chld; + + for_each_child_of_node(affs, chld) + build_fiq_affinity(irqc, chld); } set_handle_irq(aic_handle_irq); set_handle_fiq(aic_handle_fiq); - for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++) - aic_ic_write(irqc, AIC_MASK_SET + i * 4, U32_MAX); - for (i = 0; i < BITS_TO_U32(irqc->nr_hw); i++) - aic_ic_write(irqc, AIC_SW_CLR + i * 4, U32_MAX); - for (i = 0; i < irqc->nr_hw; i++) - aic_ic_write(irqc, AIC_TARGET_CPU + i * 4, 1); + off = 0; + for (die = 0; die < irqc->nr_die; die++) { + for (i = 0; i < BITS_TO_U32(irqc->nr_irq); i++) + aic_ic_write(irqc, irqc->info.mask_set + off + i * 4, U32_MAX); + for (i = 0; i < BITS_TO_U32(irqc->nr_irq); i++) + aic_ic_write(irqc, irqc->info.sw_clr + off + i * 4, U32_MAX); + if (irqc->info.target_cpu) + for (i = 0; i < irqc->nr_irq; i++) + aic_ic_write(irqc, irqc->info.target_cpu + off + i * 4, 1); + off += irqc->info.die_stride; + } + + if (irqc->info.version == 2) { + u32 config = aic_ic_read(irqc, AIC2_CONFIG); + + config |= AIC2_CONFIG_ENABLE; + aic_ic_write(irqc, AIC2_CONFIG, config); + } if (!is_kernel_in_hyp_mode()) pr_info("Kernel running in EL1, mapping interrupts"); + if (static_branch_likely(&use_fast_ipi)) + pr_info("Using Fast IPIs"); + cpuhp_setup_state(CPUHP_AP_IRQ_APPLE_AIC_STARTING, "irqchip/apple-aic/ipi:starting", aic_init_cpu, NULL); vgic_set_kvm_info(&vgic_info); - pr_info("Initialized with %d IRQs, %d FIQs, %d vIPIs\n", - irqc->nr_hw, AIC_NR_FIQ, AIC_NR_SWIPI); + pr_info("Initialized with %d/%d IRQs * %d/%d die(s), %d FIQs, %d vIPIs", + irqc->nr_irq, irqc->max_irq, irqc->nr_die, irqc->max_die, AIC_NR_FIQ, AIC_NR_SWIPI); return 0; + +err_remove_domain: + irq_domain_remove(irqc->hw_domain); +err_unmap: + if (irqc->event && irqc->event != irqc->base) + iounmap(irqc->event); + iounmap(irqc->base); + kfree(irqc); + return -ENODEV; } -IRQCHIP_DECLARE(apple_m1_aic, "apple,aic", aic_of_ic_init); +IRQCHIP_DECLARE(apple_aic, "apple,aic", aic_of_ic_init); +IRQCHIP_DECLARE(apple_aic2, "apple,aic2", aic_of_ic_init); diff --git a/drivers/irqchip/irq-ftintc010.c b/drivers/irqchip/irq-ftintc010.c index 5cc268880f8e..46a3aa60e50e 100644 --- a/drivers/irqchip/irq-ftintc010.c +++ b/drivers/irqchip/irq-ftintc010.c @@ -11,7 +11,6 @@ #include <linux/irq.h> #include <linux/io.h> #include <linux/irqchip.h> -#include <linux/irqchip/versatile-fpga.h> #include <linux/irqdomain.h> #include <linux/module.h> #include <linux/of.h> diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 5e935d97207d..0efe1a9a9f3b 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1211,7 +1211,7 @@ static void gic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask) * Ensure that stores to Normal memory are visible to the * other CPUs before issuing the IPI. */ - wmb(); + dsb(ishst); for_each_cpu(cpu, mask) { u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu)); diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index b8bb46c65a97..58ba835bee1f 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -34,6 +34,7 @@ #include <linux/irqdomain.h> #include <linux/interrupt.h> #include <linux/percpu.h> +#include <linux/seq_file.h> #include <linux/slab.h> #include <linux/irqchip.h> #include <linux/irqchip/chained_irq.h> @@ -66,7 +67,6 @@ union gic_base { }; struct gic_chip_data { - struct irq_chip chip; union gic_base dist_base; union gic_base cpu_base; void __iomem *raw_dist_base; @@ -397,18 +397,15 @@ static void gic_handle_cascade_irq(struct irq_desc *desc) chained_irq_exit(chip, desc); } -static const struct irq_chip gic_chip = { - .irq_mask = gic_mask_irq, - .irq_unmask = gic_unmask_irq, - .irq_eoi = gic_eoi_irq, - .irq_set_type = gic_set_type, - .irq_retrigger = gic_retrigger, - .irq_get_irqchip_state = gic_irq_get_irqchip_state, - .irq_set_irqchip_state = gic_irq_set_irqchip_state, - .flags = IRQCHIP_SET_TYPE_MASKED | - IRQCHIP_SKIP_SET_WAKE | - IRQCHIP_MASK_ON_SUSPEND, -}; +static void gic_irq_print_chip(struct irq_data *d, struct seq_file *p) +{ + struct gic_chip_data *gic = irq_data_get_irq_chip_data(d); + + if (gic->domain->dev) + seq_printf(p, gic->domain->dev->of_node->name); + else + seq_printf(p, "GIC-%d", (int)(gic - &gic_data[0])); +} void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq) { @@ -799,8 +796,12 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) { void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + gic_irq(d); + struct gic_chip_data *gic = irq_data_get_irq_chip_data(d); unsigned int cpu; + if (unlikely(gic != &gic_data[0])) + return -EINVAL; + if (!force) cpu = cpumask_any_and(mask_val, cpu_online_mask); else @@ -880,6 +881,39 @@ static __init void gic_smp_init(void) #define gic_ipi_send_mask NULL #endif +static const struct irq_chip gic_chip = { + .irq_mask = gic_mask_irq, + .irq_unmask = gic_unmask_irq, + .irq_eoi = gic_eoi_irq, + .irq_set_type = gic_set_type, + .irq_retrigger = gic_retrigger, + .irq_set_affinity = gic_set_affinity, + .ipi_send_mask = gic_ipi_send_mask, + .irq_get_irqchip_state = gic_irq_get_irqchip_state, + .irq_set_irqchip_state = gic_irq_set_irqchip_state, + .irq_print_chip = gic_irq_print_chip, + .flags = IRQCHIP_SET_TYPE_MASKED | + IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_MASK_ON_SUSPEND, +}; + +static const struct irq_chip gic_chip_mode1 = { + .name = "GICv2", + .irq_mask = gic_eoimode1_mask_irq, + .irq_unmask = gic_unmask_irq, + .irq_eoi = gic_eoimode1_eoi_irq, + .irq_set_type = gic_set_type, + .irq_retrigger = gic_retrigger, + .irq_set_affinity = gic_set_affinity, + .ipi_send_mask = gic_ipi_send_mask, + .irq_get_irqchip_state = gic_irq_get_irqchip_state, + .irq_set_irqchip_state = gic_irq_set_irqchip_state, + .irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity, + .flags = IRQCHIP_SET_TYPE_MASKED | + IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_MASK_ON_SUSPEND, +}; + #ifdef CONFIG_BL_SWITCHER /* * gic_send_sgi - send a SGI directly to given CPU interface number @@ -1024,15 +1058,19 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, { struct gic_chip_data *gic = d->host_data; struct irq_data *irqd = irq_desc_get_irq_data(irq_to_desc(irq)); + const struct irq_chip *chip; + + chip = (static_branch_likely(&supports_deactivate_key) && + gic == &gic_data[0]) ? &gic_chip_mode1 : &gic_chip; switch (hw) { case 0 ... 31: irq_set_percpu_devid(irq); - irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data, + irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_percpu_devid_irq, NULL, NULL); break; default: - irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data, + irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_fasteoi_irq, NULL, NULL); irq_set_probe(irq); irqd_set_single_target(irqd); @@ -1127,26 +1165,6 @@ static const struct irq_domain_ops gic_irq_domain_ops = { .unmap = gic_irq_domain_unmap, }; -static void gic_init_chip(struct gic_chip_data *gic, struct device *dev, - const char *name, bool use_eoimode1) -{ - /* Initialize irq_chip */ - gic->chip = gic_chip; - gic->chip.name = name; - gic->chip.parent_device = dev; - - if (use_eoimode1) { - gic->chip.irq_mask = gic_eoimode1_mask_irq; - gic->chip.irq_eoi = gic_eoimode1_eoi_irq; - gic->chip.irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity; - } - - if (gic == &gic_data[0]) { - gic->chip.irq_set_affinity = gic_set_affinity; - gic->chip.ipi_send_mask = gic_ipi_send_mask; - } -} - static int gic_init_bases(struct gic_chip_data *gic, struct fwnode_handle *handle) { @@ -1246,7 +1264,6 @@ error: static int __init __gic_init_bases(struct gic_chip_data *gic, struct fwnode_handle *handle) { - char *name; int i, ret; if (WARN_ON(!gic || gic->domain)) @@ -1266,18 +1283,8 @@ static int __init __gic_init_bases(struct gic_chip_data *gic, pr_info("GIC: Using split EOI/Deactivate mode\n"); } - if (static_branch_likely(&supports_deactivate_key) && gic == &gic_data[0]) { - name = kasprintf(GFP_KERNEL, "GICv2"); - gic_init_chip(gic, NULL, name, true); - } else { - name = kasprintf(GFP_KERNEL, "GIC-%d", (int)(gic-&gic_data[0])); - gic_init_chip(gic, NULL, name, false); - } - ret = gic_init_bases(gic, handle); - if (ret) - kfree(name); - else if (gic == &gic_data[0]) + if (gic == &gic_data[0]) gic_smp_init(); return ret; @@ -1460,8 +1467,6 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq) if (!*gic) return -ENOMEM; - gic_init_chip(*gic, dev, dev->of_node->name, false); - ret = gic_of_setup(*gic, dev->of_node); if (ret) return ret; @@ -1472,6 +1477,7 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq) return ret; } + irq_domain_set_pm_device((*gic)->domain, dev); irq_set_chained_handler_and_data(irq, gic_handle_cascade_irq, *gic); return 0; diff --git a/drivers/irqchip/irq-imx-intmux.c b/drivers/irqchip/irq-imx-intmux.c index e86ff743e98c..80aaea82468a 100644 --- a/drivers/irqchip/irq-imx-intmux.c +++ b/drivers/irqchip/irq-imx-intmux.c @@ -61,7 +61,6 @@ #define CHAN_MAX_NUM 0x8 struct intmux_irqchip_data { - struct irq_chip chip; u32 saved_reg; int chanidx; int irq; @@ -114,7 +113,7 @@ static void imx_intmux_irq_unmask(struct irq_data *d) raw_spin_unlock_irqrestore(&data->lock, flags); } -static struct irq_chip imx_intmux_irq_chip = { +static struct irq_chip imx_intmux_irq_chip __ro_after_init = { .name = "intmux", .irq_mask = imx_intmux_irq_mask, .irq_unmask = imx_intmux_irq_unmask, @@ -126,7 +125,7 @@ static int imx_intmux_irq_map(struct irq_domain *h, unsigned int irq, struct intmux_irqchip_data *data = h->host_data; irq_set_chip_data(irq, data); - irq_set_chip_and_handler(irq, &data->chip, handle_level_irq); + irq_set_chip_and_handler(irq, &imx_intmux_irq_chip, handle_level_irq); return 0; } @@ -241,8 +240,6 @@ static int imx_intmux_probe(struct platform_device *pdev) } for (i = 0; i < channum; i++) { - data->irqchip_data[i].chip = imx_intmux_irq_chip; - data->irqchip_data[i].chip.parent_device = &pdev->dev; data->irqchip_data[i].chanidx = i; data->irqchip_data[i].irq = irq_of_parse_and_map(np, i); @@ -260,6 +257,7 @@ static int imx_intmux_probe(struct platform_device *pdev) goto out; } data->irqchip_data[i].domain = domain; + irq_domain_set_pm_device(domain, &pdev->dev); /* disable all interrupt sources of this channel firstly */ writel_relaxed(0, data->regs + CHANIER(i)); diff --git a/drivers/irqchip/irq-lpc32xx.c b/drivers/irqchip/irq-lpc32xx.c index a29357f39450..4d70a857133f 100644 --- a/drivers/irqchip/irq-lpc32xx.c +++ b/drivers/irqchip/irq-lpc32xx.c @@ -11,6 +11,7 @@ #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/of_platform.h> +#include <linux/seq_file.h> #include <linux/slab.h> #include <asm/exception.h> @@ -25,8 +26,8 @@ struct lpc32xx_irq_chip { void __iomem *base; + phys_addr_t addr; struct irq_domain *domain; - struct irq_chip chip; }; static struct lpc32xx_irq_chip *lpc32xx_mic_irqc; @@ -118,6 +119,24 @@ static int lpc32xx_irq_set_type(struct irq_data *d, unsigned int type) return 0; } +static void lpc32xx_irq_print_chip(struct irq_data *d, struct seq_file *p) +{ + struct lpc32xx_irq_chip *ic = irq_data_get_irq_chip_data(d); + + if (ic == lpc32xx_mic_irqc) + seq_printf(p, "%08x.mic", ic->addr); + else + seq_printf(p, "%08x.sic", ic->addr); +} + +static const struct irq_chip lpc32xx_chip = { + .irq_ack = lpc32xx_irq_ack, + .irq_mask = lpc32xx_irq_mask, + .irq_unmask = lpc32xx_irq_unmask, + .irq_set_type = lpc32xx_irq_set_type, + .irq_print_chip = lpc32xx_irq_print_chip, +}; + static void __exception_irq_entry lpc32xx_handle_irq(struct pt_regs *regs) { struct lpc32xx_irq_chip *ic = lpc32xx_mic_irqc; @@ -153,7 +172,7 @@ static int lpc32xx_irq_domain_map(struct irq_domain *id, unsigned int virq, struct lpc32xx_irq_chip *ic = id->host_data; irq_set_chip_data(virq, ic); - irq_set_chip_and_handler(virq, &ic->chip, handle_level_irq); + irq_set_chip_and_handler(virq, &lpc32xx_chip, handle_level_irq); irq_set_status_flags(virq, IRQ_LEVEL); irq_set_noprobe(virq); @@ -183,6 +202,7 @@ static int __init lpc32xx_of_ic_init(struct device_node *node, if (!irqc) return -ENOMEM; + irqc->addr = addr; irqc->base = of_iomap(node, 0); if (!irqc->base) { pr_err("%pOF: unable to map registers\n", node); @@ -190,21 +210,11 @@ static int __init lpc32xx_of_ic_init(struct device_node *node, return -EINVAL; } - irqc->chip.irq_ack = lpc32xx_irq_ack; - irqc->chip.irq_mask = lpc32xx_irq_mask; - irqc->chip.irq_unmask = lpc32xx_irq_unmask; - irqc->chip.irq_set_type = lpc32xx_irq_set_type; - if (is_mic) - irqc->chip.name = kasprintf(GFP_KERNEL, "%08x.mic", addr); - else - irqc->chip.name = kasprintf(GFP_KERNEL, "%08x.sic", addr); - irqc->domain = irq_domain_add_linear(node, NR_LPC32XX_IC_IRQS, &lpc32xx_irq_domain_ops, irqc); if (!irqc->domain) { pr_err("unable to add irq domain\n"); iounmap(irqc->base); - kfree(irqc->chip.name); kfree(irqc); return -ENODEV; } diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c index d90ff0b92480..2aaa9aad3e87 100644 --- a/drivers/irqchip/irq-meson-gpio.c +++ b/drivers/irqchip/irq-meson-gpio.c @@ -16,7 +16,7 @@ #include <linux/of.h> #include <linux/of_address.h> -#define NUM_CHANNEL 8 +#define MAX_NUM_CHANNEL 64 #define MAX_INPUT_MUX 256 #define REG_EDGE_POL 0x00 @@ -26,6 +26,8 @@ /* use for A1 like chips */ #define REG_PIN_A1_SEL 0x04 +/* Used for s4 chips */ +#define REG_EDGE_POL_S4 0x1c /* * Note: The S905X3 datasheet reports that BOTH_EDGE is controlled by @@ -51,15 +53,22 @@ static void meson_a1_gpio_irq_sel_pin(struct meson_gpio_irq_controller *ctl, unsigned int channel, unsigned long hwirq); static void meson_a1_gpio_irq_init(struct meson_gpio_irq_controller *ctl); +static int meson8_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl, + unsigned int type, u32 *channel_hwirq); +static int meson_s4_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl, + unsigned int type, u32 *channel_hwirq); struct irq_ctl_ops { void (*gpio_irq_sel_pin)(struct meson_gpio_irq_controller *ctl, unsigned int channel, unsigned long hwirq); void (*gpio_irq_init)(struct meson_gpio_irq_controller *ctl); + int (*gpio_irq_set_type)(struct meson_gpio_irq_controller *ctl, + unsigned int type, u32 *channel_hwirq); }; struct meson_gpio_irq_params { unsigned int nr_hwirq; + unsigned int nr_channels; bool support_edge_both; unsigned int edge_both_offset; unsigned int edge_single_offset; @@ -68,28 +77,44 @@ struct meson_gpio_irq_params { struct irq_ctl_ops ops; }; -#define INIT_MESON_COMMON(irqs, init, sel) \ +#define INIT_MESON_COMMON(irqs, init, sel, type) \ .nr_hwirq = irqs, \ .ops = { \ .gpio_irq_init = init, \ .gpio_irq_sel_pin = sel, \ + .gpio_irq_set_type = type, \ }, #define INIT_MESON8_COMMON_DATA(irqs) \ INIT_MESON_COMMON(irqs, meson_gpio_irq_init_dummy, \ - meson8_gpio_irq_sel_pin) \ + meson8_gpio_irq_sel_pin, \ + meson8_gpio_irq_set_type) \ .edge_single_offset = 0, \ .pol_low_offset = 16, \ .pin_sel_mask = 0xff, \ + .nr_channels = 8, \ #define INIT_MESON_A1_COMMON_DATA(irqs) \ INIT_MESON_COMMON(irqs, meson_a1_gpio_irq_init, \ - meson_a1_gpio_irq_sel_pin) \ + meson_a1_gpio_irq_sel_pin, \ + meson8_gpio_irq_set_type) \ .support_edge_both = true, \ .edge_both_offset = 16, \ .edge_single_offset = 8, \ .pol_low_offset = 0, \ .pin_sel_mask = 0x7f, \ + .nr_channels = 8, \ + +#define INIT_MESON_S4_COMMON_DATA(irqs) \ + INIT_MESON_COMMON(irqs, meson_a1_gpio_irq_init, \ + meson_a1_gpio_irq_sel_pin, \ + meson_s4_gpio_irq_set_type) \ + .support_edge_both = true, \ + .edge_both_offset = 0, \ + .edge_single_offset = 12, \ + .pol_low_offset = 0, \ + .pin_sel_mask = 0xff, \ + .nr_channels = 12, \ static const struct meson_gpio_irq_params meson8_params = { INIT_MESON8_COMMON_DATA(134) @@ -121,6 +146,10 @@ static const struct meson_gpio_irq_params a1_params = { INIT_MESON_A1_COMMON_DATA(62) }; +static const struct meson_gpio_irq_params s4_params = { + INIT_MESON_S4_COMMON_DATA(82) +}; + static const struct of_device_id meson_irq_gpio_matches[] = { { .compatible = "amlogic,meson8-gpio-intc", .data = &meson8_params }, { .compatible = "amlogic,meson8b-gpio-intc", .data = &meson8b_params }, @@ -130,14 +159,15 @@ static const struct of_device_id meson_irq_gpio_matches[] = { { .compatible = "amlogic,meson-g12a-gpio-intc", .data = &axg_params }, { .compatible = "amlogic,meson-sm1-gpio-intc", .data = &sm1_params }, { .compatible = "amlogic,meson-a1-gpio-intc", .data = &a1_params }, + { .compatible = "amlogic,meson-s4-gpio-intc", .data = &s4_params }, { } }; struct meson_gpio_irq_controller { const struct meson_gpio_irq_params *params; void __iomem *base; - u32 channel_irqs[NUM_CHANNEL]; - DECLARE_BITMAP(channel_map, NUM_CHANNEL); + u32 channel_irqs[MAX_NUM_CHANNEL]; + DECLARE_BITMAP(channel_map, MAX_NUM_CHANNEL); spinlock_t lock; }; @@ -207,8 +237,8 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl, spin_lock_irqsave(&ctl->lock, flags); /* Find a free channel */ - idx = find_first_zero_bit(ctl->channel_map, NUM_CHANNEL); - if (idx >= NUM_CHANNEL) { + idx = find_first_zero_bit(ctl->channel_map, ctl->params->nr_channels); + if (idx >= ctl->params->nr_channels) { spin_unlock_irqrestore(&ctl->lock, flags); pr_err("No channel available\n"); return -ENOSPC; @@ -256,9 +286,8 @@ meson_gpio_irq_release_channel(struct meson_gpio_irq_controller *ctl, clear_bit(idx, ctl->channel_map); } -static int meson_gpio_irq_type_setup(struct meson_gpio_irq_controller *ctl, - unsigned int type, - u32 *channel_hwirq) +static int meson8_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl, + unsigned int type, u32 *channel_hwirq) { u32 val = 0; unsigned int idx; @@ -299,6 +328,51 @@ static int meson_gpio_irq_type_setup(struct meson_gpio_irq_controller *ctl, return 0; } +/* + * gpio irq relative registers for s4 + * -PADCTRL_GPIO_IRQ_CTRL0 + * bit[31]: enable/disable all the irq lines + * bit[12-23]: single edge trigger + * bit[0-11]: polarity trigger + * + * -PADCTRL_GPIO_IRQ_CTRL[X] + * bit[0-16]: 7 bits to choose gpio source for irq line 2*[X] - 2 + * bit[16-22]:7 bits to choose gpio source for irq line 2*[X] - 1 + * where X = 1-6 + * + * -PADCTRL_GPIO_IRQ_CTRL[7] + * bit[0-11]: both edge trigger + */ +static int meson_s4_gpio_irq_set_type(struct meson_gpio_irq_controller *ctl, + unsigned int type, u32 *channel_hwirq) +{ + u32 val = 0; + unsigned int idx; + + idx = meson_gpio_irq_get_channel_idx(ctl, channel_hwirq); + + type &= IRQ_TYPE_SENSE_MASK; + + meson_gpio_irq_update_bits(ctl, REG_EDGE_POL_S4, BIT(idx), 0); + + if (type == IRQ_TYPE_EDGE_BOTH) { + val |= BIT(ctl->params->edge_both_offset + idx); + meson_gpio_irq_update_bits(ctl, REG_EDGE_POL_S4, + BIT(ctl->params->edge_both_offset + idx), val); + return 0; + } + + if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_EDGE_FALLING)) + val |= BIT(ctl->params->pol_low_offset + idx); + + if (type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING)) + val |= BIT(ctl->params->edge_single_offset + idx); + + meson_gpio_irq_update_bits(ctl, REG_EDGE_POL, + BIT(idx) | BIT(12 + idx), val); + return 0; +}; + static unsigned int meson_gpio_irq_type_output(unsigned int type) { unsigned int sense = type & IRQ_TYPE_SENSE_MASK; @@ -323,7 +397,7 @@ static int meson_gpio_irq_set_type(struct irq_data *data, unsigned int type) u32 *channel_hwirq = irq_data_get_irq_chip_data(data); int ret; - ret = meson_gpio_irq_type_setup(ctl, type, channel_hwirq); + ret = ctl->params->ops.gpio_irq_set_type(ctl, type, channel_hwirq); if (ret) return ret; @@ -450,10 +524,10 @@ static int meson_gpio_irq_parse_dt(struct device_node *node, struct meson_gpio_i ret = of_property_read_variable_u32_array(node, "amlogic,channel-interrupts", ctl->channel_irqs, - NUM_CHANNEL, - NUM_CHANNEL); + ctl->params->nr_channels, + ctl->params->nr_channels); if (ret < 0) { - pr_err("can't get %d channel interrupts\n", NUM_CHANNEL); + pr_err("can't get %d channel interrupts\n", ctl->params->nr_channels); return ret; } @@ -507,7 +581,7 @@ static int meson_gpio_irq_of_init(struct device_node *node, struct device_node * } pr_info("%d to %d gpio interrupt mux initialized\n", - ctl->params->nr_hwirq, NUM_CHANNEL); + ctl->params->nr_hwirq, ctl->params->nr_channels); return 0; diff --git a/drivers/irqchip/irq-mvebu-pic.c b/drivers/irqchip/irq-mvebu-pic.c index 870f9866b8da..ef3d3646ccc2 100644 --- a/drivers/irqchip/irq-mvebu-pic.c +++ b/drivers/irqchip/irq-mvebu-pic.c @@ -18,6 +18,7 @@ #include <linux/module.h> #include <linux/of_irq.h> #include <linux/platform_device.h> +#include <linux/seq_file.h> #define PIC_CAUSE 0x0 #define PIC_MASK 0x4 @@ -29,7 +30,7 @@ struct mvebu_pic { void __iomem *base; u32 parent_irq; struct irq_domain *domain; - struct irq_chip irq_chip; + struct platform_device *pdev; }; static void mvebu_pic_reset(struct mvebu_pic *pic) @@ -66,6 +67,20 @@ static void mvebu_pic_unmask_irq(struct irq_data *d) writel(reg, pic->base + PIC_MASK); } +static void mvebu_pic_print_chip(struct irq_data *d, struct seq_file *p) +{ + struct mvebu_pic *pic = irq_data_get_irq_chip_data(d); + + seq_printf(p, dev_name(&pic->pdev->dev)); +} + +static const struct irq_chip mvebu_pic_chip = { + .irq_mask = mvebu_pic_mask_irq, + .irq_unmask = mvebu_pic_unmask_irq, + .irq_eoi = mvebu_pic_eoi_irq, + .irq_print_chip = mvebu_pic_print_chip, +}; + static int mvebu_pic_irq_map(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq) { @@ -73,8 +88,7 @@ static int mvebu_pic_irq_map(struct irq_domain *domain, unsigned int virq, irq_set_percpu_devid(virq); irq_set_chip_data(virq, pic); - irq_set_chip_and_handler(virq, &pic->irq_chip, - handle_percpu_devid_irq); + irq_set_chip_and_handler(virq, &mvebu_pic_chip, handle_percpu_devid_irq); irq_set_status_flags(virq, IRQ_LEVEL); irq_set_probe(virq); @@ -120,22 +134,16 @@ static int mvebu_pic_probe(struct platform_device *pdev) { struct device_node *node = pdev->dev.of_node; struct mvebu_pic *pic; - struct irq_chip *irq_chip; pic = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_pic), GFP_KERNEL); if (!pic) return -ENOMEM; + pic->pdev = pdev; pic->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(pic->base)) return PTR_ERR(pic->base); - irq_chip = &pic->irq_chip; - irq_chip->name = dev_name(&pdev->dev); - irq_chip->irq_mask = mvebu_pic_mask_irq; - irq_chip->irq_unmask = mvebu_pic_unmask_irq; - irq_chip->irq_eoi = mvebu_pic_eoi_irq; - pic->parent_irq = irq_of_parse_and_map(node, 0); if (pic->parent_irq <= 0) { dev_err(&pdev->dev, "Failed to parse parent interrupt\n"); diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c index ba4759b3e269..94230306e0ee 100644 --- a/drivers/irqchip/irq-nvic.c +++ b/drivers/irqchip/irq-nvic.c @@ -107,6 +107,7 @@ static int __init nvic_of_init(struct device_node *node, if (!nvic_irq_domain) { pr_warn("Failed to allocate irq domain\n"); + iounmap(nvic_base); return -ENOMEM; } @@ -116,6 +117,7 @@ static int __init nvic_of_init(struct device_node *node, if (ret) { pr_warn("Failed to allocate irq chips\n"); irq_domain_remove(nvic_irq_domain); + iounmap(nvic_base); return ret; } diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c new file mode 100644 index 000000000000..eea5a753618c --- /dev/null +++ b/drivers/irqchip/irq-qcom-mpm.c @@ -0,0 +1,461 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, Linaro Limited + * Copyright (c) 2010-2020, The Linux Foundation. All rights reserved. + */ + +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/irqchip.h> +#include <linux/irqdomain.h> +#include <linux/mailbox_client.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/pm_domain.h> +#include <linux/slab.h> +#include <linux/soc/qcom/irq.h> +#include <linux/spinlock.h> + +/* + * This is the driver for Qualcomm MPM (MSM Power Manager) interrupt controller, + * which is commonly found on Qualcomm SoCs built on the RPM architecture. + * Sitting in always-on domain, MPM monitors the wakeup interrupts when SoC is + * asleep, and wakes up the AP when one of those interrupts occurs. This driver + * doesn't directly access physical MPM registers though. Instead, the access + * is bridged via a piece of internal memory (SRAM) that is accessible to both + * AP and RPM. This piece of memory is called 'vMPM' in the driver. + * + * When SoC is awake, the vMPM is owned by AP and the register setup by this + * driver all happens on vMPM. When AP is about to get power collapsed, the + * driver sends a mailbox notification to RPM, which will take over the vMPM + * ownership and dump vMPM into physical MPM registers. On wakeup, AP is woken + * up by a MPM pin/interrupt, and RPM will copy STATUS registers into vMPM. + * Then AP start owning vMPM again. + * + * vMPM register map: + * + * 31 0 + * +--------------------------------+ + * | TIMER0 | 0x00 + * +--------------------------------+ + * | TIMER1 | 0x04 + * +--------------------------------+ + * | ENABLE0 | 0x08 + * +--------------------------------+ + * | ... | ... + * +--------------------------------+ + * | ENABLEn | + * +--------------------------------+ + * | FALLING_EDGE0 | + * +--------------------------------+ + * | ... | + * +--------------------------------+ + * | STATUSn | + * +--------------------------------+ + * + * n = DIV_ROUND_UP(pin_cnt, 32) + * + */ + +#define MPM_REG_ENABLE 0 +#define MPM_REG_FALLING_EDGE 1 +#define MPM_REG_RISING_EDGE 2 +#define MPM_REG_POLARITY 3 +#define MPM_REG_STATUS 4 + +/* MPM pin map to GIC hwirq */ +struct mpm_gic_map { + int pin; + irq_hw_number_t hwirq; +}; + +struct qcom_mpm_priv { + void __iomem *base; + raw_spinlock_t lock; + struct mbox_client mbox_client; + struct mbox_chan *mbox_chan; + struct mpm_gic_map *maps; + unsigned int map_cnt; + unsigned int reg_stride; + struct irq_domain *domain; + struct generic_pm_domain genpd; +}; + +static u32 qcom_mpm_read(struct qcom_mpm_priv *priv, unsigned int reg, + unsigned int index) +{ + unsigned int offset = (reg * priv->reg_stride + index + 2) * 4; + + return readl_relaxed(priv->base + offset); +} + +static void qcom_mpm_write(struct qcom_mpm_priv *priv, unsigned int reg, + unsigned int index, u32 val) +{ + unsigned int offset = (reg * priv->reg_stride + index + 2) * 4; + + writel_relaxed(val, priv->base + offset); + + /* Ensure the write is completed */ + wmb(); +} + +static void qcom_mpm_enable_irq(struct irq_data *d, bool en) +{ + struct qcom_mpm_priv *priv = d->chip_data; + int pin = d->hwirq; + unsigned int index = pin / 32; + unsigned int shift = pin % 32; + unsigned long flags, val; + + raw_spin_lock_irqsave(&priv->lock, flags); + + val = qcom_mpm_read(priv, MPM_REG_ENABLE, index); + __assign_bit(shift, &val, en); + qcom_mpm_write(priv, MPM_REG_ENABLE, index, val); + + raw_spin_unlock_irqrestore(&priv->lock, flags); +} + +static void qcom_mpm_mask(struct irq_data *d) +{ + qcom_mpm_enable_irq(d, false); + + if (d->parent_data) + irq_chip_mask_parent(d); +} + +static void qcom_mpm_unmask(struct irq_data *d) +{ + qcom_mpm_enable_irq(d, true); + + if (d->parent_data) + irq_chip_unmask_parent(d); +} + +static void mpm_set_type(struct qcom_mpm_priv *priv, bool set, unsigned int reg, + unsigned int index, unsigned int shift) +{ + unsigned long flags, val; + + raw_spin_lock_irqsave(&priv->lock, flags); + + val = qcom_mpm_read(priv, reg, index); + __assign_bit(shift, &val, set); + qcom_mpm_write(priv, reg, index, val); + + raw_spin_unlock_irqrestore(&priv->lock, flags); +} + +static int qcom_mpm_set_type(struct irq_data *d, unsigned int type) +{ + struct qcom_mpm_priv *priv = d->chip_data; + int pin = d->hwirq; + unsigned int index = pin / 32; + unsigned int shift = pin % 32; + + if (type & IRQ_TYPE_EDGE_RISING) + mpm_set_type(priv, true, MPM_REG_RISING_EDGE, index, shift); + else + mpm_set_type(priv, false, MPM_REG_RISING_EDGE, index, shift); + + if (type & IRQ_TYPE_EDGE_FALLING) + mpm_set_type(priv, true, MPM_REG_FALLING_EDGE, index, shift); + else + mpm_set_type(priv, false, MPM_REG_FALLING_EDGE, index, shift); + + if (type & IRQ_TYPE_LEVEL_HIGH) + mpm_set_type(priv, true, MPM_REG_POLARITY, index, shift); + else + mpm_set_type(priv, false, MPM_REG_POLARITY, index, shift); + + if (!d->parent_data) + return 0; + + if (type & IRQ_TYPE_EDGE_BOTH) + type = IRQ_TYPE_EDGE_RISING; + + if (type & IRQ_TYPE_LEVEL_MASK) + type = IRQ_TYPE_LEVEL_HIGH; + + return irq_chip_set_type_parent(d, type); +} + +static struct irq_chip qcom_mpm_chip = { + .name = "mpm", + .irq_eoi = irq_chip_eoi_parent, + .irq_mask = qcom_mpm_mask, + .irq_unmask = qcom_mpm_unmask, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_type = qcom_mpm_set_type, + .irq_set_affinity = irq_chip_set_affinity_parent, + .flags = IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_SKIP_SET_WAKE, +}; + +static struct mpm_gic_map *get_mpm_gic_map(struct qcom_mpm_priv *priv, int pin) +{ + struct mpm_gic_map *maps = priv->maps; + int i; + + for (i = 0; i < priv->map_cnt; i++) { + if (maps[i].pin == pin) + return &maps[i]; + } + + return NULL; +} + +static int qcom_mpm_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *data) +{ + struct qcom_mpm_priv *priv = domain->host_data; + struct irq_fwspec *fwspec = data; + struct irq_fwspec parent_fwspec; + struct mpm_gic_map *map; + irq_hw_number_t pin; + unsigned int type; + int ret; + + ret = irq_domain_translate_twocell(domain, fwspec, &pin, &type); + if (ret) + return ret; + + ret = irq_domain_set_hwirq_and_chip(domain, virq, pin, + &qcom_mpm_chip, priv); + if (ret) + return ret; + + map = get_mpm_gic_map(priv, pin); + if (map == NULL) + return irq_domain_disconnect_hierarchy(domain->parent, virq); + + if (type & IRQ_TYPE_EDGE_BOTH) + type = IRQ_TYPE_EDGE_RISING; + + if (type & IRQ_TYPE_LEVEL_MASK) + type = IRQ_TYPE_LEVEL_HIGH; + + parent_fwspec.fwnode = domain->parent->fwnode; + parent_fwspec.param_count = 3; + parent_fwspec.param[0] = 0; + parent_fwspec.param[1] = map->hwirq; + parent_fwspec.param[2] = type; + + return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, + &parent_fwspec); +} + +static const struct irq_domain_ops qcom_mpm_ops = { + .alloc = qcom_mpm_alloc, + .free = irq_domain_free_irqs_common, + .translate = irq_domain_translate_twocell, +}; + +/* Triggered by RPM when system resumes from deep sleep */ +static irqreturn_t qcom_mpm_handler(int irq, void *dev_id) +{ + struct qcom_mpm_priv *priv = dev_id; + unsigned long enable, pending; + irqreturn_t ret = IRQ_NONE; + unsigned long flags; + int i, j; + + for (i = 0; i < priv->reg_stride; i++) { + raw_spin_lock_irqsave(&priv->lock, flags); + enable = qcom_mpm_read(priv, MPM_REG_ENABLE, i); + pending = qcom_mpm_read(priv, MPM_REG_STATUS, i); + pending &= enable; + raw_spin_unlock_irqrestore(&priv->lock, flags); + + for_each_set_bit(j, &pending, 32) { + unsigned int pin = 32 * i + j; + struct irq_desc *desc = irq_resolve_mapping(priv->domain, pin); + struct irq_data *d = &desc->irq_data; + + if (!irqd_is_level_type(d)) + irq_set_irqchip_state(d->irq, + IRQCHIP_STATE_PENDING, true); + ret = IRQ_HANDLED; + } + } + + return ret; +} + +static int mpm_pd_power_off(struct generic_pm_domain *genpd) +{ + struct qcom_mpm_priv *priv = container_of(genpd, struct qcom_mpm_priv, + genpd); + int i, ret; + + for (i = 0; i < priv->reg_stride; i++) + qcom_mpm_write(priv, MPM_REG_STATUS, i, 0); + + /* Notify RPM to write vMPM into HW */ + ret = mbox_send_message(priv->mbox_chan, NULL); + if (ret < 0) + return ret; + + return 0; +} + +static bool gic_hwirq_is_mapped(struct mpm_gic_map *maps, int cnt, u32 hwirq) +{ + int i; + + for (i = 0; i < cnt; i++) + if (maps[i].hwirq == hwirq) + return true; + + return false; +} + +static int qcom_mpm_init(struct device_node *np, struct device_node *parent) +{ + struct platform_device *pdev = of_find_device_by_node(np); + struct device *dev = &pdev->dev; + struct irq_domain *parent_domain; + struct generic_pm_domain *genpd; + struct qcom_mpm_priv *priv; + unsigned int pin_cnt; + int i, irq; + int ret; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + ret = of_property_read_u32(np, "qcom,mpm-pin-count", &pin_cnt); + if (ret) { + dev_err(dev, "failed to read qcom,mpm-pin-count: %d\n", ret); + return ret; + } + + priv->reg_stride = DIV_ROUND_UP(pin_cnt, 32); + + ret = of_property_count_u32_elems(np, "qcom,mpm-pin-map"); + if (ret < 0) { + dev_err(dev, "failed to read qcom,mpm-pin-map: %d\n", ret); + return ret; + } + + if (ret % 2) { + dev_err(dev, "invalid qcom,mpm-pin-map\n"); + return -EINVAL; + } + + priv->map_cnt = ret / 2; + priv->maps = devm_kcalloc(dev, priv->map_cnt, sizeof(*priv->maps), + GFP_KERNEL); + if (!priv->maps) + return -ENOMEM; + + for (i = 0; i < priv->map_cnt; i++) { + u32 pin, hwirq; + + of_property_read_u32_index(np, "qcom,mpm-pin-map", i * 2, &pin); + of_property_read_u32_index(np, "qcom,mpm-pin-map", i * 2 + 1, &hwirq); + + if (gic_hwirq_is_mapped(priv->maps, i, hwirq)) { + dev_warn(dev, "failed to map pin %d as GIC hwirq %d is already mapped\n", + pin, hwirq); + continue; + } + + priv->maps[i].pin = pin; + priv->maps[i].hwirq = hwirq; + } + + raw_spin_lock_init(&priv->lock); + + priv->base = devm_platform_ioremap_resource(pdev, 0); + if (!priv->base) + return PTR_ERR(priv->base); + + for (i = 0; i < priv->reg_stride; i++) { + qcom_mpm_write(priv, MPM_REG_ENABLE, i, 0); + qcom_mpm_write(priv, MPM_REG_FALLING_EDGE, i, 0); + qcom_mpm_write(priv, MPM_REG_RISING_EDGE, i, 0); + qcom_mpm_write(priv, MPM_REG_POLARITY, i, 0); + qcom_mpm_write(priv, MPM_REG_STATUS, i, 0); + } + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + genpd = &priv->genpd; + genpd->flags = GENPD_FLAG_IRQ_SAFE; + genpd->power_off = mpm_pd_power_off; + + genpd->name = devm_kasprintf(dev, GFP_KERNEL, "%s", dev_name(dev)); + if (!genpd->name) + return -ENOMEM; + + ret = pm_genpd_init(genpd, NULL, false); + if (ret) { + dev_err(dev, "failed to init genpd: %d\n", ret); + return ret; + } + + ret = of_genpd_add_provider_simple(np, genpd); + if (ret) { + dev_err(dev, "failed to add genpd provider: %d\n", ret); + goto remove_genpd; + } + + priv->mbox_client.dev = dev; + priv->mbox_chan = mbox_request_channel(&priv->mbox_client, 0); + if (IS_ERR(priv->mbox_chan)) { + ret = PTR_ERR(priv->mbox_chan); + dev_err(dev, "failed to acquire IPC channel: %d\n", ret); + return ret; + } + + parent_domain = irq_find_host(parent); + if (!parent_domain) { + dev_err(dev, "failed to find MPM parent domain\n"); + ret = -ENXIO; + goto free_mbox; + } + + priv->domain = irq_domain_create_hierarchy(parent_domain, + IRQ_DOMAIN_FLAG_QCOM_MPM_WAKEUP, pin_cnt, + of_node_to_fwnode(np), &qcom_mpm_ops, priv); + if (!priv->domain) { + dev_err(dev, "failed to create MPM domain\n"); + ret = -ENOMEM; + goto free_mbox; + } + + irq_domain_update_bus_token(priv->domain, DOMAIN_BUS_WAKEUP); + + ret = devm_request_irq(dev, irq, qcom_mpm_handler, IRQF_NO_SUSPEND, + "qcom_mpm", priv); + if (ret) { + dev_err(dev, "failed to request irq: %d\n", ret); + goto remove_domain; + } + + return 0; + +remove_domain: + irq_domain_remove(priv->domain); +free_mbox: + mbox_free_channel(priv->mbox_chan); +remove_genpd: + pm_genpd_remove(genpd); + return ret; +} + +IRQCHIP_PLATFORM_DRIVER_BEGIN(qcom_mpm) +IRQCHIP_MATCH("qcom,mpm", qcom_mpm_init) +IRQCHIP_PLATFORM_DRIVER_END(qcom_mpm) +MODULE_DESCRIPTION("Qualcomm Technologies, Inc. MSM Power Manager"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c index 37f9a4499fdb..e83756aca14e 100644 --- a/drivers/irqchip/irq-renesas-intc-irqpin.c +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -508,7 +508,6 @@ static int intc_irqpin_probe(struct platform_device *pdev) irq_chip = &p->irq_chip; irq_chip->name = "intc-irqpin"; - irq_chip->parent_device = dev; irq_chip->irq_mask = disable_fn; irq_chip->irq_unmask = enable_fn; irq_chip->irq_set_type = intc_irqpin_irq_set_type; @@ -523,6 +522,8 @@ static int intc_irqpin_probe(struct platform_device *pdev) goto err0; } + irq_domain_set_pm_device(p->irq_domain, dev); + if (p->shared_irqs) { /* request one shared interrupt */ if (devm_request_irq(dev, p->irq[0].requested_irq, diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c index 909325f88239..1ee5e9941f67 100644 --- a/drivers/irqchip/irq-renesas-irqc.c +++ b/drivers/irqchip/irq-renesas-irqc.c @@ -188,13 +188,14 @@ static int irqc_probe(struct platform_device *pdev) p->gc->reg_base = p->cpu_int_base; p->gc->chip_types[0].regs.enable = IRQC_EN_SET; p->gc->chip_types[0].regs.disable = IRQC_EN_STS; - p->gc->chip_types[0].chip.parent_device = dev; p->gc->chip_types[0].chip.irq_mask = irq_gc_mask_disable_reg; p->gc->chip_types[0].chip.irq_unmask = irq_gc_unmask_enable_reg; p->gc->chip_types[0].chip.irq_set_type = irqc_irq_set_type; p->gc->chip_types[0].chip.irq_set_wake = irqc_irq_set_wake; p->gc->chip_types[0].chip.flags = IRQCHIP_MASK_ON_SUSPEND; + irq_domain_set_pm_device(p->irq_domain, dev); + /* request interrupts one by one */ for (k = 0; k < p->number_of_irqs; k++) { if (devm_request_irq(dev, p->irq[k].requested_irq, diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 09cc98266d30..bb87e4c3b88e 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -44,8 +44,8 @@ * Each hart context has a vector of interrupt enable bits associated with it. * There's one bit for each interrupt source. */ -#define ENABLE_BASE 0x2000 -#define ENABLE_PER_HART 0x80 +#define CONTEXT_ENABLE_BASE 0x2000 +#define CONTEXT_ENABLE_SIZE 0x80 /* * Each hart context has a set of control registers associated with it. Right @@ -53,7 +53,7 @@ * take an interrupt, and a register to claim interrupts. */ #define CONTEXT_BASE 0x200000 -#define CONTEXT_PER_HART 0x1000 +#define CONTEXT_SIZE 0x1000 #define CONTEXT_THRESHOLD 0x00 #define CONTEXT_CLAIM 0x04 @@ -81,17 +81,21 @@ static int plic_parent_irq __ro_after_init; static bool plic_cpuhp_setup_done __ro_after_init; static DEFINE_PER_CPU(struct plic_handler, plic_handlers); -static inline void plic_toggle(struct plic_handler *handler, - int hwirq, int enable) +static void __plic_toggle(void __iomem *enable_base, int hwirq, int enable) { - u32 __iomem *reg = handler->enable_base + (hwirq / 32) * sizeof(u32); + u32 __iomem *reg = enable_base + (hwirq / 32) * sizeof(u32); u32 hwirq_mask = 1 << (hwirq % 32); - raw_spin_lock(&handler->enable_lock); if (enable) writel(readl(reg) | hwirq_mask, reg); else writel(readl(reg) & ~hwirq_mask, reg); +} + +static void plic_toggle(struct plic_handler *handler, int hwirq, int enable) +{ + raw_spin_lock(&handler->enable_lock); + __plic_toggle(handler->enable_base, hwirq, enable); raw_spin_unlock(&handler->enable_lock); } @@ -324,8 +328,18 @@ static int __init plic_init(struct device_node *node, * Skip contexts other than external interrupts for our * privilege level. */ - if (parent.args[0] != RV_IRQ_EXT) + if (parent.args[0] != RV_IRQ_EXT) { + /* Disable S-mode enable bits if running in M-mode. */ + if (IS_ENABLED(CONFIG_RISCV_M_MODE)) { + void __iomem *enable_base = priv->regs + + CONTEXT_ENABLE_BASE + + i * CONTEXT_ENABLE_SIZE; + + for (hwirq = 1; hwirq <= nr_irqs; hwirq++) + __plic_toggle(enable_base, hwirq, 0); + } continue; + } hartid = riscv_of_parent_hartid(parent.np); if (hartid < 0) { @@ -361,11 +375,11 @@ static int __init plic_init(struct device_node *node, cpumask_set_cpu(cpu, &priv->lmask); handler->present = true; - handler->hart_base = - priv->regs + CONTEXT_BASE + i * CONTEXT_PER_HART; + handler->hart_base = priv->regs + CONTEXT_BASE + + i * CONTEXT_SIZE; raw_spin_lock_init(&handler->enable_lock); - handler->enable_base = - priv->regs + ENABLE_BASE + i * ENABLE_PER_HART; + handler->enable_base = priv->regs + CONTEXT_ENABLE_BASE + + i * CONTEXT_ENABLE_SIZE; handler->priv = priv; done: for (hwirq = 1; hwirq <= nr_irqs; hwirq++) diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c index b7cb2da71888..9d18f47040eb 100644 --- a/drivers/irqchip/irq-stm32-exti.c +++ b/drivers/irqchip/irq-stm32-exti.c @@ -214,6 +214,48 @@ static const struct stm32_desc_irq stm32mp1_desc_irq[] = { { .exti = 73, .irq_parent = 129, .chip = &stm32_exti_h_chip }, }; +static const struct stm32_desc_irq stm32mp13_desc_irq[] = { + { .exti = 0, .irq_parent = 6, .chip = &stm32_exti_h_chip }, + { .exti = 1, .irq_parent = 7, .chip = &stm32_exti_h_chip }, + { .exti = 2, .irq_parent = 8, .chip = &stm32_exti_h_chip }, + { .exti = 3, .irq_parent = 9, .chip = &stm32_exti_h_chip }, + { .exti = 4, .irq_parent = 10, .chip = &stm32_exti_h_chip }, + { .exti = 5, .irq_parent = 24, .chip = &stm32_exti_h_chip }, + { .exti = 6, .irq_parent = 65, .chip = &stm32_exti_h_chip }, + { .exti = 7, .irq_parent = 66, .chip = &stm32_exti_h_chip }, + { .exti = 8, .irq_parent = 67, .chip = &stm32_exti_h_chip }, + { .exti = 9, .irq_parent = 68, .chip = &stm32_exti_h_chip }, + { .exti = 10, .irq_parent = 41, .chip = &stm32_exti_h_chip }, + { .exti = 11, .irq_parent = 43, .chip = &stm32_exti_h_chip }, + { .exti = 12, .irq_parent = 77, .chip = &stm32_exti_h_chip }, + { .exti = 13, .irq_parent = 78, .chip = &stm32_exti_h_chip }, + { .exti = 14, .irq_parent = 106, .chip = &stm32_exti_h_chip }, + { .exti = 15, .irq_parent = 109, .chip = &stm32_exti_h_chip }, + { .exti = 16, .irq_parent = 1, .chip = &stm32_exti_h_chip }, + { .exti = 19, .irq_parent = 3, .chip = &stm32_exti_h_chip_direct }, + { .exti = 21, .irq_parent = 32, .chip = &stm32_exti_h_chip_direct }, + { .exti = 22, .irq_parent = 34, .chip = &stm32_exti_h_chip_direct }, + { .exti = 23, .irq_parent = 73, .chip = &stm32_exti_h_chip_direct }, + { .exti = 24, .irq_parent = 93, .chip = &stm32_exti_h_chip_direct }, + { .exti = 25, .irq_parent = 114, .chip = &stm32_exti_h_chip_direct }, + { .exti = 26, .irq_parent = 38, .chip = &stm32_exti_h_chip_direct }, + { .exti = 27, .irq_parent = 39, .chip = &stm32_exti_h_chip_direct }, + { .exti = 28, .irq_parent = 40, .chip = &stm32_exti_h_chip_direct }, + { .exti = 29, .irq_parent = 72, .chip = &stm32_exti_h_chip_direct }, + { .exti = 30, .irq_parent = 53, .chip = &stm32_exti_h_chip_direct }, + { .exti = 31, .irq_parent = 54, .chip = &stm32_exti_h_chip_direct }, + { .exti = 32, .irq_parent = 83, .chip = &stm32_exti_h_chip_direct }, + { .exti = 33, .irq_parent = 84, .chip = &stm32_exti_h_chip_direct }, + { .exti = 44, .irq_parent = 96, .chip = &stm32_exti_h_chip_direct }, + { .exti = 47, .irq_parent = 92, .chip = &stm32_exti_h_chip_direct }, + { .exti = 48, .irq_parent = 116, .chip = &stm32_exti_h_chip_direct }, + { .exti = 50, .irq_parent = 117, .chip = &stm32_exti_h_chip_direct }, + { .exti = 52, .irq_parent = 118, .chip = &stm32_exti_h_chip_direct }, + { .exti = 53, .irq_parent = 119, .chip = &stm32_exti_h_chip_direct }, + { .exti = 68, .irq_parent = 63, .chip = &stm32_exti_h_chip_direct }, + { .exti = 70, .irq_parent = 98, .chip = &stm32_exti_h_chip_direct }, +}; + static const struct stm32_exti_drv_data stm32mp1_drv_data = { .exti_banks = stm32mp1_exti_banks, .bank_nr = ARRAY_SIZE(stm32mp1_exti_banks), @@ -221,6 +263,13 @@ static const struct stm32_exti_drv_data stm32mp1_drv_data = { .irq_nr = ARRAY_SIZE(stm32mp1_desc_irq), }; +static const struct stm32_exti_drv_data stm32mp13_drv_data = { + .exti_banks = stm32mp1_exti_banks, + .bank_nr = ARRAY_SIZE(stm32mp1_exti_banks), + .desc_irqs = stm32mp13_desc_irq, + .irq_nr = ARRAY_SIZE(stm32mp13_desc_irq), +}; + static const struct stm32_desc_irq *stm32_exti_get_desc(const struct stm32_exti_drv_data *drv_data, irq_hw_number_t hwirq) @@ -922,6 +971,7 @@ static int stm32_exti_probe(struct platform_device *pdev) /* platform driver only for MP1 */ static const struct of_device_id stm32_exti_ids[] = { { .compatible = "st,stm32mp1-exti", .data = &stm32mp1_drv_data}, + { .compatible = "st,stm32mp13-exti", .data = &stm32mp13_drv_data}, {}, }; MODULE_DEVICE_TABLE(of, stm32_exti_ids); diff --git a/drivers/irqchip/irq-ts4800.c b/drivers/irqchip/irq-ts4800.c index f032db23b30f..b2d61d4f6fe6 100644 --- a/drivers/irqchip/irq-ts4800.c +++ b/drivers/irqchip/irq-ts4800.c @@ -19,14 +19,15 @@ #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/platform_device.h> +#include <linux/seq_file.h> #define IRQ_MASK 0x4 #define IRQ_STATUS 0x8 struct ts4800_irq_data { void __iomem *base; + struct platform_device *pdev; struct irq_domain *domain; - struct irq_chip irq_chip; }; static void ts4800_irq_mask(struct irq_data *d) @@ -47,12 +48,25 @@ static void ts4800_irq_unmask(struct irq_data *d) writew(reg & ~mask, data->base + IRQ_MASK); } +static void ts4800_irq_print_chip(struct irq_data *d, struct seq_file *p) +{ + struct ts4800_irq_data *data = irq_data_get_irq_chip_data(d); + + seq_printf(p, "%s", dev_name(&data->pdev->dev)); +} + +static const struct irq_chip ts4800_chip = { + .irq_mask = ts4800_irq_mask, + .irq_unmask = ts4800_irq_unmask, + .irq_print_chip = ts4800_irq_print_chip, +}; + static int ts4800_irqdomain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) { struct ts4800_irq_data *data = d->host_data; - irq_set_chip_and_handler(irq, &data->irq_chip, handle_simple_irq); + irq_set_chip_and_handler(irq, &ts4800_chip, handle_simple_irq); irq_set_chip_data(irq, data); irq_set_noprobe(irq); @@ -92,13 +106,13 @@ static int ts4800_ic_probe(struct platform_device *pdev) { struct device_node *node = pdev->dev.of_node; struct ts4800_irq_data *data; - struct irq_chip *irq_chip; int parent_irq; data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; + data->pdev = pdev; data->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(data->base)) return PTR_ERR(data->base); @@ -111,11 +125,6 @@ static int ts4800_ic_probe(struct platform_device *pdev) return -EINVAL; } - irq_chip = &data->irq_chip; - irq_chip->name = dev_name(&pdev->dev); - irq_chip->irq_mask = ts4800_irq_mask; - irq_chip->irq_unmask = ts4800_irq_unmask; - data->domain = irq_domain_add_linear(node, 8, &ts4800_ic_ops, data); if (!data->domain) { dev_err(&pdev->dev, "cannot add IRQ domain\n"); diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c index f2757b6aecc8..ba543ed9c154 100644 --- a/drivers/irqchip/irq-versatile-fpga.c +++ b/drivers/irqchip/irq-versatile-fpga.c @@ -7,12 +7,12 @@ #include <linux/io.h> #include <linux/irqchip.h> #include <linux/irqchip/chained_irq.h> -#include <linux/irqchip/versatile-fpga.h> #include <linux/irqdomain.h> #include <linux/module.h> #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> +#include <linux/seq_file.h> #include <asm/exception.h> #include <asm/mach/irq.h> @@ -34,14 +34,12 @@ /** * struct fpga_irq_data - irq data container for the FPGA IRQ controller * @base: memory offset in virtual memory - * @chip: chip container for this instance * @domain: IRQ domain for this instance * @valid: mask for valid IRQs on this controller * @used_irqs: number of active IRQs on this controller */ struct fpga_irq_data { void __iomem *base; - struct irq_chip chip; u32 valid; struct irq_domain *domain; u8 used_irqs; @@ -67,6 +65,20 @@ static void fpga_irq_unmask(struct irq_data *d) writel(mask, f->base + IRQ_ENABLE_SET); } +static void fpga_irq_print_chip(struct irq_data *d, struct seq_file *p) +{ + struct fpga_irq_data *f = irq_data_get_irq_chip_data(d); + + seq_printf(p, irq_domain_get_of_node(f->domain)->name); +} + +static const struct irq_chip fpga_chip = { + .irq_ack = fpga_irq_mask, + .irq_mask = fpga_irq_mask, + .irq_unmask = fpga_irq_unmask, + .irq_print_chip = fpga_irq_print_chip, +}; + static void fpga_irq_handle(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); @@ -116,7 +128,7 @@ static int handle_one_fpga(struct fpga_irq_data *f, struct pt_regs *regs) * Keep iterating over all registered FPGA IRQ controllers until there are * no pending interrupts. */ -asmlinkage void __exception_irq_entry fpga_handle_irq(struct pt_regs *regs) +static asmlinkage void __exception_irq_entry fpga_handle_irq(struct pt_regs *regs) { int i, handled; @@ -135,8 +147,7 @@ static int fpga_irqdomain_map(struct irq_domain *d, unsigned int irq, if (!(f->valid & BIT(hwirq))) return -EPERM; irq_set_chip_data(irq, f); - irq_set_chip_and_handler(irq, &f->chip, - handle_level_irq); + irq_set_chip_and_handler(irq, &fpga_chip, handle_level_irq); irq_set_probe(irq); return 0; } @@ -146,8 +157,8 @@ static const struct irq_domain_ops fpga_irqdomain_ops = { .xlate = irq_domain_xlate_onetwocell, }; -void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start, - int parent_irq, u32 valid, struct device_node *node) +static void __init fpga_irq_init(void __iomem *base, int parent_irq, + u32 valid, struct device_node *node) { struct fpga_irq_data *f; int i; @@ -158,10 +169,6 @@ void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start, } f = &fpga_irq_devices[fpga_irq_id]; f->base = base; - f->chip.name = name; - f->chip.irq_ack = fpga_irq_mask; - f->chip.irq_mask = fpga_irq_mask; - f->chip.irq_unmask = fpga_irq_unmask; f->valid = valid; if (parent_irq != -1) { @@ -169,20 +176,19 @@ void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start, f); } - /* This will also allocate irq descriptors */ - f->domain = irq_domain_add_simple(node, fls(valid), irq_start, + f->domain = irq_domain_add_linear(node, fls(valid), &fpga_irqdomain_ops, f); /* This will allocate all valid descriptors in the linear case */ for (i = 0; i < fls(valid); i++) if (valid & BIT(i)) { - if (!irq_start) - irq_create_mapping(f->domain, i); + /* Is this still required? */ + irq_create_mapping(f->domain, i); f->used_irqs++; } pr_info("FPGA IRQ chip %d \"%s\" @ %p, %u irqs", - fpga_irq_id, name, base, f->used_irqs); + fpga_irq_id, node->name, base, f->used_irqs); if (parent_irq != -1) pr_cont(", parent IRQ: %d\n", parent_irq); else @@ -192,8 +198,8 @@ void __init fpga_irq_init(void __iomem *base, const char *name, int irq_start, } #ifdef CONFIG_OF -int __init fpga_irq_of_init(struct device_node *node, - struct device_node *parent) +static int __init fpga_irq_of_init(struct device_node *node, + struct device_node *parent) { void __iomem *base; u32 clear_mask; @@ -222,7 +228,7 @@ int __init fpga_irq_of_init(struct device_node *node, parent_irq = -1; } - fpga_irq_init(base, node->name, 0, parent_irq, valid_mask, node); + fpga_irq_init(base, parent_irq, valid_mask, node); /* * On Versatile AB/PB, some secondary interrupts have a direct diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c index 356a59755d63..238d3d344949 100644 --- a/drivers/irqchip/irq-xilinx-intc.c +++ b/drivers/irqchip/irq-xilinx-intc.c @@ -32,6 +32,8 @@ #define MER_ME (1<<0) #define MER_HIE (1<<1) +#define SPURIOUS_IRQ (-1U) + static DEFINE_STATIC_KEY_FALSE(xintc_is_be); struct xintc_irq_chip { @@ -110,20 +112,6 @@ static struct irq_chip intc_dev = { .irq_mask_ack = intc_mask_ack, }; -unsigned int xintc_get_irq(void) -{ - unsigned int irq = -1; - u32 hwirq; - - hwirq = xintc_read(primary_intc, IVR); - if (hwirq != -1U) - irq = irq_find_mapping(primary_intc->root_domain, hwirq); - - pr_debug("irq-xilinx: hwirq=%d, irq=%d\n", hwirq, irq); - - return irq; -} - static int xintc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { struct xintc_irq_chip *irqc = d->host_data; @@ -164,6 +152,19 @@ static void xil_intc_irq_handler(struct irq_desc *desc) chained_irq_exit(chip, desc); } +static void xil_intc_handle_irq(struct pt_regs *regs) +{ + u32 hwirq; + + do { + hwirq = xintc_read(primary_intc, IVR); + if (unlikely(hwirq == SPURIOUS_IRQ)) + break; + + generic_handle_domain_irq(primary_intc->root_domain, hwirq); + } while (true); +} + static int __init xilinx_intc_of_init(struct device_node *intc, struct device_node *parent) { @@ -233,6 +234,7 @@ static int __init xilinx_intc_of_init(struct device_node *intc, } else { primary_intc = irqc; irq_set_default_host(primary_intc->root_domain); + set_handle_irq(xil_intc_handle_irq); } return 0; diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index 173e6520e06e..d96916cf6a41 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -21,23 +21,19 @@ #include <linux/slab.h> #include <linux/types.h> -#define PDC_MAX_IRQS 168 #define PDC_MAX_GPIO_IRQS 256 -#define CLEAR_INTR(reg, intr) (reg & ~(1 << intr)) -#define ENABLE_INTR(reg, intr) (reg | (1 << intr)) - #define IRQ_ENABLE_BANK 0x10 #define IRQ_i_CFG 0x110 -#define PDC_NO_PARENT_IRQ ~0UL - struct pdc_pin_region { u32 pin_base; u32 parent_base; u32 cnt; }; +#define pin_to_hwirq(r, p) ((r)->parent_base + (p) - (r)->pin_base) + static DEFINE_RAW_SPINLOCK(pdc_lock); static void __iomem *pdc_base; static struct pdc_pin_region *pdc_region; @@ -56,17 +52,18 @@ static u32 pdc_reg_read(int reg, u32 i) static void pdc_enable_intr(struct irq_data *d, bool on) { int pin_out = d->hwirq; + unsigned long enable; + unsigned long flags; u32 index, mask; - u32 enable; index = pin_out / 32; mask = pin_out % 32; - raw_spin_lock(&pdc_lock); + raw_spin_lock_irqsave(&pdc_lock, flags); enable = pdc_reg_read(IRQ_ENABLE_BANK, index); - enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask); + __assign_bit(mask, &enable, on); pdc_reg_write(IRQ_ENABLE_BANK, index, enable); - raw_spin_unlock(&pdc_lock); + raw_spin_unlock_irqrestore(&pdc_lock, flags); } static void qcom_pdc_gic_disable(struct irq_data *d) @@ -186,34 +183,17 @@ static struct irq_chip qcom_pdc_gic_chip = { .irq_set_affinity = irq_chip_set_affinity_parent, }; -static irq_hw_number_t get_parent_hwirq(int pin) +static struct pdc_pin_region *get_pin_region(int pin) { int i; - struct pdc_pin_region *region; for (i = 0; i < pdc_region_cnt; i++) { - region = &pdc_region[i]; - if (pin >= region->pin_base && - pin < region->pin_base + region->cnt) - return (region->parent_base + pin - region->pin_base); + if (pin >= pdc_region[i].pin_base && + pin < pdc_region[i].pin_base + pdc_region[i].cnt) + return &pdc_region[i]; } - return PDC_NO_PARENT_IRQ; -} - -static int qcom_pdc_translate(struct irq_domain *d, struct irq_fwspec *fwspec, - unsigned long *hwirq, unsigned int *type) -{ - if (is_of_node(fwspec->fwnode)) { - if (fwspec->param_count != 2) - return -EINVAL; - - *hwirq = fwspec->param[0]; - *type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK; - return 0; - } - - return -EINVAL; + return NULL; } static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq, @@ -221,55 +201,12 @@ static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq, { struct irq_fwspec *fwspec = data; struct irq_fwspec parent_fwspec; - irq_hw_number_t hwirq, parent_hwirq; - unsigned int type; - int ret; - - ret = qcom_pdc_translate(domain, fwspec, &hwirq, &type); - if (ret) - return ret; - - ret = irq_domain_set_hwirq_and_chip(domain, virq, hwirq, - &qcom_pdc_gic_chip, NULL); - if (ret) - return ret; - - parent_hwirq = get_parent_hwirq(hwirq); - if (parent_hwirq == PDC_NO_PARENT_IRQ) - return irq_domain_disconnect_hierarchy(domain->parent, virq); - - if (type & IRQ_TYPE_EDGE_BOTH) - type = IRQ_TYPE_EDGE_RISING; - - if (type & IRQ_TYPE_LEVEL_MASK) - type = IRQ_TYPE_LEVEL_HIGH; - - parent_fwspec.fwnode = domain->parent->fwnode; - parent_fwspec.param_count = 3; - parent_fwspec.param[0] = 0; - parent_fwspec.param[1] = parent_hwirq; - parent_fwspec.param[2] = type; - - return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, - &parent_fwspec); -} - -static const struct irq_domain_ops qcom_pdc_ops = { - .translate = qcom_pdc_translate, - .alloc = qcom_pdc_alloc, - .free = irq_domain_free_irqs_common, -}; - -static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq, - unsigned int nr_irqs, void *data) -{ - struct irq_fwspec *fwspec = data; - struct irq_fwspec parent_fwspec; - irq_hw_number_t hwirq, parent_hwirq; + struct pdc_pin_region *region; + irq_hw_number_t hwirq; unsigned int type; int ret; - ret = qcom_pdc_translate(domain, fwspec, &hwirq, &type); + ret = irq_domain_translate_twocell(domain, fwspec, &hwirq, &type); if (ret) return ret; @@ -281,8 +218,8 @@ static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; - parent_hwirq = get_parent_hwirq(hwirq); - if (parent_hwirq == PDC_NO_PARENT_IRQ) + region = get_pin_region(hwirq); + if (!region) return irq_domain_disconnect_hierarchy(domain->parent, virq); if (type & IRQ_TYPE_EDGE_BOTH) @@ -294,23 +231,16 @@ static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq, parent_fwspec.fwnode = domain->parent->fwnode; parent_fwspec.param_count = 3; parent_fwspec.param[0] = 0; - parent_fwspec.param[1] = parent_hwirq; + parent_fwspec.param[1] = pin_to_hwirq(region, hwirq); parent_fwspec.param[2] = type; return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &parent_fwspec); } -static int qcom_pdc_gpio_domain_select(struct irq_domain *d, - struct irq_fwspec *fwspec, - enum irq_domain_bus_token bus_token) -{ - return bus_token == DOMAIN_BUS_WAKEUP; -} - -static const struct irq_domain_ops qcom_pdc_gpio_ops = { - .select = qcom_pdc_gpio_domain_select, - .alloc = qcom_pdc_gpio_alloc, +static const struct irq_domain_ops qcom_pdc_ops = { + .translate = irq_domain_translate_twocell, + .alloc = qcom_pdc_alloc, .free = irq_domain_free_irqs_common, }; @@ -361,7 +291,7 @@ static int pdc_setup_pin_mapping(struct device_node *np) static int qcom_pdc_init(struct device_node *node, struct device_node *parent) { - struct irq_domain *parent_domain, *pdc_domain, *pdc_gpio_domain; + struct irq_domain *parent_domain, *pdc_domain; int ret; pdc_base = of_iomap(node, 0); @@ -383,32 +313,21 @@ static int qcom_pdc_init(struct device_node *node, struct device_node *parent) goto fail; } - pdc_domain = irq_domain_create_hierarchy(parent_domain, 0, PDC_MAX_IRQS, - of_fwnode_handle(node), - &qcom_pdc_ops, NULL); - if (!pdc_domain) { - pr_err("%pOF: GIC domain add failed\n", node); - ret = -ENOMEM; - goto fail; - } - - pdc_gpio_domain = irq_domain_create_hierarchy(parent_domain, + pdc_domain = irq_domain_create_hierarchy(parent_domain, IRQ_DOMAIN_FLAG_QCOM_PDC_WAKEUP, PDC_MAX_GPIO_IRQS, of_fwnode_handle(node), - &qcom_pdc_gpio_ops, NULL); - if (!pdc_gpio_domain) { - pr_err("%pOF: PDC domain add failed for GPIO domain\n", node); + &qcom_pdc_ops, NULL); + if (!pdc_domain) { + pr_err("%pOF: PDC domain add failed\n", node); ret = -ENOMEM; - goto remove; + goto fail; } - irq_domain_update_bus_token(pdc_gpio_domain, DOMAIN_BUS_WAKEUP); + irq_domain_update_bus_token(pdc_domain, DOMAIN_BUS_WAKEUP); return 0; -remove: - irq_domain_remove(pdc_domain); fail: kfree(pdc_region); iounmap(pdc_base); diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c index 70fa18b04ad2..b14d3f98e1eb 100644 --- a/drivers/mfd/ezx-pcap.c +++ b/drivers/mfd/ezx-pcap.c @@ -193,13 +193,11 @@ static void pcap_isr_work(struct work_struct *work) ezx_pcap_write(pcap, PCAP_REG_MSR, isr | msr); ezx_pcap_write(pcap, PCAP_REG_ISR, isr); - local_irq_disable(); service = isr & ~msr; for (irq = pcap->irq_base; service; service >>= 1, irq++) { if (service & 1) - generic_handle_irq(irq); + generic_handle_irq_safe(irq); } - local_irq_enable(); ezx_pcap_write(pcap, PCAP_REG_MSR, pcap->msr); } while (gpio_get_value(pdata->gpio)); } diff --git a/drivers/misc/hi6421v600-irq.c b/drivers/misc/hi6421v600-irq.c index 1c763796cf1f..caa3de37698b 100644 --- a/drivers/misc/hi6421v600-irq.c +++ b/drivers/misc/hi6421v600-irq.c @@ -117,8 +117,8 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv) * If both powerkey down and up IRQs are received, * handle them at the right order */ - generic_handle_irq(priv->irqs[POWERKEY_DOWN]); - generic_handle_irq(priv->irqs[POWERKEY_UP]); + generic_handle_irq_safe(priv->irqs[POWERKEY_DOWN]); + generic_handle_irq_safe(priv->irqs[POWERKEY_UP]); pending &= ~HISI_IRQ_POWERKEY_UP_DOWN; } @@ -126,7 +126,7 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv) continue; for_each_set_bit(offset, &pending, BITS_PER_BYTE) { - generic_handle_irq(priv->irqs[offset + i * BITS_PER_BYTE]); + generic_handle_irq_safe(priv->irqs[offset + i * BITS_PER_BYTE]); } } diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c index 866767b70d65..b0a7dee27ffc 100644 --- a/drivers/net/dsa/microchip/ksz8795_spi.c +++ b/drivers/net/dsa/microchip/ksz8795_spi.c @@ -124,12 +124,23 @@ static const struct of_device_id ksz8795_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, ksz8795_dt_ids); +static const struct spi_device_id ksz8795_spi_ids[] = { + { "ksz8765" }, + { "ksz8794" }, + { "ksz8795" }, + { "ksz8863" }, + { "ksz8873" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, ksz8795_spi_ids); + static struct spi_driver ksz8795_spi_driver = { .driver = { .name = "ksz8795-switch", .owner = THIS_MODULE, .of_match_table = of_match_ptr(ksz8795_dt_ids), }, + .id_table = ksz8795_spi_ids, .probe = ksz8795_spi_probe, .remove = ksz8795_spi_remove, .shutdown = ksz8795_spi_shutdown, diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index e3cb0e6c9f6f..43addeabfc25 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -98,12 +98,24 @@ static const struct of_device_id ksz9477_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, ksz9477_dt_ids); +static const struct spi_device_id ksz9477_spi_ids[] = { + { "ksz9477" }, + { "ksz9897" }, + { "ksz9893" }, + { "ksz9563" }, + { "ksz8563" }, + { "ksz9567" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, ksz9477_spi_ids); + static struct spi_driver ksz9477_spi_driver = { .driver = { .name = "ksz9477-switch", .owner = THIS_MODULE, .of_match_table = of_match_ptr(ksz9477_dt_ids), }, + .id_table = ksz9477_spi_ids, .probe = ksz9477_spi_probe, .remove = ksz9477_spi_remove, .shutdown = ksz9477_spi_shutdown, diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 4ad3fc72e74e..a89b93cb4e26 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1181,8 +1181,11 @@ static int alx_change_mtu(struct net_device *netdev, int mtu) alx->hw.mtu = mtu; alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE); netdev_update_features(netdev); - if (netif_running(netdev)) + if (netif_running(netdev)) { + mutex_lock(&alx->mtx); alx_reinit(alx); + mutex_unlock(&alx->mtx); + } return 0; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index a19dd6797070..2209d99b3404 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -2533,6 +2533,4 @@ void bnx2x_register_phc(struct bnx2x *bp); * Meant for implicit re-load flows. */ int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp); -int bnx2x_init_firmware(struct bnx2x *bp); -void bnx2x_release_firmware(struct bnx2x *bp); #endif /* bnx2x.h */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 8d36ebbf08e1..5729a5ab059d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2364,24 +2364,30 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 load_code, bool print_err) /* is another pf loaded on this engine? */ if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP && load_code != FW_MSG_CODE_DRV_LOAD_COMMON) { - /* build my FW version dword */ - u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) + - (bp->fw_rev << 16) + (bp->fw_eng << 24); + u8 loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng; + u32 loaded_fw; /* read loaded FW from chip */ - u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM); + loaded_fw = REG_RD(bp, XSEM_REG_PRAM); - DP(BNX2X_MSG_SP, "loaded fw %x, my fw %x\n", - loaded_fw, my_fw); + loaded_fw_major = loaded_fw & 0xff; + loaded_fw_minor = (loaded_fw >> 8) & 0xff; + loaded_fw_rev = (loaded_fw >> 16) & 0xff; + loaded_fw_eng = (loaded_fw >> 24) & 0xff; + + DP(BNX2X_MSG_SP, "loaded fw 0x%x major 0x%x minor 0x%x rev 0x%x eng 0x%x\n", + loaded_fw, loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng); /* abort nic load if version mismatch */ - if (my_fw != loaded_fw) { + if (loaded_fw_major != BCM_5710_FW_MAJOR_VERSION || + loaded_fw_minor != BCM_5710_FW_MINOR_VERSION || + loaded_fw_eng != BCM_5710_FW_ENGINEERING_VERSION || + loaded_fw_rev < BCM_5710_FW_REVISION_VERSION_V15) { if (print_err) - BNX2X_ERR("bnx2x with FW %x was already loaded which mismatches my %x FW. Aborting\n", - loaded_fw, my_fw); + BNX2X_ERR("loaded FW incompatible. Aborting\n"); else - BNX2X_DEV_INFO("bnx2x with FW %x was already loaded which mismatches my %x FW, possibly due to MF UNDI\n", - loaded_fw, my_fw); + BNX2X_DEV_INFO("loaded FW incompatible, possibly due to MF UNDI\n"); + return -EBUSY; } } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index eedb48d945ed..c19b072f3a23 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12319,15 +12319,6 @@ static int bnx2x_init_bp(struct bnx2x *bp) bnx2x_read_fwinfo(bp); - if (IS_PF(bp)) { - rc = bnx2x_init_firmware(bp); - - if (rc) { - bnx2x_free_mem_bp(bp); - return rc; - } - } - func = BP_FUNC(bp); /* need to reset chip if undi was active */ @@ -12340,7 +12331,6 @@ static int bnx2x_init_bp(struct bnx2x *bp) rc = bnx2x_prev_unload(bp); if (rc) { - bnx2x_release_firmware(bp); bnx2x_free_mem_bp(bp); return rc; } @@ -13409,7 +13399,7 @@ do { \ (u8 *)bp->arr, len); \ } while (0) -int bnx2x_init_firmware(struct bnx2x *bp) +static int bnx2x_init_firmware(struct bnx2x *bp) { const char *fw_file_name, *fw_file_name_v15; struct bnx2x_fw_file_hdr *fw_hdr; @@ -13509,7 +13499,7 @@ request_firmware_exit: return rc; } -void bnx2x_release_firmware(struct bnx2x *bp) +static void bnx2x_release_firmware(struct bnx2x *bp) { kfree(bp->init_ops_offsets); kfree(bp->init_ops); @@ -14026,7 +14016,6 @@ static int bnx2x_init_one(struct pci_dev *pdev, return 0; init_one_freemem: - bnx2x_release_firmware(bp); bnx2x_free_mem_bp(bp); init_one_exit: diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 87f1056e29ff..2da804f84b48 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2287,8 +2287,10 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, dma_length_status = status->length_status; if (dev->features & NETIF_F_RXCSUM) { rx_csum = (__force __be16)(status->rx_csum & 0xffff); - skb->csum = (__force __wsum)ntohs(rx_csum); - skb->ip_summed = CHECKSUM_COMPLETE; + if (rx_csum) { + skb->csum = (__force __wsum)ntohs(rx_csum); + skb->ip_summed = CHECKSUM_COMPLETE; + } } /* DMA flags and length are still valid no matter how diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 8e644e9ed8da..0e178a0a59c5 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2541,6 +2541,13 @@ restart_watchdog: queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2); } +/** + * iavf_disable_vf - disable VF + * @adapter: board private structure + * + * Set communication failed flag and free all resources. + * NOTE: This function is expected to be called with crit_lock being held. + **/ static void iavf_disable_vf(struct iavf_adapter *adapter) { struct iavf_mac_filter *f, *ftmp; @@ -2595,7 +2602,6 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE); iavf_shutdown_adminq(&adapter->hw); adapter->netdev->flags &= ~IFF_UP; - mutex_unlock(&adapter->crit_lock); adapter->flags &= ~IAVF_FLAG_RESET_PENDING; iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); @@ -4614,6 +4620,13 @@ static void iavf_remove(struct pci_dev *pdev) struct iavf_hw *hw = &adapter->hw; int err; + /* When reboot/shutdown is in progress no need to do anything + * as the adapter is already REMOVE state that was set during + * iavf_shutdown() callback. + */ + if (adapter->state == __IAVF_REMOVE) + return; + set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section); /* Wait until port initialization is complete. * There are flows where register/unregister netdev may race. diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 493942e910be..b7e8744b0c0a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4880,7 +4880,6 @@ static void ice_remove(struct pci_dev *pdev) ice_devlink_unregister_params(pf); set_bit(ICE_DOWN, pf->state); - mutex_destroy(&(&pf->hw)->fdir_fltr_lock); ice_deinit_lag(pf); if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) ice_ptp_release(pf); @@ -4888,6 +4887,7 @@ static void ice_remove(struct pci_dev *pdev) ice_remove_arfs(pf); ice_setup_mc_magic_wake(pf); ice_vsi_release_all(pf); + mutex_destroy(&(&pf->hw)->fdir_fltr_lock); ice_set_wake(pf); ice_free_irq_msix_misc(pf); ice_for_each_vsi(pf, i) { @@ -5962,8 +5962,9 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, u64 pkts = 0, bytes = 0; ring = READ_ONCE(rings[i]); - if (ring) - ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); + if (!ring) + continue; + ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); vsi_stats->tx_packets += pkts; vsi_stats->tx_bytes += bytes; vsi->tx_restart += ring->tx_stats.restart_q; diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 949858891973..fdb4d7e7296c 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -60,6 +60,12 @@ static int ocelot_chain_to_block(int chain, bool ingress) */ static int ocelot_chain_to_lookup(int chain) { + /* Backwards compatibility with older, single-chain tc-flower + * offload support in Ocelot + */ + if (chain == 0) + return 0; + return (chain / VCAP_LOOKUP) % 10; } @@ -68,7 +74,15 @@ static int ocelot_chain_to_lookup(int chain) */ static int ocelot_chain_to_pag(int chain) { - int lookup = ocelot_chain_to_lookup(chain); + int lookup; + + /* Backwards compatibility with older, single-chain tc-flower + * offload support in Ocelot + */ + if (chain == 0) + return 0; + + lookup = ocelot_chain_to_lookup(chain); /* calculate PAG value as chain index relative to the first PAG */ return chain - VCAP_IS2_CHAIN(lookup, 0); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 3646469433b1..fde1c492ca02 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1587,6 +1587,9 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, pcpu_sum = kvmalloc_array(num_possible_cpus(), sizeof(struct netvsc_ethtool_pcpu_stats), GFP_KERNEL); + if (!pcpu_sum) + return; + netvsc_get_pcpu_stats(dev, pcpu_sum); for_each_present_cpu(cpu) { struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu]; diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c index 7d2abaf2b2c9..64fb76c1e395 100644 --- a/drivers/net/mdio/mdio-mscc-miim.c +++ b/drivers/net/mdio/mdio-mscc-miim.c @@ -187,6 +187,13 @@ static const struct regmap_config mscc_miim_regmap_config = { .reg_stride = 4, }; +static const struct regmap_config mscc_miim_phy_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .name = "phy", +}; + int mscc_miim_setup(struct device *dev, struct mii_bus **pbus, const char *name, struct regmap *mii_regmap, int status_offset) { @@ -250,7 +257,7 @@ static int mscc_miim_probe(struct platform_device *pdev) } phy_regmap = devm_regmap_init_mmio(&pdev->dev, phy_regs, - &mscc_miim_regmap_config); + &mscc_miim_phy_regmap_config); if (IS_ERR(phy_regmap)) { dev_err(&pdev->dev, "Unable to create phy register regmap\n"); return PTR_ERR(phy_regmap); diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 2429db614b59..2702faf7b0f6 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1687,8 +1687,8 @@ static int marvell_suspend(struct phy_device *phydev) int err; /* Suspend the fiber mode first */ - if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, - phydev->supported)) { + if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + phydev->supported)) { err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE); if (err < 0) goto error; @@ -1722,8 +1722,8 @@ static int marvell_resume(struct phy_device *phydev) int err; /* Resume the fiber mode first */ - if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, - phydev->supported)) { + if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + phydev->supported)) { err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE); if (err < 0) goto error; diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index ebfeeb3c67c1..7e3017e7a1c0 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -2685,3 +2685,6 @@ MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl); MODULE_DESCRIPTION("Microsemi VSC85xx PHY driver"); MODULE_AUTHOR("Nagaraju Lakkaraju"); MODULE_LICENSE("Dual MIT/GPL"); + +MODULE_FIRMWARE(MSCC_VSC8584_REVB_INT8051_FW); +MODULE_FIRMWARE(MSCC_VSC8574_REVB_INT8051_FW); diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index b8e20a3f2b84..415f16662f88 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1537,11 +1537,8 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb) netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata); lan78xx_defer_kevent(dev, EVENT_LINK_RESET); - if (dev->domain_data.phyirq > 0) { - local_irq_disable(); - generic_handle_irq(dev->domain_data.phyirq); - local_irq_enable(); - } + if (dev->domain_data.phyirq > 0) + generic_handle_irq_safe(dev->domain_data.phyirq); } else { netdev_warn(dev->net, "unexpected interrupt: 0x%08x\n", intdata); diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 62c453a21e49..7c1c2658cb5f 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -2611,36 +2611,9 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) ath10k_mac_handle_beacon(ar, skb); if (ieee80211_is_beacon(hdr->frame_control) || - ieee80211_is_probe_resp(hdr->frame_control)) { - struct ieee80211_mgmt *mgmt = (void *)skb->data; - enum cfg80211_bss_frame_type ftype; - u8 *ies; - int ies_ch; - + ieee80211_is_probe_resp(hdr->frame_control)) status->boottime_ns = ktime_get_boottime_ns(); - if (!ar->scan_channel) - goto drop; - - ies = mgmt->u.beacon.variable; - - if (ieee80211_is_beacon(mgmt->frame_control)) - ftype = CFG80211_BSS_FTYPE_BEACON; - else - ftype = CFG80211_BSS_FTYPE_PRESP; - - ies_ch = cfg80211_get_ies_channel_number(mgmt->u.beacon.variable, - skb_tail_pointer(skb) - ies, - sband->band, ftype); - - if (ies_ch > 0 && ies_ch != channel) { - ath10k_dbg(ar, ATH10K_DBG_MGMT, - "channel mismatched ds channel %d scan channel %d\n", - ies_ch, channel); - goto drop; - } - } - ath10k_dbg(ar, ATH10K_DBG_MGMT, "event mgmt rx skb %pK len %d ftype %02x stype %02x\n", skb, skb->len, @@ -2654,10 +2627,6 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) ieee80211_rx_ni(ar->hw, skb); return 0; - -drop: - dev_kfree_skb(skb); - return 0; } static int freq_to_idx(struct ath10k *ar, int freq) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 091a0ca16361..496d775c6770 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1233,44 +1233,6 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, } CONFIGFS_ATTR(nvmet_subsys_, attr_model); -static ssize_t nvmet_subsys_attr_discovery_nqn_show(struct config_item *item, - char *page) -{ - return snprintf(page, PAGE_SIZE, "%s\n", - nvmet_disc_subsys->subsysnqn); -} - -static ssize_t nvmet_subsys_attr_discovery_nqn_store(struct config_item *item, - const char *page, size_t count) -{ - struct nvmet_subsys *subsys = to_subsys(item); - char *subsysnqn; - int len; - - len = strcspn(page, "\n"); - if (!len) - return -EINVAL; - - subsysnqn = kmemdup_nul(page, len, GFP_KERNEL); - if (!subsysnqn) - return -ENOMEM; - - /* - * The discovery NQN must be different from subsystem NQN. - */ - if (!strcmp(subsysnqn, subsys->subsysnqn)) { - kfree(subsysnqn); - return -EBUSY; - } - down_write(&nvmet_config_sem); - kfree(nvmet_disc_subsys->subsysnqn); - nvmet_disc_subsys->subsysnqn = subsysnqn; - up_write(&nvmet_config_sem); - - return count; -} -CONFIGFS_ATTR(nvmet_subsys_, attr_discovery_nqn); - #ifdef CONFIG_BLK_DEV_INTEGRITY static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item, char *page) @@ -1300,7 +1262,6 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_cntlid_min, &nvmet_subsys_attr_attr_cntlid_max, &nvmet_subsys_attr_attr_model, - &nvmet_subsys_attr_attr_discovery_nqn, #ifdef CONFIG_BLK_DEV_INTEGRITY &nvmet_subsys_attr_attr_pi_enable, #endif diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 5119c687de68..626caf6f1e4b 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1493,8 +1493,7 @@ static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, if (!port) return NULL; - if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn) || - !strcmp(nvmet_disc_subsys->subsysnqn, subsysnqn)) { + if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) { if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) return NULL; return nvmet_disc_subsys; diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c index 854d95163112..a2c3c207a04b 100644 --- a/drivers/pci/controller/pcie-apple.c +++ b/drivers/pci/controller/pcie-apple.c @@ -219,7 +219,7 @@ static int apple_msi_domain_alloc(struct irq_domain *domain, unsigned int virq, if (hwirq < 0) return -ENOSPC; - fwspec.param[1] += hwirq; + fwspec.param[fwspec.param_count - 2] += hwirq; ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &fwspec); if (ret) diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index e1a0c44bc686..d05ca6ebbb9d 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -141,11 +141,25 @@ config ARM_DMC620_PMU config MARVELL_CN10K_TAD_PMU tristate "Marvell CN10K LLC-TAD PMU" - depends on ARM64 || (COMPILE_TEST && 64BIT) + depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT) help Provides support for Last-Level cache Tag-and-data Units (LLC-TAD) performance monitors on CN10K family silicons. +config APPLE_M1_CPU_PMU + bool "Apple M1 CPU PMU support" + depends on ARM_PMU && ARCH_APPLE + help + Provides support for the non-architectural CPU PMUs present on + the Apple M1 SoCs and derivatives. + source "drivers/perf/hisilicon/Kconfig" +config MARVELL_CN10K_DDR_PMU + tristate "Enable MARVELL CN10K DRAM Subsystem(DSS) PMU Support" + depends on ARM64 || (COMPILE_TEST && 64BIT) + help + Enable perf support for Marvell DDR Performance monitoring + event on CN10K platform. + endmenu diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 2db5418d5b0a..4f43080ec54e 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -15,3 +15,5 @@ obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o +obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o +obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c new file mode 100644 index 000000000000..979a7c2b4f56 --- /dev/null +++ b/drivers/perf/apple_m1_cpu_pmu.c @@ -0,0 +1,584 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CPU PMU driver for the Apple M1 and derivatives + * + * Copyright (C) 2021 Google LLC + * + * Author: Marc Zyngier <maz@kernel.org> + * + * Most of the information used in this driver was provided by the + * Asahi Linux project. The rest was experimentally discovered. + */ + +#include <linux/of.h> +#include <linux/perf/arm_pmu.h> +#include <linux/platform_device.h> + +#include <asm/apple_m1_pmu.h> +#include <asm/irq_regs.h> +#include <asm/perf_event.h> + +#define M1_PMU_NR_COUNTERS 10 + +#define M1_PMU_CFG_EVENT GENMASK(7, 0) + +#define ANY_BUT_0_1 GENMASK(9, 2) +#define ONLY_2_TO_7 GENMASK(7, 2) +#define ONLY_2_4_6 (BIT(2) | BIT(4) | BIT(6)) +#define ONLY_5_6_7 (BIT(5) | BIT(6) | BIT(7)) + +/* + * Description of the events we actually know about, as well as those with + * a specific counter affinity. Yes, this is a grand total of two known + * counters, and the rest is anybody's guess. + * + * Not all counters can count all events. Counters #0 and #1 are wired to + * count cycles and instructions respectively, and some events have + * bizarre mappings (every other counter, or even *one* counter). These + * restrictions equally apply to both P and E cores. + * + * It is worth noting that the PMUs attached to P and E cores are likely + * to be different because the underlying uarches are different. At the + * moment, we don't really need to distinguish between the two because we + * know next to nothing about the events themselves, and we already have + * per cpu-type PMU abstractions. + * + * If we eventually find out that the events are different across + * implementations, we'll have to introduce per cpu-type tables. + */ +enum m1_pmu_events { + M1_PMU_PERFCTR_UNKNOWN_01 = 0x01, + M1_PMU_PERFCTR_CPU_CYCLES = 0x02, + M1_PMU_PERFCTR_INSTRUCTIONS = 0x8c, + M1_PMU_PERFCTR_UNKNOWN_8d = 0x8d, + M1_PMU_PERFCTR_UNKNOWN_8e = 0x8e, + M1_PMU_PERFCTR_UNKNOWN_8f = 0x8f, + M1_PMU_PERFCTR_UNKNOWN_90 = 0x90, + M1_PMU_PERFCTR_UNKNOWN_93 = 0x93, + M1_PMU_PERFCTR_UNKNOWN_94 = 0x94, + M1_PMU_PERFCTR_UNKNOWN_95 = 0x95, + M1_PMU_PERFCTR_UNKNOWN_96 = 0x96, + M1_PMU_PERFCTR_UNKNOWN_97 = 0x97, + M1_PMU_PERFCTR_UNKNOWN_98 = 0x98, + M1_PMU_PERFCTR_UNKNOWN_99 = 0x99, + M1_PMU_PERFCTR_UNKNOWN_9a = 0x9a, + M1_PMU_PERFCTR_UNKNOWN_9b = 0x9b, + M1_PMU_PERFCTR_UNKNOWN_9c = 0x9c, + M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f, + M1_PMU_PERFCTR_UNKNOWN_bf = 0xbf, + M1_PMU_PERFCTR_UNKNOWN_c0 = 0xc0, + M1_PMU_PERFCTR_UNKNOWN_c1 = 0xc1, + M1_PMU_PERFCTR_UNKNOWN_c4 = 0xc4, + M1_PMU_PERFCTR_UNKNOWN_c5 = 0xc5, + M1_PMU_PERFCTR_UNKNOWN_c6 = 0xc6, + M1_PMU_PERFCTR_UNKNOWN_c8 = 0xc8, + M1_PMU_PERFCTR_UNKNOWN_ca = 0xca, + M1_PMU_PERFCTR_UNKNOWN_cb = 0xcb, + M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5, + M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6, + M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7, + M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8, + M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd, + M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT, + + /* + * From this point onwards, these are not actual HW events, + * but attributes that get stored in hw->config_base. + */ + M1_PMU_CFG_COUNT_USER = BIT(8), + M1_PMU_CFG_COUNT_KERNEL = BIT(9), +}; + +/* + * Per-event affinity table. Most events can be installed on counter + * 2-9, but there are a number of exceptions. Note that this table + * has been created experimentally, and I wouldn't be surprised if more + * counters had strange affinities. + */ +static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = { + [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1, + [M1_PMU_PERFCTR_UNKNOWN_01] = BIT(7), + [M1_PMU_PERFCTR_CPU_CYCLES] = ANY_BUT_0_1 | BIT(0), + [M1_PMU_PERFCTR_INSTRUCTIONS] = BIT(7) | BIT(1), + [M1_PMU_PERFCTR_UNKNOWN_8d] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_8e] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_8f] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_90] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_93] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_94] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_95] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_96] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_97] = BIT(7), + [M1_PMU_PERFCTR_UNKNOWN_98] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_99] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_9a] = BIT(7), + [M1_PMU_PERFCTR_UNKNOWN_9b] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_9c] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7), + [M1_PMU_PERFCTR_UNKNOWN_bf] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c0] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c1] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c4] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c5] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c6] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c8] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_ca] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_cb] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7, + [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6, +}; + +static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INSTRUCTIONS, + /* No idea about the rest yet */ +}; + +/* sysfs definitions */ +static ssize_t m1_pmu_events_sysfs_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + + return sprintf(page, "event=0x%04llx\n", pmu_attr->id); +} + +#define M1_PMU_EVENT_ATTR(name, config) \ + PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config) + +static struct attribute *m1_pmu_event_attrs[] = { + M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES), + M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS), + NULL, +}; + +static const struct attribute_group m1_pmu_events_attr_group = { + .name = "events", + .attrs = m1_pmu_event_attrs, +}; + +PMU_FORMAT_ATTR(event, "config:0-7"); + +static struct attribute *m1_pmu_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static const struct attribute_group m1_pmu_format_attr_group = { + .name = "format", + .attrs = m1_pmu_format_attrs, +}; + +/* Low level accessors. No synchronisation. */ +#define PMU_READ_COUNTER(_idx) \ + case _idx: return read_sysreg_s(SYS_IMP_APL_PMC## _idx ##_EL1) + +#define PMU_WRITE_COUNTER(_val, _idx) \ + case _idx: \ + write_sysreg_s(_val, SYS_IMP_APL_PMC## _idx ##_EL1); \ + return + +static u64 m1_pmu_read_hw_counter(unsigned int index) +{ + switch (index) { + PMU_READ_COUNTER(0); + PMU_READ_COUNTER(1); + PMU_READ_COUNTER(2); + PMU_READ_COUNTER(3); + PMU_READ_COUNTER(4); + PMU_READ_COUNTER(5); + PMU_READ_COUNTER(6); + PMU_READ_COUNTER(7); + PMU_READ_COUNTER(8); + PMU_READ_COUNTER(9); + } + + BUG(); +} + +static void m1_pmu_write_hw_counter(u64 val, unsigned int index) +{ + switch (index) { + PMU_WRITE_COUNTER(val, 0); + PMU_WRITE_COUNTER(val, 1); + PMU_WRITE_COUNTER(val, 2); + PMU_WRITE_COUNTER(val, 3); + PMU_WRITE_COUNTER(val, 4); + PMU_WRITE_COUNTER(val, 5); + PMU_WRITE_COUNTER(val, 6); + PMU_WRITE_COUNTER(val, 7); + PMU_WRITE_COUNTER(val, 8); + PMU_WRITE_COUNTER(val, 9); + } + + BUG(); +} + +#define get_bit_offset(index, mask) (__ffs(mask) + (index)) + +static void __m1_pmu_enable_counter(unsigned int index, bool en) +{ + u64 val, bit; + + switch (index) { + case 0 ... 7: + bit = BIT(get_bit_offset(index, PMCR0_CNT_ENABLE_0_7)); + break; + case 8 ... 9: + bit = BIT(get_bit_offset(index - 8, PMCR0_CNT_ENABLE_8_9)); + break; + default: + BUG(); + } + + val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); + + if (en) + val |= bit; + else + val &= ~bit; + + write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1); +} + +static void m1_pmu_enable_counter(unsigned int index) +{ + __m1_pmu_enable_counter(index, true); +} + +static void m1_pmu_disable_counter(unsigned int index) +{ + __m1_pmu_enable_counter(index, false); +} + +static void __m1_pmu_enable_counter_interrupt(unsigned int index, bool en) +{ + u64 val, bit; + + switch (index) { + case 0 ... 7: + bit = BIT(get_bit_offset(index, PMCR0_PMI_ENABLE_0_7)); + break; + case 8 ... 9: + bit = BIT(get_bit_offset(index - 8, PMCR0_PMI_ENABLE_8_9)); + break; + default: + BUG(); + } + + val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); + + if (en) + val |= bit; + else + val &= ~bit; + + write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1); +} + +static void m1_pmu_enable_counter_interrupt(unsigned int index) +{ + __m1_pmu_enable_counter_interrupt(index, true); +} + +static void m1_pmu_disable_counter_interrupt(unsigned int index) +{ + __m1_pmu_enable_counter_interrupt(index, false); +} + +static void m1_pmu_configure_counter(unsigned int index, u8 event, + bool user, bool kernel) +{ + u64 val, user_bit, kernel_bit; + int shift; + + switch (index) { + case 0 ... 7: + user_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL0_0_7)); + kernel_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL1_0_7)); + break; + case 8 ... 9: + user_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL0_8_9)); + kernel_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL1_8_9)); + break; + default: + BUG(); + } + + val = read_sysreg_s(SYS_IMP_APL_PMCR1_EL1); + + if (user) + val |= user_bit; + else + val &= ~user_bit; + + if (kernel) + val |= kernel_bit; + else + val &= ~kernel_bit; + + write_sysreg_s(val, SYS_IMP_APL_PMCR1_EL1); + + /* + * Counters 0 and 1 have fixed events. For anything else, + * place the event at the expected location in the relevant + * register (PMESR0 holds the event configuration for counters + * 2-5, resp. PMESR1 for counters 6-9). + */ + switch (index) { + case 0 ... 1: + break; + case 2 ... 5: + shift = (index - 2) * 8; + val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1); + val &= ~((u64)0xff << shift); + val |= (u64)event << shift; + write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1); + break; + case 6 ... 9: + shift = (index - 6) * 8; + val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1); + val &= ~((u64)0xff << shift); + val |= (u64)event << shift; + write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1); + break; + } +} + +/* arm_pmu backend */ +static void m1_pmu_enable_event(struct perf_event *event) +{ + bool user, kernel; + u8 evt; + + evt = event->hw.config_base & M1_PMU_CFG_EVENT; + user = event->hw.config_base & M1_PMU_CFG_COUNT_USER; + kernel = event->hw.config_base & M1_PMU_CFG_COUNT_KERNEL; + + m1_pmu_disable_counter_interrupt(event->hw.idx); + m1_pmu_disable_counter(event->hw.idx); + isb(); + + m1_pmu_configure_counter(event->hw.idx, evt, user, kernel); + m1_pmu_enable_counter(event->hw.idx); + m1_pmu_enable_counter_interrupt(event->hw.idx); + isb(); +} + +static void m1_pmu_disable_event(struct perf_event *event) +{ + m1_pmu_disable_counter_interrupt(event->hw.idx); + m1_pmu_disable_counter(event->hw.idx); + isb(); +} + +static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu) +{ + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + struct pt_regs *regs; + u64 overflow, state; + int idx; + + overflow = read_sysreg_s(SYS_IMP_APL_PMSR_EL1); + if (!overflow) { + /* Spurious interrupt? */ + state = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); + state &= ~PMCR0_IACT; + write_sysreg_s(state, SYS_IMP_APL_PMCR0_EL1); + isb(); + return IRQ_NONE; + } + + cpu_pmu->stop(cpu_pmu); + + regs = get_irq_regs(); + + for (idx = 0; idx < cpu_pmu->num_events; idx++) { + struct perf_event *event = cpuc->events[idx]; + struct perf_sample_data data; + + if (!event) + continue; + + armpmu_event_update(event); + perf_sample_data_init(&data, 0, event->hw.last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + m1_pmu_disable_event(event); + } + + cpu_pmu->start(cpu_pmu); + + return IRQ_HANDLED; +} + +static u64 m1_pmu_read_counter(struct perf_event *event) +{ + return m1_pmu_read_hw_counter(event->hw.idx); +} + +static void m1_pmu_write_counter(struct perf_event *event, u64 value) +{ + m1_pmu_write_hw_counter(value, event->hw.idx); + isb(); +} + +static int m1_pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + unsigned long evtype = event->hw.config_base & M1_PMU_CFG_EVENT; + unsigned long affinity = m1_pmu_event_affinity[evtype]; + int idx; + + /* + * Place the event on the first free counter that can count + * this event. + * + * We could do a better job if we had a view of all the events + * counting on the PMU at any given time, and by placing the + * most constraining events first. + */ + for_each_set_bit(idx, &affinity, M1_PMU_NR_COUNTERS) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + + return -EAGAIN; +} + +static void m1_pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + +static void __m1_pmu_set_mode(u8 mode) +{ + u64 val; + + val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); + val &= ~(PMCR0_IMODE | PMCR0_IACT); + val |= FIELD_PREP(PMCR0_IMODE, mode); + write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1); + isb(); +} + +static void m1_pmu_start(struct arm_pmu *cpu_pmu) +{ + __m1_pmu_set_mode(PMCR0_IMODE_FIQ); +} + +static void m1_pmu_stop(struct arm_pmu *cpu_pmu) +{ + __m1_pmu_set_mode(PMCR0_IMODE_OFF); +} + +static int m1_pmu_map_event(struct perf_event *event) +{ + /* + * Although the counters are 48bit wide, bit 47 is what + * triggers the overflow interrupt. Advertise the counters + * being 47bit wide to mimick the behaviour of the ARM PMU. + */ + event->hw.flags |= ARMPMU_EVT_47BIT; + return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT); +} + +static void m1_pmu_reset(void *info) +{ + int i; + + __m1_pmu_set_mode(PMCR0_IMODE_OFF); + + for (i = 0; i < M1_PMU_NR_COUNTERS; i++) { + m1_pmu_disable_counter(i); + m1_pmu_disable_counter_interrupt(i); + m1_pmu_write_hw_counter(0, i); + } + + isb(); +} + +static int m1_pmu_set_event_filter(struct hw_perf_event *event, + struct perf_event_attr *attr) +{ + unsigned long config_base = 0; + + if (!attr->exclude_guest) + return -EINVAL; + if (!attr->exclude_kernel) + config_base |= M1_PMU_CFG_COUNT_KERNEL; + if (!attr->exclude_user) + config_base |= M1_PMU_CFG_COUNT_USER; + + event->config_base = config_base; + + return 0; +} + +static int m1_pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->handle_irq = m1_pmu_handle_irq; + cpu_pmu->enable = m1_pmu_enable_event; + cpu_pmu->disable = m1_pmu_disable_event; + cpu_pmu->read_counter = m1_pmu_read_counter; + cpu_pmu->write_counter = m1_pmu_write_counter; + cpu_pmu->get_event_idx = m1_pmu_get_event_idx; + cpu_pmu->clear_event_idx = m1_pmu_clear_event_idx; + cpu_pmu->start = m1_pmu_start; + cpu_pmu->stop = m1_pmu_stop; + cpu_pmu->map_event = m1_pmu_map_event; + cpu_pmu->reset = m1_pmu_reset; + cpu_pmu->set_event_filter = m1_pmu_set_event_filter; + + cpu_pmu->num_events = M1_PMU_NR_COUNTERS; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &m1_pmu_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &m1_pmu_format_attr_group; + return 0; +} + +/* Device driver gunk */ +static int m1_pmu_ice_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "apple_icestorm_pmu"; + return m1_pmu_init(cpu_pmu); +} + +static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "apple_firestorm_pmu"; + return m1_pmu_init(cpu_pmu); +} + +static const struct of_device_id m1_pmu_of_device_ids[] = { + { .compatible = "apple,icestorm-pmu", .data = m1_pmu_ice_init, }, + { .compatible = "apple,firestorm-pmu", .data = m1_pmu_fire_init, }, + { }, +}; +MODULE_DEVICE_TABLE(of, m1_pmu_of_device_ids); + +static int m1_pmu_device_probe(struct platform_device *pdev) +{ + return arm_pmu_device_probe(pdev, m1_pmu_of_device_ids, NULL); +} + +static struct platform_driver m1_pmu_driver = { + .driver = { + .name = "apple-m1-cpu-pmu", + .of_match_table = m1_pmu_of_device_ids, + .suppress_bind_attrs = true, + }, + .probe = m1_pmu_device_probe, +}; + +module_platform_driver(m1_pmu_driver); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 54aca3a62814..96e09fa40909 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1096,7 +1096,7 @@ static void cci_pmu_enable(struct pmu *pmu) { struct cci_pmu *cci_pmu = to_cci_pmu(pmu); struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; - int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs); + bool enabled = !bitmap_empty(hw_events->used_mask, cci_pmu->num_cntrs); unsigned long flags; if (!enabled) diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index a96c31604545..40b352e8aa7f 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1460,8 +1460,7 @@ static irqreturn_t arm_ccn_irq_handler(int irq, void *dev_id) static int arm_ccn_probe(struct platform_device *pdev) { struct arm_ccn *ccn; - struct resource *res; - unsigned int irq; + int irq; int err; ccn = devm_kzalloc(&pdev->dev, sizeof(*ccn), GFP_KERNEL); @@ -1474,10 +1473,9 @@ static int arm_ccn_probe(struct platform_device *pdev) if (IS_ERR(ccn->base)) return PTR_ERR(ccn->base); - res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!res) - return -EINVAL; - irq = res->start; + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; /* Check if we can use the interrupt */ writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__DISABLE, diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 0e48adce57ef..9c1d82be7a2f 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -71,9 +71,11 @@ #define CMN_DTM_WPn(n) (0x1A0 + (n) * 0x18) #define CMN_DTM_WPn_CONFIG(n) (CMN_DTM_WPn(n) + 0x00) #define CMN_DTM_WPn_CONFIG_WP_DEV_SEL2 GENMASK_ULL(18,17) -#define CMN_DTM_WPn_CONFIG_WP_COMBINE BIT(6) -#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE BIT(5) -#define CMN_DTM_WPn_CONFIG_WP_GRP BIT(4) +#define CMN_DTM_WPn_CONFIG_WP_COMBINE BIT(9) +#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE BIT(8) +#define CMN600_WPn_CONFIG_WP_COMBINE BIT(6) +#define CMN600_WPn_CONFIG_WP_EXCLUSIVE BIT(5) +#define CMN_DTM_WPn_CONFIG_WP_GRP GENMASK_ULL(5, 4) #define CMN_DTM_WPn_CONFIG_WP_CHN_SEL GENMASK_ULL(3, 1) #define CMN_DTM_WPn_CONFIG_WP_DEV_SEL BIT(0) #define CMN_DTM_WPn_VAL(n) (CMN_DTM_WPn(n) + 0x08) @@ -155,6 +157,7 @@ #define CMN_CONFIG_WP_COMBINE GENMASK_ULL(27, 24) #define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48) #define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51) +/* Note that we don't yet support the tertiary match group on newer IPs */ #define CMN_CONFIG_WP_GRP BIT_ULL(56) #define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(57) #define CMN_CONFIG1_WP_VAL GENMASK_ULL(63, 0) @@ -353,7 +356,7 @@ static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, return NULL; } -struct dentry *arm_cmn_debugfs; +static struct dentry *arm_cmn_debugfs; #ifdef CONFIG_DEBUG_FS static const char *arm_cmn_device_type(u8 type) @@ -595,6 +598,9 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, if ((intf & 4) && !(cmn->ports_used & BIT(intf & 3))) return 0; + if (chan == 4 && cmn->model == CMN600) + return 0; + if ((chan == 5 && cmn->rsp_vc_num < 2) || (chan == 6 && cmn->dat_vc_num < 2)) return 0; @@ -905,15 +911,18 @@ static u32 arm_cmn_wp_config(struct perf_event *event) u32 grp = CMN_EVENT_WP_GRP(event); u32 exc = CMN_EVENT_WP_EXCLUSIVE(event); u32 combine = CMN_EVENT_WP_COMBINE(event); + bool is_cmn600 = to_cmn(event->pmu)->model == CMN600; config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) | - FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE, exc) | FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL2, dev >> 1); + if (exc) + config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_EXCLUSIVE : + CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE; if (combine && !grp) - config |= CMN_DTM_WPn_CONFIG_WP_COMBINE; - + config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_COMBINE : + CMN_DTM_WPn_CONFIG_WP_COMBINE; return config; } diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 295cc7952d0e..9694370651fa 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -109,6 +109,8 @@ static inline u64 arm_pmu_event_max_period(struct perf_event *event) { if (event->hw.flags & ARMPMU_EVT_64BIT) return GENMASK_ULL(63, 0); + else if (event->hw.flags & ARMPMU_EVT_47BIT) + return GENMASK_ULL(46, 0); else return GENMASK_ULL(31, 0); } @@ -524,7 +526,7 @@ static void armpmu_enable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events); /* For task-bound events we may be called on other CPUs */ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) @@ -785,7 +787,7 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd, { struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb); struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + bool enabled = !bitmap_empty(hw_events->used_mask, armpmu->num_events); if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) return NOTIFY_DONE; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index a738aeab5c04..358e4e284a62 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -393,7 +393,7 @@ EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read); void hisi_uncore_pmu_enable(struct pmu *pmu) { struct hisi_pmu *hisi_pmu = to_hisi_pmu(pmu); - int enabled = bitmap_weight(hisi_pmu->pmu_events.used_mask, + bool enabled = !bitmap_empty(hisi_pmu->pmu_events.used_mask, hisi_pmu->num_counters); if (!enabled) diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c new file mode 100644 index 000000000000..665b382a0ee3 --- /dev/null +++ b/drivers/perf/marvell_cn10k_ddr_pmu.c @@ -0,0 +1,758 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell CN10K DRAM Subsystem (DSS) Performance Monitor Driver + * + * Copyright (C) 2021 Marvell. + */ + +#include <linux/init.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/perf_event.h> +#include <linux/hrtimer.h> + +/* Performance Counters Operating Mode Control Registers */ +#define DDRC_PERF_CNT_OP_MODE_CTRL 0x8020 +#define OP_MODE_CTRL_VAL_MANNUAL 0x1 + +/* Performance Counters Start Operation Control Registers */ +#define DDRC_PERF_CNT_START_OP_CTRL 0x8028 +#define START_OP_CTRL_VAL_START 0x1ULL +#define START_OP_CTRL_VAL_ACTIVE 0x2 + +/* Performance Counters End Operation Control Registers */ +#define DDRC_PERF_CNT_END_OP_CTRL 0x8030 +#define END_OP_CTRL_VAL_END 0x1ULL + +/* Performance Counters End Status Registers */ +#define DDRC_PERF_CNT_END_STATUS 0x8038 +#define END_STATUS_VAL_END_TIMER_MODE_END 0x1 + +/* Performance Counters Configuration Registers */ +#define DDRC_PERF_CFG_BASE 0x8040 + +/* 8 Generic event counter + 2 fixed event counters */ +#define DDRC_PERF_NUM_GEN_COUNTERS 8 +#define DDRC_PERF_NUM_FIX_COUNTERS 2 +#define DDRC_PERF_READ_COUNTER_IDX DDRC_PERF_NUM_GEN_COUNTERS +#define DDRC_PERF_WRITE_COUNTER_IDX (DDRC_PERF_NUM_GEN_COUNTERS + 1) +#define DDRC_PERF_NUM_COUNTERS (DDRC_PERF_NUM_GEN_COUNTERS + \ + DDRC_PERF_NUM_FIX_COUNTERS) + +/* Generic event counter registers */ +#define DDRC_PERF_CFG(n) (DDRC_PERF_CFG_BASE + 8 * (n)) +#define EVENT_ENABLE BIT_ULL(63) + +/* Two dedicated event counters for DDR reads and writes */ +#define EVENT_DDR_READS 101 +#define EVENT_DDR_WRITES 100 + +/* + * programmable events IDs in programmable event counters. + * DO NOT change these event-id numbers, they are used to + * program event bitmap in h/w. + */ +#define EVENT_OP_IS_ZQLATCH 55 +#define EVENT_OP_IS_ZQSTART 54 +#define EVENT_OP_IS_TCR_MRR 53 +#define EVENT_OP_IS_DQSOSC_MRR 52 +#define EVENT_OP_IS_DQSOSC_MPC 51 +#define EVENT_VISIBLE_WIN_LIMIT_REACHED_WR 50 +#define EVENT_VISIBLE_WIN_LIMIT_REACHED_RD 49 +#define EVENT_BSM_STARVATION 48 +#define EVENT_BSM_ALLOC 47 +#define EVENT_LPR_REQ_WITH_NOCREDIT 46 +#define EVENT_HPR_REQ_WITH_NOCREDIT 45 +#define EVENT_OP_IS_ZQCS 44 +#define EVENT_OP_IS_ZQCL 43 +#define EVENT_OP_IS_LOAD_MODE 42 +#define EVENT_OP_IS_SPEC_REF 41 +#define EVENT_OP_IS_CRIT_REF 40 +#define EVENT_OP_IS_REFRESH 39 +#define EVENT_OP_IS_ENTER_MPSM 35 +#define EVENT_OP_IS_ENTER_POWERDOWN 31 +#define EVENT_OP_IS_ENTER_SELFREF 27 +#define EVENT_WAW_HAZARD 26 +#define EVENT_RAW_HAZARD 25 +#define EVENT_WAR_HAZARD 24 +#define EVENT_WRITE_COMBINE 23 +#define EVENT_RDWR_TRANSITIONS 22 +#define EVENT_PRECHARGE_FOR_OTHER 21 +#define EVENT_PRECHARGE_FOR_RDWR 20 +#define EVENT_OP_IS_PRECHARGE 19 +#define EVENT_OP_IS_MWR 18 +#define EVENT_OP_IS_WR 17 +#define EVENT_OP_IS_RD 16 +#define EVENT_OP_IS_RD_ACTIVATE 15 +#define EVENT_OP_IS_RD_OR_WR 14 +#define EVENT_OP_IS_ACTIVATE 13 +#define EVENT_WR_XACT_WHEN_CRITICAL 12 +#define EVENT_LPR_XACT_WHEN_CRITICAL 11 +#define EVENT_HPR_XACT_WHEN_CRITICAL 10 +#define EVENT_DFI_RD_DATA_CYCLES 9 +#define EVENT_DFI_WR_DATA_CYCLES 8 +#define EVENT_ACT_BYPASS 7 +#define EVENT_READ_BYPASS 6 +#define EVENT_HIF_HI_PRI_RD 5 +#define EVENT_HIF_RMW 4 +#define EVENT_HIF_RD 3 +#define EVENT_HIF_WR 2 +#define EVENT_HIF_RD_OR_WR 1 + +/* Event counter value registers */ +#define DDRC_PERF_CNT_VALUE_BASE 0x8080 +#define DDRC_PERF_CNT_VALUE(n) (DDRC_PERF_CNT_VALUE_BASE + 8 * (n)) + +/* Fixed event counter enable/disable register */ +#define DDRC_PERF_CNT_FREERUN_EN 0x80C0 +#define DDRC_PERF_FREERUN_WRITE_EN 0x1 +#define DDRC_PERF_FREERUN_READ_EN 0x2 + +/* Fixed event counter control register */ +#define DDRC_PERF_CNT_FREERUN_CTRL 0x80C8 +#define DDRC_FREERUN_WRITE_CNT_CLR 0x1 +#define DDRC_FREERUN_READ_CNT_CLR 0x2 + +/* Fixed event counter value register */ +#define DDRC_PERF_CNT_VALUE_WR_OP 0x80D0 +#define DDRC_PERF_CNT_VALUE_RD_OP 0x80D8 +#define DDRC_PERF_CNT_VALUE_OVERFLOW BIT_ULL(48) +#define DDRC_PERF_CNT_MAX_VALUE GENMASK_ULL(48, 0) + +struct cn10k_ddr_pmu { + struct pmu pmu; + void __iomem *base; + unsigned int cpu; + struct device *dev; + int active_events; + struct perf_event *events[DDRC_PERF_NUM_COUNTERS]; + struct hrtimer hrtimer; + struct hlist_node node; +}; + +#define to_cn10k_ddr_pmu(p) container_of(p, struct cn10k_ddr_pmu, pmu) + +static ssize_t cn10k_ddr_pmu_event_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); + +} + +#define CN10K_DDR_PMU_EVENT_ATTR(_name, _id) \ + PMU_EVENT_ATTR_ID(_name, cn10k_ddr_pmu_event_show, _id) + +static struct attribute *cn10k_ddr_perf_events_attrs[] = { + CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_or_wr_access, EVENT_HIF_RD_OR_WR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_wr_access, EVENT_HIF_WR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rd_access, EVENT_HIF_RD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_rmw_access, EVENT_HIF_RMW), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hif_pri_rdaccess, EVENT_HIF_HI_PRI_RD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_rd_bypass_access, EVENT_READ_BYPASS), + CN10K_DDR_PMU_EVENT_ATTR(ddr_act_bypass_access, EVENT_ACT_BYPASS), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dif_wr_data_access, EVENT_DFI_WR_DATA_CYCLES), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dif_rd_data_access, EVENT_DFI_RD_DATA_CYCLES), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hpri_sched_rd_crit_access, + EVENT_HPR_XACT_WHEN_CRITICAL), + CN10K_DDR_PMU_EVENT_ATTR(ddr_lpri_sched_rd_crit_access, + EVENT_LPR_XACT_WHEN_CRITICAL), + CN10K_DDR_PMU_EVENT_ATTR(ddr_wr_trxn_crit_access, + EVENT_WR_XACT_WHEN_CRITICAL), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_active_access, EVENT_OP_IS_ACTIVATE), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_or_wr_access, EVENT_OP_IS_RD_OR_WR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_rd_active_access, EVENT_OP_IS_RD_ACTIVATE), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_read, EVENT_OP_IS_RD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_write, EVENT_OP_IS_WR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_mwr, EVENT_OP_IS_MWR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge, EVENT_OP_IS_PRECHARGE), + CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_rdwr, EVENT_PRECHARGE_FOR_RDWR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_precharge_for_other, + EVENT_PRECHARGE_FOR_OTHER), + CN10K_DDR_PMU_EVENT_ATTR(ddr_rdwr_transitions, EVENT_RDWR_TRANSITIONS), + CN10K_DDR_PMU_EVENT_ATTR(ddr_write_combine, EVENT_WRITE_COMBINE), + CN10K_DDR_PMU_EVENT_ATTR(ddr_war_hazard, EVENT_WAR_HAZARD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_raw_hazard, EVENT_RAW_HAZARD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_waw_hazard, EVENT_WAW_HAZARD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_selfref, EVENT_OP_IS_ENTER_SELFREF), + CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_powerdown, EVENT_OP_IS_ENTER_POWERDOWN), + CN10K_DDR_PMU_EVENT_ATTR(ddr_enter_mpsm, EVENT_OP_IS_ENTER_MPSM), + CN10K_DDR_PMU_EVENT_ATTR(ddr_refresh, EVENT_OP_IS_REFRESH), + CN10K_DDR_PMU_EVENT_ATTR(ddr_crit_ref, EVENT_OP_IS_CRIT_REF), + CN10K_DDR_PMU_EVENT_ATTR(ddr_spec_ref, EVENT_OP_IS_SPEC_REF), + CN10K_DDR_PMU_EVENT_ATTR(ddr_load_mode, EVENT_OP_IS_LOAD_MODE), + CN10K_DDR_PMU_EVENT_ATTR(ddr_zqcl, EVENT_OP_IS_ZQCL), + CN10K_DDR_PMU_EVENT_ATTR(ddr_cam_wr_access, EVENT_OP_IS_ZQCS), + CN10K_DDR_PMU_EVENT_ATTR(ddr_hpr_req_with_nocredit, + EVENT_HPR_REQ_WITH_NOCREDIT), + CN10K_DDR_PMU_EVENT_ATTR(ddr_lpr_req_with_nocredit, + EVENT_LPR_REQ_WITH_NOCREDIT), + CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_alloc, EVENT_BSM_ALLOC), + CN10K_DDR_PMU_EVENT_ATTR(ddr_bsm_starvation, EVENT_BSM_STARVATION), + CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_rd, + EVENT_VISIBLE_WIN_LIMIT_REACHED_RD), + CN10K_DDR_PMU_EVENT_ATTR(ddr_win_limit_reached_wr, + EVENT_VISIBLE_WIN_LIMIT_REACHED_WR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mpc, EVENT_OP_IS_DQSOSC_MPC), + CN10K_DDR_PMU_EVENT_ATTR(ddr_dqsosc_mrr, EVENT_OP_IS_DQSOSC_MRR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_tcr_mrr, EVENT_OP_IS_TCR_MRR), + CN10K_DDR_PMU_EVENT_ATTR(ddr_zqstart, EVENT_OP_IS_ZQSTART), + CN10K_DDR_PMU_EVENT_ATTR(ddr_zqlatch, EVENT_OP_IS_ZQLATCH), + /* Free run event counters */ + CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_reads, EVENT_DDR_READS), + CN10K_DDR_PMU_EVENT_ATTR(ddr_ddr_writes, EVENT_DDR_WRITES), + NULL +}; + +static struct attribute_group cn10k_ddr_perf_events_attr_group = { + .name = "events", + .attrs = cn10k_ddr_perf_events_attrs, +}; + +PMU_FORMAT_ATTR(event, "config:0-8"); + +static struct attribute *cn10k_ddr_perf_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cn10k_ddr_perf_format_attr_group = { + .name = "format", + .attrs = cn10k_ddr_perf_format_attrs, +}; + +static ssize_t cn10k_ddr_perf_cpumask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct cn10k_ddr_pmu *pmu = dev_get_drvdata(dev); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu)); +} + +static struct device_attribute cn10k_ddr_perf_cpumask_attr = + __ATTR(cpumask, 0444, cn10k_ddr_perf_cpumask_show, NULL); + +static struct attribute *cn10k_ddr_perf_cpumask_attrs[] = { + &cn10k_ddr_perf_cpumask_attr.attr, + NULL, +}; + +static struct attribute_group cn10k_ddr_perf_cpumask_attr_group = { + .attrs = cn10k_ddr_perf_cpumask_attrs, +}; + +static const struct attribute_group *cn10k_attr_groups[] = { + &cn10k_ddr_perf_events_attr_group, + &cn10k_ddr_perf_format_attr_group, + &cn10k_ddr_perf_cpumask_attr_group, + NULL, +}; + +/* Default poll timeout is 100 sec, which is very sufficient for + * 48 bit counter incremented max at 5.6 GT/s, which may take many + * hours to overflow. + */ +static unsigned long cn10k_ddr_pmu_poll_period_sec = 100; +module_param_named(poll_period_sec, cn10k_ddr_pmu_poll_period_sec, ulong, 0644); + +static ktime_t cn10k_ddr_pmu_timer_period(void) +{ + return ms_to_ktime((u64)cn10k_ddr_pmu_poll_period_sec * USEC_PER_SEC); +} + +static int ddr_perf_get_event_bitmap(int eventid, u64 *event_bitmap) +{ + switch (eventid) { + case EVENT_HIF_RD_OR_WR ... EVENT_WAW_HAZARD: + case EVENT_OP_IS_REFRESH ... EVENT_OP_IS_ZQLATCH: + *event_bitmap = (1ULL << (eventid - 1)); + break; + case EVENT_OP_IS_ENTER_SELFREF: + case EVENT_OP_IS_ENTER_POWERDOWN: + case EVENT_OP_IS_ENTER_MPSM: + *event_bitmap = (0xFULL << (eventid - 1)); + break; + default: + pr_err("%s Invalid eventid %d\n", __func__, eventid); + return -EINVAL; + } + + return 0; +} + +static int cn10k_ddr_perf_alloc_counter(struct cn10k_ddr_pmu *pmu, + struct perf_event *event) +{ + u8 config = event->attr.config; + int i; + + /* DDR read free-run counter index */ + if (config == EVENT_DDR_READS) { + pmu->events[DDRC_PERF_READ_COUNTER_IDX] = event; + return DDRC_PERF_READ_COUNTER_IDX; + } + + /* DDR write free-run counter index */ + if (config == EVENT_DDR_WRITES) { + pmu->events[DDRC_PERF_WRITE_COUNTER_IDX] = event; + return DDRC_PERF_WRITE_COUNTER_IDX; + } + + /* Allocate DDR generic counters */ + for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) { + if (pmu->events[i] == NULL) { + pmu->events[i] = event; + return i; + } + } + + return -ENOENT; +} + +static void cn10k_ddr_perf_free_counter(struct cn10k_ddr_pmu *pmu, int counter) +{ + pmu->events[counter] = NULL; +} + +static int cn10k_ddr_perf_event_init(struct perf_event *event) +{ + struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (is_sampling_event(event)) { + dev_info(pmu->dev, "Sampling not supported!\n"); + return -EOPNOTSUPP; + } + + if (event->cpu < 0) { + dev_warn(pmu->dev, "Can't provide per-task data!\n"); + return -EOPNOTSUPP; + } + + /* We must NOT create groups containing mixed PMUs */ + if (event->group_leader->pmu != event->pmu && + !is_software_event(event->group_leader)) + return -EINVAL; + + /* Set ownership of event to one CPU, same event can not be observed + * on multiple cpus at same time. + */ + event->cpu = pmu->cpu; + hwc->idx = -1; + return 0; +} + +static void cn10k_ddr_perf_counter_enable(struct cn10k_ddr_pmu *pmu, + int counter, bool enable) +{ + u32 reg; + u64 val; + + if (counter > DDRC_PERF_NUM_COUNTERS) { + pr_err("Error: unsupported counter %d\n", counter); + return; + } + + if (counter < DDRC_PERF_NUM_GEN_COUNTERS) { + reg = DDRC_PERF_CFG(counter); + val = readq_relaxed(pmu->base + reg); + + if (enable) + val |= EVENT_ENABLE; + else + val &= ~EVENT_ENABLE; + + writeq_relaxed(val, pmu->base + reg); + } else { + val = readq_relaxed(pmu->base + DDRC_PERF_CNT_FREERUN_EN); + if (enable) { + if (counter == DDRC_PERF_READ_COUNTER_IDX) + val |= DDRC_PERF_FREERUN_READ_EN; + else + val |= DDRC_PERF_FREERUN_WRITE_EN; + } else { + if (counter == DDRC_PERF_READ_COUNTER_IDX) + val &= ~DDRC_PERF_FREERUN_READ_EN; + else + val &= ~DDRC_PERF_FREERUN_WRITE_EN; + } + writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_EN); + } +} + +static u64 cn10k_ddr_perf_read_counter(struct cn10k_ddr_pmu *pmu, int counter) +{ + u64 val; + + if (counter == DDRC_PERF_READ_COUNTER_IDX) + return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_RD_OP); + + if (counter == DDRC_PERF_WRITE_COUNTER_IDX) + return readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE_WR_OP); + + val = readq_relaxed(pmu->base + DDRC_PERF_CNT_VALUE(counter)); + return val; +} + +static void cn10k_ddr_perf_event_update(struct perf_event *event) +{ + struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 prev_count, new_count, mask; + + do { + prev_count = local64_read(&hwc->prev_count); + new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx); + } while (local64_xchg(&hwc->prev_count, new_count) != prev_count); + + mask = DDRC_PERF_CNT_MAX_VALUE; + + local64_add((new_count - prev_count) & mask, &event->count); +} + +static void cn10k_ddr_perf_event_start(struct perf_event *event, int flags) +{ + struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + local64_set(&hwc->prev_count, 0); + + cn10k_ddr_perf_counter_enable(pmu, counter, true); + + hwc->state = 0; +} + +static int cn10k_ddr_perf_event_add(struct perf_event *event, int flags) +{ + struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u8 config = event->attr.config; + int counter, ret; + u32 reg_offset; + u64 val; + + counter = cn10k_ddr_perf_alloc_counter(pmu, event); + if (counter < 0) + return -EAGAIN; + + pmu->active_events++; + hwc->idx = counter; + + if (pmu->active_events == 1) + hrtimer_start(&pmu->hrtimer, cn10k_ddr_pmu_timer_period(), + HRTIMER_MODE_REL_PINNED); + + if (counter < DDRC_PERF_NUM_GEN_COUNTERS) { + /* Generic counters, configure event id */ + reg_offset = DDRC_PERF_CFG(counter); + ret = ddr_perf_get_event_bitmap(config, &val); + if (ret) + return ret; + + writeq_relaxed(val, pmu->base + reg_offset); + } else { + /* fixed event counter, clear counter value */ + if (counter == DDRC_PERF_READ_COUNTER_IDX) + val = DDRC_FREERUN_READ_CNT_CLR; + else + val = DDRC_FREERUN_WRITE_CNT_CLR; + + writeq_relaxed(val, pmu->base + DDRC_PERF_CNT_FREERUN_CTRL); + } + + hwc->state |= PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + cn10k_ddr_perf_event_start(event, flags); + + return 0; +} + +static void cn10k_ddr_perf_event_stop(struct perf_event *event, int flags) +{ + struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + cn10k_ddr_perf_counter_enable(pmu, counter, false); + + if (flags & PERF_EF_UPDATE) + cn10k_ddr_perf_event_update(event); + + hwc->state |= PERF_HES_STOPPED; +} + +static void cn10k_ddr_perf_event_del(struct perf_event *event, int flags) +{ + struct cn10k_ddr_pmu *pmu = to_cn10k_ddr_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + cn10k_ddr_perf_event_stop(event, PERF_EF_UPDATE); + + cn10k_ddr_perf_free_counter(pmu, counter); + pmu->active_events--; + hwc->idx = -1; + + /* Cancel timer when no events to capture */ + if (pmu->active_events == 0) + hrtimer_cancel(&pmu->hrtimer); +} + +static void cn10k_ddr_perf_pmu_enable(struct pmu *pmu) +{ + struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu); + + writeq_relaxed(START_OP_CTRL_VAL_START, ddr_pmu->base + + DDRC_PERF_CNT_START_OP_CTRL); +} + +static void cn10k_ddr_perf_pmu_disable(struct pmu *pmu) +{ + struct cn10k_ddr_pmu *ddr_pmu = to_cn10k_ddr_pmu(pmu); + + writeq_relaxed(END_OP_CTRL_VAL_END, ddr_pmu->base + + DDRC_PERF_CNT_END_OP_CTRL); +} + +static void cn10k_ddr_perf_event_update_all(struct cn10k_ddr_pmu *pmu) +{ + struct hw_perf_event *hwc; + int i; + + for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) { + if (pmu->events[i] == NULL) + continue; + + cn10k_ddr_perf_event_update(pmu->events[i]); + } + + /* Reset previous count as h/w counter are reset */ + for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) { + if (pmu->events[i] == NULL) + continue; + + hwc = &pmu->events[i]->hw; + local64_set(&hwc->prev_count, 0); + } +} + +static irqreturn_t cn10k_ddr_pmu_overflow_handler(struct cn10k_ddr_pmu *pmu) +{ + struct perf_event *event; + struct hw_perf_event *hwc; + u64 prev_count, new_count; + u64 value; + int i; + + event = pmu->events[DDRC_PERF_READ_COUNTER_IDX]; + if (event) { + hwc = &event->hw; + prev_count = local64_read(&hwc->prev_count); + new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx); + + /* Overflow condition is when new count less than + * previous count + */ + if (new_count < prev_count) + cn10k_ddr_perf_event_update(event); + } + + event = pmu->events[DDRC_PERF_WRITE_COUNTER_IDX]; + if (event) { + hwc = &event->hw; + prev_count = local64_read(&hwc->prev_count); + new_count = cn10k_ddr_perf_read_counter(pmu, hwc->idx); + + /* Overflow condition is when new count less than + * previous count + */ + if (new_count < prev_count) + cn10k_ddr_perf_event_update(event); + } + + for (i = 0; i < DDRC_PERF_NUM_GEN_COUNTERS; i++) { + if (pmu->events[i] == NULL) + continue; + + value = cn10k_ddr_perf_read_counter(pmu, i); + if (value == DDRC_PERF_CNT_MAX_VALUE) { + pr_info("Counter-(%d) reached max value\n", i); + cn10k_ddr_perf_event_update_all(pmu); + cn10k_ddr_perf_pmu_disable(&pmu->pmu); + cn10k_ddr_perf_pmu_enable(&pmu->pmu); + } + } + + return IRQ_HANDLED; +} + +static enum hrtimer_restart cn10k_ddr_pmu_timer_handler(struct hrtimer *hrtimer) +{ + struct cn10k_ddr_pmu *pmu = container_of(hrtimer, struct cn10k_ddr_pmu, + hrtimer); + unsigned long flags; + + local_irq_save(flags); + cn10k_ddr_pmu_overflow_handler(pmu); + local_irq_restore(flags); + + hrtimer_forward_now(hrtimer, cn10k_ddr_pmu_timer_period()); + return HRTIMER_RESTART; +} + +static int cn10k_ddr_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct cn10k_ddr_pmu *pmu = hlist_entry_safe(node, struct cn10k_ddr_pmu, + node); + unsigned int target; + + if (cpu != pmu->cpu) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(&pmu->pmu, cpu, target); + pmu->cpu = target; + return 0; +} + +static int cn10k_ddr_perf_probe(struct platform_device *pdev) +{ + struct cn10k_ddr_pmu *ddr_pmu; + struct resource *res; + void __iomem *base; + char *name; + int ret; + + ddr_pmu = devm_kzalloc(&pdev->dev, sizeof(*ddr_pmu), GFP_KERNEL); + if (!ddr_pmu) + return -ENOMEM; + + ddr_pmu->dev = &pdev->dev; + platform_set_drvdata(pdev, ddr_pmu); + + base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(base)) + return PTR_ERR(base); + + ddr_pmu->base = base; + + /* Setup the PMU counter to work in manual mode */ + writeq_relaxed(OP_MODE_CTRL_VAL_MANNUAL, ddr_pmu->base + + DDRC_PERF_CNT_OP_MODE_CTRL); + + ddr_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .task_ctx_nr = perf_invalid_context, + .attr_groups = cn10k_attr_groups, + .event_init = cn10k_ddr_perf_event_init, + .add = cn10k_ddr_perf_event_add, + .del = cn10k_ddr_perf_event_del, + .start = cn10k_ddr_perf_event_start, + .stop = cn10k_ddr_perf_event_stop, + .read = cn10k_ddr_perf_event_update, + .pmu_enable = cn10k_ddr_perf_pmu_enable, + .pmu_disable = cn10k_ddr_perf_pmu_disable, + }; + + /* Choose this cpu to collect perf data */ + ddr_pmu->cpu = raw_smp_processor_id(); + + name = devm_kasprintf(ddr_pmu->dev, GFP_KERNEL, "mrvl_ddr_pmu_%llx", + res->start); + if (!name) + return -ENOMEM; + + hrtimer_init(&ddr_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + ddr_pmu->hrtimer.function = cn10k_ddr_pmu_timer_handler; + + cpuhp_state_add_instance_nocalls( + CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE, + &ddr_pmu->node); + + ret = perf_pmu_register(&ddr_pmu->pmu, name, -1); + if (ret) + goto error; + + pr_info("CN10K DDR PMU Driver for ddrc@%llx\n", res->start); + return 0; +error: + cpuhp_state_remove_instance_nocalls( + CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE, + &ddr_pmu->node); + return ret; +} + +static int cn10k_ddr_perf_remove(struct platform_device *pdev) +{ + struct cn10k_ddr_pmu *ddr_pmu = platform_get_drvdata(pdev); + + cpuhp_state_remove_instance_nocalls( + CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE, + &ddr_pmu->node); + + perf_pmu_unregister(&ddr_pmu->pmu); + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id cn10k_ddr_pmu_of_match[] = { + { .compatible = "marvell,cn10k-ddr-pmu", }, + { }, +}; +MODULE_DEVICE_TABLE(of, cn10k_ddr_pmu_of_match); +#endif + +static struct platform_driver cn10k_ddr_pmu_driver = { + .driver = { + .name = "cn10k-ddr-pmu", + .of_match_table = of_match_ptr(cn10k_ddr_pmu_of_match), + .suppress_bind_attrs = true, + }, + .probe = cn10k_ddr_perf_probe, + .remove = cn10k_ddr_perf_remove, +}; + +static int __init cn10k_ddr_pmu_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi( + CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE, + "perf/marvell/cn10k/ddr:online", NULL, + cn10k_ddr_pmu_offline_cpu); + if (ret) + return ret; + + ret = platform_driver_register(&cn10k_ddr_pmu_driver); + if (ret) + cpuhp_remove_multi_state( + CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE); + return ret; +} + +static void __exit cn10k_ddr_pmu_exit(void) +{ + platform_driver_unregister(&cn10k_ddr_pmu_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE); +} + +module_init(cn10k_ddr_pmu_init); +module_exit(cn10k_ddr_pmu_exit); + +MODULE_AUTHOR("Bharat Bhushan <bbhushan2@marvell.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c index 7f4d292658e3..ee67305f822d 100644 --- a/drivers/perf/marvell_cn10k_tad_pmu.c +++ b/drivers/perf/marvell_cn10k_tad_pmu.c @@ -368,10 +368,12 @@ static int tad_pmu_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_OF static const struct of_device_id tad_pmu_of_match[] = { { .compatible = "marvell,cn10k-tad-pmu", }, {}, }; +#endif static struct platform_driver tad_pmu_driver = { .driver = { diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index 05378c0fd8f3..1edb9c03704f 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -887,13 +887,11 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev, static acpi_status tx2_uncore_pmu_add(acpi_handle handle, u32 level, void *data, void **return_value) { + struct acpi_device *adev = acpi_fetch_acpi_dev(handle); struct tx2_uncore_pmu *tx2_pmu; - struct acpi_device *adev; enum tx2_uncore_type type; - if (acpi_bus_get_device(handle, &adev)) - return AE_OK; - if (acpi_bus_get_status(adev) || !adev->status.present) + if (!adev || acpi_bus_get_status(adev) || !adev->status.present) return AE_OK; type = get_tx2_pmu_type(adev); diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 2b6d476bd213..0c32dffc7ede 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -867,7 +867,7 @@ static void xgene_perf_pmu_enable(struct pmu *pmu) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu); struct xgene_pmu *xgene_pmu = pmu_dev->parent; - int enabled = bitmap_weight(pmu_dev->cntr_assign_mask, + bool enabled = !bitmap_empty(pmu_dev->cntr_assign_mask, pmu_dev->max_counters); if (!enabled) @@ -1549,14 +1549,12 @@ static const struct acpi_device_id *xgene_pmu_acpi_match_type( static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level, void *data, void **return_value) { + struct acpi_device *adev = acpi_fetch_acpi_dev(handle); const struct acpi_device_id *acpi_id; struct xgene_pmu *xgene_pmu = data; struct xgene_pmu_dev_ctx *ctx; - struct acpi_device *adev; - if (acpi_bus_get_device(handle, &adev)) - return AE_OK; - if (acpi_bus_get_status(adev) || !adev->status.present) + if (!adev || acpi_bus_get_status(adev) || !adev->status.present) return AE_OK; acpi_id = xgene_pmu_acpi_match_type(xgene_pmu_acpi_type_match, adev); diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c index 4d81908d6725..ba536fd4d674 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c +++ b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c @@ -78,7 +78,6 @@ struct npcm7xx_gpio { struct gpio_chip gc; int irqbase; int irq; - void *priv; struct irq_chip irq_chip; u32 pinctrl_id; int (*direction_input)(struct gpio_chip *chip, unsigned offset); @@ -226,7 +225,7 @@ static void npcmgpio_irq_handler(struct irq_desc *desc) chained_irq_enter(chip, desc); sts = ioread32(bank->base + NPCM7XX_GP_N_EVST); en = ioread32(bank->base + NPCM7XX_GP_N_EVEN); - dev_dbg(chip->parent_device, "==> got irq sts %.8x %.8x\n", sts, + dev_dbg(bank->gc.parent, "==> got irq sts %.8x %.8x\n", sts, en); sts &= en; @@ -241,33 +240,33 @@ static int npcmgpio_set_irq_type(struct irq_data *d, unsigned int type) gpiochip_get_data(irq_data_get_irq_chip_data(d)); unsigned int gpio = BIT(d->hwirq); - dev_dbg(d->chip->parent_device, "setirqtype: %u.%u = %u\n", gpio, + dev_dbg(bank->gc.parent, "setirqtype: %u.%u = %u\n", gpio, d->irq, type); switch (type) { case IRQ_TYPE_EDGE_RISING: - dev_dbg(d->chip->parent_device, "edge.rising\n"); + dev_dbg(bank->gc.parent, "edge.rising\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_EDGE_FALLING: - dev_dbg(d->chip->parent_device, "edge.falling\n"); + dev_dbg(bank->gc.parent, "edge.falling\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_EDGE_BOTH: - dev_dbg(d->chip->parent_device, "edge.both\n"); + dev_dbg(bank->gc.parent, "edge.both\n"); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_EVBE, gpio); break; case IRQ_TYPE_LEVEL_LOW: - dev_dbg(d->chip->parent_device, "level.low\n"); + dev_dbg(bank->gc.parent, "level.low\n"); npcm_gpio_set(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; case IRQ_TYPE_LEVEL_HIGH: - dev_dbg(d->chip->parent_device, "level.high\n"); + dev_dbg(bank->gc.parent, "level.high\n"); npcm_gpio_clr(&bank->gc, bank->base + NPCM7XX_GP_N_POL, gpio); break; default: - dev_dbg(d->chip->parent_device, "invalid irq type\n"); + dev_dbg(bank->gc.parent, "invalid irq type\n"); return -EINVAL; } @@ -289,7 +288,7 @@ static void npcmgpio_irq_ack(struct irq_data *d) gpiochip_get_data(irq_data_get_irq_chip_data(d)); unsigned int gpio = d->hwirq; - dev_dbg(d->chip->parent_device, "irq_ack: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_ack: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVST); } @@ -301,7 +300,7 @@ static void npcmgpio_irq_mask(struct irq_data *d) unsigned int gpio = d->hwirq; /* Clear events */ - dev_dbg(d->chip->parent_device, "irq_mask: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_mask: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENC); } @@ -313,7 +312,7 @@ static void npcmgpio_irq_unmask(struct irq_data *d) unsigned int gpio = d->hwirq; /* Enable events */ - dev_dbg(d->chip->parent_device, "irq_unmask: %u.%u\n", gpio, d->irq); + dev_dbg(bank->gc.parent, "irq_unmask: %u.%u\n", gpio, d->irq); iowrite32(BIT(gpio), bank->base + NPCM7XX_GP_N_EVENS); } @@ -323,7 +322,7 @@ static unsigned int npcmgpio_irq_startup(struct irq_data *d) unsigned int gpio = d->hwirq; /* active-high, input, clear interrupt, enable interrupt */ - dev_dbg(d->chip->parent_device, "startup: %u.%u\n", gpio, d->irq); + dev_dbg(gc->parent, "startup: %u.%u\n", gpio, d->irq); npcmgpio_direction_input(gc, gpio); npcmgpio_irq_ack(d); npcmgpio_irq_unmask(d); diff --git a/drivers/pinctrl/pinctrl-starfive.c b/drivers/pinctrl/pinctrl-starfive.c index 266da41a6162..ab4b2ee9f217 100644 --- a/drivers/pinctrl/pinctrl-starfive.c +++ b/drivers/pinctrl/pinctrl-starfive.c @@ -1308,8 +1308,6 @@ static int starfive_probe(struct platform_device *pdev) sfp->gc.base = -1; sfp->gc.ngpio = NR_GPIOS; - starfive_irq_chip.parent_device = dev; - sfp->gc.irq.chip = &starfive_irq_chip; sfp->gc.irq.parent_handler = starfive_gpio_irq_handler; sfp->gc.irq.num_parents = 1; @@ -1330,6 +1328,8 @@ static int starfive_probe(struct platform_device *pdev) if (ret) return dev_err_probe(dev, ret, "could not register gpiochip\n"); + irq_domain_set_pm_device(sfp->gc.irq.domain, dev); + out_pinctrl_enable: return pinctrl_enable(sfp->pctl); } diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 88c549f257db..40a52feb315d 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -986,8 +986,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, CMD_SP(sc) = NULL; CMD_FLAGS(sc) |= FNIC_IO_DONE; - spin_unlock_irqrestore(io_lock, flags); - if (hdr_status != FCPIO_SUCCESS) { atomic64_inc(&fnic_stats->io_stats.io_failures); shost_printk(KERN_ERR, fnic->lport->host, "hdr status = %s\n", @@ -996,8 +994,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, fnic_release_ioreq_buf(fnic, io_req, sc); - mempool_free(io_req, fnic->io_req_pool); - cmd_trace = ((u64)hdr_status << 56) | (u64)icmnd_cmpl->scsi_status << 48 | (u64)icmnd_cmpl->flags << 40 | (u64)sc->cmnd[0] << 32 | @@ -1021,6 +1017,12 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, } else fnic->lport->host_stats.fcp_control_requests++; + /* Call SCSI completion function to complete the IO */ + scsi_done(sc); + spin_unlock_irqrestore(io_lock, flags); + + mempool_free(io_req, fnic->io_req_pool); + atomic64_dec(&fnic_stats->io_stats.active_ios); if (atomic64_read(&fnic->io_cmpl_skip)) atomic64_dec(&fnic->io_cmpl_skip); @@ -1049,9 +1051,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, if(io_duration_time > atomic64_read(&fnic_stats->io_stats.current_max_io_time)) atomic64_set(&fnic_stats->io_stats.current_max_io_time, io_duration_time); } - - /* Call SCSI completion function to complete the IO */ - scsi_done(sc); } /* fnic_fcpio_itmf_cmpl_handler diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 511726f92d9a..76229b839560 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2011,9 +2011,10 @@ mpt3sas_base_sync_reply_irqs(struct MPT3SAS_ADAPTER *ioc, u8 poll) enable_irq(reply_q->os_irq); } } + + if (poll) + _base_process_reply_queue(reply_q); } - if (poll) - _base_process_reply_queue(reply_q); } /** diff --git a/drivers/staging/greybus/gpio.c b/drivers/staging/greybus/gpio.c index 7e6347fe93f9..8a7cf1d0e968 100644 --- a/drivers/staging/greybus/gpio.c +++ b/drivers/staging/greybus/gpio.c @@ -391,10 +391,7 @@ static int gb_gpio_request_handler(struct gb_operation *op) return -EINVAL; } - local_irq_disable(); - ret = generic_handle_irq(irq); - local_irq_enable(); - + ret = generic_handle_irq_safe(irq); if (ret) dev_err(dev, "failed to invoke irq handler\n"); diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 73f419adce61..4bb6d304eb4b 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -1919,6 +1919,7 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data, struct usbtmc_ctrlrequest request; u8 *buffer = NULL; int rv; + unsigned int is_in, pipe; unsigned long res; res = copy_from_user(&request, arg, sizeof(struct usbtmc_ctrlrequest)); @@ -1928,12 +1929,14 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data, if (request.req.wLength > USBTMC_BUFSIZE) return -EMSGSIZE; + is_in = request.req.bRequestType & USB_DIR_IN; + if (request.req.wLength) { buffer = kmalloc(request.req.wLength, GFP_KERNEL); if (!buffer) return -ENOMEM; - if ((request.req.bRequestType & USB_DIR_IN) == 0) { + if (!is_in) { /* Send control data to device */ res = copy_from_user(buffer, request.data, request.req.wLength); @@ -1944,8 +1947,12 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data, } } + if (is_in) + pipe = usb_rcvctrlpipe(data->usb_dev, 0); + else + pipe = usb_sndctrlpipe(data->usb_dev, 0); rv = usb_control_msg(data->usb_dev, - usb_rcvctrlpipe(data->usb_dev, 0), + pipe, request.req.bRequest, request.req.bRequestType, request.req.wValue, @@ -1957,7 +1964,7 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data, goto exit; } - if (rv && (request.req.bRequestType & USB_DIR_IN)) { + if (rv && is_in) { /* Read control data from device */ res = copy_to_user(request.data, buffer, rv); if (res) diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c index 00b3f6b3bb31..713efd9aefde 100644 --- a/drivers/usb/gadget/function/rndis.c +++ b/drivers/usb/gadget/function/rndis.c @@ -640,6 +640,7 @@ static int rndis_set_response(struct rndis_params *params, BufLength = le32_to_cpu(buf->InformationBufferLength); BufOffset = le32_to_cpu(buf->InformationBufferOffset); if ((BufLength > RNDIS_MAX_TOTAL_SIZE) || + (BufOffset > RNDIS_MAX_TOTAL_SIZE) || (BufOffset + 8 >= RNDIS_MAX_TOTAL_SIZE)) return -EINVAL; diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 568534a0d17c..c109b069f511 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1436,7 +1436,6 @@ static void usb_gadget_remove_driver(struct usb_udc *udc) usb_gadget_udc_stop(udc); udc->driver = NULL; - udc->dev.driver = NULL; udc->gadget->dev.driver = NULL; } @@ -1498,7 +1497,6 @@ static int udc_bind_to_driver(struct usb_udc *udc, struct usb_gadget_driver *dri driver->function); udc->driver = driver; - udc->dev.driver = &driver->driver; udc->gadget->dev.driver = &driver->driver; usb_gadget_udc_set_speed(udc, driver->max_speed); @@ -1521,7 +1519,6 @@ err1: dev_err(&udc->dev, "failed to start %s: %d\n", udc->driver->function, ret); udc->driver = NULL; - udc->dev.driver = NULL; udc->gadget->dev.driver = NULL; return ret; } diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 7d4d0713f4f0..d2b7e613eb34 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -327,7 +327,6 @@ static int omap2430_probe(struct platform_device *pdev) musb->dev.parent = &pdev->dev; musb->dev.dma_mask = &omap2430_dmamask; musb->dev.coherent_dma_mask = omap2430_dmamask; - device_set_of_node_from_dev(&musb->dev, &pdev->dev); glue->dev = &pdev->dev; glue->musb = musb; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 082380c03a3e..1768362115c6 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1170,7 +1170,9 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev, goto done; } - if (msg.size == 0) { + if ((msg.type == VHOST_IOTLB_UPDATE || + msg.type == VHOST_IOTLB_INVALIDATE) && + msg.size == 0) { ret = -EINVAL; goto done; } diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 37f0b4274113..e6c9d41db1de 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -753,7 +753,8 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) /* Iterating over all connections for all CIDs to find orphans is * inefficient. Room for improvement here. */ - vsock_for_each_connected_socket(vhost_vsock_reset_orphans); + vsock_for_each_connected_socket(&vhost_transport.transport, + vhost_vsock_reset_orphans); /* Don't check the owner, because we are in the release path, so we * need to stop the vsock device in any case. diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index 265865610edc..bebb2eea6448 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -1041,6 +1041,47 @@ stifb_copyarea(struct fb_info *info, const struct fb_copyarea *area) SETUP_FB(fb); } +#define ARTIST_VRAM_SIZE 0x000804 +#define ARTIST_VRAM_SRC 0x000808 +#define ARTIST_VRAM_SIZE_TRIGGER_WINFILL 0x000a04 +#define ARTIST_VRAM_DEST_TRIGGER_BLOCKMOVE 0x000b00 +#define ARTIST_SRC_BM_ACCESS 0x018008 +#define ARTIST_FGCOLOR 0x018010 +#define ARTIST_BGCOLOR 0x018014 +#define ARTIST_BITMAP_OP 0x01801c + +static void +stifb_fillrect(struct fb_info *info, const struct fb_fillrect *rect) +{ + struct stifb_info *fb = container_of(info, struct stifb_info, info); + + if (rect->rop != ROP_COPY) + return cfb_fillrect(info, rect); + + SETUP_HW(fb); + + if (fb->info.var.bits_per_pixel == 32) { + WRITE_WORD(0xBBA0A000, fb, REG_10); + + NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xffffffff); + } else { + WRITE_WORD(fb->id == S9000_ID_HCRX ? 0x13a02000 : 0x13a01000, fb, REG_10); + + NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xff); + } + + WRITE_WORD(0x03000300, fb, ARTIST_BITMAP_OP); + WRITE_WORD(0x2ea01000, fb, ARTIST_SRC_BM_ACCESS); + NGLE_QUICK_SET_DST_BM_ACCESS(fb, 0x2ea01000); + NGLE_REALLY_SET_IMAGE_FG_COLOR(fb, rect->color); + WRITE_WORD(0, fb, ARTIST_BGCOLOR); + + NGLE_SET_DSTXY(fb, (rect->dx << 16) | (rect->dy)); + SET_LENXY_START_RECFILL(fb, (rect->width << 16) | (rect->height)); + + SETUP_FB(fb); +} + static void __init stifb_init_display(struct stifb_info *fb) { @@ -1105,7 +1146,7 @@ static const struct fb_ops stifb_ops = { .owner = THIS_MODULE, .fb_setcolreg = stifb_setcolreg, .fb_blank = stifb_blank, - .fb_fillrect = cfb_fillrect, + .fb_fillrect = stifb_fillrect, .fb_copyarea = stifb_copyarea, .fb_imageblit = cfb_imageblit, }; @@ -1297,7 +1338,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) goto out_err0; } info->screen_size = fix->smem_len; - info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA; + info->flags = FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT; info->pseudo_palette = &fb->pseudo_palette; /* This has to be done !!! */ diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 4d5ae61580aa..68e586283764 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -36,6 +36,9 @@ config COMPAT_BINFMT_ELF config ARCH_BINFMT_ELF_STATE bool +config ARCH_BINFMT_ELF_EXTRA_PHDRS + bool + config ARCH_HAVE_ELF_PROT bool diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 053cb449eb16..d3020abfe404 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3924,7 +3924,8 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, /* only send once per connect */ spin_lock(&cifs_tcp_ses_lock); - if (server->tcpStatus != CifsNeedSessSetup) { + if ((server->tcpStatus != CifsNeedSessSetup) && + (ses->status == CifsGood)) { spin_unlock(&cifs_tcp_ses_lock); return 0; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 2772dec9dcea..8bde30fa5387 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1105,17 +1105,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) goto read_super_error; } - root = d_make_root(inode); - if (!root) { - status = -ENOMEM; - mlog_errno(status); - goto read_super_error; - } - - sb->s_root = root; - - ocfs2_complete_mount_recovery(osb); - osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL, &ocfs2_kset->kobj); if (!osb->osb_dev_kset) { @@ -1133,6 +1122,17 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) goto read_super_error; } + root = d_make_root(inode); + if (!root) { + status = -ENOMEM; + mlog_errno(status); + goto read_super_error; + } + + sb->s_root = root; + + ocfs2_complete_mount_recovery(osb); + if (ocfs2_mount_local(osb)) snprintf(nodestr, sizeof(nodestr), "local"); else diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index e715bdb720d5..057c8964aefb 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -56,6 +56,7 @@ enum arch_timer_spi_nr { #define ARCH_TIMER_EVT_TRIGGER_MASK (0xF << ARCH_TIMER_EVT_TRIGGER_SHIFT) #define ARCH_TIMER_USR_VT_ACCESS_EN (1 << 8) /* virtual timer registers */ #define ARCH_TIMER_USR_PT_ACCESS_EN (1 << 9) /* physical timer registers */ +#define ARCH_TIMER_EVT_INTERVAL_SCALE (1 << 17) /* EVNTIS in the ARMv8 ARM */ #define ARCH_TIMER_EVT_STREAM_PERIOD_US 100 #define ARCH_TIMER_EVT_STREAM_FREQ \ diff --git a/include/crypto/asym_tpm_subtype.h b/include/crypto/asym_tpm_subtype.h deleted file mode 100644 index 48198c36d6b9..000000000000 --- a/include/crypto/asym_tpm_subtype.h +++ /dev/null @@ -1,19 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#ifndef _LINUX_ASYM_TPM_SUBTYPE_H -#define _LINUX_ASYM_TPM_SUBTYPE_H - -#include <linux/keyctl.h> - -struct tpm_key { - void *blob; - u32 blob_len; - uint16_t key_len; /* Size in bits of the key */ - const void *pub_key; /* pointer inside blob to the public key bytes */ - uint16_t pub_key_len; /* length of the public key */ -}; - -struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len); - -extern struct asymmetric_key_subtype asym_tpm_subtype; - -#endif /* _LINUX_ASYM_TPM_SUBTYPE_H */ diff --git a/include/dt-bindings/interrupt-controller/apple-aic.h b/include/dt-bindings/interrupt-controller/apple-aic.h index 604f2bb30ac0..bf3aac0e5491 100644 --- a/include/dt-bindings/interrupt-controller/apple-aic.h +++ b/include/dt-bindings/interrupt-controller/apple-aic.h @@ -11,5 +11,7 @@ #define AIC_TMR_HV_VIRT 1 #define AIC_TMR_GUEST_PHYS 2 #define AIC_TMR_GUEST_VIRT 3 +#define AIC_CPU_PMU_E 4 +#define AIC_CPU_PMU_P 5 #endif diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h index 6acd3cf13a18..2419a735420f 100644 --- a/include/keys/system_keyring.h +++ b/include/keys/system_keyring.h @@ -38,6 +38,20 @@ extern int restrict_link_by_builtin_and_secondary_trusted( #define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted #endif +#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING +extern int restrict_link_by_builtin_secondary_and_machine( + struct key *dest_keyring, + const struct key_type *type, + const union key_payload *payload, + struct key *restrict_key); +extern void __init set_machine_trusted_keys(struct key *keyring); +#else +#define restrict_link_by_builtin_secondary_and_machine restrict_link_by_builtin_trusted +static inline void __init set_machine_trusted_keys(struct key *keyring) +{ +} +#endif + extern struct pkcs7_message *pkcs7; #ifdef CONFIG_SYSTEM_BLACKLIST_KEYRING extern int mark_hash_blacklisted(const char *hash); diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 6c7f47846971..6562f543c3e0 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -117,30 +117,9 @@ void amba_device_put(struct amba_device *); int amba_device_add(struct amba_device *, struct resource *); int amba_device_register(struct amba_device *, struct resource *); void amba_device_unregister(struct amba_device *); -struct amba_device *amba_find_device(const char *, struct device *, unsigned int, unsigned int); int amba_request_regions(struct amba_device *, const char *); void amba_release_regions(struct amba_device *); -static inline int amba_pclk_enable(struct amba_device *dev) -{ - return clk_enable(dev->pclk); -} - -static inline void amba_pclk_disable(struct amba_device *dev) -{ - clk_disable(dev->pclk); -} - -static inline int amba_pclk_prepare(struct amba_device *dev) -{ - return clk_prepare(dev->pclk); -} - -static inline void amba_pclk_unprepare(struct amba_device *dev) -{ - clk_unprepare(dev->pclk); -} - /* Some drivers don't use the struct amba_device */ #define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff) #define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 411a428ace4d..2bc550ac8dc7 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -231,6 +231,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE, CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE, + CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE, CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 746e081879a5..f8e206e82476 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -114,7 +114,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg #endif } -#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64) +#ifdef CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its @@ -149,6 +149,6 @@ static inline size_t elf_core_extra_data_size(void) { return 0; } -#endif +#endif /* CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS */ #endif /* _LINUX_ELFCORE_H */ diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index b712217f7030..1ed52441972f 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -52,6 +52,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev) case ARPHRD_VOID: case ARPHRD_NONE: case ARPHRD_RAWIP: + case ARPHRD_PIMREG: return false; default: return true; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9367f1cb2e3c..f40754caaefa 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -579,7 +579,16 @@ enum NR_SOFTIRQS }; -#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) +/* + * The following vectors can be safely ignored after ksoftirqd is parked: + * + * _ RCU: + * 1) rcutree_migrate_callbacks() migrates the queue. + * 2) rcu_report_dead() reports the final quiescent states. + * + * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue + */ +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index e9dacd4b9f6b..af1c9d62e642 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -34,13 +34,16 @@ struct ioasid_allocator_ops { #if IS_ENABLED(CONFIG_IOASID) ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, void *private); -void ioasid_get(ioasid_t ioasid); -bool ioasid_put(ioasid_t ioasid); +void ioasid_free(ioasid_t ioasid); void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)); int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator); int ioasid_set_data(ioasid_t ioasid, void *data); +static inline bool pasid_valid(ioasid_t ioasid) +{ + return ioasid != INVALID_IOASID; +} #else /* !CONFIG_IOASID */ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, @@ -49,14 +52,7 @@ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, return INVALID_IOASID; } -static inline void ioasid_get(ioasid_t ioasid) -{ -} - -static inline bool ioasid_put(ioasid_t ioasid) -{ - return false; -} +static inline void ioasid_free(ioasid_t ioasid) { } static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)) @@ -78,5 +74,10 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data) return -ENOTSUPP; } +static inline bool pasid_valid(ioasid_t ioasid) +{ + return false; +} + #endif /* CONFIG_IOASID */ #endif /* __LINUX_IOASID_H */ diff --git a/include/linux/irq.h b/include/linux/irq.h index 848e1e12c5c6..f92788ccdba2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -456,7 +456,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) /** * struct irq_chip - hardware interrupt chip descriptor * - * @parent_device: pointer to parent device for irqchip * @name: name for /proc/interrupts * @irq_startup: start up the interrupt (defaults to ->enable if NULL) * @irq_shutdown: shut down the interrupt (defaults to ->disable if NULL) @@ -503,7 +502,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @flags: chip specific flags */ struct irq_chip { - struct device *parent_device; const char *name; unsigned int (*irq_startup)(struct irq_data *data); void (*irq_shutdown)(struct irq_data *data); @@ -712,10 +710,11 @@ extern struct irq_chip no_irq_chip; extern struct irq_chip dummy_irq_chip; extern void -irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, +irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip, irq_flow_handler_t handle, const char *name); -static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *chip, +static inline void irq_set_chip_and_handler(unsigned int irq, + const struct irq_chip *chip, irq_flow_handler_t handle) { irq_set_chip_and_handler_name(irq, chip, handle, NULL); @@ -805,7 +804,7 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq) } /* Set/get chip/data for an IRQ: */ -extern int irq_set_chip(unsigned int irq, struct irq_chip *chip); +extern int irq_set_chip(unsigned int irq, const struct irq_chip *chip); extern int irq_set_handler_data(unsigned int irq, void *data); extern int irq_set_chip_data(unsigned int irq, void *data); extern int irq_set_irq_type(unsigned int irq, unsigned int type); diff --git a/include/linux/irqchip/versatile-fpga.h b/include/linux/irqchip/versatile-fpga.h deleted file mode 100644 index a978fc8c7996..000000000000 --- a/include/linux/irqchip/versatile-fpga.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef PLAT_FPGA_IRQ_H -#define PLAT_FPGA_IRQ_H - -struct device_node; -struct pt_regs; - -void fpga_handle_irq(struct pt_regs *regs); -void fpga_irq_init(void __iomem *, const char *, int, int, u32, - struct device_node *node); -int fpga_irq_of_init(struct device_node *node, - struct device_node *parent); - -#endif diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 93d270ca0c56..a77584593f7d 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -160,6 +160,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc) int handle_irq_desc(struct irq_desc *desc); int generic_handle_irq(unsigned int irq); +int generic_handle_irq_safe(unsigned int irq); #ifdef CONFIG_IRQ_DOMAIN /* diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index d476405802e9..00d577f90883 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -151,6 +151,8 @@ struct irq_domain_chip_generic; * @gc: Pointer to a list of generic chips. There is a helper function for * setting up one or more generic chips for interrupt controllers * drivers using the generic chip library which uses this pointer. + * @dev: Pointer to a device that the domain represent, and that will be + * used for power management purposes. * @parent: Pointer to parent irq_domain to support hierarchy irq_domains * * Revmap data, used internally by irq_domain @@ -171,6 +173,7 @@ struct irq_domain { struct fwnode_handle *fwnode; enum irq_domain_bus_token bus_token; struct irq_domain_chip_generic *gc; + struct device *dev; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_domain *parent; #endif @@ -226,6 +229,13 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) return to_of_node(d->fwnode); } +static inline void irq_domain_set_pm_device(struct irq_domain *d, + struct device *dev) +{ + if (d) + d->dev = dev; +} + #ifdef CONFIG_IRQ_DOMAIN struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id, const char *name, phys_addr_t *pa); @@ -469,7 +479,8 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest); extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq); extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, + const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name); extern void irq_domain_reset_irq_data(struct irq_data *irq_data); @@ -512,7 +523,7 @@ extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq, - struct irq_chip *chip, + const struct irq_chip *chip, void *chip_data); extern void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq, diff --git a/include/linux/kasan-enabled.h b/include/linux/kasan-enabled.h new file mode 100644 index 000000000000..6f612d69ea0c --- /dev/null +++ b/include/linux/kasan-enabled.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KASAN_ENABLED_H +#define _LINUX_KASAN_ENABLED_H + +#include <linux/static_key.h> + +#ifdef CONFIG_KASAN_HW_TAGS + +DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); + +static __always_inline bool kasan_enabled(void) +{ + return static_branch_likely(&kasan_flag_enabled); +} + +static inline bool kasan_hw_tags_enabled(void) +{ + return kasan_enabled(); +} + +#else /* CONFIG_KASAN_HW_TAGS */ + +static inline bool kasan_enabled(void) +{ + return IS_ENABLED(CONFIG_KASAN); +} + +static inline bool kasan_hw_tags_enabled(void) +{ + return false; +} + +#endif /* CONFIG_KASAN_HW_TAGS */ + +#endif /* LINUX_KASAN_ENABLED_H */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 4a45562d8893..b6a93261c92a 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -3,6 +3,7 @@ #define _LINUX_KASAN_H #include <linux/bug.h> +#include <linux/kasan-enabled.h> #include <linux/kernel.h> #include <linux/static_key.h> #include <linux/types.h> @@ -83,33 +84,11 @@ static inline void kasan_disable_current(void) {} #ifdef CONFIG_KASAN_HW_TAGS -DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); - -static __always_inline bool kasan_enabled(void) -{ - return static_branch_likely(&kasan_flag_enabled); -} - -static inline bool kasan_hw_tags_enabled(void) -{ - return kasan_enabled(); -} - void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags); void kasan_free_pages(struct page *page, unsigned int order); #else /* CONFIG_KASAN_HW_TAGS */ -static inline bool kasan_enabled(void) -{ - return IS_ENABLED(CONFIG_KASAN); -} - -static inline bool kasan_hw_tags_enabled(void) -{ - return false; -} - static __always_inline void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) { diff --git a/include/linux/linkage.h b/include/linux/linkage.h index dbf8506decca..acb1ad2356f1 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -165,7 +165,18 @@ #ifndef SYM_END #define SYM_END(name, sym_type) \ .type name sym_type ASM_NL \ - .size name, .-name + .set .L__sym_size_##name, .-name ASM_NL \ + .size name, .L__sym_size_##name +#endif + +/* SYM_ALIAS -- use only if you have to */ +#ifndef SYM_ALIAS +#define SYM_ALIAS(alias, name, sym_type, linkage) \ + linkage(alias) ASM_NL \ + .set alias, name ASM_NL \ + .type alias sym_type ASM_NL \ + .set .L__sym_size_##alias, .L__sym_size_##name ASM_NL \ + .size alias, .L__sym_size_##alias #endif /* === code annotations === */ @@ -200,30 +211,8 @@ SYM_ENTRY(name, linkage, SYM_A_NONE) #endif -/* - * SYM_FUNC_START_LOCAL_ALIAS -- use where there are two local names for one - * function - */ -#ifndef SYM_FUNC_START_LOCAL_ALIAS -#define SYM_FUNC_START_LOCAL_ALIAS(name) \ - SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) -#endif - -/* - * SYM_FUNC_START_ALIAS -- use where there are two global names for one - * function - */ -#ifndef SYM_FUNC_START_ALIAS -#define SYM_FUNC_START_ALIAS(name) \ - SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) -#endif - /* SYM_FUNC_START -- use for global functions */ #ifndef SYM_FUNC_START -/* - * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two - * later. - */ #define SYM_FUNC_START(name) \ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif @@ -236,7 +225,6 @@ /* SYM_FUNC_START_LOCAL -- use for local functions */ #ifndef SYM_FUNC_START_LOCAL -/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_START_LOCAL(name) \ SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) #endif @@ -259,22 +247,39 @@ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) #endif -/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */ -#ifndef SYM_FUNC_END_ALIAS -#define SYM_FUNC_END_ALIAS(name) \ - SYM_END(name, SYM_T_FUNC) -#endif - /* * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START, * SYM_FUNC_START_WEAK, ... */ #ifndef SYM_FUNC_END -/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_END(name) \ SYM_END(name, SYM_T_FUNC) #endif +/* + * SYM_FUNC_ALIAS -- define a global alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS +#define SYM_FUNC_ALIAS(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL) +#endif + +/* + * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS_LOCAL +#define SYM_FUNC_ALIAS_LOCAL(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL) +#endif + +/* + * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS_WEAK +#define SYM_FUNC_ALIAS_WEAK(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK) +#endif + /* SYM_CODE_START -- use for non-C (special) functions */ #ifndef SYM_CODE_START #define SYM_CODE_START(name) \ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0f549870da6a..5f7a33890b0f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -634,7 +634,7 @@ struct mm_struct { #endif struct work_struct async_put_work; -#ifdef CONFIG_IOMMU_SUPPORT +#ifdef CONFIG_IOMMU_SVA u32 pasid; #endif } __randomize_layout; diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 2512e2f9cd4e..0407a38b470a 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -26,6 +26,8 @@ */ /* Event uses a 64bit counter */ #define ARMPMU_EVT_64BIT 1 +/* Event uses a 47bit counter */ +#define ARMPMU_EVT_47BIT 2 #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) PERF_COUNT_HW_CACHE_##_x diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 88b42eb46406..e7c39c200e2b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -84,7 +84,7 @@ static inline int rcu_preempt_depth(void) /* Internal to kernel */ void rcu_init(void); -extern int rcu_scheduler_active __read_mostly; +extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); void rcu_report_dead(unsigned int cpu); void rcutree_migrate_callbacks(int cpu); @@ -924,7 +924,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * * kvfree_rcu(ptr); * - * where @ptr is a pointer to kvfree(). + * where @ptr is the pointer to be freed by kvfree(). * * Please note, head-less way of freeing is permitted to * use from a context that has to follow might_sleep() diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 858f4d429946..5fed476f977f 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -64,9 +64,8 @@ static inline void rcu_softirq_qs(void) rcu_tasks_qs(current, (preempt)); \ } while (0) -static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) +static inline int rcu_needs_cpu(void) { - *nextevt = KTIME_MAX; return 0; } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 53209d669400..9c6cfb742504 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -19,7 +19,7 @@ void rcu_softirq_qs(void); void rcu_note_context_switch(bool preempt); -int rcu_needs_cpu(u64 basem, u64 *nextevt); +int rcu_needs_cpu(void); void rcu_cpu_stall_reset(void); /* @@ -62,7 +62,7 @@ static inline void rcu_irq_exit_check_preempt(void) { } void exit_rcu(void); void rcu_scheduler_starting(void); -extern int rcu_scheduler_active __read_mostly; +extern int rcu_scheduler_active; void rcu_end_inkernel_boot(void); bool rcu_inkernel_boot_has_ended(void); bool rcu_is_watching(void); diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h index 61c56cca95c4..8052d34da782 100644 --- a/include/linux/rcuwait.h +++ b/include/linux/rcuwait.h @@ -47,11 +47,7 @@ static inline void prepare_to_rcuwait(struct rcuwait *w) rcu_assign_pointer(w->task, current); } -static inline void finish_rcuwait(struct rcuwait *w) -{ - rcu_assign_pointer(w->task, NULL); - __set_current_state(TASK_RUNNING); -} +extern void finish_rcuwait(struct rcuwait *w); #define rcuwait_wait_event(w, condition, state) \ ({ \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 75ba8aa60248..aab75966396f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -938,6 +938,9 @@ struct task_struct { /* Recursion prevention for eventfd_signal() */ unsigned in_eventfd_signal:1; #endif +#ifdef CONFIG_IOMMU_SVA + unsigned pasid_activated:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ @@ -1087,6 +1090,9 @@ struct task_struct { /* Restored if set_restore_sigmask() was used: */ sigset_t saved_sigmask; struct sigpending pending; +#ifdef CONFIG_RT_DELAYED_SIGNALS + struct kernel_siginfo forced_info; +#endif unsigned long sas_ss_sp; size_t sas_ss_size; unsigned int sas_ss_flags; diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index aa5f09ca5bcf..a80356e9dc69 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -8,6 +8,7 @@ #include <linux/mm_types.h> #include <linux/gfp.h> #include <linux/sync_core.h> +#include <linux/ioasid.h> /* * Routines for handling mm_structs @@ -433,4 +434,29 @@ static inline void membarrier_update_current_mm(struct mm_struct *next_mm) } #endif +#ifdef CONFIG_IOMMU_SVA +static inline void mm_pasid_init(struct mm_struct *mm) +{ + mm->pasid = INVALID_IOASID; +} + +/* Associate a PASID with an mm_struct: */ +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) +{ + mm->pasid = pasid; +} + +static inline void mm_pasid_drop(struct mm_struct *mm) +{ + if (pasid_valid(mm->pasid)) { + ioasid_free(mm->pasid); + mm->pasid = INVALID_IOASID; + } +} +#else +static inline void mm_pasid_init(struct mm_struct *mm) {} +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {} +static inline void mm_pasid_drop(struct mm_struct *mm) {} +#endif + #endif /* _LINUX_SCHED_MM_H */ diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index d10150587d81..892562ebbd3a 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -79,6 +79,8 @@ static inline void *try_get_task_stack(struct task_struct *tsk) static inline void put_task_stack(struct task_struct *tsk) {} #endif +void exit_task_stack_account(struct task_struct *tsk); + #define task_stack_end_corrupted(task) \ (*(end_of_stack(task)) != STACK_END_MAGIC) diff --git a/include/linux/topology.h b/include/linux/topology.h index a6e201758ae9..f19bc3626297 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -211,6 +211,9 @@ static inline int cpu_to_mem(int cpu) #ifndef topology_drawer_id #define topology_drawer_id(cpu) ((void)(cpu), -1) #endif +#ifndef topology_ppin +#define topology_ppin(cpu) ((void)(cpu), 0ull) +#endif #ifndef topology_sibling_cpumask #define topology_sibling_cpumask(cpu) cpumask_of(cpu) #endif diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index ab207677e0a8..f742e50207fb 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -205,7 +205,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); void vsock_remove_sock(struct vsock_sock *vsk); -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); +void vsock_for_each_connected_socket(struct vsock_transport *transport, + void (*fn)(struct sock *sk)); int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); bool vsock_find_cid(unsigned int cid); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8731d5bcb47d..b08b70989d2c 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -97,7 +97,6 @@ struct nf_conn { unsigned long status; u16 cpu; - u16 local_origin:1; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 670e41783edd..90b2fb0292cb 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -794,16 +794,15 @@ TRACE_EVENT_RCU(rcu_torture_read, * Tracepoint for rcu_barrier() execution. The string "s" describes * the rcu_barrier phase: * "Begin": rcu_barrier() started. + * "CB": An rcu_barrier_callback() invoked a callback, not the last. * "EarlyExit": rcu_barrier() piggybacked, thus early exit. * "Inc1": rcu_barrier() piggyback check counter incremented. - * "OfflineNoCBQ": rcu_barrier() found offline no-CBs CPU with callbacks. - * "OnlineQ": rcu_barrier() found online CPU with callbacks. - * "OnlineNQ": rcu_barrier() found online CPU, no callbacks. + * "Inc2": rcu_barrier() piggyback check counter incremented. * "IRQ": An rcu_barrier_callback() callback posted on remote CPU. * "IRQNQ": An rcu_barrier_callback() callback found no callbacks. - * "CB": An rcu_barrier_callback() invoked a callback, not the last. * "LastCB": An rcu_barrier_callback() invoked the last callback. - * "Inc2": rcu_barrier() piggyback check counter incremented. + * "NQ": rcu_barrier() found a CPU with no callbacks. + * "OnlineQ": rcu_barrier() found online CPU with callbacks. * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument * is the count of remaining callbacks, and "done" is the piggybacking count. */ diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 61bf4774b8f2..fe8e5b74cb39 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -40,6 +40,9 @@ typedef __s64 Elf64_Sxword; #define PT_GNU_STACK (PT_LOOS + 0x474e551) +/* ARM MTE memory tag segment type */ +#define PT_ARM_MEMTAG_MTE (PT_LOPROC + 0x1) + /* * Extended Numbering * diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index ce77f0265660..5644abd5f8a8 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -132,4 +132,14 @@ config SCHED_CORE which is the likely usage by Linux distributions, there should be no measurable impact on performance. - +config ARCH_WANTS_RT_DELAYED_SIGNALS + bool + help + This option is selected by architectures where raising signals + can happen in atomic contexts on PREEMPT_RT enabled kernels. This + option delays raising the signal until the return to user space + loop where it is also delivered. X86 requires this to deliver + signals from trap handlers which run on IST stacks. + +config RT_DELAYED_SIGNALS + def_bool PREEMPT_RT && ARCH_WANTS_RT_DELAYED_SIGNALS diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config index 07df6d93c4df..e8db8d938661 100644 --- a/kernel/configs/debug.config +++ b/kernel/configs/debug.config @@ -16,6 +16,7 @@ CONFIG_SYMBOLIC_ERRNAME=y # # Compile-time checks and compiler options # +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_FRAME_WARN=2048 diff --git a/kernel/entry/common.c b/kernel/entry/common.c index bad713684c2e..0543a2c92f20 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -148,6 +148,18 @@ static void handle_signal_work(struct pt_regs *regs, unsigned long ti_work) arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING); } +#ifdef CONFIG_RT_DELAYED_SIGNALS +static inline void raise_delayed_signal(void) +{ + if (unlikely(current->forced_info.si_signo)) { + force_sig_info(¤t->forced_info); + current->forced_info.si_signo = 0; + } +} +#else +static inline void raise_delayed_signal(void) { } +#endif + static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work) { @@ -162,6 +174,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, if (ti_work & _TIF_NEED_RESCHED) schedule(); + raise_delayed_signal(); + if (ti_work & _TIF_UPROBE) uprobe_notify_resume(regs); diff --git a/kernel/exit.c b/kernel/exit.c index b00a25bb4ab9..c303cffe7fdb 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -845,6 +845,7 @@ void __noreturn do_exit(long code) put_page(tsk->task_frag.page); validate_creds_for_do_exit(tsk); + exit_task_stack_account(tsk); check_stack_usage(); preempt_disable(); diff --git a/kernel/fork.c b/kernel/fork.c index f1e89007f228..d74c30b6733b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -97,6 +97,7 @@ #include <linux/scs.h> #include <linux/io_uring.h> #include <linux/bpf.h> +#include <linux/sched/mm.h> #include <asm/pgalloc.h> #include <linux/uaccess.h> @@ -185,7 +186,7 @@ static inline void free_task_struct(struct task_struct *tsk) */ # if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) -#ifdef CONFIG_VMAP_STACK +# ifdef CONFIG_VMAP_STACK /* * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB * flush. Try to minimize the number of calls by caching stacks. @@ -193,6 +194,41 @@ static inline void free_task_struct(struct task_struct *tsk) #define NR_CACHED_STACKS 2 static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]); +struct vm_stack { + struct rcu_head rcu; + struct vm_struct *stack_vm_area; +}; + +static bool try_release_thread_stack_to_cache(struct vm_struct *vm) +{ + unsigned int i; + + for (i = 0; i < NR_CACHED_STACKS; i++) { + if (this_cpu_cmpxchg(cached_stacks[i], NULL, vm) != NULL) + continue; + return true; + } + return false; +} + +static void thread_stack_free_rcu(struct rcu_head *rh) +{ + struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu); + + if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area)) + return; + + vfree(vm_stack); +} + +static void thread_stack_delayed_free(struct task_struct *tsk) +{ + struct vm_stack *vm_stack = tsk->stack; + + vm_stack->stack_vm_area = tsk->stack_vm_area; + call_rcu(&vm_stack->rcu, thread_stack_free_rcu); +} + static int free_vm_stack_cache(unsigned int cpu) { struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu); @@ -210,11 +246,35 @@ static int free_vm_stack_cache(unsigned int cpu) return 0; } -#endif -static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) +static int memcg_charge_kernel_stack(struct vm_struct *vm) { -#ifdef CONFIG_VMAP_STACK + int i; + int ret; + + BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); + BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); + + for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { + ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0); + if (ret) + goto err; + } + return 0; +err: + /* + * If memcg_kmem_charge_page() fails, page's memory cgroup pointer is + * NULL, and memcg_kmem_uncharge_page() in free_thread_stack() will + * ignore this page. + */ + for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) + memcg_kmem_uncharge_page(vm->pages[i], 0); + return ret; +} + +static int alloc_thread_stack_node(struct task_struct *tsk, int node) +{ + struct vm_struct *vm; void *stack; int i; @@ -232,9 +292,14 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) /* Clear stale pointers from reused stack. */ memset(s->addr, 0, THREAD_SIZE); + if (memcg_charge_kernel_stack(s)) { + vfree(s->addr); + return -ENOMEM; + } + tsk->stack_vm_area = s; tsk->stack = s->addr; - return s->addr; + return 0; } /* @@ -247,71 +312,95 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) THREADINFO_GFP & ~__GFP_ACCOUNT, PAGE_KERNEL, 0, node, __builtin_return_address(0)); + if (!stack) + return -ENOMEM; + vm = find_vm_area(stack); + if (memcg_charge_kernel_stack(vm)) { + vfree(stack); + return -ENOMEM; + } /* * We can't call find_vm_area() in interrupt context, and * free_thread_stack() can be called in interrupt context, * so cache the vm_struct. */ - if (stack) { - tsk->stack_vm_area = find_vm_area(stack); - tsk->stack = stack; - } - return stack; -#else + tsk->stack_vm_area = vm; + tsk->stack = stack; + return 0; +} + +static void free_thread_stack(struct task_struct *tsk) +{ + if (!try_release_thread_stack_to_cache(tsk->stack_vm_area)) + thread_stack_delayed_free(tsk); + + tsk->stack = NULL; + tsk->stack_vm_area = NULL; +} + +# else /* !CONFIG_VMAP_STACK */ + +static void thread_stack_free_rcu(struct rcu_head *rh) +{ + __free_pages(virt_to_page(rh), THREAD_SIZE_ORDER); +} + +static void thread_stack_delayed_free(struct task_struct *tsk) +{ + struct rcu_head *rh = tsk->stack; + + call_rcu(rh, thread_stack_free_rcu); +} + +static int alloc_thread_stack_node(struct task_struct *tsk, int node) +{ struct page *page = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); if (likely(page)) { tsk->stack = kasan_reset_tag(page_address(page)); - return tsk->stack; + return 0; } - return NULL; -#endif + return -ENOMEM; } -static inline void free_thread_stack(struct task_struct *tsk) +static void free_thread_stack(struct task_struct *tsk) { -#ifdef CONFIG_VMAP_STACK - struct vm_struct *vm = task_stack_vm_area(tsk); + thread_stack_delayed_free(tsk); + tsk->stack = NULL; +} - if (vm) { - int i; +# endif /* CONFIG_VMAP_STACK */ +# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */ - for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) - memcg_kmem_uncharge_page(vm->pages[i], 0); - - for (i = 0; i < NR_CACHED_STACKS; i++) { - if (this_cpu_cmpxchg(cached_stacks[i], - NULL, tsk->stack_vm_area) != NULL) - continue; +static struct kmem_cache *thread_stack_cache; - return; - } +static void thread_stack_free_rcu(struct rcu_head *rh) +{ + kmem_cache_free(thread_stack_cache, rh); +} - vfree_atomic(tsk->stack); - return; - } -#endif +static void thread_stack_delayed_free(struct task_struct *tsk) +{ + struct rcu_head *rh = tsk->stack; - __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER); + call_rcu(rh, thread_stack_free_rcu); } -# else -static struct kmem_cache *thread_stack_cache; -static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, - int node) +static int alloc_thread_stack_node(struct task_struct *tsk, int node) { unsigned long *stack; stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); stack = kasan_reset_tag(stack); tsk->stack = stack; - return stack; + return stack ? 0 : -ENOMEM; } static void free_thread_stack(struct task_struct *tsk) { - kmem_cache_free(thread_stack_cache, tsk->stack); + thread_stack_delayed_free(tsk); + tsk->stack = NULL; } void thread_stack_cache_init(void) @@ -321,8 +410,26 @@ void thread_stack_cache_init(void) THREAD_SIZE, NULL); BUG_ON(thread_stack_cache == NULL); } -# endif -#endif + +# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */ +#else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */ + +static int alloc_thread_stack_node(struct task_struct *tsk, int node) +{ + unsigned long *stack; + + stack = arch_alloc_thread_stack_node(tsk, node); + tsk->stack = stack; + return stack ? 0 : -ENOMEM; +} + +static void free_thread_stack(struct task_struct *tsk) +{ + arch_free_thread_stack(tsk); + tsk->stack = NULL; +} + +#endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */ /* SLAB cache for signal_struct structures (tsk->signal) */ static struct kmem_cache *signal_cachep; @@ -379,50 +486,34 @@ void vm_area_free(struct vm_area_struct *vma) static void account_kernel_stack(struct task_struct *tsk, int account) { - void *stack = task_stack_page(tsk); - struct vm_struct *vm = task_stack_vm_area(tsk); - - if (vm) { + if (IS_ENABLED(CONFIG_VMAP_STACK)) { + struct vm_struct *vm = task_stack_vm_area(tsk); int i; for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB, account * (PAGE_SIZE / 1024)); } else { + void *stack = task_stack_page(tsk); + /* All stack pages are in the same node. */ mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB, account * (THREAD_SIZE / 1024)); } } -static int memcg_charge_kernel_stack(struct task_struct *tsk) +void exit_task_stack_account(struct task_struct *tsk) { -#ifdef CONFIG_VMAP_STACK - struct vm_struct *vm = task_stack_vm_area(tsk); - int ret; - - BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); + account_kernel_stack(tsk, -1); - if (vm) { + if (IS_ENABLED(CONFIG_VMAP_STACK)) { + struct vm_struct *vm; int i; - BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); - - for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { - /* - * If memcg_kmem_charge_page() fails, page's - * memory cgroup pointer is NULL, and - * memcg_kmem_uncharge_page() in free_thread_stack() - * will ignore this page. - */ - ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, - 0); - if (ret) - return ret; - } + vm = task_stack_vm_area(tsk); + for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) + memcg_kmem_uncharge_page(vm->pages[i], 0); } -#endif - return 0; } static void release_task_stack(struct task_struct *tsk) @@ -430,12 +521,7 @@ static void release_task_stack(struct task_struct *tsk) if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD)) return; /* Better to leak the stack than to free prematurely */ - account_kernel_stack(tsk, -1); free_thread_stack(tsk); - tsk->stack = NULL; -#ifdef CONFIG_VMAP_STACK - tsk->stack_vm_area = NULL; -#endif } #ifdef CONFIG_THREAD_INFO_IN_TASK @@ -874,8 +960,6 @@ void set_task_stack_end_magic(struct task_struct *tsk) static struct task_struct *dup_task_struct(struct task_struct *orig, int node) { struct task_struct *tsk; - unsigned long *stack; - struct vm_struct *stack_vm_area __maybe_unused; int err; if (node == NUMA_NO_NODE) @@ -884,32 +968,18 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (!tsk) return NULL; - stack = alloc_thread_stack_node(tsk, node); - if (!stack) + err = arch_dup_task_struct(tsk, orig); + if (err) goto free_tsk; - if (memcg_charge_kernel_stack(tsk)) - goto free_stack; - - stack_vm_area = task_stack_vm_area(tsk); - - err = arch_dup_task_struct(tsk, orig); + err = alloc_thread_stack_node(tsk, node); + if (err) + goto free_tsk; - /* - * arch_dup_task_struct() clobbers the stack-related fields. Make - * sure they're properly initialized before using any stack-related - * functions again. - */ - tsk->stack = stack; -#ifdef CONFIG_VMAP_STACK - tsk->stack_vm_area = stack_vm_area; -#endif #ifdef CONFIG_THREAD_INFO_IN_TASK refcount_set(&tsk->stack_refcount, 1); #endif - - if (err) - goto free_stack; + account_kernel_stack(tsk, 1); err = scs_prepare(tsk, node); if (err) @@ -953,8 +1023,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->wake_q.next = NULL; tsk->worker_private = NULL; - account_kernel_stack(tsk, 1); - kcov_task_init(tsk); kmap_local_fork(tsk); @@ -967,12 +1035,17 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->use_memdelay = 0; #endif +#ifdef CONFIG_IOMMU_SVA + tsk->pasid_activated = 0; +#endif + #ifdef CONFIG_MEMCG tsk->active_memcg = NULL; #endif return tsk; free_stack: + exit_task_stack_account(tsk); free_thread_stack(tsk); free_tsk: free_task_struct(tsk); @@ -1019,13 +1092,6 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } -static void mm_init_pasid(struct mm_struct *mm) -{ -#ifdef CONFIG_IOMMU_SUPPORT - mm->pasid = INIT_PASID; -#endif -} - static void mm_init_uprobes_state(struct mm_struct *mm) { #ifdef CONFIG_UPROBES @@ -1054,7 +1120,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_cpumask(mm); mm_init_aio(mm); mm_init_owner(mm, p); - mm_init_pasid(mm); + mm_pasid_init(mm); RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_subscriptions_init(mm); init_tlb_flush_pending(mm); @@ -1121,6 +1187,7 @@ static inline void __mmput(struct mm_struct *mm) } if (mm->binfmt) module_put(mm->binfmt->module); + mm_pasid_drop(mm); mmdrop(mm); } @@ -2451,6 +2518,7 @@ bad_fork_cleanup_count: exit_creds(p); bad_fork_free: WRITE_ONCE(p->__state, TASK_DEAD); + exit_task_stack_account(p); put_task_stack(p); delayed_free_task(p); fork_out: diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index c09324663088..54af0deb239b 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -38,7 +38,7 @@ struct irqaction chained_action = { * @irq: irq number * @chip: pointer to irq chip description structure */ -int irq_set_chip(unsigned int irq, struct irq_chip *chip) +int irq_set_chip(unsigned int irq, const struct irq_chip *chip) { unsigned long flags; struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); @@ -46,10 +46,7 @@ int irq_set_chip(unsigned int irq, struct irq_chip *chip) if (!desc) return -EINVAL; - if (!chip) - chip = &no_irq_chip; - - desc->irq_data.chip = chip; + desc->irq_data.chip = (struct irq_chip *)(chip ?: &no_irq_chip); irq_put_desc_unlock(desc, flags); /* * For !CONFIG_SPARSE_IRQ make the irq show up in @@ -1073,7 +1070,7 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, EXPORT_SYMBOL_GPL(irq_set_chained_handler_and_data); void -irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, +irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip, irq_flow_handler_t handle, const char *name) { irq_set_chip(irq, chip); @@ -1558,6 +1555,14 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return 0; } +static struct device *irq_get_parent_device(struct irq_data *data) +{ + if (data->domain) + return data->domain->dev; + + return NULL; +} + /** * irq_chip_pm_get - Enable power for an IRQ chip * @data: Pointer to interrupt specific data @@ -1567,12 +1572,13 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) */ int irq_chip_pm_get(struct irq_data *data) { + struct device *dev = irq_get_parent_device(data); int retval; - if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) { - retval = pm_runtime_get_sync(data->chip->parent_device); + if (IS_ENABLED(CONFIG_PM) && dev) { + retval = pm_runtime_get_sync(dev); if (retval < 0) { - pm_runtime_put_noidle(data->chip->parent_device); + pm_runtime_put_noidle(dev); return retval; } } @@ -1590,10 +1596,11 @@ int irq_chip_pm_get(struct irq_data *data) */ int irq_chip_pm_put(struct irq_data *data) { + struct device *dev = irq_get_parent_device(data); int retval = 0; - if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) - retval = pm_runtime_put(data->chip->parent_device); + if (IS_ENABLED(CONFIG_PM) && dev) + retval = pm_runtime_put(dev); return (retval < 0) ? retval : 0; } diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index e4cff358b437..2b43f5f5033d 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -69,8 +69,12 @@ irq_debug_show_chip(struct seq_file *m, struct irq_data *data, int ind) seq_printf(m, "chip: None\n"); return; } - seq_printf(m, "%*schip: %s\n", ind, "", chip->name); - seq_printf(m, "%*sflags: 0x%lx\n", ind + 1, "", chip->flags); + seq_printf(m, "%*schip: ", ind, ""); + if (chip->irq_print_chip) + chip->irq_print_chip(data, m); + else + seq_printf(m, "%s", chip->name); + seq_printf(m, "\n%*sflags: 0x%lx\n", ind + 1, "", chip->flags); irq_debug_show_bits(m, ind, chip->flags, irqchip_flags, ARRAY_SIZE(irqchip_flags)); } diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 2267e6527db3..939d21cd55c3 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -640,7 +640,7 @@ int handle_irq_desc(struct irq_desc *desc) return -EINVAL; data = irq_desc_get_irq_data(desc); - if (WARN_ON_ONCE(!in_irq() && handle_enforce_irqctx(data))) + if (WARN_ON_ONCE(!in_hardirq() && handle_enforce_irqctx(data))) return -EPERM; generic_handle_irq_desc(desc); @@ -662,6 +662,29 @@ int generic_handle_irq(unsigned int irq) } EXPORT_SYMBOL_GPL(generic_handle_irq); +/** + * generic_handle_irq_safe - Invoke the handler for a particular irq from any + * context. + * @irq: The irq number to handle + * + * Returns: 0 on success, a negative value on error. + * + * This function can be called from any context (IRQ or process context). It + * will report an error if not invoked from IRQ context and the irq has been + * marked to enforce IRQ-context only. + */ +int generic_handle_irq_safe(unsigned int irq) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = handle_irq_desc(irq_to_desc(irq)); + local_irq_restore(flags); + return ret; +} +EXPORT_SYMBOL_GPL(generic_handle_irq_safe); + #ifdef CONFIG_IRQ_DOMAIN /** * generic_handle_domain_irq - Invoke the handler for a HW irq belonging @@ -676,7 +699,7 @@ EXPORT_SYMBOL_GPL(generic_handle_irq); */ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq) { - WARN_ON_ONCE(!in_irq()); + WARN_ON_ONCE(!in_hardirq()); return handle_irq_desc(irq_resolve_mapping(domain, hwirq)); } EXPORT_SYMBOL_GPL(generic_handle_domain_irq); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index bf38c546aa25..d5ce96510549 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1319,7 +1319,8 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data); * @chip_data: The associated chip data */ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, + const struct irq_chip *chip, void *chip_data) { struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); @@ -1328,7 +1329,7 @@ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, return -ENOENT; irq_data->hwirq = hwirq; - irq_data->chip = chip ? chip : &no_irq_chip; + irq_data->chip = (struct irq_chip *)(chip ? chip : &no_irq_chip); irq_data->chip_data = chip_data; return 0; @@ -1347,7 +1348,7 @@ EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip); * @handler_name: The interrupt handler name */ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name) { @@ -1853,7 +1854,7 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data); * @handler_name: The interrupt handler name */ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name) { diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h index e373fbe44da5..431cee212467 100644 --- a/kernel/rcu/rcu_segcblist.h +++ b/kernel/rcu/rcu_segcblist.h @@ -56,13 +56,13 @@ static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp) static inline void rcu_segcblist_set_flags(struct rcu_segcblist *rsclp, int flags) { - rsclp->flags |= flags; + WRITE_ONCE(rsclp->flags, rsclp->flags | flags); } static inline void rcu_segcblist_clear_flags(struct rcu_segcblist *rsclp, int flags) { - rsclp->flags &= ~flags; + WRITE_ONCE(rsclp->flags, rsclp->flags & ~flags); } static inline bool rcu_segcblist_test_flags(struct rcu_segcblist *rsclp, diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 422f7e4cc08d..55d049c39608 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -284,7 +284,7 @@ static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); -static bool rcu_fwd_cb_nodelay; /* Short rcu_torture_delay() delays. */ +static atomic_t rcu_fwd_cb_nodelay; /* Short rcu_torture_delay() delays. */ /* * Allocate an element from the rcu_tortures pool. @@ -387,7 +387,7 @@ rcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp) * period, and we want a long delay occasionally to trigger * force_quiescent_state. */ - if (!READ_ONCE(rcu_fwd_cb_nodelay) && + if (!atomic_read(&rcu_fwd_cb_nodelay) && !(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) { started = cur_ops->get_gp_seq(); ts = rcu_trace_clock_local(); @@ -674,6 +674,7 @@ static struct rcu_torture_ops srcu_ops = { .call = srcu_torture_call, .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, + .cbflood_max = 50000, .irq_capable = 1, .no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU), .name = "srcu" @@ -708,6 +709,7 @@ static struct rcu_torture_ops srcud_ops = { .call = srcu_torture_call, .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, + .cbflood_max = 50000, .irq_capable = 1, .no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU), .name = "srcud" @@ -997,7 +999,7 @@ static int rcu_torture_boost(void *arg) goto checkwait; /* Wait for the next test interval. */ - oldstarttime = boost_starttime; + oldstarttime = READ_ONCE(boost_starttime); while (time_before(jiffies, oldstarttime)) { schedule_timeout_interruptible(oldstarttime - jiffies); if (stutter_wait("rcu_torture_boost")) @@ -1041,10 +1043,11 @@ static int rcu_torture_boost(void *arg) * interval. Besides, we are running at RT priority, * so delays should be relatively rare. */ - while (oldstarttime == boost_starttime && !kthread_should_stop()) { + while (oldstarttime == READ_ONCE(boost_starttime) && !kthread_should_stop()) { if (mutex_trylock(&boost_mutex)) { if (oldstarttime == boost_starttime) { - boost_starttime = jiffies + test_boost_interval * HZ; + WRITE_ONCE(boost_starttime, + jiffies + test_boost_interval * HZ); n_rcu_torture_boosts++; } mutex_unlock(&boost_mutex); @@ -1276,7 +1279,7 @@ rcu_torture_writer(void *arg) boot_ended = rcu_inkernel_boot_has_ended(); stutter_waited = stutter_wait("rcu_torture_writer"); if (stutter_waited && - !READ_ONCE(rcu_fwd_cb_nodelay) && + !atomic_read(&rcu_fwd_cb_nodelay) && !cur_ops->slow_gps && !torture_must_stop() && boot_ended) @@ -2180,7 +2183,6 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp) for (i = ARRAY_SIZE(rfp->n_launders_hist) - 1; i > 0; i--) if (rfp->n_launders_hist[i].n_launders > 0) break; - mutex_lock(&rcu_fwd_mutex); // Serialize histograms. pr_alert("%s: Callback-invocation histogram %d (duration %lu jiffies):", __func__, rfp->rcu_fwd_id, jiffies - rfp->rcu_fwd_startat); gps_old = rfp->rcu_launder_gp_seq_start; @@ -2193,7 +2195,6 @@ static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp) gps_old = gps; } pr_cont("\n"); - mutex_unlock(&rcu_fwd_mutex); } /* Callback function for continuous-flood RCU callbacks. */ @@ -2281,6 +2282,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp, unsigned long stopat; static DEFINE_TORTURE_RANDOM(trs); + pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id); if (!cur_ops->sync) return; // Cannot do need_resched() forward progress testing without ->sync. if (cur_ops->call && cur_ops->cb_barrier) { @@ -2289,7 +2291,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp, } /* Tight loop containing cond_resched(). */ - WRITE_ONCE(rcu_fwd_cb_nodelay, true); + atomic_inc(&rcu_fwd_cb_nodelay); cur_ops->sync(); /* Later readers see above write. */ if (selfpropcb) { WRITE_ONCE(fcs.stop, 0); @@ -2325,6 +2327,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp, if (selfpropcb) { WRITE_ONCE(fcs.stop, 1); cur_ops->sync(); /* Wait for running CB to complete. */ + pr_alert("%s: Waiting for CBs: %pS() %d\n", __func__, cur_ops->cb_barrier, rfp->rcu_fwd_id); cur_ops->cb_barrier(); /* Wait for queued callbacks. */ } @@ -2333,7 +2336,7 @@ static void rcu_torture_fwd_prog_nr(struct rcu_fwd *rfp, destroy_rcu_head_on_stack(&fcs.rh); } schedule_timeout_uninterruptible(HZ / 10); /* Let kthreads recover. */ - WRITE_ONCE(rcu_fwd_cb_nodelay, false); + atomic_dec(&rcu_fwd_cb_nodelay); } /* Carry out call_rcu() forward-progress testing. */ @@ -2353,13 +2356,14 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp) unsigned long stopat; unsigned long stoppedat; + pr_alert("%s: Starting forward-progress test %d\n", __func__, rfp->rcu_fwd_id); if (READ_ONCE(rcu_fwd_emergency_stop)) return; /* Get out of the way quickly, no GP wait! */ if (!cur_ops->call) return; /* Can't do call_rcu() fwd prog without ->call. */ /* Loop continuously posting RCU callbacks. */ - WRITE_ONCE(rcu_fwd_cb_nodelay, true); + atomic_inc(&rcu_fwd_cb_nodelay); cur_ops->sync(); /* Later readers see above write. */ WRITE_ONCE(rfp->rcu_fwd_startat, jiffies); stopat = rfp->rcu_fwd_startat + MAX_FWD_CB_JIFFIES; @@ -2414,6 +2418,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp) n_launders_cb_snap = READ_ONCE(rfp->n_launders_cb); cver = READ_ONCE(rcu_torture_current_version) - cver; gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps); + pr_alert("%s: Waiting for CBs: %pS() %d\n", __func__, cur_ops->cb_barrier, rfp->rcu_fwd_id); cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */ (void)rcu_torture_fwd_prog_cbfree(rfp); @@ -2427,11 +2432,13 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp) n_launders, n_launders_sa, n_max_gps, n_max_cbs, cver, gps); atomic_long_add(n_max_cbs, &rcu_fwd_max_cbs); + mutex_lock(&rcu_fwd_mutex); // Serialize histograms. rcu_torture_fwd_cb_hist(rfp); + mutex_unlock(&rcu_fwd_mutex); } schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */ tick_dep_clear_task(current, TICK_DEP_BIT_RCU); - WRITE_ONCE(rcu_fwd_cb_nodelay, false); + atomic_dec(&rcu_fwd_cb_nodelay); } @@ -2511,7 +2518,7 @@ static int rcu_torture_fwd_prog(void *args) firsttime = false; WRITE_ONCE(rcu_fwd_seq, rcu_fwd_seq + 1); } else { - while (READ_ONCE(rcu_fwd_seq) == oldseq) + while (READ_ONCE(rcu_fwd_seq) == oldseq && !torture_must_stop()) schedule_timeout_interruptible(1); oldseq = READ_ONCE(rcu_fwd_seq); } @@ -2905,8 +2912,10 @@ rcu_torture_cleanup(void) int i; if (torture_cleanup_begin()) { - if (cur_ops->cb_barrier != NULL) + if (cur_ops->cb_barrier != NULL) { + pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier); cur_ops->cb_barrier(); + } return; } if (!cur_ops) { @@ -2961,8 +2970,10 @@ rcu_torture_cleanup(void) * Wait for all RCU callbacks to fire, then do torture-type-specific * cleanup operations. */ - if (cur_ops->cb_barrier != NULL) + if (cur_ops->cb_barrier != NULL) { + pr_info("%s: Invoking %pS().\n", __func__, cur_ops->cb_barrier); cur_ops->cb_barrier(); + } if (cur_ops->cleanup != NULL) cur_ops->cleanup(); diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index d64f0b1d8cd3..ac17348187e4 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -123,7 +123,7 @@ static struct rcu_tasks rt_name = \ .call_func = call, \ .rtpcpu = &rt_name ## __percpu, \ .name = n, \ - .percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS) + 1, \ + .percpu_enqueue_shift = order_base_2(CONFIG_NR_CPUS), \ .percpu_enqueue_lim = 1, \ .percpu_dequeue_lim = 1, \ .barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex), \ @@ -302,7 +302,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, if (unlikely(needadjust)) { raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); if (rtp->percpu_enqueue_lim != nr_cpu_ids) { - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1); + WRITE_ONCE(rtp->percpu_enqueue_shift, 0); WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids); smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids); pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name); @@ -417,7 +417,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp) if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) { raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); if (rtp->percpu_enqueue_lim > 1) { - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1); + WRITE_ONCE(rtp->percpu_enqueue_shift, order_base_2(nr_cpu_ids)); smp_store_release(&rtp->percpu_enqueue_lim, 1); rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu(); pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a4c25a6283b0..a4b8189455d5 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -87,11 +87,12 @@ static struct rcu_state rcu_state = { .gp_state = RCU_GP_IDLE, .gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT, .barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex), + .barrier_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.barrier_lock), .name = RCU_NAME, .abbr = RCU_ABBR, .exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex), .exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex), - .ofl_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock), + .ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED, }; /* Dump rcu_node combining tree at boot to verify correct setup. */ @@ -153,7 +154,7 @@ static void sync_sched_exp_online_cleanup(int cpu); static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp); static bool rcu_rdp_is_offloaded(struct rcu_data *rdp); -/* rcuc/rcub kthread realtime priority */ +/* rcuc/rcub/rcuop kthread realtime priority */ static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0; module_param(kthread_prio, int, 0444); @@ -222,6 +223,16 @@ static unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp) } /* + * Is the CPU corresponding to the specified rcu_data structure online + * from RCU's perspective? This perspective is given by that structure's + * ->qsmaskinitnext field rather than by the global cpu_online_mask. + */ +static bool rcu_rdp_cpu_online(struct rcu_data *rdp) +{ + return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode)); +} + +/* * Return true if an RCU grace period is in progress. The READ_ONCE()s * permit this function to be invoked without holding the root rcu_node * structure's ->lock, but of course results can be subject to change. @@ -1086,9 +1097,8 @@ void rcu_irq_enter_irqson(void) * Just check whether or not this CPU has non-offloaded RCU callbacks * queued. */ -int rcu_needs_cpu(u64 basemono, u64 *nextevt) +int rcu_needs_cpu(void) { - *nextevt = KTIME_MAX; return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) && !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data)); } @@ -1167,15 +1177,20 @@ void rcu_request_urgent_qs_task(struct task_struct *t) bool rcu_lockdep_current_cpu_online(void) { struct rcu_data *rdp; - struct rcu_node *rnp; bool ret = false; if (in_nmi() || !rcu_scheduler_fully_active) return true; preempt_disable_notrace(); rdp = this_cpu_ptr(&rcu_data); - rnp = rdp->mynode; - if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1) + /* + * Strictly, we care here about the case where the current CPU is + * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask + * not being up to date. So arch_spin_is_locked() might have a + * false positive if it's held by some *other* CPU, but that's + * OK because that just means a false *negative* on the warning. + */ + if (rcu_rdp_cpu_online(rdp) || arch_spin_is_locked(&rcu_state.ofl_lock)) ret = true; preempt_enable_notrace(); return ret; @@ -1260,8 +1275,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) * For more detail, please refer to the "Hotplug CPU" section * of RCU's Requirements documentation. */ - if (WARN_ON_ONCE(!(rdp->grpmask & rcu_rnp_online_cpus(rnp)))) { - bool onl; + if (WARN_ON_ONCE(!rcu_rdp_cpu_online(rdp))) { struct rcu_node *rnp1; pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n", @@ -1270,9 +1284,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent) pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx ->rcu_gp_init_mask %#lx\n", __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rcu_gp_init_mask); - onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp)); pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n", - __func__, rdp->cpu, ".o"[onl], + __func__, rdp->cpu, ".o"[rcu_rdp_cpu_online(rdp)], (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags, (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags); return 1; /* Break things loose after complaining. */ @@ -1739,7 +1752,6 @@ static void rcu_strict_gp_boundary(void *unused) */ static noinline_for_stack bool rcu_gp_init(void) { - unsigned long firstseq; unsigned long flags; unsigned long oldmask; unsigned long mask; @@ -1782,22 +1794,17 @@ static noinline_for_stack bool rcu_gp_init(void) * of RCU's Requirements documentation. */ WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF); + /* Exclude CPU hotplug operations. */ rcu_for_each_leaf_node(rnp) { - // Wait for CPU-hotplug operations that might have - // started before this grace period did. - smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values. - firstseq = READ_ONCE(rnp->ofl_seq); - if (firstseq & 0x1) - while (firstseq == READ_ONCE(rnp->ofl_seq)) - schedule_timeout_idle(1); // Can't wake unless RCU is watching. - smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values. - raw_spin_lock(&rcu_state.ofl_lock); - raw_spin_lock_irq_rcu_node(rnp); + local_irq_save(flags); + arch_spin_lock(&rcu_state.ofl_lock); + raw_spin_lock_rcu_node(rnp); if (rnp->qsmaskinit == rnp->qsmaskinitnext && !rnp->wait_blkd_tasks) { /* Nothing to do on this leaf rcu_node structure. */ - raw_spin_unlock_irq_rcu_node(rnp); - raw_spin_unlock(&rcu_state.ofl_lock); + raw_spin_unlock_rcu_node(rnp); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(flags); continue; } @@ -1832,8 +1839,9 @@ static noinline_for_stack bool rcu_gp_init(void) rcu_cleanup_dead_rnp(rnp); } - raw_spin_unlock_irq_rcu_node(rnp); - raw_spin_unlock(&rcu_state.ofl_lock); + raw_spin_unlock_rcu_node(rnp); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(flags); } rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */ @@ -2850,10 +2858,12 @@ static void rcu_cpu_kthread(unsigned int cpu) { unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status); char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work); + unsigned long *j = this_cpu_ptr(&rcu_data.rcuc_activity); int spincnt; trace_rcu_utilization(TPS("Start CPU kthread@rcu_run")); for (spincnt = 0; spincnt < 10; spincnt++) { + WRITE_ONCE(*j, jiffies); local_bh_disable(); *statusp = RCU_KTHREAD_RUNNING; local_irq_disable(); @@ -2874,6 +2884,7 @@ static void rcu_cpu_kthread(unsigned int cpu) schedule_timeout_idle(2); trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); *statusp = RCU_KTHREAD_WAITING; + WRITE_ONCE(*j, jiffies); } static struct smp_hotplug_thread rcu_cpu_thread_spec = { @@ -2894,7 +2905,7 @@ static int __init rcu_spawn_core_kthreads(void) for_each_possible_cpu(cpu) per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0; - if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq) + if (use_softirq) return 0; WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__); @@ -2995,9 +3006,47 @@ static void check_cb_ovld(struct rcu_data *rdp) raw_spin_unlock_rcu_node(rnp); } -/* Helper function for call_rcu() and friends. */ -static void -__call_rcu(struct rcu_head *head, rcu_callback_t func) +/** + * call_rcu() - Queue an RCU callback for invocation after a grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. However, the callback function + * might well execute concurrently with RCU read-side critical sections + * that started after call_rcu() was invoked. + * + * RCU read-side critical sections are delimited by rcu_read_lock() + * and rcu_read_unlock(), and may be nested. In addition, but only in + * v5.0 and later, regions of code across which interrupts, preemption, + * or softirqs have been disabled also serve as RCU read-side critical + * sections. This includes hardware interrupt handlers, softirq handlers, + * and NMI handlers. + * + * Note that all CPUs must agree that the grace period extended beyond + * all pre-existing RCU read-side critical section. On systems with more + * than one CPU, this means that when "func()" is invoked, each CPU is + * guaranteed to have executed a full memory barrier since the end of its + * last RCU read-side critical section whose beginning preceded the call + * to call_rcu(). It also means that each CPU executing an RCU read-side + * critical section that continues beyond the start of "func()" must have + * executed a memory barrier after the call_rcu() but before the beginning + * of that RCU read-side critical section. Note that these guarantees + * include CPUs that are offline, idle, or executing in user mode, as + * well as CPUs that are executing in the kernel. + * + * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the + * resulting RCU callback function "func()", then both CPU A and CPU B are + * guaranteed to execute a full memory barrier during the time interval + * between the call to call_rcu() and the invocation of "func()" -- even + * if CPU A and CPU B are the same CPU (but again only if the system has + * more than one CPU). + * + * Implementation of these memory-ordering guarantees is described here: + * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst. + */ +void call_rcu(struct rcu_head *head, rcu_callback_t func) { static atomic_t doublefrees; unsigned long flags; @@ -3011,7 +3060,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func) /* * Probable double call_rcu(), so leak the callback. * Use rcu:rcu_callback trace event to find the previous - * time callback was passed to __call_rcu(). + * time callback was passed to call_rcu(). */ if (atomic_inc_return(&doublefrees) < 4) { pr_err("%s(): Double-freed CB %p->%pS()!!! ", __func__, head, head->func); @@ -3022,8 +3071,8 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func) } head->func = func; head->next = NULL; - local_irq_save(flags); kasan_record_aux_stack_noalloc(head); + local_irq_save(flags); rdp = this_cpu_ptr(&rcu_data); /* Add the callback to our list. */ @@ -3060,51 +3109,6 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func) local_irq_restore(flags); } } - -/** - * call_rcu() - Queue an RCU callback for invocation after a grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all pre-existing RCU read-side - * critical sections have completed. However, the callback function - * might well execute concurrently with RCU read-side critical sections - * that started after call_rcu() was invoked. - * - * RCU read-side critical sections are delimited by rcu_read_lock() - * and rcu_read_unlock(), and may be nested. In addition, but only in - * v5.0 and later, regions of code across which interrupts, preemption, - * or softirqs have been disabled also serve as RCU read-side critical - * sections. This includes hardware interrupt handlers, softirq handlers, - * and NMI handlers. - * - * Note that all CPUs must agree that the grace period extended beyond - * all pre-existing RCU read-side critical section. On systems with more - * than one CPU, this means that when "func()" is invoked, each CPU is - * guaranteed to have executed a full memory barrier since the end of its - * last RCU read-side critical section whose beginning preceded the call - * to call_rcu(). It also means that each CPU executing an RCU read-side - * critical section that continues beyond the start of "func()" must have - * executed a memory barrier after the call_rcu() but before the beginning - * of that RCU read-side critical section. Note that these guarantees - * include CPUs that are offline, idle, or executing in user mode, as - * well as CPUs that are executing in the kernel. - * - * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the - * resulting RCU callback function "func()", then both CPU A and CPU B are - * guaranteed to execute a full memory barrier during the time interval - * between the call to call_rcu() and the invocation of "func()" -- even - * if CPU A and CPU B are the same CPU (but again only if the system has - * more than one CPU). - * - * Implementation of these memory-ordering guarantees is described here: - * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst. - */ -void call_rcu(struct rcu_head *head, rcu_callback_t func) -{ - __call_rcu(head, func); -} EXPORT_SYMBOL_GPL(call_rcu); @@ -3984,13 +3988,16 @@ static void rcu_barrier_callback(struct rcu_head *rhp) } /* - * Called with preemption disabled, and from cross-cpu IRQ context. + * If needed, entrain an rcu_barrier() callback on rdp->cblist. */ -static void rcu_barrier_func(void *cpu_in) +static void rcu_barrier_entrain(struct rcu_data *rdp) { - uintptr_t cpu = (uintptr_t)cpu_in; - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence); + unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap); + lockdep_assert_held(&rcu_state.barrier_lock); + if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq)) + return; rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence); rdp->barrier_head.func = rcu_barrier_callback; debug_rcu_head_queue(&rdp->barrier_head); @@ -4000,10 +4007,26 @@ static void rcu_barrier_func(void *cpu_in) atomic_inc(&rcu_state.barrier_cpu_count); } else { debug_rcu_head_unqueue(&rdp->barrier_head); - rcu_barrier_trace(TPS("IRQNQ"), -1, - rcu_state.barrier_sequence); + rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence); } rcu_nocb_unlock(rdp); + smp_store_release(&rdp->barrier_seq_snap, gseq); +} + +/* + * Called with preemption disabled, and from cross-cpu IRQ context. + */ +static void rcu_barrier_handler(void *cpu_in) +{ + uintptr_t cpu = (uintptr_t)cpu_in; + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + + lockdep_assert_irqs_disabled(); + WARN_ON_ONCE(cpu != rdp->cpu); + WARN_ON_ONCE(cpu != smp_processor_id()); + raw_spin_lock(&rcu_state.barrier_lock); + rcu_barrier_entrain(rdp); + raw_spin_unlock(&rcu_state.barrier_lock); } /** @@ -4017,6 +4040,8 @@ static void rcu_barrier_func(void *cpu_in) void rcu_barrier(void) { uintptr_t cpu; + unsigned long flags; + unsigned long gseq; struct rcu_data *rdp; unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence); @@ -4027,15 +4052,16 @@ void rcu_barrier(void) /* Did someone else do our work for us? */ if (rcu_seq_done(&rcu_state.barrier_sequence, s)) { - rcu_barrier_trace(TPS("EarlyExit"), -1, - rcu_state.barrier_sequence); + rcu_barrier_trace(TPS("EarlyExit"), -1, rcu_state.barrier_sequence); smp_mb(); /* caller's subsequent code after above check. */ mutex_unlock(&rcu_state.barrier_mutex); return; } /* Mark the start of the barrier operation. */ + raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags); rcu_seq_start(&rcu_state.barrier_sequence); + gseq = rcu_state.barrier_sequence; rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence); /* @@ -4047,7 +4073,7 @@ void rcu_barrier(void) */ init_completion(&rcu_state.barrier_completion); atomic_set(&rcu_state.barrier_cpu_count, 2); - cpus_read_lock(); + raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags); /* * Force each CPU with callbacks to register a new callback. @@ -4056,29 +4082,31 @@ void rcu_barrier(void) */ for_each_possible_cpu(cpu) { rdp = per_cpu_ptr(&rcu_data, cpu); - if (cpu_is_offline(cpu) && - !rcu_rdp_is_offloaded(rdp)) +retry: + if (smp_load_acquire(&rdp->barrier_seq_snap) == gseq) continue; - if (rcu_segcblist_n_cbs(&rdp->cblist) && cpu_online(cpu)) { - rcu_barrier_trace(TPS("OnlineQ"), cpu, - rcu_state.barrier_sequence); - smp_call_function_single(cpu, rcu_barrier_func, (void *)cpu, 1); - } else if (rcu_segcblist_n_cbs(&rdp->cblist) && - cpu_is_offline(cpu)) { - rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, - rcu_state.barrier_sequence); - local_irq_disable(); - rcu_barrier_func((void *)cpu); - local_irq_enable(); - } else if (cpu_is_offline(cpu)) { - rcu_barrier_trace(TPS("OfflineNoCBNoQ"), cpu, - rcu_state.barrier_sequence); - } else { - rcu_barrier_trace(TPS("OnlineNQ"), cpu, - rcu_state.barrier_sequence); + raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags); + if (!rcu_segcblist_n_cbs(&rdp->cblist)) { + WRITE_ONCE(rdp->barrier_seq_snap, gseq); + raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags); + rcu_barrier_trace(TPS("NQ"), cpu, rcu_state.barrier_sequence); + continue; + } + if (!rcu_rdp_cpu_online(rdp)) { + rcu_barrier_entrain(rdp); + WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq); + raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags); + rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, rcu_state.barrier_sequence); + continue; + } + raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags); + if (smp_call_function_single(cpu, rcu_barrier_handler, (void *)cpu, 1)) { + schedule_timeout_uninterruptible(1); + goto retry; } + WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq); + rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence); } - cpus_read_unlock(); /* * Now that we have an rcu_barrier_callback() callback on each @@ -4093,6 +4121,12 @@ void rcu_barrier(void) /* Mark the end of the barrier operation. */ rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence); rcu_seq_end(&rcu_state.barrier_sequence); + gseq = rcu_state.barrier_sequence; + for_each_possible_cpu(cpu) { + rdp = per_cpu_ptr(&rcu_data, cpu); + + WRITE_ONCE(rdp->barrier_seq_snap, gseq); + } /* Other rcu_barrier() invocations can now safely proceed. */ mutex_unlock(&rcu_state.barrier_mutex); @@ -4140,6 +4174,7 @@ rcu_boot_init_percpu_data(int cpu) INIT_WORK(&rdp->strict_work, strict_work_handler); WARN_ON_ONCE(rdp->dynticks_nesting != 1); WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp))); + rdp->barrier_seq_snap = rcu_state.barrier_sequence; rdp->rcu_ofl_gp_seq = rcu_state.gp_seq; rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED; rdp->rcu_onl_gp_seq = rcu_state.gp_seq; @@ -4287,12 +4322,13 @@ void rcu_cpu_starting(unsigned int cpu) rnp = rdp->mynode; mask = rdp->grpmask; - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(!(rnp->ofl_seq & 0x1)); + local_irq_save(flags); + arch_spin_lock(&rcu_state.ofl_lock); rcu_dynticks_eqs_online(); - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - raw_spin_lock_irqsave_rcu_node(rnp, flags); + raw_spin_lock(&rcu_state.barrier_lock); + raw_spin_lock_rcu_node(rnp); WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask); + raw_spin_unlock(&rcu_state.barrier_lock); newcpu = !(rnp->expmaskinitnext & mask); rnp->expmaskinitnext |= mask; /* Allow lockless access for expedited grace periods. */ @@ -4304,15 +4340,18 @@ void rcu_cpu_starting(unsigned int cpu) /* An incoming CPU should never be blocking a grace period. */ if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */ + /* rcu_report_qs_rnp() *really* wants some flags to restore */ + unsigned long flags2; + + local_irq_save(flags2); rcu_disable_urgency_upon_qs(rdp); /* Report QS -after- changing ->qsmaskinitnext! */ - rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); + rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags2); } else { - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + raw_spin_unlock_rcu_node(rnp); } - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(rnp->ofl_seq & 0x1); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(flags); smp_mb(); /* Ensure RCU read-side usage follows above initialization. */ } @@ -4326,7 +4365,7 @@ void rcu_cpu_starting(unsigned int cpu) */ void rcu_report_dead(unsigned int cpu) { - unsigned long flags; + unsigned long flags, seq_flags; unsigned long mask; struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ @@ -4340,10 +4379,8 @@ void rcu_report_dead(unsigned int cpu) /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ mask = rdp->grpmask; - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(!(rnp->ofl_seq & 0x1)); - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - raw_spin_lock(&rcu_state.ofl_lock); + local_irq_save(seq_flags); + arch_spin_lock(&rcu_state.ofl_lock); raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */ rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq); rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags); @@ -4354,10 +4391,8 @@ void rcu_report_dead(unsigned int cpu) } WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - raw_spin_unlock(&rcu_state.ofl_lock); - smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier(). - WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1); - WARN_ON_ONCE(rnp->ofl_seq & 0x1); + arch_spin_unlock(&rcu_state.ofl_lock); + local_irq_restore(seq_flags); rdp->cpu_started = false; } @@ -4380,7 +4415,9 @@ void rcutree_migrate_callbacks(int cpu) rcu_segcblist_empty(&rdp->cblist)) return; /* No callbacks to migrate. */ - local_irq_save(flags); + raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags); + WARN_ON_ONCE(rcu_rdp_cpu_online(rdp)); + rcu_barrier_entrain(rdp); my_rdp = this_cpu_ptr(&rcu_data); my_rnp = my_rdp->mynode; rcu_nocb_lock(my_rdp); /* irqs already disabled. */ @@ -4390,10 +4427,10 @@ void rcutree_migrate_callbacks(int cpu) needwake = rcu_advance_cbs(my_rnp, rdp) || rcu_advance_cbs(my_rnp, my_rdp); rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist); + raw_spin_unlock(&rcu_state.barrier_lock); /* irqs remain disabled. */ needwake = needwake || rcu_advance_cbs(my_rnp, my_rdp); rcu_segcblist_disable(&rdp->cblist); - WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != - !rcu_segcblist_n_cbs(&my_rdp->cblist)); + WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != !rcu_segcblist_n_cbs(&my_rdp->cblist)); if (rcu_rdp_is_offloaded(my_rdp)) { raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */ __call_rcu_nocb_wake(my_rdp, true, flags); @@ -4440,26 +4477,10 @@ static int rcu_pm_notify(struct notifier_block *self, static int __init rcu_spawn_gp_kthread(void) { unsigned long flags; - int kthread_prio_in = kthread_prio; struct rcu_node *rnp; struct sched_param sp; struct task_struct *t; - /* Force priority into range. */ - if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2 - && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) - kthread_prio = 2; - else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1) - kthread_prio = 1; - else if (kthread_prio < 0) - kthread_prio = 0; - else if (kthread_prio > 99) - kthread_prio = 99; - - if (kthread_prio != kthread_prio_in) - pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n", - kthread_prio, kthread_prio_in); - rcu_scheduler_fully_active = 1; t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name); if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n", __func__)) @@ -4570,6 +4591,7 @@ static void __init rcu_init_one(void) init_waitqueue_head(&rnp->exp_wq[2]); init_waitqueue_head(&rnp->exp_wq[3]); spin_lock_init(&rnp->exp_lock); + mutex_init(&rnp->boost_kthread_mutex); } } @@ -4585,6 +4607,28 @@ static void __init rcu_init_one(void) } /* + * Force priority from the kernel command-line into range. + */ +static void __init sanitize_kthread_prio(void) +{ + int kthread_prio_in = kthread_prio; + + if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2 + && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) + kthread_prio = 2; + else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1) + kthread_prio = 1; + else if (kthread_prio < 0) + kthread_prio = 0; + else if (kthread_prio > 99) + kthread_prio = 99; + + if (kthread_prio != kthread_prio_in) + pr_alert("%s: Limited prio to %d from %d\n", + __func__, kthread_prio, kthread_prio_in); +} + +/* * Compute the rcu_node tree geometry from kernel parameters. This cannot * replace the definitions in tree.h because those are needed to size * the ->node array in the rcu_state structure. @@ -4744,6 +4788,7 @@ void __init rcu_init(void) kfree_rcu_batch_init(); rcu_bootup_announce(); + sanitize_kthread_prio(); rcu_init_geometry(); rcu_init_one(); if (dump_tree) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 486fc901bd08..926673ebe355 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -56,8 +56,6 @@ struct rcu_node { /* Initialized from ->qsmaskinitnext at the */ /* beginning of each grace period. */ unsigned long qsmaskinitnext; - unsigned long ofl_seq; /* CPU-hotplug operation sequence count. */ - /* Online CPUs for next grace period. */ unsigned long expmask; /* CPUs or groups that need to check in */ /* to allow the current expedited GP */ /* to complete. */ @@ -110,6 +108,9 @@ struct rcu_node { /* side effect, not as a lock. */ unsigned long boost_time; /* When to start boosting (jiffies). */ + struct mutex boost_kthread_mutex; + /* Exclusion for thread spawning and affinity */ + /* manipulation. */ struct task_struct *boost_kthread_task; /* kthread that takes care of priority */ /* boosting for this rcu_node structure. */ @@ -190,6 +191,7 @@ struct rcu_data { bool rcu_forced_tick_exp; /* ... provide QS to expedited GP. */ /* 4) rcu_barrier(), OOM callbacks, and expediting. */ + unsigned long barrier_seq_snap; /* Snap of rcu_state.barrier_sequence. */ struct rcu_head barrier_head; int exp_dynticks_snap; /* Double-check need for IPI. */ @@ -203,6 +205,8 @@ struct rcu_data { int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ struct timer_list nocb_timer; /* Enforce finite deferral. */ unsigned long nocb_gp_adv_time; /* Last call_rcu() CB adv (jiffies). */ + struct mutex nocb_gp_kthread_mutex; /* Exclusion for nocb gp kthread */ + /* spawning */ /* The following fields are used by call_rcu, hence own cacheline. */ raw_spinlock_t nocb_bypass_lock ____cacheline_internodealigned_in_smp; @@ -237,6 +241,7 @@ struct rcu_data { /* rcuc per-CPU kthread or NULL. */ unsigned int rcu_cpu_kthread_status; char rcu_cpu_has_work; + unsigned long rcuc_activity; /* 7) Diagnostic data, including RCU CPU stall warnings. */ unsigned int softirq_snap; /* Snapshot of softirq activity. */ @@ -302,9 +307,8 @@ struct rcu_state { /* The following fields are guarded by the root rcu_node's lock. */ - u8 boost ____cacheline_internodealigned_in_smp; - /* Subject to priority boost. */ - unsigned long gp_seq; /* Grace-period sequence #. */ + unsigned long gp_seq ____cacheline_internodealigned_in_smp; + /* Grace-period sequence #. */ unsigned long gp_max; /* Maximum GP duration in */ /* jiffies. */ struct task_struct *gp_kthread; /* Task for grace periods. */ @@ -323,6 +327,8 @@ struct rcu_state { /* rcu_barrier(). */ /* End of fields guarded by barrier_mutex. */ + raw_spinlock_t barrier_lock; /* Protects ->barrier_seq_snap. */ + struct mutex exp_mutex; /* Serialize expedited GP. */ struct mutex exp_wake_mutex; /* Serialize wakeup. */ unsigned long expedited_sequence; /* Take a ticket. */ @@ -355,7 +361,7 @@ struct rcu_state { const char *name; /* Name of structure. */ char abbr; /* Abbreviated name. */ - raw_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp; + arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp; /* Synchronize offline with */ /* GP pre-initialization. */ }; diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 237a79989aba..60197ea24ceb 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -502,7 +502,8 @@ static void synchronize_rcu_expedited_wait(void) if (synchronize_rcu_expedited_wait_once(1)) return; rcu_for_each_leaf_node(rnp) { - for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { + mask = READ_ONCE(rnp->expmask); + for_each_leaf_node_cpu_mask(rnp, cpu, mask) { rdp = per_cpu_ptr(&rcu_data, cpu); if (rdp->rcu_forced_tick_exp) continue; @@ -656,7 +657,7 @@ static void rcu_exp_handler(void *unused) */ if (!depth) { if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) || - rcu_dynticks_curr_cpu_in_eqs()) { + rcu_is_cpu_rrupt_from_idle()) { rcu_report_exp_rdp(rdp); } else { WRITE_ONCE(rdp->cpu_no_qs.b.exp, true); diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index eeafb546a7a0..636d0546a4e9 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1169,7 +1169,7 @@ void __init rcu_init_nohz(void) struct rcu_data *rdp; #if defined(CONFIG_NO_HZ_FULL) - if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask)) + if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) need_rcu_nocb_mask = true; #endif /* #if defined(CONFIG_NO_HZ_FULL) */ @@ -1226,6 +1226,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) raw_spin_lock_init(&rdp->nocb_gp_lock); timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0); rcu_cblist_init(&rdp->nocb_bypass); + mutex_init(&rdp->nocb_gp_kthread_mutex); } /* @@ -1238,6 +1239,7 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu) struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); struct rcu_data *rdp_gp; struct task_struct *t; + struct sched_param sp; if (!rcu_scheduler_fully_active || !rcu_nocb_is_setup) return; @@ -1247,20 +1249,30 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu) return; /* If we didn't spawn the GP kthread first, reorganize! */ + sp.sched_priority = kthread_prio; rdp_gp = rdp->nocb_gp_rdp; + mutex_lock(&rdp_gp->nocb_gp_kthread_mutex); if (!rdp_gp->nocb_gp_kthread) { t = kthread_run(rcu_nocb_gp_kthread, rdp_gp, "rcuog/%d", rdp_gp->cpu); - if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__)) + if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__)) { + mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex); return; + } WRITE_ONCE(rdp_gp->nocb_gp_kthread, t); + if (kthread_prio) + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); } + mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex); /* Spawn the kthread for this CPU. */ t = kthread_run(rcu_nocb_cb_kthread, rdp, "rcuo%c/%d", rcu_state.abbr, cpu); if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__)) return; + + if (kthread_prio) + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); WRITE_ONCE(rdp->nocb_cb_kthread, t); WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread); } @@ -1348,7 +1360,7 @@ static void __init rcu_organize_nocb_kthreads(void) */ void rcu_bind_current_to_nocb(void) { - if (cpumask_available(rcu_nocb_mask) && cpumask_weight(rcu_nocb_mask)) + if (cpumask_available(rcu_nocb_mask) && !cpumask_empty(rcu_nocb_mask)) WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask)); } EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c5b45c2f68a1..6082dd23408f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -330,7 +330,7 @@ void rcu_note_context_switch(bool preempt) * then queue the task as required based on the states * of any ongoing and expedited grace periods. */ - WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0); + WARN_ON_ONCE(!rcu_rdp_cpu_online(rdp)); WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); trace_rcu_preempt_task(rcu_state.name, t->pid, @@ -556,16 +556,16 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } - /* Unboost if we were boosted. */ - if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) - rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex); - /* * If this was the last task on the expedited lists, * then we need to report up the rcu_node hierarchy. */ if (!empty_exp && empty_exp_now) rcu_report_exp_rnp(rnp, true); + + /* Unboost if we were boosted. */ + if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) + rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex); } else { local_irq_restore(flags); } @@ -773,7 +773,6 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) int cpu; int i; struct list_head *lhp; - bool onl; struct rcu_data *rdp; struct rcu_node *rnp1; @@ -797,9 +796,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) pr_cont("\n"); for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) { rdp = per_cpu_ptr(&rcu_data, cpu); - onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp)); pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n", - cpu, ".o"[onl], + cpu, ".o"[rcu_rdp_cpu_online(rdp)], (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags, (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags); } @@ -996,12 +994,15 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) */ static void rcu_cpu_kthread_setup(unsigned int cpu) { + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); #ifdef CONFIG_RCU_BOOST struct sched_param sp; sp.sched_priority = kthread_prio; sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); #endif /* #ifdef CONFIG_RCU_BOOST */ + + WRITE_ONCE(rdp->rcuc_activity, jiffies); } #ifdef CONFIG_RCU_BOOST @@ -1172,15 +1173,14 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) struct sched_param sp; struct task_struct *t; + mutex_lock(&rnp->boost_kthread_mutex); if (rnp->boost_kthread_task || !rcu_scheduler_fully_active) - return; - - rcu_state.boost = 1; + goto out; t = kthread_create(rcu_boost_kthread, (void *)rnp, "rcub/%d", rnp_index); if (WARN_ON_ONCE(IS_ERR(t))) - return; + goto out; raw_spin_lock_irqsave_rcu_node(rnp, flags); rnp->boost_kthread_task = t; @@ -1188,6 +1188,9 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) sp.sched_priority = kthread_prio; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ + + out: + mutex_unlock(&rnp->boost_kthread_mutex); } /* @@ -1210,14 +1213,16 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) return; if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) return; + mutex_lock(&rnp->boost_kthread_mutex); for_each_leaf_node_possible_cpu(rnp, cpu) if ((mask & leaf_node_cpu_bit(rnp, cpu)) && cpu != outgoingcpu) cpumask_set_cpu(cpu, cm); cpumask_and(cm, cm, housekeeping_cpumask(HK_FLAG_RCU)); - if (cpumask_weight(cm) == 0) + if (cpumask_empty(cm)) cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU)); set_cpus_allowed_ptr(t, cm); + mutex_unlock(&rnp->boost_kthread_mutex); free_cpumask_var(cm); } diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 21bebf7c9030..0c5d8516516a 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -379,6 +379,15 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp) return j > 2 * HZ; } +static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp) +{ + unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity); + + if (jp) + *jp = j; + return j > 2 * HZ; +} + /* * Print out diagnostic information for the specified stalled CPU. * @@ -430,6 +439,29 @@ static void print_cpu_stall_info(int cpu) falsepositive ? " (false positive?)" : ""); } +static void rcuc_kthread_dump(struct rcu_data *rdp) +{ + int cpu; + unsigned long j; + struct task_struct *rcuc; + + rcuc = rdp->rcu_cpu_kthread_task; + if (!rcuc) + return; + + cpu = task_cpu(rcuc); + if (cpu_is_offline(cpu) || idle_cpu(cpu)) + return; + + if (!rcu_is_rcuc_kthread_starving(rdp, &j)) + return; + + pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j); + sched_show_task(rcuc); + if (!trigger_single_cpu_backtrace(cpu)) + dump_cpu_task(cpu); +} + /* Complain about starvation of grace-period kthread. */ static void rcu_check_gp_kthread_starvation(void) { @@ -601,6 +633,9 @@ static void print_cpu_stall(unsigned long gps) rcu_check_gp_kthread_expired_fqs_timer(); rcu_check_gp_kthread_starvation(); + if (!use_softirq) + rcuc_kthread_dump(rdp); + rcu_dump_cpu_stacks(); raw_spin_lock_irqsave_rcu_node(rnp, flags); diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 156892c22bb5..180ff9c41fa8 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -407,6 +407,13 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, } EXPORT_SYMBOL_GPL(__wait_rcu_gp); +void finish_rcuwait(struct rcuwait *w) +{ + rcu_assign_pointer(w->task, NULL); + __set_current_state(TASK_RUNNING); +} +EXPORT_SYMBOL_GPL(finish_rcuwait); + #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD void init_rcu_head(struct rcu_head *head) { diff --git a/kernel/signal.c b/kernel/signal.c index 9b04631acde8..e93de6daa188 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1308,6 +1308,43 @@ enum sig_handler { }; /* + * On some archictectures, PREEMPT_RT has to delay sending a signal from a + * trap since it cannot enable preemption, and the signal code's + * spin_locks turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME + * which will send the signal on exit of the trap. + */ +#ifdef CONFIG_RT_DELAYED_SIGNALS +static inline bool force_sig_delayed(struct kernel_siginfo *info, + struct task_struct *t) +{ + if (!in_atomic()) + return false; + + if (WARN_ON_ONCE(t->forced_info.si_signo)) + return true; + + if (is_si_special(info)) { + WARN_ON_ONCE(info != SEND_SIG_PRIV); + t->forced_info.si_signo = info->si_signo; + t->forced_info.si_errno = 0; + t->forced_info.si_code = SI_KERNEL; + t->forced_info.si_pid = 0; + t->forced_info.si_uid = 0; + } else { + t->forced_info = *info; + } + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); + return true; +} +#else +static inline bool force_sig_delayed(struct kernel_siginfo *info, + struct task_struct *t) +{ + return false; +} +#endif + +/* * Force a signal that the process can't ignore: if necessary * we unblock the signal and change any SIG_IGN to SIG_DFL. * @@ -1327,6 +1364,9 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, struct k_sigaction *action; int sig = info->si_signo; + if (force_sig_delayed(info, t)) + return 0; + spin_lock_irqsave(&t->sighand->siglock, flags); action = &t->sighand->action[sig-1]; ignored = action->sa.sa_handler == SIG_IGN; diff --git a/kernel/softirq.c b/kernel/softirq.c index 41f470929e99..fac801815554 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -222,7 +222,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) u32 pending; int curcnt; - WARN_ON_ONCE(in_irq()); + WARN_ON_ONCE(in_hardirq()); lockdep_assert_irqs_enabled(); local_irq_save(flags); @@ -305,7 +305,7 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) { unsigned long flags; - WARN_ON_ONCE(in_irq()); + WARN_ON_ONCE(in_hardirq()); raw_local_irq_save(flags); /* @@ -352,14 +352,14 @@ static void __local_bh_enable(unsigned int cnt) */ void _local_bh_enable(void) { - WARN_ON_ONCE(in_irq()); + WARN_ON_ONCE(in_hardirq()); __local_bh_enable(SOFTIRQ_DISABLE_OFFSET); } EXPORT_SYMBOL(_local_bh_enable); void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) { - WARN_ON_ONCE(in_irq()); + WARN_ON_ONCE(in_hardirq()); lockdep_assert_irqs_enabled(); #ifdef CONFIG_TRACE_IRQFLAGS local_irq_disable(); @@ -618,7 +618,7 @@ static inline void tick_irq_exit(void) /* Make sure that timer wheel updates are propagated */ if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) { - if (!in_irq()) + if (!in_hardirq()) tick_nohz_irq_exit(); } #endif diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 04bfd62f5e5c..27b7868b5c30 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -181,5 +181,14 @@ config HIGH_RES_TIMERS hardware is not capable then this option only increases the size of the kernel image. +config CLOCKSOURCE_WATCHDOG_MAX_SKEW_US + int "Clocksource watchdog maximum allowable skew (in μs)" + depends on CLOCKSOURCE_WATCHDOG + range 50 1000 + default 100 + help + Specify the maximum amount of allowable watchdog skew in + microseconds before reporting the clocksource to be unstable. + endmenu endif diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1cf73807b450..95d7ca35bdf2 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -107,7 +107,13 @@ static u64 suspend_start; * This delay could be due to SMIs, NMIs, or to VCPU preemptions. Used as * a lower bound for cs->uncertainty_margin values when registering clocks. */ -#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC) +#ifdef CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US +#define MAX_SKEW_USEC CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US +#else +#define MAX_SKEW_USEC 100 +#endif + +#define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC) #ifdef CONFIG_CLOCKSOURCE_WATCHDOG static void clocksource_watchdog_work(struct work_struct *work); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 17a283ce2b20..2d76c91b85de 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -169,6 +169,8 @@ static ktime_t tick_init_jiffy_update(void) return period; } +#define MAX_STALLED_JIFFIES 5 + static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) { int cpu = smp_processor_id(); @@ -196,6 +198,21 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) if (tick_do_timer_cpu == cpu) tick_do_update_jiffies64(now); + /* + * If jiffies update stalled for too long (timekeeper in stop_machine() + * or VMEXIT'ed for several msecs), force an update. + */ + if (ts->last_tick_jiffies != jiffies) { + ts->stalled_jiffies = 0; + ts->last_tick_jiffies = READ_ONCE(jiffies); + } else { + if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) { + tick_do_update_jiffies64(now); + ts->stalled_jiffies = 0; + ts->last_tick_jiffies = READ_ONCE(jiffies); + } + } + if (ts->inidle) ts->got_idle_tick = 1; } @@ -768,7 +785,7 @@ static inline bool local_timer_softirq_pending(void) static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) { - u64 basemono, next_tick, next_tmr, next_rcu, delta, expires; + u64 basemono, next_tick, delta, expires; unsigned long basejiff; unsigned int seq; @@ -791,7 +808,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) * minimal delta which brings us back to this place * immediately. Lather, rinse and repeat... */ - if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() || + if (rcu_needs_cpu() || arch_needs_cpu() || irq_work_needs_cpu() || local_timer_softirq_pending()) { next_tick = basemono + TICK_NSEC; } else { @@ -802,10 +819,8 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) * disabled this also looks at the next expiring * hrtimer. */ - next_tmr = get_next_timer_interrupt(basejiff, basemono); - ts->next_timer = next_tmr; - /* Take the next rcu event into account */ - next_tick = next_rcu < next_tmr ? next_rcu : next_tmr; + next_tick = get_next_timer_interrupt(basejiff, basemono); + ts->next_timer = next_tick; } /* @@ -984,6 +999,45 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) __tick_nohz_full_update_tick(ts, ktime_get()); } +/* + * A pending softirq outside an IRQ (or softirq disabled section) context + * should be waiting for ksoftirqd to handle it. Therefore we shouldn't + * reach here due to the need_resched() early check in can_stop_idle_tick(). + * + * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the + * cpu_down() process, softirqs can still be raised while ksoftirqd is parked, + * triggering the below since wakep_softirqd() is ignored. + * + */ +static bool report_idle_softirq(void) +{ + static int ratelimit; + unsigned int pending = local_softirq_pending(); + + if (likely(!pending)) + return false; + + /* Some softirqs claim to be safe against hotplug and ksoftirqd parking */ + if (!cpu_active(smp_processor_id())) { + pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK; + if (!pending) + return false; + } + + if (ratelimit < 10) + return false; + + /* On RT, softirqs handling may be waiting on some lock */ + if (!local_bh_blocked()) + return false; + + pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n", + pending); + ratelimit++; + + return true; +} + static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) { /* @@ -1010,17 +1064,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) if (need_resched()) return false; - if (unlikely(local_softirq_pending())) { - static int ratelimit; - - if (ratelimit < 10 && !local_bh_blocked() && - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { - pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n", - (unsigned int) local_softirq_pending()); - ratelimit++; - } + if (unlikely(report_idle_softirq())) return false; - } if (tick_nohz_full_enabled()) { /* diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index d952ae393423..504649513399 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -49,6 +49,8 @@ enum tick_nohz_mode { * @timer_expires_base: Base time clock monotonic for @timer_expires * @next_timer: Expiry time of next expiring timer for debugging purpose only * @tick_dep_mask: Tick dependency mask - is set, if someone needs the tick + * @last_tick_jiffies: Value of jiffies seen on last tick + * @stalled_jiffies: Number of stalled jiffies detected across ticks */ struct tick_sched { struct hrtimer sched_timer; @@ -77,6 +79,8 @@ struct tick_sched { u64 next_timer; ktime_t idle_expires; atomic_t tick_dep_mask; + unsigned long last_tick_jiffies; + unsigned int stalled_jiffies; }; extern struct tick_sched *tick_get_tick_sched(int cpu); diff --git a/kernel/torture.c b/kernel/torture.c index ef27a6c82451..789aeb0e1159 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -911,7 +911,7 @@ void torture_kthread_stopping(char *title) { char buf[128]; - snprintf(buf, sizeof(buf), "Stopping %s", title); + snprintf(buf, sizeof(buf), "%s is stopping", title); VERBOSE_TOROUT_STRING(buf); while (!kthread_should_stop()) { torture_shutdown_absorb(title); @@ -931,12 +931,14 @@ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m, int ret = 0; VERBOSE_TOROUT_STRING(m); - *tp = kthread_run(fn, arg, "%s", s); + *tp = kthread_create(fn, arg, "%s", s); if (IS_ERR(*tp)) { ret = PTR_ERR(*tp); TOROUT_ERRSTRING(f); *tp = NULL; + return ret; } + wake_up_process(*tp); // Process is sleeping, so ordering provided. torture_shuffle_task_register(*tp); return ret; } diff --git a/mm/init-mm.c b/mm/init-mm.c index b4a6f38fb51d..fbe7844d0912 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -10,6 +10,7 @@ #include <linux/atomic.h> #include <linux/user_namespace.h> +#include <linux/ioasid.h> #include <asm/mmu.h> #ifndef INIT_MM_CONTEXT @@ -38,6 +39,9 @@ struct mm_struct init_mm = { .mmlist = LIST_HEAD_INIT(init_mm.mmlist), .user_ns = &init_user_ns, .cpu_bitmap = CPU_BITS_NONE, +#ifdef CONFIG_IOMMU_SVA + .pasid = INVALID_IOASID, +#endif INIT_MM_CONTEXT(init_mm) }; diff --git a/mm/swap_state.c b/mm/swap_state.c index 8d4104242100..ee67164531c0 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -478,7 +478,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * __read_swap_cache_async(), which has set SWAP_HAS_CACHE * in swap_map, but not yet added its page to swap cache. */ - cond_resched(); + schedule_timeout_uninterruptible(1); } /* diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 074e4a69a728..88e2808019b4 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1436,6 +1436,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) const char *user_protocol; master = of_find_net_device_by_node(ethernet); + of_node_put(ethernet); if (!master) return -EPROBE_DEFER; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index b0ffbcd5432d..55d604c9b3b3 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -812,8 +812,7 @@ int esp6_input_done2(struct sk_buff *skb, int err) struct tcphdr *th; offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); - - if (offset < 0) { + if (offset == -1) { err = -EINVAL; goto out; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4788f6b37053..194832663d85 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1476,8 +1476,8 @@ static int __ip6_append_data(struct sock *sk, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; - if (mtu < fragheaderlen || - ((mtu - fragheaderlen) & ~7) + fragheaderlen < sizeof(struct frag_hdr)) + if (mtu <= fragheaderlen || + ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr)) goto emsgsize; maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - diff --git a/net/key/af_key.c b/net/key/af_key.c index 9bf52a09b5ff..fd51db3be91c 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1699,7 +1699,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad xfrm_probe_algs(); - supp_skb = compose_sadb_supported(hdr, GFP_KERNEL); + supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO); if (!supp_skb) { if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) pfk->registered &= ~(1<<hdr->sadb_msg_satype); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index d6aa5b47031e..bf1e17c678f1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1748,9 +1748,6 @@ resolve_normal_ct(struct nf_conn *tmpl, return 0; if (IS_ERR(h)) return PTR_ERR(h); - - ct = nf_ct_tuplehash_to_ctrack(h); - ct->local_origin = state->hook == NF_INET_LOCAL_OUT; } ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 2d06a66899b2..ffcf6529afc3 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -494,38 +494,6 @@ another_round: goto another_round; } -static bool tuple_force_port_remap(const struct nf_conntrack_tuple *tuple) -{ - u16 sp, dp; - - switch (tuple->dst.protonum) { - case IPPROTO_TCP: - sp = ntohs(tuple->src.u.tcp.port); - dp = ntohs(tuple->dst.u.tcp.port); - break; - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - sp = ntohs(tuple->src.u.udp.port); - dp = ntohs(tuple->dst.u.udp.port); - break; - default: - return false; - } - - /* IANA: System port range: 1-1023, - * user port range: 1024-49151, - * private port range: 49152-65535. - * - * Linux default ephemeral port range is 32768-60999. - * - * Enforce port remapping if sport is significantly lower - * than dport to prevent NAT port shadowing, i.e. - * accidental match of 'new' inbound connection vs. - * existing outbound one. - */ - return sp < 16384 && dp >= 32768; -} - /* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, * we change the source to map into the range. For NF_INET_PRE_ROUTING * and NF_INET_LOCAL_OUT, we change the destination to map into the @@ -539,17 +507,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { - bool random_port = range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL; const struct nf_conntrack_zone *zone; struct net *net = nf_ct_net(ct); zone = nf_ct_zone(ct); - if (maniptype == NF_NAT_MANIP_SRC && - !random_port && - !ct->local_origin) - random_port = tuple_force_port_remap(orig_tuple); - /* 1) If this srcip/proto/src-proto-part is currently mapped, * and that same mapping gives a unique tuple within the given * range, use that. @@ -558,7 +520,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, * So far, we don't do local source mappings, so multiple * manips not an issue. */ - if (maniptype == NF_NAT_MANIP_SRC && !random_port) { + if (maniptype == NF_NAT_MANIP_SRC && + !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { /* try the original tuple first */ if (in_range(orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { @@ -582,7 +545,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, */ /* Only bother mapping if it's not already in range and unique */ - if (!random_port) { + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) && l4proto_in_range(tuple, maniptype, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c86748b3873b..d71a33ae39b3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8260,6 +8260,12 @@ void nf_tables_trans_destroy_flush_work(void) } EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); +static bool nft_expr_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + return false; +} + static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) { const struct nft_expr *expr, *last; @@ -8307,8 +8313,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha nft_rule_for_each_expr(expr, last, rule) { track.cur = expr; - if (expr->ops->reduce && - expr->ops->reduce(&track, expr)) { + if (nft_expr_reduce(&track, expr)) { expr = track.cur; continue; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ab87f22cc7ec..a7273af2d900 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2317,8 +2317,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, copy_skb = skb_get(skb); skb_head = skb->data; } - if (copy_skb) + if (copy_skb) { + memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0, + sizeof(PACKET_SKB_CB(copy_skb)->sa.ll)); skb_set_owner_r(copy_skb, sk); + } } snaplen = po->rx_ring.frame_size - macoff; if ((int)snaplen < 0) { @@ -3462,6 +3465,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + const size_t max_len = min(sizeof(skb->cb), + sizeof(struct sockaddr_storage)); int copy_len; /* If the address length field is there to be filled @@ -3484,6 +3489,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, msg->msg_namelen = sizeof(struct sockaddr_ll); } } + if (WARN_ON_ONCE(copy_len > max_len)) { + copy_len = max_len; + msg->msg_namelen = copy_len; + } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 38baeb189d4e..f04abf662ec6 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -334,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(vsock_remove_sock); -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) +void vsock_for_each_connected_socket(struct vsock_transport *transport, + void (*fn)(struct sock *sk)) { int i; @@ -343,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { struct vsock_sock *vsk; list_for_each_entry(vsk, &vsock_connected_table[i], - connected_table) + connected_table) { + if (vsk->transport != transport) + continue; + fn(sk_vsock(vsk)); + } } spin_unlock_bh(&vsock_table_lock); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index fb3302fff627..5afc194a58bb 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -24,6 +24,7 @@ static struct workqueue_struct *virtio_vsock_workqueue; static struct virtio_vsock __rcu *the_virtio_vsock; static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ +static struct virtio_transport virtio_transport; /* forward declaration */ struct virtio_vsock { struct virtio_device *vdev; @@ -384,7 +385,8 @@ static void virtio_vsock_event_handle(struct virtio_vsock *vsock, switch (le32_to_cpu(event->id)) { case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: virtio_vsock_update_guest_cid(vsock); - vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vsock_for_each_connected_socket(&virtio_transport.transport, + virtio_vsock_reset_sock); break; } } @@ -662,7 +664,8 @@ static void virtio_vsock_remove(struct virtio_device *vdev) synchronize_rcu(); /* Reset all connected sockets when the device disappear */ - vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vsock_for_each_connected_socket(&virtio_transport.transport, + virtio_vsock_reset_sock); /* Stop all work handlers to make sure no one is accessing the device, * so we can safely call virtio_reset_device(). diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 7aef34e32bdf..b17dc9745188 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -75,6 +75,8 @@ static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; static int PROTOCOL_OVERRIDE = -1; +static struct vsock_transport vmci_transport; /* forward declaration */ + /* Helper function to convert from a VMCI error code to a VSock error code. */ static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) @@ -882,7 +884,8 @@ static void vmci_transport_qp_resumed_cb(u32 sub_id, const struct vmci_event_data *e_data, void *client_data) { - vsock_for_each_connected_socket(vmci_transport_handle_detach); + vsock_for_each_connected_socket(&vmci_transport, + vmci_transport_handle_detach); } static void vmci_transport_recv_pkt_work(struct work_struct *work) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 3106b7536b89..9c084a2ba3b0 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -12,202 +12,46 @@ use strict; ## ## ## #define enhancements by Armin Kuster <akuster@mvista.com> ## ## Copyright (c) 2000 MontaVista Software, Inc. ## -## ## -## This software falls under the GNU General Public License. ## -## Please read the COPYING file for more information ## +# +# Copyright (C) 2022 Tomasz Warniełło (POD) -# 18/01/2001 - Cleanups -# Functions prototyped as foo(void) same as foo() -# Stop eval'ing where we don't need to. -# -- huggie@earth.li +use Pod::Usage qw/pod2usage/; -# 27/06/2001 - Allowed whitespace after initial "/**" and -# allowed comments before function declarations. -# -- Christian Kreibich <ck@whoop.org> +=head1 NAME -# Still to do: -# - add perldoc documentation -# - Look more closely at some of the scarier bits :) +kernel-doc - Print formatted kernel documentation to stdout -# 26/05/2001 - Support for separate source and object trees. -# Return error code. -# Keith Owens <kaos@ocs.com.au> +=head1 SYNOPSIS -# 23/09/2001 - Added support for typedefs, structs, enums and unions -# Support for Context section; can be terminated using empty line -# Small fixes (like spaces vs. \s in regex) -# -- Tim Jansen <tim@tjansen.de> + kernel-doc [-h] [-v] [-Werror] + [ -man | + -rst [-sphinx-version VERSION] [-enable-lineno] | + -none + ] + [ + -export | + -internal | + [-function NAME] ... | + [-nosymbol NAME] ... + ] + [-no-doc-sections] + [-export-file FILE] ... + FILE ... -# 25/07/2012 - Added support for HTML5 -# -- Dan Luedtke <mail@danrl.de> +Run `kernel-doc -h` for details. -sub usage { - my $message = <<"EOF"; -Usage: $0 [OPTION ...] FILE ... +=head1 DESCRIPTION Read C language source or header FILEs, extract embedded documentation comments, and print formatted documentation to standard output. -The documentation comments are identified by "/**" opening comment mark. See -Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. - -Output format selection (mutually exclusive): - -man Output troff manual page format. This is the default. - -rst Output reStructuredText format. - -none Do not output documentation, only warnings. - -Output format selection modifier (affects only ReST output): - - -sphinx-version Use the ReST C domain dialect compatible with an - specific Sphinx Version. - If not specified, kernel-doc will auto-detect using - the sphinx-build version found on PATH. - -Output selection (mutually exclusive): - -export Only output documentation for symbols that have been - exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() - in any input FILE or -export-file FILE. - -internal Only output documentation for symbols that have NOT been - exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() - in any input FILE or -export-file FILE. - -function NAME Only output documentation for the given function(s) - or DOC: section title(s). All other functions and DOC: - sections are ignored. May be specified multiple times. - -nosymbol NAME Exclude the specified symbols from the output - documentation. May be specified multiple times. - -Output selection modifiers: - -no-doc-sections Do not output DOC: sections. - -enable-lineno Enable output of #define LINENO lines. Only works with - reStructuredText format. - -export-file FILE Specify an additional FILE in which to look for - EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(). To be used with - -export or -internal. May be specified multiple times. - -Other parameters: - -v Verbose output, more warnings and other information. - -h Print this help. - -Werror Treat warnings as errors. - -EOF - print $message; - exit 1; -} +The documentation comments are identified by the "/**" opening comment mark. -# -# format of comments. -# In the following table, (...)? signifies optional structure. -# (...)* signifies 0 or more structure elements -# /** -# * function_name(:)? (- short description)? -# (* @parameterx: (description of parameter x)?)* -# (* a blank line)? -# * (Description:)? (Description of function)? -# * (section header: (section description)? )* -# (*)?*/ -# -# So .. the trivial example would be: -# -# /** -# * my_function -# */ -# -# If the Description: header tag is omitted, then there must be a blank line -# after the last parameter specification. -# e.g. -# /** -# * my_function - does my stuff -# * @my_arg: its mine damnit -# * -# * Does my stuff explained. -# */ -# -# or, could also use: -# /** -# * my_function - does my stuff -# * @my_arg: its mine damnit -# * Description: Does my stuff explained. -# */ -# etc. -# -# Besides functions you can also write documentation for structs, unions, -# enums and typedefs. Instead of the function name you must write the name -# of the declaration; the struct/union/enum/typedef must always precede -# the name. Nesting of declarations is not supported. -# Use the argument mechanism to document members or constants. -# e.g. -# /** -# * struct my_struct - short description -# * @a: first member -# * @b: second member -# * -# * Longer description -# */ -# struct my_struct { -# int a; -# int b; -# /* private: */ -# int c; -# }; -# -# All descriptions can be multiline, except the short function description. -# -# For really longs structs, you can also describe arguments inside the -# body of the struct. -# eg. -# /** -# * struct my_struct - short description -# * @a: first member -# * @b: second member -# * -# * Longer description -# */ -# struct my_struct { -# int a; -# int b; -# /** -# * @c: This is longer description of C -# * -# * You can use paragraphs to describe arguments -# * using this method. -# */ -# int c; -# }; -# -# This should be use only for struct/enum members. -# -# You can also add additional sections. When documenting kernel functions you -# should document the "Context:" of the function, e.g. whether the functions -# can be called form interrupts. Unlike other sections you can end it with an -# empty line. -# A non-void function should have a "Return:" section describing the return -# value(s). -# Example-sections should contain the string EXAMPLE so that they are marked -# appropriately in DocBook. -# -# Example: -# /** -# * user_function - function that can only be called in user context -# * @a: some argument -# * Context: !in_interrupt() -# * -# * Some description -# * Example: -# * user_function(22); -# */ -# ... -# -# -# All descriptive text is further processed, scanning for the following special -# patterns, which are highlighted appropriately. -# -# 'funcname()' - function -# '$ENVVAR' - environmental variable -# '&struct_name' - name of a structure (up to two words including 'struct') -# '&struct_name.member' - name of a structure member -# '@parameter' - name of a parameter -# '%CONST' - name of a constant. -# '``LITERAL``' - literal string without any spaces on it. +See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. + +=cut + +# more perldoc at the end of the file ## init lots of data @@ -273,7 +117,13 @@ my $blankline_rst = "\n"; # read arguments if ($#ARGV == -1) { - usage(); + pod2usage( + -message => "No arguments!\n", + -exitval => 1, + -verbose => 99, + -sections => 'SYNOPSIS', + -output => \*STDERR, + ); } my $kernelversion; @@ -468,7 +318,7 @@ while ($ARGV[0] =~ m/^--?(.*)/) { } elsif ($cmd eq "Werror") { $Werror = 1; } elsif (($cmd eq "h") || ($cmd eq "help")) { - usage(); + pod2usage(-exitval => 0, -verbose => 2); } elsif ($cmd eq 'no-doc-sections') { $no_doc_sections = 1; } elsif ($cmd eq 'enable-lineno') { @@ -494,7 +344,22 @@ while ($ARGV[0] =~ m/^--?(.*)/) { } } else { # Unknown argument - usage(); + pod2usage( + -message => "Argument unknown!\n", + -exitval => 1, + -verbose => 99, + -sections => 'SYNOPSIS', + -output => \*STDERR, + ); + } + if ($#ARGV < 0){ + pod2usage( + -message => "FILE argument missing\n", + -exitval => 1, + -verbose => 99, + -sections => 'SYNOPSIS', + -output => \*STDERR, + ); } } @@ -2521,3 +2386,118 @@ if ($Werror && $warnings) { } else { exit($output_mode eq "none" ? 0 : $errors) } + +__END__ + +=head1 OPTIONS + +=head2 Output format selection (mutually exclusive): + +=over 8 + +=item -man + +Output troff manual page format. + +=item -rst + +Output reStructuredText format. This is the default. + +=item -none + +Do not output documentation, only warnings. + +=back + +=head2 Output format modifiers + +=head3 reStructuredText only + +=over 8 + +=item -sphinx-version VERSION + +Use the ReST C domain dialect compatible with a specific Sphinx Version. + +If not specified, kernel-doc will auto-detect using the sphinx-build version +found on PATH. + +=back + +=head2 Output selection (mutually exclusive): + +=over 8 + +=item -export + +Only output documentation for the symbols that have been exported using +EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE. + +=item -internal + +Only output documentation for the symbols that have NOT been exported using +EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE. + +=item -function NAME + +Only output documentation for the given function or DOC: section title. +All other functions and DOC: sections are ignored. + +May be specified multiple times. + +=item -nosymbol NAME + +Exclude the specified symbol from the output documentation. + +May be specified multiple times. + +=back + +=head2 Output selection modifiers: + +=over 8 + +=item -no-doc-sections + +Do not output DOC: sections. + +=item -export-file FILE + +Specify an additional FILE in which to look for EXPORT_SYMBOL() and +EXPORT_SYMBOL_GPL(). + +To be used with -export or -internal. + +May be specified multiple times. + +=back + +=head3 reStructuredText only + +=over 8 + +=item -enable-lineno + +Enable output of #define LINENO lines. + +=back + +=head2 Other parameters: + +=over 8 + +=item -h, -help + +Print this help. + +=item -v + +Verbose output, more warnings and other information. + +=item -Werror + +Treat warnings as errors. + +=back + +=cut diff --git a/security/integrity/Kconfig b/security/integrity/Kconfig index 71f0177e8716..599429f99f99 100644 --- a/security/integrity/Kconfig +++ b/security/integrity/Kconfig @@ -62,6 +62,19 @@ config INTEGRITY_PLATFORM_KEYRING provided by the platform for verifying the kexec'ed kerned image and, possibly, the initramfs signature. +config INTEGRITY_MACHINE_KEYRING + bool "Provide a keyring to which Machine Owner Keys may be added" + depends on SECONDARY_TRUSTED_KEYRING + depends on INTEGRITY_ASYMMETRIC_KEYS + depends on SYSTEM_BLACKLIST_KEYRING + depends on LOAD_UEFI_KEYS + depends on !IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY + help + If set, provide a keyring to which Machine Owner Keys (MOK) may + be added. This keyring shall contain just MOK keys. Unlike keys + in the platform keyring, keys contained in the .machine keyring will + be trusted within the kernel. + config LOAD_UEFI_KEYS depends on INTEGRITY_PLATFORM_KEYRING depends on EFI diff --git a/security/integrity/Makefile b/security/integrity/Makefile index 7ee39d66cf16..d0ffe37dc1d6 100644 --- a/security/integrity/Makefile +++ b/security/integrity/Makefile @@ -10,6 +10,7 @@ integrity-$(CONFIG_INTEGRITY_AUDIT) += integrity_audit.o integrity-$(CONFIG_INTEGRITY_SIGNATURE) += digsig.o integrity-$(CONFIG_INTEGRITY_ASYMMETRIC_KEYS) += digsig_asymmetric.o integrity-$(CONFIG_INTEGRITY_PLATFORM_KEYRING) += platform_certs/platform_keyring.o +integrity-$(CONFIG_INTEGRITY_MACHINE_KEYRING) += platform_certs/machine_keyring.o integrity-$(CONFIG_LOAD_UEFI_KEYS) += platform_certs/efi_parser.o \ platform_certs/load_uefi.o \ platform_certs/keyring_handler.o diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c index 3b06a01bd0fd..c8c8a4a4e7a0 100644 --- a/security/integrity/digsig.c +++ b/security/integrity/digsig.c @@ -30,6 +30,7 @@ static const char * const keyring_name[INTEGRITY_KEYRING_MAX] = { ".ima", #endif ".platform", + ".machine", }; #ifdef CONFIG_IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY @@ -111,6 +112,8 @@ static int __init __integrity_init_keyring(const unsigned int id, } else { if (id == INTEGRITY_KEYRING_PLATFORM) set_platform_trusted_keys(keyring[id]); + if (id == INTEGRITY_KEYRING_MACHINE && trust_moklist()) + set_machine_trusted_keys(keyring[id]); if (id == INTEGRITY_KEYRING_IMA) load_module_cert(keyring[id]); } @@ -126,7 +129,8 @@ int __init integrity_init_keyring(const unsigned int id) perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH; - if (id == INTEGRITY_KEYRING_PLATFORM) { + if (id == INTEGRITY_KEYRING_PLATFORM || + id == INTEGRITY_KEYRING_MACHINE) { restriction = NULL; goto out; } @@ -139,7 +143,14 @@ int __init integrity_init_keyring(const unsigned int id) return -ENOMEM; restriction->check = restrict_link_to_ima; - perm |= KEY_USR_WRITE; + + /* + * MOK keys can only be added through a read-only runtime services + * UEFI variable during boot. No additional keys shall be allowed to + * load into the machine keyring following init from userspace. + */ + if (id != INTEGRITY_KEYRING_MACHINE) + perm |= KEY_USR_WRITE; out: return __integrity_init_keyring(id, perm, restriction); diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index 547425c20e11..2e214c761158 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -151,7 +151,8 @@ int integrity_kernel_read(struct file *file, loff_t offset, #define INTEGRITY_KEYRING_EVM 0 #define INTEGRITY_KEYRING_IMA 1 #define INTEGRITY_KEYRING_PLATFORM 2 -#define INTEGRITY_KEYRING_MAX 3 +#define INTEGRITY_KEYRING_MACHINE 3 +#define INTEGRITY_KEYRING_MAX 4 extern struct dentry *integrity_dir; @@ -283,3 +284,17 @@ static inline void __init add_to_platform_keyring(const char *source, { } #endif + +#ifdef CONFIG_INTEGRITY_MACHINE_KEYRING +void __init add_to_machine_keyring(const char *source, const void *data, size_t len); +bool __init trust_moklist(void); +#else +static inline void __init add_to_machine_keyring(const char *source, + const void *data, size_t len) +{ +} +static inline bool __init trust_moklist(void) +{ + return false; +} +#endif diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c index 5604bd57c990..1db4d3b4356d 100644 --- a/security/integrity/platform_certs/keyring_handler.c +++ b/security/integrity/platform_certs/keyring_handler.c @@ -9,6 +9,7 @@ #include <keys/asymmetric-type.h> #include <keys/system_keyring.h> #include "../integrity.h" +#include "keyring_handler.h" static efi_guid_t efi_cert_x509_guid __initdata = EFI_CERT_X509_GUID; static efi_guid_t efi_cert_x509_sha256_guid __initdata = @@ -66,7 +67,7 @@ static __init void uefi_revocation_list_x509(const char *source, /* * Return the appropriate handler for particular signature list types found in - * the UEFI db and MokListRT tables. + * the UEFI db tables. */ __init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type) { @@ -77,6 +78,21 @@ __init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type) /* * Return the appropriate handler for particular signature list types found in + * the MokListRT tables. + */ +__init efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type) +{ + if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0) { + if (IS_ENABLED(CONFIG_INTEGRITY_MACHINE_KEYRING) && trust_moklist()) + return add_to_machine_keyring; + else + return add_to_platform_keyring; + } + return 0; +} + +/* + * Return the appropriate handler for particular signature list types found in * the UEFI dbx and MokListXRT tables. */ __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type) diff --git a/security/integrity/platform_certs/keyring_handler.h b/security/integrity/platform_certs/keyring_handler.h index 2462bfa08fe3..284558f30411 100644 --- a/security/integrity/platform_certs/keyring_handler.h +++ b/security/integrity/platform_certs/keyring_handler.h @@ -25,6 +25,11 @@ void blacklist_binary(const char *source, const void *data, size_t len); efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type); /* + * Return the handler for particular signature list types found in the mok. + */ +efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type); + +/* * Return the handler for particular signature list types found in the dbx. */ efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type); diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index 08b6d12f99b4..5f45c3c07dbd 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -95,7 +95,7 @@ static int __init load_moklist_certs(void) rc = parse_efi_signature_list("UEFI:MokListRT (MOKvar table)", mokvar_entry->data, mokvar_entry->data_size, - get_handler_for_db); + get_handler_for_mok); /* All done if that worked. */ if (!rc) return rc; @@ -110,7 +110,7 @@ static int __init load_moklist_certs(void) mok = get_cert_list(L"MokListRT", &mok_var, &moksize, &status); if (mok) { rc = parse_efi_signature_list("UEFI:MokListRT", - mok, moksize, get_handler_for_db); + mok, moksize, get_handler_for_mok); kfree(mok); if (rc) pr_err("Couldn't parse MokListRT signatures: %d\n", rc); diff --git a/security/integrity/platform_certs/machine_keyring.c b/security/integrity/platform_certs/machine_keyring.c new file mode 100644 index 000000000000..7aaed7950b6e --- /dev/null +++ b/security/integrity/platform_certs/machine_keyring.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Machine keyring routines. + * + * Copyright (c) 2021, Oracle and/or its affiliates. + */ + +#include <linux/efi.h> +#include "../integrity.h" + +static bool trust_mok; + +static __init int machine_keyring_init(void) +{ + int rc; + + rc = integrity_init_keyring(INTEGRITY_KEYRING_MACHINE); + if (rc) + return rc; + + pr_notice("Machine keyring initialized\n"); + return 0; +} +device_initcall(machine_keyring_init); + +void __init add_to_machine_keyring(const char *source, const void *data, size_t len) +{ + key_perm_t perm; + int rc; + + perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW; + rc = integrity_load_cert(INTEGRITY_KEYRING_MACHINE, source, data, len, perm); + + /* + * Some MOKList keys may not pass the machine keyring restrictions. + * If the restriction check does not pass and the platform keyring + * is configured, try to add it into that keyring instead. + */ + if (rc && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) + rc = integrity_load_cert(INTEGRITY_KEYRING_PLATFORM, source, + data, len, perm); + + if (rc) + pr_info("Error adding keys to machine keyring %s\n", source); +} + +/* + * Try to load the MokListTrustedRT MOK variable to see if we should trust + * the MOK keys within the kernel. It is not an error if this variable + * does not exist. If it does not exist, MOK keys should not be trusted + * within the machine keyring. + */ +static __init bool uefi_check_trust_mok_keys(void) +{ + struct efi_mokvar_table_entry *mokvar_entry; + + mokvar_entry = efi_mokvar_entry_find("MokListTrustedRT"); + + if (mokvar_entry) + return true; + + return false; +} + +bool __init trust_moklist(void) +{ + static bool initialized; + + if (!initialized) { + initialized = true; + + if (uefi_check_trust_mok_keys()) + trust_mok = true; + } + + return trust_mok; +} diff --git a/security/keys/keyctl_pkey.c b/security/keys/keyctl_pkey.c index 5de0d599a274..97bc27bbf079 100644 --- a/security/keys/keyctl_pkey.c +++ b/security/keys/keyctl_pkey.c @@ -135,15 +135,23 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par switch (op) { case KEYCTL_PKEY_ENCRYPT: + if (uparams.in_len > info.max_dec_size || + uparams.out_len > info.max_enc_size) + return -EINVAL; + break; case KEYCTL_PKEY_DECRYPT: if (uparams.in_len > info.max_enc_size || uparams.out_len > info.max_dec_size) return -EINVAL; break; case KEYCTL_PKEY_SIGN: + if (uparams.in_len > info.max_data_size || + uparams.out_len > info.max_sig_size) + return -EINVAL; + break; case KEYCTL_PKEY_VERIFY: - if (uparams.in_len > info.max_sig_size || - uparams.out_len > info.max_data_size) + if (uparams.in_len > info.max_data_size || + uparams.in2_len > info.max_sig_size) return -EINVAL; break; default: @@ -151,7 +159,7 @@ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_par } params->in_len = uparams.in_len; - params->out_len = uparams.out_len; + params->out_len = uparams.out_len; /* Note: same as in2_len */ return 0; } diff --git a/security/keys/trusted-keys/trusted_core.c b/security/keys/trusted-keys/trusted_core.c index d5c891d8d353..9b9d3ef79cbe 100644 --- a/security/keys/trusted-keys/trusted_core.c +++ b/security/keys/trusted-keys/trusted_core.c @@ -27,10 +27,10 @@ module_param_named(source, trusted_key_source, charp, 0); MODULE_PARM_DESC(source, "Select trusted keys source (tpm or tee)"); static const struct trusted_key_source trusted_key_sources[] = { -#if defined(CONFIG_TCG_TPM) +#if IS_REACHABLE(CONFIG_TCG_TPM) { "tpm", &trusted_key_tpm_ops }, #endif -#if defined(CONFIG_TEE) +#if IS_REACHABLE(CONFIG_TEE) { "tee", &trusted_key_tee_ops }, #endif }; @@ -351,7 +351,7 @@ static int __init init_trusted(void) static void __exit cleanup_trusted(void) { - static_call(trusted_key_exit)(); + static_call_cond(trusted_key_exit)(); } late_initcall(init_trusted); diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h index dc632b41f135..65c0d9ce1e29 100644 --- a/tools/arch/x86/include/asm/insn.h +++ b/tools/arch/x86/include/asm/insn.h @@ -124,7 +124,7 @@ struct insn { #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ /* VEX bit fields */ -#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ +#define X86_EVEX_M(vex) ((vex) & 0x07) /* EVEX Byte1 */ #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ #define X86_VEX2_M 1 /* VEX2.M always 1 */ #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 59cf2343f3d9..d0d7b9bc6cad 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -27,8 +27,7 @@ * Output: * rax original destination */ -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK(memcpy) +SYM_FUNC_START(__memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS @@ -40,11 +39,12 @@ SYM_FUNC_START_WEAK(memcpy) movl %edx, %ecx rep movsb RET -SYM_FUNC_END(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) -EXPORT_SYMBOL(memcpy) +SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +EXPORT_SYMBOL(memcpy) + /* * memcpy_erms() - enhanced fast string memcpy. This is faster and * simpler than memcpy. Use memcpy_erms when possible. diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index d624f2bc42f1..fc9ffd3ff3b2 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -17,7 +17,6 @@ * * rax original destination */ -SYM_FUNC_START_WEAK(memset) SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended @@ -42,10 +41,11 @@ SYM_FUNC_START(__memset) movq %r9,%rax RET SYM_FUNC_END(__memset) -SYM_FUNC_END_ALIAS(memset) -EXPORT_SYMBOL(memset) EXPORT_SYMBOL(__memset) +SYM_FUNC_ALIAS_WEAK(memset, __memset) +EXPORT_SYMBOL(memset) + /* * ISO C memset - set a memory block to a byte value. This function uses * enhanced rep stosb to override the fast string function. diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index ec31f5b60323..d12d1358f96d 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -690,7 +690,10 @@ AVXcode: 2 45: vpsrlvd/q Vx,Hx,Wx (66),(v) 46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) 47: vpsllvd/q Vx,Hx,Wx (66),(v) -# Skip 0x48-0x4b +# Skip 0x48 +49: TILERELEASE (v1),(000),(11B) | LDTILECFG Mtc (v1)(000) | STTILECFG Mtc (66),(v1),(000) | TILEZERO Vt (F2),(v1),(11B) +# Skip 0x4a +4b: TILELOADD Vt,Wsm (F2),(v1) | TILELOADDT1 Vt,Wsm (66),(v1) | TILESTORED Wsm,Vt (F3),(v) 4c: vrcp14ps/d Vpd,Wpd (66),(ev) 4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) 4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) @@ -705,7 +708,10 @@ AVXcode: 2 59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) 5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) 5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) -# Skip 0x5c-0x61 +5c: TDPBF16PS Vt,Wt,Ht (F3),(v1) +# Skip 0x5d +5e: TDPBSSD Vt,Wt,Ht (F2),(v1) | TDPBSUD Vt,Wt,Ht (F3),(v1) | TDPBUSD Vt,Wt,Ht (66),(v1) | TDPBUUD Vt,Wt,Ht (v1) +# Skip 0x5f-0x61 62: vpexpandb/w Vx,Wx (66),(ev) 63: vpcompressb/w Wx,Vx (66),(ev) 64: vpblendmd/q Vx,Hx,Wx (66),(ev) @@ -822,9 +828,9 @@ AVXcode: 3 05: vpermilpd Vx,Wx,Ib (66),(v) 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) 07: -08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) +08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) | vrndscaleph Vx,Wx,Ib (evo) 09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) -0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) +0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) | vrndscalesh Vx,Hx,Wx,Ib (evo) 0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) 0c: vblendps Vx,Hx,Wx,Ib (66) 0d: vblendpd Vx,Hx,Wx,Ib (66) @@ -846,8 +852,8 @@ AVXcode: 3 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) 23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) 25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) -26: vgetmantps/d Vx,Wx,Ib (66),(ev) -27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) +26: vgetmantps/d Vx,Wx,Ib (66),(ev) | vgetmantph Vx,Wx,Ib (ev) +27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | vgetmantsh Vx,Hx,Wx,Ib (ev) 30: kshiftrb/w Vk,Uk,Ib (66),(v) 31: kshiftrd/q Vk,Uk,Ib (66),(v) 32: kshiftlb/w Vk,Uk,Ib (66),(v) @@ -871,23 +877,102 @@ AVXcode: 3 51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) 54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) 55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) -56: vreduceps/d Vx,Wx,Ib (66),(ev) -57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) +56: vreduceps/d Vx,Wx,Ib (66),(ev) | vreduceph Vx,Wx,Ib (ev) +57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | vreducesh Vx,Hx,Wx,Ib (ev) 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) -66: vfpclassps/d Vk,Wx,Ib (66),(ev) -67: vfpclassss/d Vk,Wx,Ib (66),(ev) +66: vfpclassps/d Vk,Wx,Ib (66),(ev) | vfpclassph Vx,Wx,Ib (ev) +67: vfpclassss/d Vk,Wx,Ib (66),(ev) | vfpclasssh Vx,Wx,Ib (ev) 70: vpshldw Vx,Hx,Wx,Ib (66),(ev) 71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev) 72: vpshrdw Vx,Hx,Wx,Ib (66),(ev) 73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev) +c2: vcmpph Vx,Hx,Wx,Ib (ev) | vcmpsh Vx,Hx,Wx,Ib (F3),(ev) cc: sha1rnds4 Vdq,Wdq,Ib ce: vgf2p8affineqb Vx,Wx,Ib (66) cf: vgf2p8affineinvqb Vx,Wx,Ib (66) df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) -f0: RORX Gy,Ey,Ib (F2),(v) +f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B) +EndTable + +Table: EVEX map 5 +Referrer: +AVXcode: 5 +10: vmovsh Vx,Hx,Wx (F3),(ev) | vmovsh Vx,Wx (F3),(ev) +11: vmovsh Wx,Hx,Vx (F3),(ev) | vmovsh Wx,Vx (F3),(ev) +1d: vcvtps2phx Vx,Wx (66),(ev) | vcvtss2sh Vx,Hx,Wx (ev) +2a: vcvtsi2sh Vx,Hx,Wx (F3),(ev) +2c: vcvttsh2si Vx,Wx (F3),(ev) +2d: vcvtsh2si Vx,Wx (F3),(ev) +2e: vucomish Vx,Wx (ev) +2f: vcomish Vx,Wx (ev) +51: vsqrtph Vx,Wx (ev) | vsqrtsh Vx,Hx,Wx (F3),(ev) +58: vaddph Vx,Hx,Wx (ev) | vaddsh Vx,Hx,Wx (F3),(ev) +59: vmulph Vx,Hx,Wx (ev) | vmulsh Vx,Hx,Wx (F3),(ev) +5a: vcvtpd2ph Vx,Wx (66),(ev) | vcvtph2pd Vx,Wx (ev) | vcvtsd2sh Vx,Hx,Wx (F2),(ev) | vcvtsh2sd Vx,Hx,Wx (F3),(ev) +5b: vcvtdq2ph Vx,Wx (ev) | vcvtph2dq Vx,Wx (66),(ev) | vcvtqq2ph Vx,Wx (ev) | vcvttph2dq Vx,Wx (F3),(ev) +5c: vsubph Vx,Hx,Wx (ev) | vsubsh Vx,Hx,Wx (F3),(ev) +5d: vminph Vx,Hx,Wx (ev) | vminsh Vx,Hx,Wx (F3),(ev) +5e: vdivph Vx,Hx,Wx (ev) | vdivsh Vx,Hx,Wx (F3),(ev) +5f: vmaxph Vx,Hx,Wx (ev) | vmaxsh Vx,Hx,Wx (F3),(ev) +6e: vmovw Vx,Wx (66),(ev) +78: vcvttph2udq Vx,Wx (ev) | vcvttph2uqq Vx,Wx (66),(ev) | vcvttsh2usi Vx,Wx (F3),(ev) +79: vcvtph2udq Vx,Wx (ev) | vcvtph2uqq Vx,Wx (66),(ev) | vcvtsh2usi Vx,Wx (F3),(ev) +7a: vcvttph2qq Vx,Wx (66),(ev) | vcvtudq2ph Vx,Wx (F2),(ev) | vcvtuqq2ph Vx,Wx (F2),(ev) +7b: vcvtph2qq Vx,Wx (66),(ev) | vcvtusi2sh Vx,Hx,Wx (F3),(ev) +7c: vcvttph2uw Vx,Wx (ev) | vcvttph2w Vx,Wx (66),(ev) +7d: vcvtph2uw Vx,Wx (ev) | vcvtph2w Vx,Wx (66),(ev) | vcvtuw2ph Vx,Wx (F2),(ev) | vcvtw2ph Vx,Wx (F3),(ev) +7e: vmovw Wx,Vx (66),(ev) +EndTable + +Table: EVEX map 6 +Referrer: +AVXcode: 6 +13: vcvtph2psx Vx,Wx (66),(ev) | vcvtsh2ss Vx,Hx,Wx (ev) +2c: vscalefph Vx,Hx,Wx (66),(ev) +2d: vscalefsh Vx,Hx,Wx (66),(ev) +42: vgetexpph Vx,Wx (66),(ev) +43: vgetexpsh Vx,Hx,Wx (66),(ev) +4c: vrcpph Vx,Wx (66),(ev) +4d: vrcpsh Vx,Hx,Wx (66),(ev) +4e: vrsqrtph Vx,Wx (66),(ev) +4f: vrsqrtsh Vx,Hx,Wx (66),(ev) +56: vfcmaddcph Vx,Hx,Wx (F2),(ev) | vfmaddcph Vx,Hx,Wx (F3),(ev) +57: vfcmaddcsh Vx,Hx,Wx (F2),(ev) | vfmaddcsh Vx,Hx,Wx (F3),(ev) +96: vfmaddsub132ph Vx,Hx,Wx (66),(ev) +97: vfmsubadd132ph Vx,Hx,Wx (66),(ev) +98: vfmadd132ph Vx,Hx,Wx (66),(ev) +99: vfmadd132sh Vx,Hx,Wx (66),(ev) +9a: vfmsub132ph Vx,Hx,Wx (66),(ev) +9b: vfmsub132sh Vx,Hx,Wx (66),(ev) +9c: vfnmadd132ph Vx,Hx,Wx (66),(ev) +9d: vfnmadd132sh Vx,Hx,Wx (66),(ev) +9e: vfnmsub132ph Vx,Hx,Wx (66),(ev) +9f: vfnmsub132sh Vx,Hx,Wx (66),(ev) +a6: vfmaddsub213ph Vx,Hx,Wx (66),(ev) +a7: vfmsubadd213ph Vx,Hx,Wx (66),(ev) +a8: vfmadd213ph Vx,Hx,Wx (66),(ev) +a9: vfmadd213sh Vx,Hx,Wx (66),(ev) +aa: vfmsub213ph Vx,Hx,Wx (66),(ev) +ab: vfmsub213sh Vx,Hx,Wx (66),(ev) +ac: vfnmadd213ph Vx,Hx,Wx (66),(ev) +ad: vfnmadd213sh Vx,Hx,Wx (66),(ev) +ae: vfnmsub213ph Vx,Hx,Wx (66),(ev) +af: vfnmsub213sh Vx,Hx,Wx (66),(ev) +b6: vfmaddsub231ph Vx,Hx,Wx (66),(ev) +b7: vfmsubadd231ph Vx,Hx,Wx (66),(ev) +b8: vfmadd231ph Vx,Hx,Wx (66),(ev) +b9: vfmadd231sh Vx,Hx,Wx (66),(ev) +ba: vfmsub231ph Vx,Hx,Wx (66),(ev) +bb: vfmsub231sh Vx,Hx,Wx (66),(ev) +bc: vfnmadd231ph Vx,Hx,Wx (66),(ev) +bd: vfnmadd231sh Vx,Hx,Wx (66),(ev) +be: vfnmsub231ph Vx,Hx,Wx (66),(ev) +bf: vfnmsub231sh Vx,Hx,Wx (66),(ev) +d6: vfcmulcph Vx,Hx,Wx (F2),(ev) | vfmulcph Vx,Hx,Wx (F3),(ev) +d7: vfcmulcsh Vx,Hx,Wx (F2),(ev) | vfmulcsh Vx,Hx,Wx (F3),(ev) EndTable GrpTable: Grp1 @@ -970,7 +1055,7 @@ GrpTable: Grp7 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) +5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) | CLUI (F3),(110),(11B) | SERIALIZE (000),(11B) | STUI (F3),(111),(11B) | TESTUI (F3)(101)(11B) | UIRET (F3),(100),(11B) | XRESLDTRK (F2),(000),(11B) | XSUSLDTRK (F2),(001),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -987,7 +1072,7 @@ GrpTable: Grp9 3: xrstors 4: xsavec 5: xsaves -6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | SENDUIPI Gq (F3) 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) EndTable diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt index 394ee57d58f2..ee819a402b69 100644 --- a/tools/memory-model/Documentation/explanation.txt +++ b/tools/memory-model/Documentation/explanation.txt @@ -485,6 +485,57 @@ have R ->po X. It wouldn't make sense for a computation to depend somehow on a value that doesn't get loaded from shared memory until later in the code! +Here's a trick question: When is a dependency not a dependency? Answer: +When it is purely syntactic rather than semantic. We say a dependency +between two accesses is purely syntactic if the second access doesn't +actually depend on the result of the first. Here is a trivial example: + + r1 = READ_ONCE(x); + WRITE_ONCE(y, r1 * 0); + +There appears to be a data dependency from the load of x to the store +of y, since the value to be stored is computed from the value that was +loaded. But in fact, the value stored does not really depend on +anything since it will always be 0. Thus the data dependency is only +syntactic (it appears to exist in the code) but not semantic (the +second access will always be the same, regardless of the value of the +first access). Given code like this, a compiler could simply discard +the value returned by the load from x, which would certainly destroy +any dependency. (The compiler is not permitted to eliminate entirely +the load generated for a READ_ONCE() -- that's one of the nice +properties of READ_ONCE() -- but it is allowed to ignore the load's +value.) + +It's natural to object that no one in their right mind would write +code like the above. However, macro expansions can easily give rise +to this sort of thing, in ways that often are not apparent to the +programmer. + +Another mechanism that can lead to purely syntactic dependencies is +related to the notion of "undefined behavior". Certain program +behaviors are called "undefined" in the C language specification, +which means that when they occur there are no guarantees at all about +the outcome. Consider the following example: + + int a[1]; + int i; + + r1 = READ_ONCE(i); + r2 = READ_ONCE(a[r1]); + +Access beyond the end or before the beginning of an array is one kind +of undefined behavior. Therefore the compiler doesn't have to worry +about what will happen if r1 is nonzero, and it can assume that r1 +will always be zero regardless of the value actually loaded from i. +(If the assumption turns out to be wrong the resulting behavior will +be undefined anyway, so the compiler doesn't care!) Thus the value +from the load can be discarded, breaking the address dependency. + +The LKMM is unaware that purely syntactic dependencies are different +from semantic dependencies and therefore mistakenly predicts that the +accesses in the two examples above will be ordered. This is another +example of how the compiler can undermine the memory model. Be warned. + THE READS-FROM RELATION: rf, rfi, and rfe ----------------------------------------- diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index c10ef78df050..479e769ca324 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -112,7 +112,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec const struct elf *elf = file->elf; struct insn insn; int x86_64, ret; - unsigned char op1, op2, + unsigned char op1, op2, op3, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0; @@ -139,6 +139,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec op1 = insn.opcode.bytes[0]; op2 = insn.opcode.bytes[1]; + op3 = insn.opcode.bytes[2]; if (insn.rex_prefix.nbytes) { rex = insn.rex_prefix.bytes[0]; @@ -491,6 +492,14 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec /* nopl/nopw */ *type = INSN_NOP; + } else if (op2 == 0x38 && op3 == 0xf8) { + if (insn.prefixes.nbytes == 1 && + insn.prefixes.bytes[0] == 0xf2) { + /* ENQCMD cannot be used in the kernel. */ + WARN("ENQCMD instruction at %s:%lx", sec->name, + offset); + } + } else if (op2 == 0xa0 || op2 == 0xa8) { /* push fs/gs */ diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c index 9708ae892061..ba429cadb18f 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c @@ -2197,6 +2197,924 @@ "3e f2 ff 25 78 56 34 12 \tnotrack bnd jmp *0x12345678",}, {{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", "3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmp *0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 58 cb \tvaddph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 58 cb \tvaddph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 58 cb \tvaddph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 58 cb \tvaddsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 48 c2 eb 12 \tvcmple_oqph %zmm3,%zmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%zmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 c2 eb 12 \tvcmple_oqph %xmm3,%xmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%xmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 28 c2 eb 12 \tvcmple_oqph %ymm3,%ymm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%ymm2,%k5",}, +{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6e 08 c2 eb 12 \tvcmple_oqsh %xmm3,%xmm2,%k5",}, +{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%eax,%ecx,8),%xmm2,%k5",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 2f ca \tvcomish %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 5b ca \tvcvtdq2ph %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 5b ca \tvcvtdq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 5b ca \tvcvtdq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x48, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 48 5a ca \tvcvtpd2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x08, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 08 5a ca \tvcvtpd2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x28, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 28 5a ca \tvcvtpd2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 5b ca \tvcvtph2dq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 5b ca \tvcvtph2dq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 5b ca \tvcvtph2dq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 5a ca \tvcvtph2pd %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 5a ca \tvcvtph2pd %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 5a ca \tvcvtph2pd %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "", +"62 f2 7d 48 13 ca \tvcvtph2ps %ymm2,%zmm1",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 13 ca \tvcvtph2psx %ymm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 13 ca \tvcvtph2psx %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 13 ca \tvcvtph2psx %xmm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7b ca \tvcvtph2qq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7b ca \tvcvtph2qq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7b ca \tvcvtph2qq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 79 ca \tvcvtph2udq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 79 ca \tvcvtph2udq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 79 ca \tvcvtph2udq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 79 ca \tvcvtph2uqq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 79 ca \tvcvtph2uqq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 79 ca \tvcvtph2uqq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 7d ca \tvcvtph2uw %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 7d ca \tvcvtph2uw %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 7d ca \tvcvtph2uw %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7d ca \tvcvtph2w %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7d ca \tvcvtph2w %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7d ca \tvcvtph2w %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0xd1, 0x12, }, 7, 0, "", "", +"62 f3 7d 48 1d d1 12 \tvcvtps2ph $0x12,%zmm2,%ymm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 1d ca \tvcvtps2phx %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 1d ca \tvcvtps2phx %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 1d ca \tvcvtps2phx %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 48 5b ca \tvcvtqq2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 08 5b ca \tvcvtqq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 28 5b ca \tvcvtqq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xef, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 ef 08 5a 8c c8 78 56 34 12 \tvcvtsd2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5a 8c c8 78 56 34 12 \tvcvtsh2sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf6, 0x6c, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6c 08 13 8c c8 78 56 34 12 \tvcvtsh2ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 79 c1 \tvcvtsh2usi %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0xc8, }, 6, 0, "", "", +"62 f5 6e 08 2a c8 \tvcvtsi2sh %eax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 1d cb \tvcvtss2sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 48 5b ca \tvcvttph2dq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 08 5b ca \tvcvttph2dq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 28 5b ca \tvcvttph2dq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7a ca \tvcvttph2qq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7a ca \tvcvttph2qq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7a ca \tvcvttph2qq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 78 ca \tvcvttph2udq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 78 ca \tvcvttph2udq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 78 ca \tvcvttph2udq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 78 ca \tvcvttph2uqq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 78 ca \tvcvttph2uqq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 78 ca \tvcvttph2uqq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 7c ca \tvcvttph2uw %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 7c ca \tvcvttph2uw %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 7c ca \tvcvttph2uw %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7c ca \tvcvttph2w %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7c ca \tvcvttph2w %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7c ca \tvcvttph2w %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 2c c1 \tvcvttsh2si %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 78 c1 \tvcvttsh2usi %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 48 7a ca \tvcvtudq2ph %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 08 7a ca \tvcvtudq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 28 7a ca \tvcvtudq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 48 7a ca \tvcvtuqq2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 08 7a ca \tvcvtuqq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 28 7a ca \tvcvtuqq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0xc8, }, 6, 0, "", "", +"62 f5 6e 08 7b c8 \tvcvtusi2sh %eax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 48 7d ca \tvcvtuw2ph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 08 7d ca \tvcvtuw2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 28 7d ca \tvcvtuw2ph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 48 7d ca \tvcvtw2ph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 08 7d ca \tvcvtw2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 28 7d ca \tvcvtw2ph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5e cb \tvdivph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5e cb \tvdivph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5e cb \tvdivph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5e cb \tvdivsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 48 56 cb \tvfcmaddcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 56 cb \tvfcmaddcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 28 56 cb \tvfcmaddcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 57 cb \tvfcmaddcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 48 d6 cb \tvfcmulcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 d6 cb \tvfcmulcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 28 d6 cb \tvfcmulcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 d7 cb \tvfcmulcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 98 cb \tvfmadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 98 cb \tvfmadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 98 cb \tvfmadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 99 cb \tvfmadd132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a8 cb \tvfmadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a8 cb \tvfmadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a8 cb \tvfmadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a9 cb \tvfmadd213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b8 cb \tvfmadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b8 cb \tvfmadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b8 cb \tvfmadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b9 cb \tvfmadd231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 48 56 cb \tvfmaddcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 56 cb \tvfmaddcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 28 56 cb \tvfmaddcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 57 cb \tvfmaddcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 96 cb \tvfmaddsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 96 cb \tvfmaddsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 96 cb \tvfmaddsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a6 cb \tvfmaddsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a6 cb \tvfmaddsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a6 cb \tvfmaddsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b6 cb \tvfmaddsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b6 cb \tvfmaddsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b6 cb \tvfmaddsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9a cb \tvfmsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9a cb \tvfmsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9a cb \tvfmsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9b cb \tvfmsub132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 aa cb \tvfmsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 aa cb \tvfmsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 aa cb \tvfmsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ab cb \tvfmsub213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ba cb \tvfmsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ba cb \tvfmsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ba cb \tvfmsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bb cb \tvfmsub231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 97 cb \tvfmsubadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 97 cb \tvfmsubadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 97 cb \tvfmsubadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a7 cb \tvfmsubadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a7 cb \tvfmsubadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a7 cb \tvfmsubadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b7 cb \tvfmsubadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b7 cb \tvfmsubadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b7 cb \tvfmsubadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 48 d6 cb \tvfmulcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 d6 cb \tvfmulcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 28 d6 cb \tvfmulcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 d7 cb \tvfmulcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9c cb \tvfnmadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9c cb \tvfnmadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9c cb \tvfnmadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9d cb \tvfnmadd132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ac cb \tvfnmadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ac cb \tvfnmadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ac cb \tvfnmadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ad cb \tvfnmadd213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 bc cb \tvfnmadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bc cb \tvfnmadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 bc cb \tvfnmadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bd cb \tvfnmadd231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9e cb \tvfnmsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9e cb \tvfnmsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9e cb \tvfnmsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9f cb \tvfnmsub132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ae cb \tvfnmsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ae cb \tvfnmsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ae cb \tvfnmsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 af cb \tvfnmsub213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 be cb \tvfnmsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 be cb \tvfnmsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 be cb \tvfnmsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bf cb \tvfnmsub231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 66 e9 12 \tvfpclassph $0x12,%zmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 66 e9 12 \tvfpclassph $0x12,%xmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 66 e9 12 \tvfpclassph $0x12,%ymm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 67 e9 12 \tvfpclasssh $0x12,%xmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%eax,%ecx,8),%k5",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 42 ca \tvgetexpph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 42 ca \tvgetexpph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 42 ca \tvgetexpph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 43 cb \tvgetexpsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 26 ca 12 \tvgetmantph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 26 ca 12 \tvgetmantph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 26 ca 12 \tvgetmantph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 27 cb 12 \tvgetmantsh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5f cb \tvmaxph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5f cb \tvmaxph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5f cb \tvmaxph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5f cb \tvmaxsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5d cb \tvminph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5d cb \tvminph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5d cb \tvminph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5d cb \tvminsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x10, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 10 cb \tvmovsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8, }, 6, 0, "", "", +"62 f5 7d 08 7e c8 \tvmovw %xmm1,%eax",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw %xmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8, }, 6, 0, "", "", +"62 f5 7d 08 6e c8 \tvmovw %eax,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 59 cb \tvmulph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 59 cb \tvmulph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 59 cb \tvmulph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 59 cb \tvmulsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 4c ca \tvrcpph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 4c ca \tvrcpph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 4c ca \tvrcpph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 4d cb \tvrcpsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 56 ca 12 \tvreduceph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 56 ca 12 \tvreduceph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 56 ca 12 \tvreduceph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 57 cb 12 \tvreducesh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 08 ca 12 \tvrndscaleph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 08 ca 12 \tvrndscaleph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 08 ca 12 \tvrndscaleph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 0a cb 12 \tvrndscalesh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 4e ca \tvrsqrtph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 4e ca \tvrsqrtph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 4e ca \tvrsqrtph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 4f cb \tvrsqrtsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 2c cb \tvscalefph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 2c cb \tvscalefph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 2c cb \tvscalefph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 2d cb \tvscalefsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 51 ca \tvsqrtph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 51 ca \tvsqrtph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 51 ca \tvsqrtph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 51 cb \tvsqrtsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5c cb \tvsubph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5c cb \tvsubph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5c cb \tvsubph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5c cb \tvsubsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 2e ca \tvucomish %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "", +"f3 0f 3a f0 c0 00 \threset $0x0",}, +{{0x0f, 0x01, 0xe8, }, 3, 0, "", "", +"0f 01 e8 \tserialize ",}, +{{0xf2, 0x0f, 0x01, 0xe9, }, 4, 0, "", "", +"f2 0f 01 e9 \txresldtrk ",}, +{{0xf2, 0x0f, 0x01, 0xe8, }, 4, 0, "", "", +"f2 0f 01 e8 \txsusldtrk ",}, {{0x0f, 0x01, 0xcf, }, 3, 0, "", "", "0f 01 cf \tencls ",}, {{0x0f, 0x01, 0xd7, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c index 5da17d41d302..3a47e98fec33 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -2459,6 +2459,1432 @@ "3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%rax,%rcx,8)",}, {{0x3e, 0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "jmp", "indirect", "3e f2 41 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%r8,%rcx,8)",}, +{{0xc4, 0xe2, 0x78, 0x49, 0x04, 0xc8, }, 6, 0, "", "", +"c4 e2 78 49 04 c8 \tldtilecfg (%rax,%rcx,8)",}, +{{0xc4, 0xc2, 0x78, 0x49, 0x04, 0xc8, }, 6, 0, "", "", +"c4 c2 78 49 04 c8 \tldtilecfg (%r8,%rcx,8)",}, +{{0xc4, 0xe2, 0x79, 0x49, 0x04, 0xc8, }, 6, 0, "", "", +"c4 e2 79 49 04 c8 \tsttilecfg (%rax,%rcx,8)",}, +{{0xc4, 0xc2, 0x79, 0x49, 0x04, 0xc8, }, 6, 0, "", "", +"c4 c2 79 49 04 c8 \tsttilecfg (%r8,%rcx,8)",}, +{{0xc4, 0xe2, 0x7a, 0x5c, 0xd1, }, 5, 0, "", "", +"c4 e2 7a 5c d1 \ttdpbf16ps %tmm0,%tmm1,%tmm2",}, +{{0xc4, 0xe2, 0x7b, 0x5e, 0xd1, }, 5, 0, "", "", +"c4 e2 7b 5e d1 \ttdpbssd %tmm0,%tmm1,%tmm2",}, +{{0xc4, 0xe2, 0x7a, 0x5e, 0xd1, }, 5, 0, "", "", +"c4 e2 7a 5e d1 \ttdpbsud %tmm0,%tmm1,%tmm2",}, +{{0xc4, 0xe2, 0x79, 0x5e, 0xd1, }, 5, 0, "", "", +"c4 e2 79 5e d1 \ttdpbusd %tmm0,%tmm1,%tmm2",}, +{{0xc4, 0xe2, 0x78, 0x5e, 0xd1, }, 5, 0, "", "", +"c4 e2 78 5e d1 \ttdpbuud %tmm0,%tmm1,%tmm2",}, +{{0xc4, 0xe2, 0x7b, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "", +"c4 e2 7b 4b 0c c8 \ttileloadd (%rax,%rcx,8),%tmm1",}, +{{0xc4, 0xc2, 0x7b, 0x4b, 0x14, 0xc8, }, 6, 0, "", "", +"c4 c2 7b 4b 14 c8 \ttileloadd (%r8,%rcx,8),%tmm2",}, +{{0xc4, 0xe2, 0x79, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "", +"c4 e2 79 4b 0c c8 \ttileloaddt1 (%rax,%rcx,8),%tmm1",}, +{{0xc4, 0xc2, 0x79, 0x4b, 0x14, 0xc8, }, 6, 0, "", "", +"c4 c2 79 4b 14 c8 \ttileloaddt1 (%r8,%rcx,8),%tmm2",}, +{{0xc4, 0xe2, 0x78, 0x49, 0xc0, }, 5, 0, "", "", +"c4 e2 78 49 c0 \ttilerelease ",}, +{{0xc4, 0xe2, 0x7a, 0x4b, 0x0c, 0xc8, }, 6, 0, "", "", +"c4 e2 7a 4b 0c c8 \ttilestored %tmm1,(%rax,%rcx,8)",}, +{{0xc4, 0xc2, 0x7a, 0x4b, 0x14, 0xc8, }, 6, 0, "", "", +"c4 c2 7a 4b 14 c8 \ttilestored %tmm2,(%r8,%rcx,8)",}, +{{0xc4, 0xe2, 0x7b, 0x49, 0xc0, }, 5, 0, "", "", +"c4 e2 7b 49 c0 \ttilezero %tmm0",}, +{{0xc4, 0xe2, 0x7b, 0x49, 0xf8, }, 5, 0, "", "", +"c4 e2 7b 49 f8 \ttilezero %tmm7",}, +{{0xf3, 0x0f, 0x01, 0xee, }, 4, 0, "", "", +"f3 0f 01 ee \tclui ",}, +{{0xf3, 0x0f, 0xc7, 0xf0, }, 4, 0, "", "", +"f3 0f c7 f0 \tsenduipi %rax",}, +{{0xf3, 0x41, 0x0f, 0xc7, 0xf0, }, 5, 0, "", "", +"f3 41 0f c7 f0 \tsenduipi %r8",}, +{{0xf3, 0x0f, 0x01, 0xef, }, 4, 0, "", "", +"f3 0f 01 ef \tstui ",}, +{{0xf3, 0x0f, 0x01, 0xed, }, 4, 0, "", "", +"f3 0f 01 ed \ttestui ",}, +{{0xf3, 0x0f, 0x01, 0xec, }, 4, 0, "", "", +"f3 0f 01 ec \tuiret ",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 58 cb \tvaddph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 58 cb \tvaddph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 58 cb \tvaddph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 58 8c c8 78 56 34 12 \tvaddph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 58 cb \tvaddsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x58, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 58 8c c8 78 56 34 12 \tvaddsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 48 c2 eb 12 \tvcmple_oqph %zmm3,%zmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%zmm2,%k5",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x48, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 48 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%zmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 c2 eb 12 \tvcmple_oqph %xmm3,%xmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%xmm2,%k5",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%xmm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6c 28 c2 eb 12 \tvcmple_oqph %ymm3,%ymm2,%k5",}, +{{0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%rax,%rcx,8),%ymm2,%k5",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x28, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 28 c2 ac c8 78 56 34 12 12 \tvcmple_oqph 0x12345678(%eax,%ecx,8),%ymm2,%k5",}, +{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xeb, 0x12, }, 7, 0, "", "", +"62 f3 6e 08 c2 eb 12 \tvcmple_oqsh %xmm3,%xmm2,%k5",}, +{{0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%rax,%rcx,8),%xmm2,%k5",}, +{{0x67, 0x62, 0xf3, 0x6e, 0x08, 0xc2, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6e 08 c2 ac c8 78 56 34 12 12 \tvcmple_oqsh 0x12345678(%eax,%ecx,8),%xmm2,%k5",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 2f ca \tvcomish %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x2f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 2f 8c c8 78 56 34 12 \tvcomish 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 5b ca \tvcvtdq2ph %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 5b 8c c8 78 56 34 12 \tvcvtdq2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 5b ca \tvcvtdq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 5b ca \tvcvtdq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x48, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 48 5a ca \tvcvtpd2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x08, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 08 5a ca \tvcvtpd2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xfd, 0x28, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 fd 28 5a ca \tvcvtpd2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 5b ca \tvcvtph2dq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 5b ca \tvcvtph2dq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 5b ca \tvcvtph2dq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 5b 8c c8 78 56 34 12 \tvcvtph2dq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 5a ca \tvcvtph2pd %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 5a ca \tvcvtph2pd %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 5a ca \tvcvtph2pd %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 5a 8c c8 78 56 34 12 \tvcvtph2pd 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "", +"62 f2 7d 48 13 ca \tvcvtph2ps %ymm2,%zmm1",}, +{{0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf2, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f2 7d 48 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 79 13 ca \tvcvtph2ps %xmm2,%xmm1",}, +{{0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0xc4, 0xe2, 0x79, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 79 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0xca, }, 5, 0, "", "", +"c4 e2 7d 13 ca \tvcvtph2ps %xmm2,%ymm1",}, +{{0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0xc4, 0xe2, 0x7d, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"67 c4 e2 7d 13 8c c8 78 56 34 12 \tvcvtph2ps 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 13 ca \tvcvtph2psx %ymm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 48 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 13 ca \tvcvtph2psx %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 08 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 13 ca \tvcvtph2psx %xmm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 28 13 8c c8 78 56 34 12 \tvcvtph2psx 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7b ca \tvcvtph2qq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7b ca \tvcvtph2qq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7b ca \tvcvtph2qq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 7b 8c c8 78 56 34 12 \tvcvtph2qq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 79 ca \tvcvtph2udq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 79 ca \tvcvtph2udq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 79 ca \tvcvtph2udq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 79 8c c8 78 56 34 12 \tvcvtph2udq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 79 ca \tvcvtph2uqq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 79 ca \tvcvtph2uqq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 79 ca \tvcvtph2uqq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x79, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 79 8c c8 78 56 34 12 \tvcvtph2uqq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 7d ca \tvcvtph2uw %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 7d ca \tvcvtph2uw %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 7d ca \tvcvtph2uw %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 7d 8c c8 78 56 34 12 \tvcvtph2uw 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7d ca \tvcvtph2w %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7d ca \tvcvtph2w %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7d ca \tvcvtph2w %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 7d 8c c8 78 56 34 12 \tvcvtph2w 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0x62, 0xf3, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7d 48 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%zmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf3, 0x7d, 0x48, 0x1d, 0xd1, 0x12, }, 7, 0, "", "", +"62 f3 7d 48 1d d1 12 \tvcvtps2ph $0x12,%zmm2,%ymm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0xc4, 0xe3, 0x7d, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"67 c4 e3 7d 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm1,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0xc4, 0xe3, 0x79, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"67 c4 e3 79 1d 8c c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm1,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 7d 1d d1 12 \tvcvtps2ph $0x12,%ymm2,%xmm1",}, +{{0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0xc4, 0xe3, 0x7d, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"67 c4 e3 7d 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%ymm2,0x12345678(%eax,%ecx,8)",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0xd1, 0x12, }, 6, 0, "", "", +"c4 e3 79 1d d1 12 \tvcvtps2ph $0x12,%xmm2,%xmm1",}, +{{0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 11, 0, "", "", +"c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0xc4, 0xe3, 0x79, 0x1d, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"67 c4 e3 79 1d 94 c8 78 56 34 12 12 \tvcvtps2ph $0x12,%xmm2,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 1d ca \tvcvtps2phx %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 1d 8c c8 78 56 34 12 \tvcvtps2phx 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 1d ca \tvcvtps2phx %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x1d, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 1d ca \tvcvtps2phx %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 48 5b ca \tvcvtqq2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 08 5b ca \tvcvtqq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xfc, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 fc 28 5b ca \tvcvtqq2ph %ymm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0xef, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 ef 08 5a 8c c8 78 56 34 12 \tvcvtsd2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 5a 8c c8 78 56 34 12 \tvcvtsh2sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%eax",}, +{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x2d, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 fe 08 2d 84 c8 78 56 34 12 \tvcvtsh2si 0x12345678(%eax,%ecx,8),%rax",}, +{{0x67, 0x62, 0xf6, 0x6c, 0x08, 0x13, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6c 08 13 8c c8 78 56 34 12 \tvcvtsh2ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 79 c1 \tvcvtsh2usi %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%rax,%rcx,8),%eax",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x79, 0xc1, }, 6, 0, "", "", +"62 f5 fe 08 79 c1 \tvcvtsh2usi %xmm1,%rax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 fe 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%rax,%rcx,8),%rax",}, +{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x79, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 fe 08 79 84 c8 78 56 34 12 \tvcvtsh2usi 0x12345678(%eax,%ecx,8),%rax",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0xc8, }, 6, 0, "", "", +"62 f5 6e 08 2a c8 \tvcvtsi2sh %eax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0xee, 0x08, 0x2a, 0xc8, }, 6, 0, "", "", +"62 f5 ee 08 2a c8 \tvcvtsi2sh %rax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x2a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 2a 8c c8 78 56 34 12 \tvcvtsi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 1d cb \tvcvtss2sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x1d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 1d 8c c8 78 56 34 12 \tvcvtss2sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 48 5b ca \tvcvttph2dq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x48, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 48 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 08 5b ca \tvcvttph2dq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0xca, }, 6, 0, "", "", +"62 f5 7e 28 5b ca \tvcvttph2dq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x28, 0x5b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 28 5b 8c c8 78 56 34 12 \tvcvttph2dq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7a ca \tvcvttph2qq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7a ca \tvcvttph2qq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7a ca \tvcvttph2qq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 7a 8c c8 78 56 34 12 \tvcvttph2qq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 78 ca \tvcvttph2udq %ymm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 78 ca \tvcvttph2udq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 78 ca \tvcvttph2udq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 78 8c c8 78 56 34 12 \tvcvttph2udq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 78 ca \tvcvttph2uqq %xmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 78 ca \tvcvttph2uqq %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 78 ca \tvcvttph2uqq %xmm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x78, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 78 8c c8 78 56 34 12 \tvcvttph2uqq 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 7c ca \tvcvttph2uw %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 7c ca \tvcvttph2uw %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 7c ca \tvcvttph2uw %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 7c 8c c8 78 56 34 12 \tvcvttph2uw 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 48 7c ca \tvcvttph2w %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x48, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 48 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 08 7c ca \tvcvttph2w %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0xca, }, 6, 0, "", "", +"62 f5 7d 28 7c ca \tvcvttph2w %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x28, 0x7c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 28 7c 8c c8 78 56 34 12 \tvcvttph2w 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 2c c1 \tvcvttsh2si %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%rax,%rcx,8),%eax",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x2c, 0xc1, }, 6, 0, "", "", +"62 f5 fe 08 2c c1 \tvcvttsh2si %xmm1,%rax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 fe 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%rax,%rcx,8),%rax",}, +{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x2c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 fe 08 2c 84 c8 78 56 34 12 \tvcvttsh2si 0x12345678(%eax,%ecx,8),%rax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0xc1, }, 6, 0, "", "", +"62 f5 7e 08 78 c1 \tvcvttsh2usi %xmm1,%eax",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%rax,%rcx,8),%eax",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%eax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x78, 0xc1, }, 6, 0, "", "", +"62 f5 fe 08 78 c1 \tvcvttsh2usi %xmm1,%rax",}, +{{0x62, 0xf5, 0xfe, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 fe 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%rax,%rcx,8),%rax",}, +{{0x67, 0x62, 0xf5, 0xfe, 0x08, 0x78, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 fe 08 78 84 c8 78 56 34 12 \tvcvttsh2usi 0x12345678(%eax,%ecx,8),%rax",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 48 7a ca \tvcvtudq2ph %zmm2,%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7f, 0x48, 0x7a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7f 48 7a 8c c8 78 56 34 12 \tvcvtudq2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 08 7a ca \tvcvtudq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 7f 28 7a ca \tvcvtudq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x48, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 48 7a ca \tvcvtuqq2ph %zmm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x08, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 08 7a ca \tvcvtuqq2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0xff, 0x28, 0x7a, 0xca, }, 6, 0, "", "", +"62 f5 ff 28 7a ca \tvcvtuqq2ph %ymm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0xc8, }, 6, 0, "", "", +"62 f5 6e 08 7b c8 \tvcvtusi2sh %eax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0xee, 0x08, 0x7b, 0xc8, }, 6, 0, "", "", +"62 f5 ee 08 7b c8 \tvcvtusi2sh %rax,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x7b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 7b 8c c8 78 56 34 12 \tvcvtusi2shl 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 48 7d ca \tvcvtuw2ph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7f, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7f 48 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 08 7d ca \tvcvtuw2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7f, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7f 08 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7f 28 7d ca \tvcvtuw2ph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7f, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7f 28 7d 8c c8 78 56 34 12 \tvcvtuw2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 48 7d ca \tvcvtw2ph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x48, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 48 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 08 7d ca \tvcvtw2ph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0xca, }, 6, 0, "", "", +"62 f5 7e 28 7d ca \tvcvtw2ph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x28, 0x7d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 28 7d 8c c8 78 56 34 12 \tvcvtw2ph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5e cb \tvdivph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5e cb \tvdivph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5e cb \tvdivph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 5e 8c c8 78 56 34 12 \tvdivph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5e cb \tvdivsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 5e 8c c8 78 56 34 12 \tvdivsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 48 56 cb \tvfcmaddcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 48 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 56 cb \tvfcmaddcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 08 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6f 28 56 cb \tvfcmaddcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 28 56 8c c8 78 56 34 12 \tvfcmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 57 cb \tvfcmaddcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 08 57 8c c8 78 56 34 12 \tvfcmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 48 d6 cb \tvfcmulcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 48 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 d6 cb \tvfcmulcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 08 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6f 28 d6 cb \tvfcmulcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 28 d6 8c c8 78 56 34 12 \tvfcmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0xcb, }, 6, 0, "", "", +"62 f6 6f 08 d7 cb \tvfcmulcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6f, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6f 08 d7 8c c8 78 56 34 12 \tvfcmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 98 cb \tvfmadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 98 cb \tvfmadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 98 cb \tvfmadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x98, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 98 8c c8 78 56 34 12 \tvfmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 99 cb \tvfmadd132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x99, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 99 8c c8 78 56 34 12 \tvfmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a8 cb \tvfmadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a8 cb \tvfmadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a8 cb \tvfmadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 a8 8c c8 78 56 34 12 \tvfmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a9 cb \tvfmadd213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 a9 8c c8 78 56 34 12 \tvfmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b8 cb \tvfmadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b8 cb \tvfmadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b8 cb \tvfmadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb8, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 b8 8c c8 78 56 34 12 \tvfmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b9 cb \tvfmadd231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb9, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 b9 8c c8 78 56 34 12 \tvfmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 48 56 cb \tvfmaddcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 48 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 56 cb \tvfmaddcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 08 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0xcb, }, 6, 0, "", "", +"62 f6 6e 28 56 cb \tvfmaddcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 28 56 8c c8 78 56 34 12 \tvfmaddcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 57 cb \tvfmaddcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 08 57 8c c8 78 56 34 12 \tvfmaddcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 96 cb \tvfmaddsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 96 cb \tvfmaddsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 96 cb \tvfmaddsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x96, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 96 8c c8 78 56 34 12 \tvfmaddsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a6 cb \tvfmaddsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a6 cb \tvfmaddsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a6 cb \tvfmaddsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 a6 8c c8 78 56 34 12 \tvfmaddsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b6 cb \tvfmaddsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b6 cb \tvfmaddsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b6 cb \tvfmaddsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 b6 8c c8 78 56 34 12 \tvfmaddsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9a cb \tvfmsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9a cb \tvfmsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9a cb \tvfmsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 9a 8c c8 78 56 34 12 \tvfmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9b cb \tvfmsub132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9b, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9b 8c c8 78 56 34 12 \tvfmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 aa cb \tvfmsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 aa cb \tvfmsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 aa cb \tvfmsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xaa, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 aa 8c c8 78 56 34 12 \tvfmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ab cb \tvfmsub213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xab, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 ab 8c c8 78 56 34 12 \tvfmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ba cb \tvfmsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ba cb \tvfmsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ba cb \tvfmsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xba, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 ba 8c c8 78 56 34 12 \tvfmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bb cb \tvfmsub231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 bb 8c c8 78 56 34 12 \tvfmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 97 cb \tvfmsubadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 97 cb \tvfmsubadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 97 cb \tvfmsubadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x97, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 97 8c c8 78 56 34 12 \tvfmsubadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 a7 cb \tvfmsubadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 a7 cb \tvfmsubadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 a7 cb \tvfmsubadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xa7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 a7 8c c8 78 56 34 12 \tvfmsubadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 b7 cb \tvfmsubadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 b7 cb \tvfmsubadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 b7 cb \tvfmsubadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xb7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 b7 8c c8 78 56 34 12 \tvfmsubadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 48 d6 cb \tvfmulcph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x48, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 48 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 d6 cb \tvfmulcph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 08 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0xcb, }, 6, 0, "", "", +"62 f6 6e 28 d6 cb \tvfmulcph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x28, 0xd6, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 28 d6 8c c8 78 56 34 12 \tvfmulcph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0xcb, }, 6, 0, "", "", +"62 f6 6e 08 d7 cb \tvfmulcsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6e, 0x08, 0xd7, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6e 08 d7 8c c8 78 56 34 12 \tvfmulcsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9c cb \tvfnmadd132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9c cb \tvfnmadd132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9c cb \tvfnmadd132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 9c 8c c8 78 56 34 12 \tvfnmadd132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9d cb \tvfnmadd132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9d 8c c8 78 56 34 12 \tvfnmadd132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ac cb \tvfnmadd213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ac cb \tvfnmadd213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ac cb \tvfnmadd213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xac, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 ac 8c c8 78 56 34 12 \tvfnmadd213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ad cb \tvfnmadd213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xad, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 ad 8c c8 78 56 34 12 \tvfnmadd213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 bc cb \tvfnmadd231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bc cb \tvfnmadd231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 bc cb \tvfnmadd231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xbc, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 bc 8c c8 78 56 34 12 \tvfnmadd231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bd cb \tvfnmadd231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbd, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 bd 8c c8 78 56 34 12 \tvfnmadd231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 9e cb \tvfnmsub132ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9e cb \tvfnmsub132ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 9e cb \tvfnmsub132ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x9e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 9e 8c c8 78 56 34 12 \tvfnmsub132ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 9f cb \tvfnmsub132sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x9f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 9f 8c c8 78 56 34 12 \tvfnmsub132sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 ae cb \tvfnmsub213ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 ae cb \tvfnmsub213ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 ae cb \tvfnmsub213ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xae, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 ae 8c c8 78 56 34 12 \tvfnmsub213ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 af cb \tvfnmsub213sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xaf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 af 8c c8 78 56 34 12 \tvfnmsub213sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 be cb \tvfnmsub231ph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 be cb \tvfnmsub231ph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 be cb \tvfnmsub231ph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0xbe, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 be 8c c8 78 56 34 12 \tvfnmsub231ph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 bf cb \tvfnmsub231sh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0xbf, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 bf 8c c8 78 56 34 12 \tvfnmsub231sh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 66 e9 12 \tvfpclassph $0x12,%zmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 66 e9 12 \tvfpclassph $0x12,%xmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x66, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 66 e9 12 \tvfpclassph $0x12,%ymm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xe9, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 67 e9 12 \tvfpclasssh $0x12,%xmm1,%k5",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%rax,%rcx,8),%k5",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x67, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 08 67 ac c8 78 56 34 12 12 \tvfpclasssh $0x12,0x12345678(%eax,%ecx,8),%k5",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 42 ca \tvgetexpph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 48 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 42 ca \tvgetexpph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 08 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 42 ca \tvgetexpph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x42, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 28 42 8c c8 78 56 34 12 \tvgetexpph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 43 cb \tvgetexpsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x43, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 43 8c c8 78 56 34 12 \tvgetexpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 26 ca 12 \tvgetmantph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 48 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 26 ca 12 \tvgetmantph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 08 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 26 ca 12 \tvgetmantph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x26, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 28 26 8c c8 78 56 34 12 12 \tvgetmantph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 27 cb 12 \tvgetmantsh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x27, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 08 27 8c c8 78 56 34 12 12 \tvgetmantsh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5f cb \tvmaxph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5f cb \tvmaxph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5f cb \tvmaxph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 5f 8c c8 78 56 34 12 \tvmaxph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5f cb \tvmaxsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 5f 8c c8 78 56 34 12 \tvmaxsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5d cb \tvminph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5d cb \tvminph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5d cb \tvminph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 5d 8c c8 78 56 34 12 \tvminph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5d cb \tvminsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 5d 8c c8 78 56 34 12 \tvminsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x11, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 11 8c c8 78 56 34 12 \tvmovsh %xmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7e, 0x08, 0x10, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7e 08 10 8c c8 78 56 34 12 \tvmovsh 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x10, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 10 cb \tvmovsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0xc8, }, 6, 0, "", "", +"62 f5 7d 08 7e c8 \tvmovw %xmm1,%eax",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw %xmm1,0x12345678(%rax,%rcx,8)",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x7e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 7e 8c c8 78 56 34 12 \tvmovw %xmm1,0x12345678(%eax,%ecx,8)",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0xc8, }, 6, 0, "", "", +"62 f5 7d 08 6e c8 \tvmovw %eax,%xmm1",}, +{{0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7d, 0x08, 0x6e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7d 08 6e 8c c8 78 56 34 12 \tvmovw 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 59 cb \tvmulph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 59 cb \tvmulph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 59 cb \tvmulph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 59 8c c8 78 56 34 12 \tvmulph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 59 cb \tvmulsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x59, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 59 8c c8 78 56 34 12 \tvmulsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 4c ca \tvrcpph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 48 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 4c ca \tvrcpph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 08 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 4c ca \tvrcpph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x4c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 28 4c 8c c8 78 56 34 12 \tvrcpph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 4d cb \tvrcpsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x4d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 4d 8c c8 78 56 34 12 \tvrcpsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 56 ca 12 \tvreduceph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 48 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 56 ca 12 \tvreduceph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 08 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 56 ca 12 \tvreduceph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x56, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 28 56 8c c8 78 56 34 12 12 \tvreduceph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 57 cb 12 \tvreducesh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x57, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 08 57 8c c8 78 56 34 12 12 \tvreducesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 48 08 ca 12 \tvrndscaleph $0x12,%zmm2,%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x48, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 48 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 08 08 ca 12 \tvrndscaleph $0x12,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x08, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 08 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0xca, 0x12, }, 7, 0, "", "", +"62 f3 7c 28 08 ca 12 \tvrndscaleph $0x12,%ymm2,%ymm1",}, +{{0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf3, 0x7c, 0x28, 0x08, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 7c 28 08 8c c8 78 56 34 12 12 \tvrndscaleph $0x12,0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0xcb, 0x12, }, 7, 0, "", "", +"62 f3 6c 08 0a cb 12 \tvrndscalesh $0x12,%xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 12, 0, "", "", +"62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf3, 0x6c, 0x08, 0x0a, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x12, }, 13, 0, "", "", +"67 62 f3 6c 08 0a 8c c8 78 56 34 12 12 \tvrndscalesh $0x12,0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 48 4e ca \tvrsqrtph %zmm2,%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x48, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 48 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 08 4e ca \tvrsqrtph %xmm2,%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x08, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 08 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0xca, }, 6, 0, "", "", +"62 f6 7d 28 4e ca \tvrsqrtph %ymm2,%ymm1",}, +{{0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf6, 0x7d, 0x28, 0x4e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 7d 28 4e 8c c8 78 56 34 12 \tvrsqrtph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 4f cb \tvrsqrtsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x4f, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 4f 8c c8 78 56 34 12 \tvrsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 48 2c cb \tvscalefph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x48, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 48 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 2c cb \tvscalefph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0xcb, }, 6, 0, "", "", +"62 f6 6d 28 2c cb \tvscalefph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x28, 0x2c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 28 2c 8c c8 78 56 34 12 \tvscalefph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0xcb, }, 6, 0, "", "", +"62 f6 6d 08 2d cb \tvscalefsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf6, 0x6d, 0x08, 0x2d, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f6 6d 08 2d 8c c8 78 56 34 12 \tvscalefsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 48 51 ca \tvsqrtph %zmm2,%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%zmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x48, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 48 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%zmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 51 ca \tvsqrtph %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0xca, }, 6, 0, "", "", +"62 f5 7c 28 51 ca \tvsqrtph %ymm2,%ymm1",}, +{{0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%rax,%rcx,8),%ymm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x28, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 28 51 8c c8 78 56 34 12 \tvsqrtph 0x12345678(%eax,%ecx,8),%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 51 cb \tvsqrtsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x51, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 51 8c c8 78 56 34 12 \tvsqrtsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 48 5c cb \tvsubph %zmm3,%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%zmm2,%zmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x48, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 48 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%zmm2,%zmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 08 5c cb \tvsubph %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 08 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6c 28 5c cb \tvsubph %ymm3,%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%rax,%rcx,8),%ymm2,%ymm1",}, +{{0x67, 0x62, 0xf5, 0x6c, 0x28, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6c 28 5c 8c c8 78 56 34 12 \tvsubph 0x12345678(%eax,%ecx,8),%ymm2,%ymm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0xcb, }, 6, 0, "", "", +"62 f5 6e 08 5c cb \tvsubsh %xmm3,%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%rax,%rcx,8),%xmm2,%xmm1",}, +{{0x67, 0x62, 0xf5, 0x6e, 0x08, 0x5c, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 6e 08 5c 8c c8 78 56 34 12 \tvsubsh 0x12345678(%eax,%ecx,8),%xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0xca, }, 6, 0, "", "", +"62 f5 7c 08 2e ca \tvucomish %xmm2,%xmm1",}, +{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%rax,%rcx,8),%xmm1",}, +{{0x67, 0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 12, 0, "", "", +"67 62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%eax,%ecx,8),%xmm1",}, +{{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "", +"f3 0f 3a f0 c0 00 \threset $0x0",}, +{{0x0f, 0x01, 0xe8, }, 3, 0, "", "", +"0f 01 e8 \tserialize ",}, +{{0xf2, 0x0f, 0x01, 0xe9, }, 4, 0, "", "", +"f2 0f 01 e9 \txresldtrk ",}, +{{0xf2, 0x0f, 0x01, 0xe8, }, 4, 0, "", "", +"f2 0f 01 e8 \txsusldtrk ",}, {{0x0f, 0x01, 0xcf, }, 3, 0, "", "", "0f 01 cf \tencls ",}, {{0x0f, 0x01, 0xd7, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c index c3808e94c46e..a391464c8dee 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -1910,6 +1910,724 @@ int main(void) asm volatile("notrack bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ asm volatile("notrack bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + /* AMX */ + + asm volatile("ldtilecfg (%rax,%rcx,8)"); + asm volatile("ldtilecfg (%r8,%rcx,8)"); + asm volatile("sttilecfg (%rax,%rcx,8)"); + asm volatile("sttilecfg (%r8,%rcx,8)"); + asm volatile("tdpbf16ps %tmm0, %tmm1, %tmm2"); + asm volatile("tdpbssd %tmm0, %tmm1, %tmm2"); + asm volatile("tdpbsud %tmm0, %tmm1, %tmm2"); + asm volatile("tdpbusd %tmm0, %tmm1, %tmm2"); + asm volatile("tdpbuud %tmm0, %tmm1, %tmm2"); + asm volatile("tileloadd (%rax,%rcx,8), %tmm1"); + asm volatile("tileloadd (%r8,%rcx,8), %tmm2"); + asm volatile("tileloaddt1 (%rax,%rcx,8), %tmm1"); + asm volatile("tileloaddt1 (%r8,%rcx,8), %tmm2"); + asm volatile("tilerelease"); + asm volatile("tilestored %tmm1, (%rax,%rcx,8)"); + asm volatile("tilestored %tmm2, (%r8,%rcx,8)"); + asm volatile("tilezero %tmm0"); + asm volatile("tilezero %tmm7"); + + /* User Interrupt */ + + asm volatile("clui"); + asm volatile("senduipi %rax"); + asm volatile("senduipi %r8"); + asm volatile("stui"); + asm volatile("testui"); + asm volatile("uiret"); + + /* AVX512-FP16 */ + + asm volatile("vaddph %zmm3, %zmm2, %zmm1"); + asm volatile("vaddph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vaddph %xmm3, %xmm2, %xmm1"); + asm volatile("vaddph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vaddph %ymm3, %ymm2, %ymm1"); + asm volatile("vaddph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vaddsh %xmm3, %xmm2, %xmm1"); + asm volatile("vaddsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vaddsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcmpph $0x12, %zmm3, %zmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %zmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %zmm2, %k5"); + asm volatile("vcmpph $0x12, %xmm3, %xmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5"); + asm volatile("vcmpph $0x12, %ymm3, %ymm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%rax,%rcx,8), %ymm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %ymm2, %k5"); + asm volatile("vcmpsh $0x12, %xmm3, %xmm2, %k5"); + asm volatile("vcmpsh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %k5"); + asm volatile("vcmpsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5"); + asm volatile("vcomish %xmm2, %xmm1"); + asm volatile("vcomish 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcomish 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtdq2ph %zmm2, %ymm1"); + asm volatile("vcvtdq2ph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtdq2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtdq2ph %xmm2, %xmm1"); + asm volatile("vcvtdq2ph %ymm2, %xmm1"); + asm volatile("vcvtpd2ph %zmm2, %xmm1"); + asm volatile("vcvtpd2ph %xmm2, %xmm1"); + asm volatile("vcvtpd2ph %ymm2, %xmm1"); + asm volatile("vcvtph2dq %ymm2, %zmm1"); + asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2dq %xmm2, %xmm1"); + asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2dq %xmm2, %ymm1"); + asm volatile("vcvtph2dq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2pd %xmm2, %zmm1"); + asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2pd %xmm2, %xmm1"); + asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2pd %xmm2, %ymm1"); + asm volatile("vcvtph2pd 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2ps %ymm2, %zmm1"); + asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2ps %xmm2, %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2ps %xmm2, %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2ps %xmm2, %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2ps %xmm2, %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2psx %ymm2, %zmm1"); + asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2psx %xmm2, %xmm1"); + asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2psx %xmm2, %ymm1"); + asm volatile("vcvtph2psx 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2qq %xmm2, %zmm1"); + asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2qq %xmm2, %xmm1"); + asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2qq %xmm2, %ymm1"); + asm volatile("vcvtph2qq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2udq %ymm2, %zmm1"); + asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2udq %xmm2, %xmm1"); + asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2udq %xmm2, %ymm1"); + asm volatile("vcvtph2udq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2uqq %xmm2, %zmm1"); + asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2uqq %xmm2, %xmm1"); + asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2uqq %xmm2, %ymm1"); + asm volatile("vcvtph2uqq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2uw %zmm2, %zmm1"); + asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2uw %xmm2, %xmm1"); + asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2uw %ymm2, %ymm1"); + asm volatile("vcvtph2uw 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2w %zmm2, %zmm1"); + asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2w %xmm2, %xmm1"); + asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2w %ymm2, %ymm1"); + asm volatile("vcvtph2w 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%rax,%rcx,8)"); + asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %zmm2, %ymm1"); + asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%rax,%rcx,8)"); + asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%rax,%rcx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%rax,%rcx,8)"); + asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%rax,%rcx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2phx %zmm2, %ymm1"); + asm volatile("vcvtps2phx 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtps2phx 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtps2phx %xmm2, %xmm1"); + asm volatile("vcvtps2phx %ymm2, %xmm1"); + asm volatile("vcvtqq2ph %zmm2, %xmm1"); + asm volatile("vcvtqq2ph %xmm2, %xmm1"); + asm volatile("vcvtqq2ph %ymm2, %xmm1"); + asm volatile("vcvtsd2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2sd 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %rax"); + asm volatile("vcvtsh2ss 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2usi %xmm1, %eax"); + asm volatile("vcvtsh2usi 0x12345678(%rax,%rcx,8), %eax"); + asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvtsh2usi %xmm1, %rax"); + asm volatile("vcvtsh2usi 0x12345678(%rax,%rcx,8), %rax"); + asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %rax"); + asm volatile("vcvtsi2sh %eax, %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsi2sh %rax, %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtss2sh %xmm3, %xmm2, %xmm1"); + asm volatile("vcvtss2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vcvtss2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvttph2dq %ymm2, %zmm1"); + asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2dq %xmm2, %xmm1"); + asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2dq %xmm2, %ymm1"); + asm volatile("vcvttph2dq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2qq %xmm2, %zmm1"); + asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2qq %xmm2, %xmm1"); + asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2qq %xmm2, %ymm1"); + asm volatile("vcvttph2qq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2udq %ymm2, %zmm1"); + asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2udq %xmm2, %xmm1"); + asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2udq %xmm2, %ymm1"); + asm volatile("vcvttph2udq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2uqq %xmm2, %zmm1"); + asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2uqq %xmm2, %xmm1"); + asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2uqq %xmm2, %ymm1"); + asm volatile("vcvttph2uqq 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2uw %zmm2, %zmm1"); + asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2uw %xmm2, %xmm1"); + asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2uw %ymm2, %ymm1"); + asm volatile("vcvttph2uw 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2w %zmm2, %zmm1"); + asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2w %xmm2, %xmm1"); + asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2w %ymm2, %ymm1"); + asm volatile("vcvttph2w 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttsh2si %xmm1, %eax"); + asm volatile("vcvttsh2si 0x12345678(%rax,%rcx,8), %eax"); + asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvttsh2si %xmm1, %rax"); + asm volatile("vcvttsh2si 0x12345678(%rax,%rcx,8), %rax"); + asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %rax"); + asm volatile("vcvttsh2usi %xmm1, %eax"); + asm volatile("vcvttsh2usi 0x12345678(%rax,%rcx,8), %eax"); + asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvttsh2usi %xmm1, %rax"); + asm volatile("vcvttsh2usi 0x12345678(%rax,%rcx,8), %rax"); + asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %rax"); + asm volatile("vcvtudq2ph %zmm2, %ymm1"); + asm volatile("vcvtudq2ph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtudq2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtudq2ph %xmm2, %xmm1"); + asm volatile("vcvtudq2ph %ymm2, %xmm1"); + asm volatile("vcvtuqq2ph %zmm2, %xmm1"); + asm volatile("vcvtuqq2ph %xmm2, %xmm1"); + asm volatile("vcvtuqq2ph %ymm2, %xmm1"); + asm volatile("vcvtusi2sh %eax, %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtusi2sh %rax, %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtuw2ph %zmm2, %zmm1"); + asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtuw2ph %xmm2, %xmm1"); + asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtuw2ph %ymm2, %ymm1"); + asm volatile("vcvtuw2ph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtw2ph %zmm2, %zmm1"); + asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtw2ph %xmm2, %xmm1"); + asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtw2ph %ymm2, %ymm1"); + asm volatile("vcvtw2ph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vdivph %zmm3, %zmm2, %zmm1"); + asm volatile("vdivph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vdivph %xmm3, %xmm2, %xmm1"); + asm volatile("vdivph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vdivph %ymm3, %ymm2, %ymm1"); + asm volatile("vdivph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vdivsh %xmm3, %xmm2, %xmm1"); + asm volatile("vdivsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vdivsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfcmaddcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfcmaddcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfcmaddcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmaddcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfcmulcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfcmulcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfcmulcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmulcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmulcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmulcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmulcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmulcph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmulcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmulcsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub132ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub132sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub213ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub213sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub231ph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub231sh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfpclassph $0x12, %zmm1, %k5"); + asm volatile("vfpclassph $0x12, %xmm1, %k5"); + asm volatile("vfpclassph $0x12, %ymm1, %k5"); + asm volatile("vfpclasssh $0x12, %xmm1, %k5"); + asm volatile("vfpclasssh $0x12, 0x12345678(%rax,%rcx,8), %k5"); + asm volatile("vfpclasssh $0x12, 0x12345678(%eax,%ecx,8), %k5"); + asm volatile("vgetexpph %zmm2, %zmm1"); + asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vgetexpph %xmm2, %xmm1"); + asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vgetexpph %ymm2, %ymm1"); + asm volatile("vgetexpph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vgetexpsh %xmm3, %xmm2, %xmm1"); + asm volatile("vgetexpsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vgetexpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vgetmantph $0x12, %zmm2, %zmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vgetmantph $0x12, %xmm2, %xmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vgetmantph $0x12, %ymm2, %ymm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vgetmantsh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vgetmantsh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vgetmantsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmaxph %zmm3, %zmm2, %zmm1"); + asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vmaxph %xmm3, %xmm2, %xmm1"); + asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmaxph %ymm3, %ymm2, %ymm1"); + asm volatile("vmaxph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vmaxsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmaxsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vmaxsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vminph %zmm3, %zmm2, %zmm1"); + asm volatile("vminph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vminph %xmm3, %xmm2, %xmm1"); + asm volatile("vminph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vminph %ymm3, %ymm2, %ymm1"); + asm volatile("vminph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vminsh %xmm3, %xmm2, %xmm1"); + asm volatile("vminsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vminsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmovsh %xmm1, 0x12345678(%rax,%rcx,8)"); + asm volatile("vmovsh %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vmovsh 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vmovsh 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vmovsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmovw %xmm1, %eax"); + asm volatile("vmovw %xmm1, 0x12345678(%rax,%rcx,8)"); + asm volatile("vmovw %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vmovw %eax, %xmm1"); + asm volatile("vmovw 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vmovw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vmulph %zmm3, %zmm2, %zmm1"); + asm volatile("vmulph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vmulph %xmm3, %xmm2, %xmm1"); + asm volatile("vmulph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmulph %ymm3, %ymm2, %ymm1"); + asm volatile("vmulph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vmulsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmulsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vmulsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrcpph %zmm2, %zmm1"); + asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrcpph %xmm2, %xmm1"); + asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrcpph %ymm2, %ymm1"); + asm volatile("vrcpph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrcpsh %xmm3, %xmm2, %xmm1"); + asm volatile("vrcpsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vrcpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vreduceph $0x12, %zmm2, %zmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vreduceph $0x12, %xmm2, %xmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vreduceph $0x12, %ymm2, %ymm1"); + asm volatile("vreduceph $0x12, 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vreducesh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vreducesh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vreducesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrndscaleph $0x12, %zmm2, %zmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrndscaleph $0x12, %xmm2, %xmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrndscaleph $0x12, %ymm2, %ymm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrndscalesh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vrndscalesh $0x12, 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vrndscalesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrsqrtph %zmm2, %zmm1"); + asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrsqrtph %xmm2, %xmm1"); + asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrsqrtph %ymm2, %ymm1"); + asm volatile("vrsqrtph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrsqrtsh %xmm3, %xmm2, %xmm1"); + asm volatile("vrsqrtsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vrsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vscalefph %zmm3, %zmm2, %zmm1"); + asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vscalefph %xmm3, %xmm2, %xmm1"); + asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vscalefph %ymm3, %ymm2, %ymm1"); + asm volatile("vscalefph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vscalefsh %xmm3, %xmm2, %xmm1"); + asm volatile("vscalefsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vscalefsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsqrtph %zmm2, %zmm1"); + asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %zmm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vsqrtph %xmm2, %xmm1"); + asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vsqrtph %ymm2, %ymm1"); + asm volatile("vsqrtph 0x12345678(%rax,%rcx,8), %ymm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vsqrtsh %xmm3, %xmm2, %xmm1"); + asm volatile("vsqrtsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsubph %zmm3, %zmm2, %zmm1"); + asm volatile("vsubph 0x12345678(%rax,%rcx,8), %zmm2, %zmm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vsubph %xmm3, %xmm2, %xmm1"); + asm volatile("vsubph 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsubph %ymm3, %ymm2, %ymm1"); + asm volatile("vsubph 0x12345678(%rax,%rcx,8), %ymm2, %ymm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vsubsh %xmm3, %xmm2, %xmm1"); + asm volatile("vsubsh 0x12345678(%rax,%rcx,8), %xmm2, %xmm1"); + asm volatile("vsubsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vucomish %xmm2, %xmm1"); + asm volatile("vucomish 0x12345678(%rax,%rcx,8), %xmm1"); + asm volatile("vucomish 0x12345678(%eax,%ecx,8), %xmm1"); + #else /* #ifdef __x86_64__ */ /* bound r32, mem (same op code as EVEX prefix) */ @@ -3670,8 +4388,479 @@ int main(void) asm volatile("notrack bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ asm volatile("notrack bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + /* AVX512-FP16 */ + + asm volatile("vaddph %zmm3, %zmm2, %zmm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vaddph %xmm3, %xmm2, %xmm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vaddph %ymm3, %ymm2, %ymm1"); + asm volatile("vaddph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vaddsh %xmm3, %xmm2, %xmm1"); + asm volatile("vaddsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcmpph $0x12, %zmm3, %zmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %zmm2, %k5"); + asm volatile("vcmpph $0x12, %xmm3, %xmm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5"); + asm volatile("vcmpph $0x12, %ymm3, %ymm2, %k5"); + asm volatile("vcmpph $0x12, 0x12345678(%eax,%ecx,8), %ymm2, %k5"); + asm volatile("vcmpsh $0x12, %xmm3, %xmm2, %k5"); + asm volatile("vcmpsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %k5"); + asm volatile("vcomish %xmm2, %xmm1"); + asm volatile("vcomish 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtdq2ph %zmm2, %ymm1"); + asm volatile("vcvtdq2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtdq2ph %xmm2, %xmm1"); + asm volatile("vcvtdq2ph %ymm2, %xmm1"); + asm volatile("vcvtpd2ph %zmm2, %xmm1"); + asm volatile("vcvtpd2ph %xmm2, %xmm1"); + asm volatile("vcvtpd2ph %ymm2, %xmm1"); + asm volatile("vcvtph2dq %ymm2, %zmm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2dq %xmm2, %xmm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2dq %xmm2, %ymm1"); + asm volatile("vcvtph2dq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2pd %xmm2, %zmm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2pd %xmm2, %xmm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2pd %xmm2, %ymm1"); + asm volatile("vcvtph2pd 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2ps %ymm2, %zmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2ps %xmm2, %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2ps %xmm2, %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2ps %xmm2, %xmm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2ps %xmm2, %ymm1"); + asm volatile("vcvtph2ps 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2psx %ymm2, %zmm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2psx %xmm2, %xmm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2psx %xmm2, %ymm1"); + asm volatile("vcvtph2psx 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2qq %xmm2, %zmm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2qq %xmm2, %xmm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2qq %xmm2, %ymm1"); + asm volatile("vcvtph2qq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2udq %ymm2, %zmm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2udq %xmm2, %xmm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2udq %xmm2, %ymm1"); + asm volatile("vcvtph2udq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2uqq %xmm2, %zmm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2uqq %xmm2, %xmm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2uqq %xmm2, %ymm1"); + asm volatile("vcvtph2uqq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2uw %zmm2, %zmm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2uw %xmm2, %xmm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2uw %ymm2, %ymm1"); + asm volatile("vcvtph2uw 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtph2w %zmm2, %zmm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtph2w %xmm2, %xmm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtph2w %ymm2, %ymm1"); + asm volatile("vcvtph2w 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtps2ph $0x12, %zmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %zmm2, %ymm1"); + asm volatile("vcvtps2ph $0x12, %ymm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %ymm2, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2ph $0x12, %xmm2, %xmm1"); + asm volatile("vcvtps2ph $0x12, %xmm2, 0x12345678(%eax,%ecx,8)"); + asm volatile("vcvtps2phx %zmm2, %ymm1"); + asm volatile("vcvtps2phx 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtps2phx %xmm2, %xmm1"); + asm volatile("vcvtps2phx %ymm2, %xmm1"); + asm volatile("vcvtqq2ph %zmm2, %xmm1"); + asm volatile("vcvtqq2ph %xmm2, %xmm1"); + asm volatile("vcvtqq2ph %ymm2, %xmm1"); + asm volatile("vcvtsd2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2sd 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2si 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvtsh2ss 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsh2usi %xmm1, %eax"); + asm volatile("vcvtsh2usi 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvtsi2sh %eax, %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtsi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtss2sh %xmm3, %xmm2, %xmm1"); + asm volatile("vcvtss2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvttph2dq %ymm2, %zmm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2dq %xmm2, %xmm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2dq %xmm2, %ymm1"); + asm volatile("vcvttph2dq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2qq %xmm2, %zmm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2qq %xmm2, %xmm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2qq %xmm2, %ymm1"); + asm volatile("vcvttph2qq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2udq %ymm2, %zmm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2udq %xmm2, %xmm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2udq %xmm2, %ymm1"); + asm volatile("vcvttph2udq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2uqq %xmm2, %zmm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2uqq %xmm2, %xmm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2uqq %xmm2, %ymm1"); + asm volatile("vcvttph2uqq 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2uw %zmm2, %zmm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2uw %xmm2, %xmm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2uw %ymm2, %ymm1"); + asm volatile("vcvttph2uw 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttph2w %zmm2, %zmm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvttph2w %xmm2, %xmm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvttph2w %ymm2, %ymm1"); + asm volatile("vcvttph2w 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvttsh2si %xmm1, %eax"); + asm volatile("vcvttsh2si 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvttsh2usi %xmm1, %eax"); + asm volatile("vcvttsh2usi 0x12345678(%eax,%ecx,8), %eax"); + asm volatile("vcvtudq2ph %zmm2, %ymm1"); + asm volatile("vcvtudq2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtudq2ph %xmm2, %xmm1"); + asm volatile("vcvtudq2ph %ymm2, %xmm1"); + asm volatile("vcvtuqq2ph %zmm2, %xmm1"); + asm volatile("vcvtuqq2ph %xmm2, %xmm1"); + asm volatile("vcvtuqq2ph %ymm2, %xmm1"); + asm volatile("vcvtusi2sh %eax, %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtusi2sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vcvtuw2ph %zmm2, %zmm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtuw2ph %xmm2, %xmm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtuw2ph %ymm2, %ymm1"); + asm volatile("vcvtuw2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vcvtw2ph %zmm2, %zmm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vcvtw2ph %xmm2, %xmm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vcvtw2ph %ymm2, %ymm1"); + asm volatile("vcvtw2ph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vdivph %zmm3, %zmm2, %zmm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vdivph %xmm3, %xmm2, %xmm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vdivph %ymm3, %ymm2, %ymm1"); + asm volatile("vdivph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vdivsh %xmm3, %xmm2, %xmm1"); + asm volatile("vdivsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfcmaddcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmaddcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfcmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfcmaddcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfcmulcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfcmulcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfcmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfcmulcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfcmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmadd231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmaddsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmaddsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmaddsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmaddsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsub231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmsubadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmsubadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmsubadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmsubadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmulcph %zmm3, %zmm2, %zmm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfmulcph %xmm3, %xmm2, %xmm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfmulcph %ymm3, %ymm2, %ymm1"); + asm volatile("vfmulcph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfmulcsh %xmm3, %xmm2, %xmm1"); + asm volatile("vfmulcsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmadd231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmadd231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmadd231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmadd231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmadd231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub132ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub132ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub132ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub132sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub132sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub213ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub213ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub213ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub213sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub213sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231ph %zmm3, %zmm2, %zmm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vfnmsub231ph %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfnmsub231ph %ymm3, %ymm2, %ymm1"); + asm volatile("vfnmsub231ph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vfnmsub231sh %xmm3, %xmm2, %xmm1"); + asm volatile("vfnmsub231sh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vfpclassph $0x12, %zmm1, %k5"); + asm volatile("vfpclassph $0x12, %xmm1, %k5"); + asm volatile("vfpclassph $0x12, %ymm1, %k5"); + asm volatile("vfpclasssh $0x12, %xmm1, %k5"); + asm volatile("vfpclasssh $0x12, 0x12345678(%eax,%ecx,8), %k5"); + asm volatile("vgetexpph %zmm2, %zmm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vgetexpph %xmm2, %xmm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vgetexpph %ymm2, %ymm1"); + asm volatile("vgetexpph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vgetexpsh %xmm3, %xmm2, %xmm1"); + asm volatile("vgetexpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vgetmantph $0x12, %zmm2, %zmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vgetmantph $0x12, %xmm2, %xmm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vgetmantph $0x12, %ymm2, %ymm1"); + asm volatile("vgetmantph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vgetmantsh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vgetmantsh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmaxph %zmm3, %zmm2, %zmm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vmaxph %xmm3, %xmm2, %xmm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmaxph %ymm3, %ymm2, %ymm1"); + asm volatile("vmaxph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vmaxsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmaxsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vminph %zmm3, %zmm2, %zmm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vminph %xmm3, %xmm2, %xmm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vminph %ymm3, %ymm2, %ymm1"); + asm volatile("vminph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vminsh %xmm3, %xmm2, %xmm1"); + asm volatile("vminsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmovsh %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vmovsh 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vmovsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmovw %xmm1, %eax"); + asm volatile("vmovw %xmm1, 0x12345678(%eax,%ecx,8)"); + asm volatile("vmovw %eax, %xmm1"); + asm volatile("vmovw 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vmulph %zmm3, %zmm2, %zmm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vmulph %xmm3, %xmm2, %xmm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vmulph %ymm3, %ymm2, %ymm1"); + asm volatile("vmulph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vmulsh %xmm3, %xmm2, %xmm1"); + asm volatile("vmulsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrcpph %zmm2, %zmm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrcpph %xmm2, %xmm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrcpph %ymm2, %ymm1"); + asm volatile("vrcpph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrcpsh %xmm3, %xmm2, %xmm1"); + asm volatile("vrcpsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vreduceph $0x12, %zmm2, %zmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vreduceph $0x12, %xmm2, %xmm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vreduceph $0x12, %ymm2, %ymm1"); + asm volatile("vreduceph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vreducesh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vreducesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrndscaleph $0x12, %zmm2, %zmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrndscaleph $0x12, %xmm2, %xmm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrndscaleph $0x12, %ymm2, %ymm1"); + asm volatile("vrndscaleph $0x12, 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrndscalesh $0x12, %xmm3, %xmm2, %xmm1"); + asm volatile("vrndscalesh $0x12, 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vrsqrtph %zmm2, %zmm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vrsqrtph %xmm2, %xmm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vrsqrtph %ymm2, %ymm1"); + asm volatile("vrsqrtph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vrsqrtsh %xmm3, %xmm2, %xmm1"); + asm volatile("vrsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vscalefph %zmm3, %zmm2, %zmm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vscalefph %xmm3, %xmm2, %xmm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vscalefph %ymm3, %ymm2, %ymm1"); + asm volatile("vscalefph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vscalefsh %xmm3, %xmm2, %xmm1"); + asm volatile("vscalefsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsqrtph %zmm2, %zmm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %zmm1"); + asm volatile("vsqrtph %xmm2, %xmm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %xmm1"); + asm volatile("vsqrtph %ymm2, %ymm1"); + asm volatile("vsqrtph 0x12345678(%eax,%ecx,8), %ymm1"); + asm volatile("vsqrtsh %xmm3, %xmm2, %xmm1"); + asm volatile("vsqrtsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsubph %zmm3, %zmm2, %zmm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %zmm2, %zmm1"); + asm volatile("vsubph %xmm3, %xmm2, %xmm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vsubph %ymm3, %ymm2, %ymm1"); + asm volatile("vsubph 0x12345678(%eax,%ecx,8), %ymm2, %ymm1"); + asm volatile("vsubsh %xmm3, %xmm2, %xmm1"); + asm volatile("vsubsh 0x12345678(%eax,%ecx,8), %xmm2, %xmm1"); + asm volatile("vucomish %xmm2, %xmm1"); + asm volatile("vucomish 0x12345678(%eax,%ecx,8), %xmm1"); + #endif /* #ifndef __x86_64__ */ + /* Prediction history reset */ + + asm volatile("hreset $0"); + + /* Serialize instruction execution */ + + asm volatile("serialize"); + + /* TSX suspend load address tracking */ + + asm volatile("xresldtrk"); + asm volatile("xsusldtrk"); + /* SGX */ asm volatile("encls"); diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index f924246eff78..8d9b55959256 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -29,7 +29,7 @@ struct evsel *arch_evlist__leader(struct list_head *list) __evlist__for_each_entry(list, evsel) { if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") && - evsel->name && strstr(evsel->name, "slots")) + evsel->name && strcasestr(evsel->name, "slots")) return evsel; } return first; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index eaad04e1672a..41a66a48cbdf 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -346,7 +346,7 @@ struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affin { struct evlist_cpu_iterator itr = { .container = evlist, - .evsel = evlist__first(evlist), + .evsel = NULL, .cpu_map_idx = 0, .evlist_cpu_map_idx = 0, .evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus), @@ -354,16 +354,22 @@ struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affin .affinity = affinity, }; - if (itr.affinity) { - itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0); - affinity__set(itr.affinity, itr.cpu.cpu); - itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu); - /* - * If this CPU isn't in the evsel's cpu map then advance through - * the list. - */ - if (itr.cpu_map_idx == -1) - evlist_cpu_iterator__next(&itr); + if (evlist__empty(evlist)) { + /* Ensure the empty list doesn't iterate. */ + itr.evlist_cpu_map_idx = itr.evlist_cpu_map_nr; + } else { + itr.evsel = evlist__first(evlist); + if (itr.affinity) { + itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0); + affinity__set(itr.affinity, itr.cpu.cpu); + itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu); + /* + * If this CPU isn't in the evsel's cpu map then advance + * through the list. + */ + if (itr.cpu_map_idx == -1) + evlist_cpu_iterator__next(&itr); + } } return itr; } diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index 5acf053fca7d..aa0c5179836d 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -50,41 +50,32 @@ #ifndef SYM_END #define SYM_END(name, sym_type) \ .type name sym_type ASM_NL \ + .set .L__sym_size_##name, .-name ASM_NL \ .size name, .-name #endif -/* - * SYM_FUNC_START_ALIAS -- use where there are two global names for one - * function - */ -#ifndef SYM_FUNC_START_ALIAS -#define SYM_FUNC_START_ALIAS(name) \ - SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +/* SYM_ALIAS -- use only if you have to */ +#ifndef SYM_ALIAS +#define SYM_ALIAS(alias, name, sym_type, linkage) \ + linkage(alias) ASM_NL \ + .set alias, name ASM_NL \ + .type alias sym_type ASM_NL \ + .set .L__sym_size_##alias, .L__sym_size_##name ASM_NL \ + .size alias, .L__sym_size_##alias #endif /* SYM_FUNC_START -- use for global functions */ #ifndef SYM_FUNC_START -/* - * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two - * later. - */ #define SYM_FUNC_START(name) \ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif /* SYM_FUNC_START_LOCAL -- use for local functions */ #ifndef SYM_FUNC_START_LOCAL -/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_START_LOCAL(name) \ SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) #endif -/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */ -#ifndef SYM_FUNC_END_ALIAS -#define SYM_FUNC_END_ALIAS(name) \ - SYM_END(name, SYM_T_FUNC) -#endif - /* SYM_FUNC_START_WEAK -- use for weak functions */ #ifndef SYM_FUNC_START_WEAK #define SYM_FUNC_START_WEAK(name) \ @@ -96,9 +87,32 @@ * SYM_FUNC_START_WEAK, ... */ #ifndef SYM_FUNC_END -/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */ #define SYM_FUNC_END(name) \ SYM_END(name, SYM_T_FUNC) #endif +/* + * SYM_FUNC_ALIAS -- define a global alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS +#define SYM_FUNC_ALIAS(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_GLOBAL) +#endif + +/* + * SYM_FUNC_ALIAS_LOCAL -- define a local alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS_LOCAL +#define SYM_FUNC_ALIAS_LOCAL(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_LOCAL) +#endif + +/* + * SYM_FUNC_ALIAS_WEAK -- define a weak global alias for an existing function + */ +#ifndef SYM_FUNC_ALIAS_WEAK +#define SYM_FUNC_ALIAS_WEAK(alias, name) \ + SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK) +#endif + #endif /* PERF_LINUX_LINKAGE_H_ */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index b2ed3140a1fa..dfde9eada224 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -231,7 +231,7 @@ void symbols__fixup_end(struct rb_root_cached *symbols) prev = curr; curr = rb_entry(nd, struct symbol, rb_node); - if (prev->end == prev->start && prev->end != curr->start) + if (prev->end == prev->start || prev->end != curr->start) arch__symbols__fixup_end(prev, curr); } diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index d8eeeafb50dc..1e13b7523918 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -18,7 +18,6 @@ #include "../../kselftest.h" -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) #define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1) extern void do_syscall(int sve_vl); diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index a3c1e67441f9..4c418b2021e0 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -21,8 +21,6 @@ #include "../../kselftest.h" -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) - /* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */ #ifndef NT_ARM_SVE #define NT_ARM_SVE 0x405 @@ -489,6 +487,8 @@ static int do_parent(pid_t child) unsigned int vq, vl; bool vl_supported; + ksft_print_msg("Parent is %d, child is %d\n", getpid(), child); + /* Attach to the child */ while (1) { int sig; diff --git a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c index a876db1f096a..325bca0de0f6 100644 --- a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c +++ b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c @@ -19,17 +19,6 @@ #include "kselftest.h" #include "mte_common_util.h" -#define PR_SET_TAGGED_ADDR_CTRL 55 -#define PR_GET_TAGGED_ADDR_CTRL 56 -# define PR_TAGGED_ADDR_ENABLE (1UL << 0) -# define PR_MTE_TCF_SHIFT 1 -# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TAG_SHIFT 3 -# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) - #include "mte_def.h" #define NUM_ITERATIONS 1024 diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index 1de7a0abd0ae..f4ae5f87a3b7 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -3,6 +3,7 @@ #define _GNU_SOURCE +#include <assert.h> #include <errno.h> #include <fcntl.h> #include <signal.h> @@ -11,6 +12,7 @@ #include <string.h> #include <ucontext.h> #include <unistd.h> +#include <sys/uio.h> #include <sys/mman.h> #include "kselftest.h" @@ -19,14 +21,28 @@ static size_t page_sz; -static int check_usermem_access_fault(int mem_type, int mode, int mapping) +#define TEST_NAME_MAX 100 + +enum test_type { + READ_TEST, + WRITE_TEST, + READV_TEST, + WRITEV_TEST, + LAST_TEST, +}; + +static int check_usermem_access_fault(int mem_type, int mode, int mapping, + int tag_offset, int tag_len, + enum test_type test_type) { int fd, i, err; char val = 'A'; - size_t len, read_len; + ssize_t len, syscall_len; void *ptr, *ptr_next; + int fileoff, ptroff, size; + int sizes[] = {1, 2, 3, 8, 16, 32, 4096, page_sz}; - err = KSFT_FAIL; + err = KSFT_PASS; len = 2 * page_sz; mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); fd = create_temp_file(); @@ -43,9 +59,9 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping) } mte_initialize_current_context(mode, (uintptr_t)ptr, len); /* Copy from file into buffer with valid tag */ - read_len = read(fd, ptr, len); + syscall_len = read(fd, ptr, len); mte_wait_after_trig(); - if (cur_mte_cxt.fault_valid || read_len < len) + if (cur_mte_cxt.fault_valid || syscall_len < len) goto usermem_acc_err; /* Verify same pattern is read */ for (i = 0; i < len; i++) @@ -54,36 +70,136 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping) if (i < len) goto usermem_acc_err; - /* Tag the next half of memory with different value */ - ptr_next = (void *)((unsigned long)ptr + page_sz); + if (!tag_len) + tag_len = len - tag_offset; + /* Tag a part of memory with different value */ + ptr_next = (void *)((unsigned long)ptr + tag_offset); ptr_next = mte_insert_new_tag(ptr_next); - mte_set_tag_address_range(ptr_next, page_sz); + mte_set_tag_address_range(ptr_next, tag_len); - lseek(fd, 0, 0); - /* Copy from file into buffer with invalid tag */ - read_len = read(fd, ptr, len); - mte_wait_after_trig(); - /* - * Accessing user memory in kernel with invalid tag should fail in sync - * mode without fault but may not fail in async mode as per the - * implemented MTE userspace support in Arm64 kernel. - */ - if (mode == MTE_SYNC_ERR && - !cur_mte_cxt.fault_valid && read_len < len) { - err = KSFT_PASS; - } else if (mode == MTE_ASYNC_ERR && - !cur_mte_cxt.fault_valid && read_len == len) { - err = KSFT_PASS; + for (fileoff = 0; fileoff < 16; fileoff++) { + for (ptroff = 0; ptroff < 16; ptroff++) { + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + size = sizes[i]; + lseek(fd, 0, 0); + + /* perform file operation on buffer with invalid tag */ + switch (test_type) { + case READ_TEST: + syscall_len = read(fd, ptr + ptroff, size); + break; + case WRITE_TEST: + syscall_len = write(fd, ptr + ptroff, size); + break; + case READV_TEST: { + struct iovec iov[1]; + iov[0].iov_base = ptr + ptroff; + iov[0].iov_len = size; + syscall_len = readv(fd, iov, 1); + break; + } + case WRITEV_TEST: { + struct iovec iov[1]; + iov[0].iov_base = ptr + ptroff; + iov[0].iov_len = size; + syscall_len = writev(fd, iov, 1); + break; + } + case LAST_TEST: + goto usermem_acc_err; + } + + mte_wait_after_trig(); + /* + * Accessing user memory in kernel with invalid tag should fail in sync + * mode without fault but may not fail in async mode as per the + * implemented MTE userspace support in Arm64 kernel. + */ + if (cur_mte_cxt.fault_valid) { + goto usermem_acc_err; + } + if (mode == MTE_SYNC_ERR && syscall_len < len) { + /* test passed */ + } else if (mode == MTE_ASYNC_ERR && syscall_len == size) { + /* test passed */ + } else { + goto usermem_acc_err; + } + } + } } + + goto exit; + usermem_acc_err: + err = KSFT_FAIL; +exit: mte_free_memory((void *)ptr, len, mem_type, true); close(fd); return err; } +void format_test_name(char* name, int name_len, int type, int sync, int map, int len, int offset) { + const char* test_type; + const char* mte_type; + const char* map_type; + + switch (type) { + case READ_TEST: + test_type = "read"; + break; + case WRITE_TEST: + test_type = "write"; + break; + case READV_TEST: + test_type = "readv"; + break; + case WRITEV_TEST: + test_type = "writev"; + break; + default: + assert(0); + break; + } + + switch (sync) { + case MTE_SYNC_ERR: + mte_type = "MTE_SYNC_ERR"; + break; + case MTE_ASYNC_ERR: + mte_type = "MTE_ASYNC_ERR"; + break; + default: + assert(0); + break; + } + + switch (map) { + case MAP_SHARED: + map_type = "MAP_SHARED"; + break; + case MAP_PRIVATE: + map_type = "MAP_PRIVATE"; + break; + default: + assert(0); + break; + } + + snprintf(name, name_len, + "test type: %s, %s, %s, tag len: %d, tag offset: %d\n", + test_type, mte_type, map_type, len, offset); +} + int main(int argc, char *argv[]) { int err; + int t, s, m, l, o; + int mte_sync[] = {MTE_SYNC_ERR, MTE_ASYNC_ERR}; + int maps[] = {MAP_SHARED, MAP_PRIVATE}; + int tag_lens[] = {0, MT_GRANULE_SIZE}; + int tag_offsets[] = {page_sz, MT_GRANULE_SIZE}; + char test_name[TEST_NAME_MAX]; page_sz = getpagesize(); if (!page_sz) { @@ -98,17 +214,28 @@ int main(int argc, char *argv[]) mte_register_signal(SIGSEGV, mte_default_handler); /* Set test plan */ - ksft_set_plan(4); + ksft_set_plan(64); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), - "Check memory access from kernel in sync mode, private mapping and mmap memory\n"); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), - "Check memory access from kernel in sync mode, shared mapping and mmap memory\n"); - - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), - "Check memory access from kernel in async mode, private mapping and mmap memory\n"); - evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), - "Check memory access from kernel in async mode, shared mapping and mmap memory\n"); + for (t = 0; t < LAST_TEST; t++) { + for (s = 0; s < ARRAY_SIZE(mte_sync); s++) { + for (m = 0; m < ARRAY_SIZE(maps); m++) { + for (l = 0; l < ARRAY_SIZE(tag_lens); l++) { + for (o = 0; o < ARRAY_SIZE(tag_offsets); o++) { + int sync = mte_sync[s]; + int map = maps[m]; + int offset = tag_offsets[o]; + int tag_len = tag_lens[l]; + int res = check_usermem_access_fault(USE_MMAP, sync, + map, offset, + tag_len, t); + format_test_name(test_name, TEST_NAME_MAX, + t, sync, map, tag_len, offset); + evaluate_test(res, test_name); + } + } + } + } + } mte_restore_setup(); ksft_print_cnts(); diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h index ebe8694dbef0..f909b70d9e98 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.h +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -53,6 +53,7 @@ struct tdescr { char *name; char *descr; unsigned long feats_required; + unsigned long feats_incompatible; /* bitmask of effectively supported feats: populated at run-time */ unsigned long feats_supported; bool initialized; diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 2f8c23af3b5e..5743897984b0 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -36,6 +36,8 @@ static inline char *feats_to_string(unsigned long feats) { size_t flen = MAX_FEATS_SZ - 1; + feats_string[0] = '\0'; + for (int i = 0; i < FMAX_END; i++) { if (feats & (1UL << i)) { size_t tlen = strlen(feats_names[i]); @@ -256,7 +258,7 @@ int test_init(struct tdescr *td) td->minsigstksz = MINSIGSTKSZ; fprintf(stderr, "Detected MINSTKSIGSZ:%d\n", td->minsigstksz); - if (td->feats_required) { + if (td->feats_required || td->feats_incompatible) { td->feats_supported = 0; /* * Checking for CPU required features using both the @@ -267,15 +269,29 @@ int test_init(struct tdescr *td) if (getauxval(AT_HWCAP) & HWCAP_SVE) td->feats_supported |= FEAT_SVE; if (feats_ok(td)) { - fprintf(stderr, - "Required Features: [%s] supported\n", - feats_to_string(td->feats_required & - td->feats_supported)); + if (td->feats_required & td->feats_supported) + fprintf(stderr, + "Required Features: [%s] supported\n", + feats_to_string(td->feats_required & + td->feats_supported)); + if (!(td->feats_incompatible & td->feats_supported)) + fprintf(stderr, + "Incompatible Features: [%s] absent\n", + feats_to_string(td->feats_incompatible)); } else { - fprintf(stderr, - "Required Features: [%s] NOT supported\n", - feats_to_string(td->feats_required & - ~td->feats_supported)); + if ((td->feats_required & td->feats_supported) != + td->feats_supported) + fprintf(stderr, + "Required Features: [%s] NOT supported\n", + feats_to_string(td->feats_required & + ~td->feats_supported)); + if (td->feats_incompatible & td->feats_supported) + fprintf(stderr, + "Incompatible Features: [%s] supported\n", + feats_to_string(td->feats_incompatible & + ~td->feats_supported)); + + td->result = KSFT_SKIP; return 0; } diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index 6772b5c8d274..f3aa99ba67bb 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -18,6 +18,8 @@ void test_result(struct tdescr *td); static inline bool feats_ok(struct tdescr *td) { + if (td->feats_incompatible & td->feats_supported) + return false; return (td->feats_required & td->feats_supported) == td->feats_required; } diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 79fe627b9e81..eb8543b9a5c4 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -880,9 +880,8 @@ EOF return $ksft_skip fi - # test default behaviour. Packet from ns1 to ns0 is not redirected - # due to automatic port translation. - test_port_shadow "default" "ROUTER" + # test default behaviour. Packet from ns1 to ns0 is redirected to ns2. + test_port_shadow "default" "CLIENT" # test packet filter based mitigation: prevent forwarding of # packets claiming to come from the service port. diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh index e6a132df6172..69f8a5958cef 100755 --- a/tools/testing/selftests/rcutorture/bin/console-badness.sh +++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh @@ -10,7 +10,7 @@ # # Authors: Paul E. McKenney <paulmck@kernel.org> -egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | +egrep 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | grep -v 'ODEBUG: ' | grep -v 'This means that this is a DEBUG kernel and it is' | grep -v 'Warning: unable to open an initial console' | diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh index 5a0023d183da..0941f1ddab65 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh @@ -47,8 +47,8 @@ else exit 1 fi -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh dryrun= diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh index 370406bbfeed..f17000a2ccf1 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh @@ -49,8 +49,8 @@ fi mkdir $resdir/$ds echo Results directory: $resdir/$ds -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh echo Using all `identify_qemu_vcpus` CPUs. diff --git a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh index e4a00779b8c6..ee886b40a5d2 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh @@ -22,8 +22,8 @@ T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$ trap 'rm -rf $T' 0 mkdir $T -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh default_starttime="`get_starttime`" starttime="${2-default_starttime}" diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh index 2e9e9e2eedb6..5f682fc892dd 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh @@ -30,10 +30,16 @@ editor=${EDITOR-vi} files= for i in ${rundir}/*/Make.out do + scenariodir="`dirname $i`" + scenariobasedir="`echo ${scenariodir} | sed -e 's/\.[0-9]*$//'`" if egrep -q "error:|warning:|^ld: .*undefined reference to" < $i then egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags files="$files $i.diags $i" + elif ! test -f ${scenariobasedir}/vmlinux + then + echo No ${scenariobasedir}/vmlinux file > $i.diags + files="$files $i.diags $i" fi done if test -n "$files" diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh index 1c4c2c727dad..43e1387234d1 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -25,7 +25,7 @@ stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null | tail -1 | sed -e 's/^\[[ 0-9.]*] //' | awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' | tr -d '\012\015'`" -fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }'`" +fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }' | tr -d '\015'`" if test -z "$ngps" then echo "$configfile ------- " $stopstate diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index e09b1bc78708..8c4c1e4792d0 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -19,8 +19,8 @@ then exit 1 fi -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh starttime="`get_starttime`" @@ -108,8 +108,8 @@ else cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log" exit 2 fi - cp -a "$rundir" "$KVM/res/" - oldrun="$KVM/res/$ds" + cp -a "$rundir" "$RCUTORTURE/res/" + oldrun="$RCUTORTURE/res/$ds" fi echo | tee -a "$oldrun/remote-log" echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log" @@ -155,18 +155,23 @@ do echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log" cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -" ret=$? - if test "$ret" -ne 0 - then - echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. | tee -a "$oldrun/remote-log" + tries=0 + while test "$ret" -ne 0 + do + echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. $tries prior retries. | tee -a "$oldrun/remote-log" sleep 60 cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -" ret=$? if test "$ret" -ne 0 then - echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log" - exit 10 + if test "$tries" > 5 + then + echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log" + exit 10 + fi fi - fi + tries=$((tries+1)) + done done # Function to check for presence of a file on the specified system. diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 6de0c183db5b..55b2c1533282 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -25,15 +25,15 @@ LANG=en_US.UTF-8; export LANG dur=$((30*60)) dryrun="" -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`" TORTURE_DEFCONFIG=defconfig TORTURE_BOOT_IMAGE="" TORTURE_BUILDONLY= -TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD +TORTURE_INITRD="$RCUTORTURE/initrd"; export TORTURE_INITRD TORTURE_KCONFIG_ARG="" TORTURE_KCONFIG_GDB_ARG="" TORTURE_BOOT_GDB_ARG="" @@ -262,7 +262,7 @@ else exit 1 fi -CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG +CONFIGFRAG=${RCUTORTURE}/configs/${TORTURE_SUITE}; export CONFIGFRAG defaultconfigs="`tr '\012' ' ' < $CONFIGFRAG/CFLIST`" if test -z "$configs" @@ -272,7 +272,7 @@ fi if test -z "$resdir" then - resdir=$KVM/res + resdir=$RCUTORTURE/res fi # Create a file of test-name/#cpus pairs, sorted by decreasing #cpus. @@ -280,7 +280,7 @@ configs_derep= for CF in $configs do case $CF in - [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**) + [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**|[0-9][0-9][0-9][0-9]\**) config_reps=`echo $CF | sed -e 's/\*.*$//'` CF1=`echo $CF | sed -e 's/^[^*]*\*//'` ;; @@ -386,7 +386,7 @@ END { # Generate a script to execute the tests in appropriate batches. cat << ___EOF___ > $T/script CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG -KVM="$KVM"; export KVM +RCUTORTURE="$RCUTORTURE"; export RCUTORTURE PATH="$PATH"; export PATH TORTURE_ALLOTED_CPUS="$TORTURE_ALLOTED_CPUS"; export TORTURE_ALLOTED_CPUS TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE @@ -569,7 +569,7 @@ ___EOF___ awk < $T/cfgcpu.pack \ -v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \ -v CONFIGDIR="$CONFIGFRAG/" \ - -v KVM="$KVM" \ + -v RCUTORTURE="$RCUTORTURE" \ -v ncpus=$cpus \ -v jitter="$jitter" \ -v rd=$resdir/$ds/ \ diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 9f624bd53c27..822eb037a057 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -138,6 +138,16 @@ then then summary="$summary Bugs: $n_bugs" fi + n_kcsan=`egrep -c 'BUG: KCSAN: ' $file` + if test "$n_kcsan" -ne 0 + then + if test "$n_bugs" = "$n_kcsan" + then + summary="$summary (all bugs kcsan)" + else + summary="$summary KCSAN: $n_kcsan" + fi + fi n_calltrace=`grep -c 'Call Trace:' $file` if test "$n_calltrace" -ne 0 then diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index eae88aacca2a..bfe09e2829c8 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -13,8 +13,8 @@ scriptname=$0 args="$*" -KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -PATH=${KVM}/bin:$PATH; export PATH +RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE +PATH=${RCUTORTURE}/bin:$PATH; export PATH . functions.sh TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`" @@ -37,7 +37,7 @@ configs_scftorture= kcsan_kmake_args= # Default compression, duration, and apportionment. -compress_kasan_vmlinux="`identify_qemu_vcpus`" +compress_concurrency="`identify_qemu_vcpus`" duration_base=10 duration_rcutorture_frac=7 duration_locktorture_frac=1 @@ -67,12 +67,12 @@ function doyesno () { usage () { echo "Usage: $scriptname optional arguments:" - echo " --compress-kasan-vmlinux concurrency" + echo " --compress-concurrency concurrency" echo " --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\"" echo " --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\"" echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\"" - echo " --doall" - echo " --doallmodconfig / --do-no-allmodconfig" + echo " --do-all" + echo " --do-allmodconfig / --do-no-allmodconfig" echo " --do-clocksourcewd / --do-no-clocksourcewd" echo " --do-kasan / --do-no-kasan" echo " --do-kcsan / --do-no-kcsan" @@ -91,9 +91,9 @@ usage () { while test $# -gt 0 do case "$1" in - --compress-kasan-vmlinux) - checkarg --compress-kasan-vmlinux "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error' - compress_kasan_vmlinux=$2 + --compress-concurrency) + checkarg --compress-concurrency "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error' + compress_concurrency=$2 shift ;; --config-rcutorture|--configs-rcutorture) @@ -414,8 +414,14 @@ nfailures=0 echo FAILURES: | tee -a $T/log if test -s "$T/failures" then - cat "$T/failures" | tee -a $T/log + awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/ /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum" nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`" + grep "^ Summary: " "$T/failuresum" | + grep -v '^ Summary: Bugs: [0-9]* (all bugs kcsan)$' > "$T/nonkcsan" + if test -s "$T/nonkcsan" + then + nonkcsanbug="yes" + fi ret=2 fi if test "$do_kcsan" = "yes" @@ -424,12 +430,16 @@ then fi echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log +if test -z "$nonkcsanbug" && test -s "$T/failuresum" +then + echo " All bugs were KCSAN failures." +fi tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`" -if test -n "$tdir" && test $compress_kasan_vmlinux -gt 0 +if test -n "$tdir" && test $compress_concurrency -gt 0 then # KASAN vmlinux files can approach 1GB in size, so compress them. - echo Looking for KASAN files to compress: `date` > "$tdir/log-xz" 2>&1 - find "$tdir" -type d -name '*-kasan' -print > $T/xz-todo + echo Looking for K[AC]SAN files to compress: `date` > "$tdir/log-xz" 2>&1 + find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo ncompresses=0 batchno=1 if test -s $T/xz-todo @@ -447,7 +457,7 @@ then do xz "$j" >> "$tdir/log-xz" 2>&1 & ncompresses=$((ncompresses+1)) - if test $ncompresses -ge $compress_kasan_vmlinux + if test $ncompresses -ge $compress_concurrency then echo Waiting for batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log wait diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 index 3ca112444ce7..7093422050f6 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=4 +CONFIG_NR_CPUS=3 CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot index 238bfe3bd0cc..ce0694fd9b92 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot @@ -1 +1,2 @@ rcutorture.torture_type=srcu +rcutorture.fwd_progress=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot index ce48c7b82673..2db39f298d18 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot @@ -1,2 +1,4 @@ rcutorture.torture_type=srcud rcupdate.rcu_self_test=1 +rcutorture.fwd_progress=3 +srcutree.big_cpu_lim=5 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 index 34c8ff5a12f2..e4d74e5fc1d0 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=4 +CONFIG_NR_CPUS=5 CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index 2956584e1e37..75af864e07b6 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -4,7 +4,7 @@ include ../lib.mk .PHONY: all clean -CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \ +CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh "$(CC)" \ ../x86/trivial_64bit_program.c) ifndef OBJCOPY diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c index 9d4322c946e2..006b464c8fc9 100644 --- a/tools/testing/selftests/sgx/load.c +++ b/tools/testing/selftests/sgx/load.c @@ -21,7 +21,7 @@ void encl_delete(struct encl *encl) { - struct encl_segment *heap_seg = &encl->segment_tbl[encl->nr_segments - 1]; + struct encl_segment *heap_seg; if (encl->encl_base) munmap((void *)encl->encl_base, encl->encl_size); @@ -32,10 +32,11 @@ void encl_delete(struct encl *encl) if (encl->fd) close(encl->fd); - munmap(heap_seg->src, heap_seg->size); - - if (encl->segment_tbl) + if (encl->segment_tbl) { + heap_seg = &encl->segment_tbl[encl->nr_segments - 1]; + munmap(heap_seg->src, heap_seg->size); free(encl->segment_tbl); + } memset(encl, 0, sizeof(*encl)); } diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index 370c4995f7c4..dd74fa42302e 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -146,7 +146,8 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl, if (!encl_load("test_encl.elf", encl, heap_size)) { encl_delete(encl); - TH_LOG("Failed to load the test enclave.\n"); + TH_LOG("Failed to load the test enclave."); + return false; } if (!encl_measure(encl)) @@ -185,8 +186,6 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl, return true; err: - encl_delete(encl); - for (i = 0; i < encl->nr_segments; i++) { seg = &encl->segment_tbl[i]; @@ -205,7 +204,9 @@ err: fclose(maps_file); } - TH_LOG("Failed to initialize the test enclave.\n"); + TH_LOG("Failed to initialize the test enclave."); + + encl_delete(encl); return false; } diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py index f34486cd7342..057a4f49c79d 100644 --- a/tools/testing/selftests/tpm2/tpm2.py +++ b/tools/testing/selftests/tpm2/tpm2.py @@ -56,6 +56,7 @@ TSS2_RESMGR_TPM_RC_LAYER = (11 << TSS2_RC_LAYER_SHIFT) TPM2_CAP_HANDLES = 0x00000001 TPM2_CAP_COMMANDS = 0x00000002 +TPM2_CAP_PCRS = 0x00000005 TPM2_CAP_TPM_PROPERTIES = 0x00000006 TPM2_PT_FIXED = 0x100 @@ -712,3 +713,33 @@ class Client: pt += 1 return handles + + def get_cap_pcrs(self): + pcr_banks = {} + + fmt = '>HII III' + + cmd = struct.pack(fmt, + TPM2_ST_NO_SESSIONS, + struct.calcsize(fmt), + TPM2_CC_GET_CAPABILITY, + TPM2_CAP_PCRS, 0, 1) + rsp = self.send_cmd(cmd)[10:] + _, _, cnt = struct.unpack('>BII', rsp[:9]) + rsp = rsp[9:] + + # items are TPMS_PCR_SELECTION's + for i in range(0, cnt): + hash, sizeOfSelect = struct.unpack('>HB', rsp[:3]) + rsp = rsp[3:] + + pcrSelect = 0 + if sizeOfSelect > 0: + pcrSelect, = struct.unpack('%ds' % sizeOfSelect, + rsp[:sizeOfSelect]) + rsp = rsp[sizeOfSelect:] + pcrSelect = int.from_bytes(pcrSelect, byteorder='big') + + pcr_banks[hash] = pcrSelect + + return pcr_banks diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py index 9d764306887b..ffe98b5c8d22 100644 --- a/tools/testing/selftests/tpm2/tpm2_tests.py +++ b/tools/testing/selftests/tpm2/tpm2_tests.py @@ -27,7 +27,17 @@ class SmokeTest(unittest.TestCase): result = self.client.unseal(self.root_key, blob, auth, None) self.assertEqual(data, result) + def determine_bank_alg(self, mask): + pcr_banks = self.client.get_cap_pcrs() + for bank_alg, pcrSelection in pcr_banks.items(): + if pcrSelection & mask == mask: + return bank_alg + return None + def test_seal_with_policy(self): + bank_alg = self.determine_bank_alg(1 << 16) + self.assertIsNotNone(bank_alg) + handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL) data = ('X' * 64).encode() @@ -35,7 +45,7 @@ class SmokeTest(unittest.TestCase): pcrs = [16] try: - self.client.policy_pcr(handle, pcrs) + self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg) self.client.policy_password(handle) policy_dig = self.client.get_policy_digest(handle) @@ -47,7 +57,7 @@ class SmokeTest(unittest.TestCase): handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY) try: - self.client.policy_pcr(handle, pcrs) + self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg) self.client.policy_password(handle) result = self.client.unseal(self.root_key, blob, auth, handle) @@ -72,6 +82,9 @@ class SmokeTest(unittest.TestCase): self.assertEqual(rc, tpm2.TPM2_RC_AUTH_FAIL) def test_unseal_with_wrong_policy(self): + bank_alg = self.determine_bank_alg(1 << 16 | 1 << 1) + self.assertIsNotNone(bank_alg) + handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL) data = ('X' * 64).encode() @@ -79,7 +92,7 @@ class SmokeTest(unittest.TestCase): pcrs = [16] try: - self.client.policy_pcr(handle, pcrs) + self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg) self.client.policy_password(handle) policy_dig = self.client.get_policy_digest(handle) @@ -91,13 +104,13 @@ class SmokeTest(unittest.TestCase): # Extend first a PCR that is not part of the policy and try to unseal. # This should succeed. - ds = tpm2.get_digest_size(tpm2.TPM2_ALG_SHA1) - self.client.extend_pcr(1, ('X' * ds).encode()) + ds = tpm2.get_digest_size(bank_alg) + self.client.extend_pcr(1, ('X' * ds).encode(), bank_alg=bank_alg) handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY) try: - self.client.policy_pcr(handle, pcrs) + self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg) self.client.policy_password(handle) result = self.client.unseal(self.root_key, blob, auth, handle) @@ -109,14 +122,14 @@ class SmokeTest(unittest.TestCase): # Then, extend a PCR that is part of the policy and try to unseal. # This should fail. - self.client.extend_pcr(16, ('X' * ds).encode()) + self.client.extend_pcr(16, ('X' * ds).encode(), bank_alg=bank_alg) handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY) rc = 0 try: - self.client.policy_pcr(handle, pcrs) + self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg) self.client.policy_password(handle) result = self.client.unseal(self.root_key, blob, auth, handle) @@ -302,3 +315,19 @@ class AsyncTest(unittest.TestCase): log.debug("Calling get_cap in a NON_BLOCKING mode") async_client.get_cap(tpm2.TPM2_CAP_HANDLES, tpm2.HR_LOADED_SESSION) async_client.close() + + def test_flush_invalid_context(self): + log = logging.getLogger(__name__) + log.debug(sys._getframe().f_code.co_name) + + async_client = tpm2.Client(tpm2.Client.FLAG_SPACE | tpm2.Client.FLAG_NONBLOCK) + log.debug("Calling flush_context passing in an invalid handle ") + handle = 0x80123456 + rc = 0 + try: + async_client.flush_context(handle) + except OSError as e: + rc = e.errno + + self.assertEqual(rc, 22) + async_client.close() diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 1607322a112c..a14b5b800897 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm selftests +LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h + include local_config.mk uname_M := $(shell uname -m 2>/dev/null || echo not) @@ -140,10 +142,6 @@ endif $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap -$(OUTPUT)/gup_test: ../../../../mm/gup_test.h - -$(OUTPUT)/hmm-tests: local_config.h - # HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty. $(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS) diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh index 3e2089c8cf54..8c669c0d662e 100755 --- a/tools/testing/selftests/x86/check_cc.sh +++ b/tools/testing/selftests/x86/check_cc.sh @@ -7,7 +7,7 @@ CC="$1" TESTPROG="$2" shift 2 -if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then +if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then echo 1 else echo 0 |