From 00d1c5e05736f947687be27706bda01cec104e57 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 17 Apr 2008 17:40:45 +0200 Subject: x86: add gbpages switches These new controls toggle experimental support for a new CPU feature, the straightforward extension of largepages from the pmd level to the pud level, which allows 1GB (kernel) TLBs instead of 2MB TLBs. Turn it off by default, as this code has not been tested well enough yet. Use the CONFIG_DIRECT_GBPAGES=y .config option or gbpages on the boot line can be used to enable it. If enabled in the .config then nogbpages boot option disables it. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/x86_64/boot-options.txt | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Documentation') diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index 34abae4e9442..b0c7b6c4abda 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -307,3 +307,8 @@ Debugging stuck (default) Miscellaneous + + nogbpages + Do not use GB pages for kernel direct mappings. + gbpages + Use GB pages for kernel direct mappings. -- cgit v1.2.3 From 099e1377269a47ed30a00ee131001988e5bcaa9c Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 13 Feb 2008 20:54:58 +0000 Subject: x86: use ELF format in compressed images. Signed-off-by: Ian Campbell Cc: Ian Campbell Cc: Jeremy Fitzhardinge Cc: virtualization@lists.linux-foundation.org Cc: H. Peter Anvin Cc: Jeremy Fitzhardinge Cc: virtualization@lists.linux-foundation.org Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/i386/boot.txt | 18 +++++++++++++ arch/x86/boot/Makefile | 14 ++++++++++ arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/misc.c | 56 +++++++++++++++++++++++++++++++++++++++ arch/x86/boot/header.S | 6 +++++ 5 files changed, 95 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt index fc49b79bc1ab..b5f5ba1ea668 100644 --- a/Documentation/i386/boot.txt +++ b/Documentation/i386/boot.txt @@ -170,6 +170,8 @@ Offset Proto Name Meaning 0238/4 2.06+ cmdline_size Maximum size of the kernel command line 023C/4 2.07+ hardware_subarch Hardware subarchitecture 0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data +0248/4 2.08+ compressed_payload_offset +024C/4 2.08+ compressed_payload_length (1) For backwards compatibility, if the setup_sects field contains 0, the real value is 4. @@ -512,6 +514,22 @@ Protocol: 2.07+ A pointer to data that is specific to hardware subarch +Field name: compressed_payload_offset +Type: read +Offset/size: 0x248/4 +Protocol: 2.08+ + + If non-zero then this field contains the offset from the end of the + real-mode code to the compressed payload. The compression format + should be determined using the standard magic number, currently only + gzip is used. + +Field name: compressed_payload_length +Type: read +Offset/size: 0x24c/4 +Protocol: 2.08+ + + The length of the compressed payload. **** THE KERNEL COMMAND LINE diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index f88458e83ef0..9695affeb584 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -94,6 +94,20 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) +sed-offsets := -e 's/^00*/0/' \ + -e 's/^\([0-9a-fA-F]*\) . \(input_data\|input_data_end\)$$/\#define \2 0x\1/p' + +quiet_cmd_offsets = OFFSETS $@ + cmd_offsets = $(NM) $< | sed -n $(sed-offsets) > $@ + +$(obj)/offsets.h: $(obj)/compressed/vmlinux FORCE + $(call if_changed,offsets) + +targets += offsets.h + +AFLAGS_header.o += -I$(obj) +$(obj)/header.o: $(obj)/offsets.h + LDFLAGS_setup.elf := -T $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE $(call if_changed,ld) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index d2b9f3bb87c0..92fdd35bd93e 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -22,7 +22,7 @@ $(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $ $(call if_changed,ld) @: -OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S +OBJCOPYFLAGS_vmlinux.bin := -R .comment -S $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 8182e32c1b42..69aec2f4155d 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -15,6 +15,10 @@ * we just keep it from happening */ #undef CONFIG_PARAVIRT +#ifdef CONFIG_X86_32 +#define _ASM_DESC_H_ 1 +#endif + #ifdef CONFIG_X86_64 #define _LINUX_STRING_H_ 1 #define __LINUX_BITMAP_H 1 @@ -22,6 +26,7 @@ #include #include +#include #include #include #include @@ -365,6 +370,56 @@ static void error(char *x) asm("hlt"); } +static void parse_elf(void *output) +{ +#ifdef CONFIG_X86_64 + Elf64_Ehdr ehdr; + Elf64_Phdr *phdrs, *phdr; +#else + Elf32_Ehdr ehdr; + Elf32_Phdr *phdrs, *phdr; +#endif + void *dest; + int i; + + memcpy(&ehdr, output, sizeof(ehdr)); + if(ehdr.e_ident[EI_MAG0] != ELFMAG0 || + ehdr.e_ident[EI_MAG1] != ELFMAG1 || + ehdr.e_ident[EI_MAG2] != ELFMAG2 || + ehdr.e_ident[EI_MAG3] != ELFMAG3) + { + error("Kernel is not a valid ELF file"); + return; + } + + putstr("Parsing ELF... "); + + phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); + if (!phdrs) + error("Failed to allocate space for phdrs"); + + memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum); + + for (i=0; ip_type) { + case PT_LOAD: +#ifdef CONFIG_RELOCATABLE + dest = output; + dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); +#else + dest = (void*)(phdr->p_paddr); +#endif + memcpy(dest, + output + phdr->p_offset, + phdr->p_filesz); + break; + default: /* Ignore other PT_* */ break; + } + } +} + asmlinkage void decompress_kernel(void *rmode, memptr heap, uch *input_data, unsigned long input_len, uch *output) @@ -408,6 +463,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, makecrc(); putstr("\nDecompressing Linux... "); gunzip(); + parse_elf(output); putstr("done.\nBooting the kernel.\n"); return; } diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 64ad9016585a..8471658d5534 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -22,6 +22,7 @@ #include #include #include "boot.h" +#include "offsets.h" SETUPSECTS = 4 /* default nr of setup-sectors */ BOOTSEG = 0x07C0 /* original address of boot-sector */ @@ -223,6 +224,11 @@ hardware_subarch: .long 0 # subarchitecture, added with 2.07 hardware_subarch_data: .quad 0 +compressed_payload_offset: + .long input_data +compressed_payload_length: + .long input_data_end-input_data + # End of setup header ##################################################### .section ".inittext", "ax" -- cgit v1.2.3 From 7d6e737c8d2698b63ad10fd75cc6793380395d0e Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Sun, 17 Feb 2008 20:06:35 +0100 Subject: x86: add a crc32 checksum to the kernel image. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/i386/boot.txt | 8 +++++ arch/x86/boot/tools/build.c | 88 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 95 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt index b5f5ba1ea668..05c24dfd7ecf 100644 --- a/Documentation/i386/boot.txt +++ b/Documentation/i386/boot.txt @@ -531,6 +531,14 @@ Protocol: 2.08+ The length of the compressed payload. +**** THE IMAGE CHECKSUM + +From boot protocol version 2.08 onwards the CRC-32 is calculated over +the entire file using the characteristic polynomial 0x04C11DB7 and an +initial remainder of 0xffffffff. The checksum is appended to the +file; therefore the CRC of the file up to the limit specified in the +syssize field of the header is always 0. + **** THE KERNEL COMMAND LINE The kernel command line has become an important way for the boot diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index b4248740ff0d..44dc1923c0e3 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -50,6 +50,75 @@ typedef unsigned long u32; u8 buf[SETUP_SECT_MAX*512]; int is_big_kernel; +/*----------------------------------------------------------------------*/ + +static const u32 crctab32[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d +}; + +static u32 partial_crc32_one(u8 c, u32 crc) +{ + return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8); +} + +static u32 partial_crc32(const u8 *s, int len, u32 crc) +{ + while (len--) + crc = partial_crc32_one(*s++, crc); + return crc; +} + static void die(const char * str, ...) { va_list args; @@ -74,6 +143,7 @@ int main(int argc, char ** argv) FILE *file; int fd; void *kernel; + u32 crc = 0xffffffffUL; if (argc > 2 && !strcmp(argv[1], "-b")) { @@ -144,7 +214,8 @@ int main(int argc, char ** argv) kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); if (kernel == MAP_FAILED) die("Unable to mmap '%s': %m", argv[2]); - sys_size = (sz + 15) / 16; + /* Number of 16-byte paragraphs, including space for a 4-byte CRC */ + sys_size = (sz + 15 + 4) / 16; if (!is_big_kernel && sys_size > DEF_SYSSIZE) die("System is too big. Try using bzImage or modules."); @@ -155,12 +226,27 @@ int main(int argc, char ** argv) buf[0x1f6] = sys_size >> 16; buf[0x1f7] = sys_size >> 24; + crc = partial_crc32(buf, i, crc); if (fwrite(buf, 1, i, stdout) != i) die("Writing setup failed"); /* Copy the kernel code */ + crc = partial_crc32(kernel, sz, crc); if (fwrite(kernel, 1, sz, stdout) != sz) die("Writing kernel failed"); + + /* Add padding leaving 4 bytes for the checksum */ + while (sz++ < (sys_size*16) - 4) { + crc = partial_crc32_one('\0', crc); + if (fwrite("\0", 1, 1, stdout) != 1) + die("Writing padding failed"); + } + + /* Write the CRC */ + fprintf(stderr, "CRC %lx\n", crc); + if (fwrite(&crc, 1, 4, stdout) != 4) + die("Writing CRC failed"); + close(fd); /* Everything is OK */ -- cgit v1.2.3 From 87253d1b4f2b5a29bdfc6275b9fb52a47d72df64 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 19 Feb 2008 11:12:30 +0000 Subject: x86: boot protocol updates Also update field names to simply payload_{offset,length} so as to not rule out uncompressed images. Signed-off-by: Ian Campbell Cc: H. Peter Anvin Cc: Jeremy Fitzhardinge Cc: virtualization@lists.linux-foundation.org Signed-off-by: Ingo Molnar --- Documentation/i386/boot.txt | 18 ++++++++++-------- arch/x86/boot/header.S | 6 ++---- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'Documentation') diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt index 05c24dfd7ecf..2eb16100bb3f 100644 --- a/Documentation/i386/boot.txt +++ b/Documentation/i386/boot.txt @@ -170,8 +170,8 @@ Offset Proto Name Meaning 0238/4 2.06+ cmdline_size Maximum size of the kernel command line 023C/4 2.07+ hardware_subarch Hardware subarchitecture 0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data -0248/4 2.08+ compressed_payload_offset -024C/4 2.08+ compressed_payload_length +0248/4 2.08+ payload_offset Offset of kernel payload +024C/4 2.08+ payload_length Length of kernel payload (1) For backwards compatibility, if the setup_sects field contains 0, the real value is 4. @@ -514,22 +514,24 @@ Protocol: 2.07+ A pointer to data that is specific to hardware subarch -Field name: compressed_payload_offset +Field name: payload_offset Type: read Offset/size: 0x248/4 Protocol: 2.08+ If non-zero then this field contains the offset from the end of the - real-mode code to the compressed payload. The compression format - should be determined using the standard magic number, currently only - gzip is used. + real-mode code to the payload. + + The payload may be compressed. The format of both the compressed and + uncompressed data should be determined using the standard magic + numbers. Currently only gzip compressed ELF is used. -Field name: compressed_payload_length +Field name: payload_length Type: read Offset/size: 0x24c/4 Protocol: 2.08+ - The length of the compressed payload. + The length of the payload. **** THE IMAGE CHECKSUM diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 40c91bb483e1..6d2df8d61c54 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -224,10 +224,8 @@ hardware_subarch: .long 0 # subarchitecture, added with 2.07 hardware_subarch_data: .quad 0 -compressed_payload_offset: - .long input_data -compressed_payload_length: - .long input_data_end-input_data +payload_offset: .long input_data +payload_length: .long input_data_end-input_data # End of setup header ##################################################### -- cgit v1.2.3 From b552da8740222c35bcd83c9be7b27185bfb6d53c Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 19 Mar 2008 15:58:11 +0100 Subject: x86 iommu: add more documentation Fix coding style in pci-dma_64.c and add stubs for documentation. I hope someone fills the rest, I understand maybe off and soft... Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 13 +++++++++++++ arch/x86/kernel/pci-dma_64.c | 20 ++++++++++---------- 2 files changed, 23 insertions(+), 10 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index dafd001bf833..43c527d72f2f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -812,6 +812,19 @@ and is between 256 and 4096 characters. It is defined in the file inttest= [IA64] + iommu= [x86] + off + force + noforce + biomerge + panic + nopanic + merge + nomerge + forcesac + soft + + intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option off Disable intel iommu driver. diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 375cb2bc45be..ada5a0604992 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -232,32 +232,32 @@ static __init int iommu_setup(char *p) return -EINVAL; while (*p) { - if (!strncmp(p,"off",3)) + if (!strncmp(p, "off", 3)) no_iommu = 1; /* gart_parse_options has more force support */ - if (!strncmp(p,"force",5)) + if (!strncmp(p, "force", 5)) force_iommu = 1; - if (!strncmp(p,"noforce",7)) { + if (!strncmp(p, "noforce", 7)) { iommu_merge = 0; force_iommu = 0; } - if (!strncmp(p, "biomerge",8)) { + if (!strncmp(p, "biomerge", 8)) { iommu_bio_merge = 4096; iommu_merge = 1; force_iommu = 1; } - if (!strncmp(p, "panic",5)) + if (!strncmp(p, "panic", 5)) panic_on_overflow = 1; - if (!strncmp(p, "nopanic",7)) + if (!strncmp(p, "nopanic", 7)) panic_on_overflow = 0; - if (!strncmp(p, "merge",5)) { + if (!strncmp(p, "merge", 5)) { iommu_merge = 1; force_iommu = 1; } - if (!strncmp(p, "nomerge",7)) + if (!strncmp(p, "nomerge", 7)) iommu_merge = 0; - if (!strncmp(p, "forcesac",8)) + if (!strncmp(p, "forcesac", 8)) iommu_sac_force = 1; if (!strncmp(p, "allowdac", 8)) forbid_dac = 0; @@ -265,7 +265,7 @@ static __init int iommu_setup(char *p) forbid_dac = -1; #ifdef CONFIG_SWIOTLB - if (!strncmp(p, "soft",4)) + if (!strncmp(p, "soft", 4)) swiotlb = 1; #endif -- cgit v1.2.3 From d27554d874c7eeb14c8bfecdc39c3a8618cd8d32 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Tue, 18 Mar 2008 17:00:13 -0700 Subject: x86: PAT documentation Documentation about PAT related interfaces, intended usage and memory attribute relationship. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- Documentation/x86/pat.txt | 100 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 Documentation/x86/pat.txt (limited to 'Documentation') diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt new file mode 100644 index 000000000000..17965f927c15 --- /dev/null +++ b/Documentation/x86/pat.txt @@ -0,0 +1,100 @@ + +PAT (Page Attribute Table) + +x86 Page Attribute Table (PAT) allows for setting the memory attribute at the +page level granularity. PAT is complementary to the MTRR settings which allows +for setting of memory types over physical address ranges. However, PAT is +more flexible than MTRR due to its capability to set attributes at page level +and also due to the fact that there are no hardware limitations on number of +such attribute settings allowed. Added flexibility comes with guidelines for +not having memory type aliasing for the same physical memory with multiple +virtual addresses. + +PAT allows for different types of memory attributes. The most commonly used +ones that will be supported at this time are Write-back, Uncached, +Write-combined and Uncached Minus. + +There are many different APIs in the kernel that allows setting of memory +attributes at the page level. In order to avoid aliasing, these interfaces +should be used thoughtfully. Below is a table of interfaces available, +their intended usage and their memory attribute relationships. Internally, +these APIs use a reserve_memtype()/free_memtype() interface on the physical +address range to avoid any aliasing. + + +------------------------------------------------------------------- +API | RAM | ACPI,... | Reserved/Holes | +-----------------------|----------|------------|------------------| + | | | | +ioremap | -- | UC | UC | + | | | | +ioremap_cache | -- | WB | WB | + | | | | +ioremap_nocache | -- | UC | UC | + | | | | +ioremap_wc | -- | -- | WC | + | | | | +set_memory_uc | UC | -- | -- | + set_memory_wb | | | | + | | | | +set_memory_wc | WC | -- | -- | + set_memory_wb | | | | + | | | | +pci sysfs resource | -- | -- | UC | + | | | | +pci sysfs resource_wc | -- | -- | WC | + is IORESOURCE_PREFETCH| | | | + | | | | +pci proc | -- | -- | UC | + !PCIIOC_WRITE_COMBINE | | | | + | | | | +pci proc | -- | -- | WC | + PCIIOC_WRITE_COMBINE | | | | + | | | | +/dev/mem | -- | UC | UC | + read-write | | | | + | | | | +/dev/mem | -- | UC | UC | + mmap SYNC flag | | | | + | | | | +/dev/mem | -- | WB/WC/UC | WB/WC/UC | + mmap !SYNC flag | |(from exist-| (from exist- | + and | | ing alias)| ing alias) | + any alias to this area| | | | + | | | | +/dev/mem | -- | WB | WB | + mmap !SYNC flag | | | | + no alias to this area | | | | + and | | | | + MTRR says WB | | | | + | | | | +/dev/mem | -- | -- | UC_MINUS | + mmap !SYNC flag | | | | + no alias to this area | | | | + and | | | | + MTRR says !WB | | | | + | | | | +------------------------------------------------------------------- + +Notes: + +-- in the above table mean "Not suggested usage for the API". Some of the --'s +are strictly enforced by the kernel. Some others are not really enforced +today, but may be enforced in future. + +For ioremap and pci access through /sys or /proc - The actual type returned +can be more restrictive, in case of any existing aliasing for that address. +For example: If there is an existing uncached mapping, a new ioremap_wc can +return uncached mapping in place of write-combine requested. + +set_memory_[uc|wc] and set_memory_wb should be used in pairs, where driver will +first make a region uc or wc and switch it back to wb after use. + +Over time writes to /proc/mtrr will be deprecated in favor of using PAT based +interfaces. Users writing to /proc/mtrr are suggested to use above interfaces. + +Drivers should use ioremap_[uc|wc] to access PCI BARs with [uc|wc] access +types. + +Drivers should use set_memory_[uc|wc] to set access type for RAM ranges. + -- cgit v1.2.3 From c64df70793a9c344874eb4af19f85e0662d2d3ee Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 21 Mar 2008 18:56:19 -0700 Subject: x86: memtest bootparam Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 5 +++++ arch/x86/Kconfig | 29 +++++++++++++++++++++++++++++ arch/x86/kernel/e820_64.c | 10 +++++----- arch/x86/mm/init_64.c | 24 +++++++++++++++++------- 4 files changed, 56 insertions(+), 12 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 43c527d72f2f..f9ea0803d5d6 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1147,6 +1147,11 @@ and is between 256 and 4096 characters. It is defined in the file or memmap=0x10000$0x18690000 + memtest= [KNL,X86_64] Enable memtest + Format: + range: 0,4 : pattern number + default : 0 + meye.*= [HW] Set MotionEye Camera parameters See Documentation/video4linux/meye.txt. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e50e38e76d9f..a0d7406e8b37 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -382,6 +382,35 @@ config PARAVIRT endif +config MEMTEST_BOOTPARAM + bool "Memtest boot parameter" + depends on X86_64 + default y + help + This option adds a kernel parameter 'memtest', which allows memtest + to be disabled at boot. If this option is selected, memtest + functionality can be disabled with memtest=0 on the kernel + command line. The purpose of this option is to allow a single + kernel image to be distributed with memtest built in, but not + necessarily enabled. + + If you are unsure how to answer this question, answer Y. + +config MEMTEST_BOOTPARAM_VALUE + int "Memtest boot parameter default value (0-4)" + depends on MEMTEST_BOOTPARAM + range 0 4 + default 0 + help + This option sets the default value for the kernel parameter + 'memtest', which allows memtest to be disabled at boot. If this + option is set to 0 (zero), the memtest kernel parameter will + default to 0, disabling memtest at bootup. If this option is + set to 4, the memtest kernel parameter will default to 4, + enabling memtest at bootup, and use that as pattern number. + + If you are unsure how to answer this question, answer 0. + config ACPI_SRAT def_bool y depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 9184e6437c4f..d6ada0833876 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -241,7 +241,9 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end, /* * Find next free range after *start */ -unsigned long __init find_e820_area_size(unsigned long start, unsigned long *sizep, unsigned long align) +unsigned long __init find_e820_area_size(unsigned long start, + unsigned long *sizep, + unsigned long align) { int i; @@ -254,17 +256,15 @@ unsigned long __init find_e820_area_size(unsigned long start, unsigned long *siz continue; addr = round_up(ei->addr, align); ei_last = ei->addr + ei->size; -// printk(KERN_DEBUG "find_e820_area_size : e820 %d [%llx, %lx]\n", i, ei->addr, ei_last); if (addr < start) addr = round_up(start, align); -// printk(KERN_DEBUG "find_e820_area_size : 0 [%lx, %lx]\n", addr, ei_last); if (addr >= ei_last) continue; *sizep = ei_last - addr; - while (bad_addr_size(&addr, sizep, align) && addr+ *sizep <= ei_last) + while (bad_addr_size(&addr, sizep, align) && + addr + *sizep <= ei_last) ; last = addr + *sizep; -// printk(KERN_DEBUG "find_e820_area_size : 1 [%lx, %lx]\n", addr, last); if (last > ei_last) continue; return addr; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 52f54ee4559f..ae225c3ae9a8 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -427,7 +427,10 @@ static void __init init_gbpages(void) direct_gbpages = 0; } -static void __init memtest(unsigned long start_phys, unsigned long size, unsigned pattern) +#ifdef CONFIG_MEMTEST_BOOTPARAM + +static void __init memtest(unsigned long start_phys, unsigned long size, + unsigned pattern) { unsigned long i; unsigned long *start; @@ -486,11 +489,12 @@ static void __init memtest(unsigned long start_phys, unsigned long size, unsigne } -static int __initdata memtest_pattern; +static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE; + static int __init parse_memtest(char *arg) { if (arg) - memtest_pattern = simple_strtoul(arg, NULL, 0) + 1; + memtest_pattern = simple_strtoul(arg, NULL, 0); return 0; } @@ -501,8 +505,10 @@ static void __init early_memtest(unsigned long start, unsigned long end) unsigned long t_start, t_size; unsigned pattern; - if (memtest_pattern) - printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern); + if (!memtest_pattern) + return; + + printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern); for (pattern = 0; pattern < memtest_pattern; pattern++) { t_start = start; t_size = 0; @@ -523,9 +529,13 @@ static void __init early_memtest(unsigned long start, unsigned long end) t_start += t_size; } } - if (memtest_pattern) - printk(KERN_CONT "\n"); + printk(KERN_CONT "\n"); } +#else +static void __init early_memtest(unsigned long start, unsigned long end) +{ +} +#endif /* * Setup the direct mapping of the physical memory at PAGE_OFFSET. -- cgit v1.2.3 From 19b4e7f4e9b1c88459cf2c9b9ccaa09cb8bf854d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 10 Apr 2008 10:12:27 +0200 Subject: x86: extend the scheduled bzImage symlinks removal use of the bzImage symlinks in developer scripts is still widespread, so lets extend the removal period by 2 years. These symlinks cost us near nothing. Signed-off-by: Ingo Molnar --- Documentation/feature-removal-schedule.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index bf0e3df8e7a1..164c89394cff 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -212,7 +212,7 @@ Who: Stephen Hemminger --------------------------- What: i386/x86_64 bzImage symlinks -When: April 2008 +When: April 2010 Why: The i386/x86_64 merge provides a symlink to the old bzImage location so not yet updated user space tools, e.g. package -- cgit v1.2.3