diff options
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/Kconfig | 4 | ||||
-rw-r--r-- | arch/x86_64/defconfig | 8 | ||||
-rw-r--r-- | arch/x86_64/kernel/mpparse.c | 37 | ||||
-rw-r--r-- | arch/x86_64/kernel/nmi.c | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-dma.c | 93 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-swiotlb.c | 3 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 11 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 20 | ||||
-rw-r--r-- | arch/x86_64/kernel/vmlinux.lds.S | 17 | ||||
-rw-r--r-- | arch/x86_64/kernel/vsyscall.c | 11 | ||||
-rw-r--r-- | arch/x86_64/mm/fault.c | 6 | ||||
-rw-r--r-- | arch/x86_64/mm/init.c | 38 | ||||
-rw-r--r-- | arch/x86_64/mm/ioremap.c | 111 | ||||
-rw-r--r-- | arch/x86_64/mm/srat.c | 66 |
14 files changed, 187 insertions, 242 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 32ae1378f35c..b87a19f0d584 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -367,6 +367,10 @@ config ARCH_FLATMEM_ENABLE source "mm/Kconfig" +config MEMORY_HOTPLUG_RESERVE + def_bool y + depends on (MEMORY_HOTPLUG && DISCONTIGMEM) + config HAVE_ARCH_EARLY_PFN_TO_NID def_bool y depends on NUMA diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 647610ecb580..4844b543bed0 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -1,11 +1,12 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.18-git5 -# Tue Sep 26 09:30:47 2006 +# Linux kernel version: 2.6.18-git7 +# Wed Sep 27 21:53:10 2006 # CONFIG_X86_64=y CONFIG_64BIT=y CONFIG_X86=y +CONFIG_ZONE_DMA32=y CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_SEMAPHORE_SLEEPERS=y @@ -179,6 +180,7 @@ CONFIG_GENERIC_PENDING_IRQ=y CONFIG_PM=y # CONFIG_PM_LEGACY is not set # CONFIG_PM_DEBUG is not set +# CONFIG_PM_SYSFS_DEPRECATED is not set CONFIG_SOFTWARE_SUSPEND=y CONFIG_PM_STD_PARTITION="" CONFIG_SUSPEND_SMP=y @@ -251,6 +253,7 @@ CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_MSI=y +# CONFIG_PCI_MULTITHREAD_PROBE is not set # CONFIG_PCI_DEBUG is not set # @@ -1458,6 +1461,7 @@ CONFIG_KPROBES=y # CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 20e88f4b564b..b8d53dfa9931 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -152,6 +152,21 @@ static void __init MP_bus_info (struct mpc_config_bus *m) } } +static int bad_ioapic(unsigned long address) +{ + if (nr_ioapics >= MAX_IO_APICS) { + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " + "(found %d)\n", MAX_IO_APICS, nr_ioapics); + panic("Recompile kernel with bigger MAX_IO_APICS!\n"); + } + if (!address) { + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" + " found in table, skipping!\n"); + return 1; + } + return 0; +} + static void __init MP_ioapic_info (struct mpc_config_ioapic *m) { if (!(m->mpc_flags & MPC_APIC_USABLE)) @@ -159,16 +174,10 @@ static void __init MP_ioapic_info (struct mpc_config_ioapic *m) printk("I/O APIC #%d at 0x%X.\n", m->mpc_apicid, m->mpc_apicaddr); - if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n", - MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); - } - if (!m->mpc_apicaddr) { - printk(KERN_ERR "WARNING: bogus zero I/O APIC address" - " found in MP table, skipping!\n"); + + if (bad_ioapic(m->mpc_apicaddr)) return; - } + mp_ioapics[nr_ioapics] = *m; nr_ioapics++; } @@ -647,16 +656,8 @@ void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) { int idx = 0; - if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " - "(found %d)\n", MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!\n"); - } - if (!address) { - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" - " found in MADT table, skipping!\n"); + if (bad_ioapic(address)) return; - } idx = nr_ioapics++; diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 4d6fb047952e..7af9cb3e2d99 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -28,6 +28,10 @@ #include <asm/mce.h> #include <asm/intel_arch_perfmon.h> +int unknown_nmi_panic; +int nmi_watchdog_enabled; +int panic_on_unrecovered_nmi; + /* perfctr_nmi_owner tracks the ownership of the perfctr registers: * evtsel_nmi_owner tracks the ownership of the event selection * - different performance counters/ event selection may be reserved for diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index 4dcb671bd19f..f8d857453f8a 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -170,8 +170,20 @@ void dma_free_coherent(struct device *dev, size_t size, } EXPORT_SYMBOL(dma_free_coherent); +static int forbid_dac __read_mostly; + int dma_supported(struct device *dev, u64 mask) { +#ifdef CONFIG_PCI + if (mask > 0xffffffff && forbid_dac > 0) { + + + + printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", dev->bus_id); + return 0; + } +#endif + if (dma_ops->dma_supported) return dma_ops->dma_supported(dev, mask); @@ -231,57 +243,64 @@ EXPORT_SYMBOL(dma_set_mask); allowed overwrite iommu off workarounds for specific chipsets. soft Use software bounce buffering (default for Intel machines) noaperture Don't touch the aperture for AGP. + allowdac Allow DMA >4GB + nodac Forbid DMA >4GB + panic Force panic when IOMMU overflows */ __init int iommu_setup(char *p) { - iommu_merge = 1; + iommu_merge = 1; if (!p) return -EINVAL; - while (*p) { - if (!strncmp(p,"off",3)) - no_iommu = 1; - /* gart_parse_options has more force support */ - if (!strncmp(p,"force",5)) - force_iommu = 1; - if (!strncmp(p,"noforce",7)) { - iommu_merge = 0; - force_iommu = 0; - } - - if (!strncmp(p, "biomerge",8)) { - iommu_bio_merge = 4096; - iommu_merge = 1; - force_iommu = 1; - } - if (!strncmp(p, "panic",5)) - panic_on_overflow = 1; - if (!strncmp(p, "nopanic",7)) - panic_on_overflow = 0; - if (!strncmp(p, "merge",5)) { - iommu_merge = 1; - force_iommu = 1; - } - if (!strncmp(p, "nomerge",7)) - iommu_merge = 0; - if (!strncmp(p, "forcesac",8)) - iommu_sac_force = 1; + while (*p) { + if (!strncmp(p,"off",3)) + no_iommu = 1; + /* gart_parse_options has more force support */ + if (!strncmp(p,"force",5)) + force_iommu = 1; + if (!strncmp(p,"noforce",7)) { + iommu_merge = 0; + force_iommu = 0; + } + + if (!strncmp(p, "biomerge",8)) { + iommu_bio_merge = 4096; + iommu_merge = 1; + force_iommu = 1; + } + if (!strncmp(p, "panic",5)) + panic_on_overflow = 1; + if (!strncmp(p, "nopanic",7)) + panic_on_overflow = 0; + if (!strncmp(p, "merge",5)) { + iommu_merge = 1; + force_iommu = 1; + } + if (!strncmp(p, "nomerge",7)) + iommu_merge = 0; + if (!strncmp(p, "forcesac",8)) + iommu_sac_force = 1; + if (!strncmp(p, "allowdac", 8)) + forbid_dac = 0; + if (!strncmp(p, "nodac", 5)) + forbid_dac = -1; #ifdef CONFIG_SWIOTLB - if (!strncmp(p, "soft",4)) - swiotlb = 1; + if (!strncmp(p, "soft",4)) + swiotlb = 1; #endif #ifdef CONFIG_IOMMU - gart_parse_options(p); + gart_parse_options(p); #endif - p += strcspn(p, ","); - if (*p == ',') - ++p; - } - return 0; + p += strcspn(p, ","); + if (*p == ',') + ++p; + } + return 0; } early_param("iommu", iommu_setup); diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c index 6a55f87ba97f..697f0aa794b9 100644 --- a/arch/x86_64/kernel/pci-swiotlb.c +++ b/arch/x86_64/kernel/pci-swiotlb.c @@ -3,7 +3,8 @@ #include <linux/pci.h> #include <linux/cache.h> #include <linux/module.h> -#include <asm/dma-mapping.h> +#include <linux/dma-mapping.h> + #include <asm/proto.h> #include <asm/swiotlb.h> #include <asm/dma.h> diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 0b00bb2ea576..fc944b5e8f4a 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -123,9 +123,6 @@ struct resource standard_io_resources[] = { .flags = IORESOURCE_BUSY | IORESOURCE_IO } }; -#define STANDARD_IO_RESOURCES \ - (sizeof standard_io_resources / sizeof standard_io_resources[0]) - #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM) struct resource data_resource = { @@ -172,9 +169,6 @@ static struct resource adapter_rom_resources[] = { .flags = IORESOURCE_ROM } }; -#define ADAPTER_ROM_RESOURCES \ - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0]) - static struct resource video_rom_resource = { .name = "Video ROM", .start = 0xc0000, @@ -245,7 +239,8 @@ static void __init probe_roms(void) } /* check for adapter roms on 2k boundaries */ - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) { + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; + start += 2048) { rom = isa_bus_to_virt(start); if (!romsignature(rom)) continue; @@ -537,7 +532,7 @@ void __init setup_arch(char **cmdline_p) { unsigned i; /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < STANDARD_IO_RESOURCES; i++) + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) request_resource(&ioport_resource, &standard_io_resources[i]); } diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 1c255ee76e7c..557e92af7bea 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -77,7 +77,6 @@ unsigned long long monotonic_base; struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */ volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; -unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES; struct timespec __xtime __section_xtime; struct timezone __sys_tz __section_sys_tz; @@ -119,7 +118,7 @@ unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; void do_gettimeofday(struct timeval *tv) { - unsigned long seq, t; + unsigned long seq; unsigned int sec, usec; do { @@ -136,10 +135,7 @@ void do_gettimeofday(struct timeval *tv) be found. Note when you fix it here you need to do the same in arch/x86_64/kernel/vsyscall.c and export all needed variables in vmlinux.lds. -AK */ - - t = (jiffies - wall_jiffies) * USEC_PER_TICK + - do_gettimeoffset(); - usec += t; + usec += do_gettimeoffset(); } while (read_seqretry(&xtime_lock, seq)); @@ -165,8 +161,7 @@ int do_settimeofday(struct timespec *tv) write_seqlock_irq(&xtime_lock); - nsec -= do_gettimeoffset() * NSEC_PER_USEC + - (jiffies - wall_jiffies) * NSEC_PER_TICK; + nsec -= do_gettimeoffset() * NSEC_PER_USEC; wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); @@ -415,16 +410,16 @@ void main_timer_handler(struct pt_regs *regs) (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1; } - if (lost > 0) { + if (lost > 0) handle_lost_ticks(lost, regs); - jiffies += lost; - } + else + lost = 0; /* * Do the timer stuff. */ - do_timer(regs); + do_timer(lost + 1); #ifndef CONFIG_SMP update_process_times(user_mode(regs)); #endif @@ -1071,7 +1066,6 @@ static int timer_resume(struct sys_device *dev) vxtime.last_tsc = get_cycles_sync(); write_sequnlock_irqrestore(&xtime_lock,flags); jiffies += sleep_length; - wall_jiffies += sleep_length; monotonic_base += sleep_length * (NSEC_PER_SEC/HZ); touch_softlockup_watchdog(); return 0; diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index d0564f1bcb0b..b9df2ab6529f 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -67,13 +67,6 @@ SECTIONS _edata = .; /* End of data section */ - __bss_start = .; /* BSS */ - .bss : AT(ADDR(.bss) - LOAD_OFFSET) { - *(.bss.page_aligned) - *(.bss) - } - __bss_stop = .; - . = ALIGN(PAGE_SIZE); . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { @@ -108,9 +101,6 @@ SECTIONS .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } vgetcpu_mode = VVIRT(.vgetcpu_mode); - .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) } - wall_jiffies = VVIRT(.wall_jiffies); - .sys_tz : AT(VLOAD(.sys_tz)) { *(.sys_tz) } sys_tz = VVIRT(.sys_tz); @@ -229,6 +219,13 @@ SECTIONS . = ALIGN(4096); __nosave_end = .; + __bss_start = .; /* BSS */ + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + *(.bss.page_aligned) + *(.bss) + } + __bss_stop = .; + _end = . ; /* Sections to be discarded */ diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index ac48c3857ddb..a98b460af6a1 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -66,8 +66,7 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) sequence = read_seqbegin(&__xtime_lock); sec = __xtime.tv_sec; - usec = (__xtime.tv_nsec / 1000) + - (__jiffies - __wall_jiffies) * (1000000 / HZ); + usec = __xtime.tv_nsec / 1000; if (__vxtime.mode != VXTIME_HPET) { t = get_cycles_sync(); @@ -155,8 +154,8 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) We do this here because otherwise user space would do it on its own in a likely inferior way (no access to jiffies). If you don't like it pass NULL. */ - if (tcache && tcache->t0 == (j = __jiffies)) { - p = tcache->t1; + if (tcache && tcache->blob[0] == (j = __jiffies)) { + p = tcache->blob[1]; } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { /* Load per CPU data from RDTSCP */ rdtscp(dummy, dummy, p); @@ -165,8 +164,8 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); } if (tcache) { - tcache->t0 = j; - tcache->t1 = p; + tcache->blob[0] = j; + tcache->blob[1] = p; } if (cpu) *cpu = p & 0xfff; diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 1a17b0733ab5..3751b4788e28 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -244,7 +244,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) int unhandled_signal(struct task_struct *tsk, int sig) { - if (tsk->pid == 1) + if (is_init(tsk)) return 1; if (tsk->ptrace & PT_PTRACED) return 0; @@ -464,7 +464,7 @@ good_area: case PF_PROT: /* read, present */ goto bad_area; case 0: /* read, not present */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) goto bad_area; } @@ -580,7 +580,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); goto again; } diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 3e16fe08150e..19c72520a868 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -463,19 +463,6 @@ void online_page(struct page *page) #ifdef CONFIG_MEMORY_HOTPLUG /* - * XXX: memory_add_physaddr_to_nid() is to find node id from physical address - * via probe interface of sysfs. If acpi notifies hot-add event, then it - * can tell node id by searching dsdt. But, probe interface doesn't have - * node id. So, return 0 as node id at this time. - */ -#ifdef CONFIG_NUMA -int memory_add_physaddr_to_nid(u64 start) -{ - return 0; -} -#endif - -/* * Memory is added always to NORMAL zone. This means you will never get * additional DMA/DMA32 memory. */ @@ -487,12 +474,12 @@ int arch_add_memory(int nid, u64 start, u64 size) unsigned long nr_pages = size >> PAGE_SHIFT; int ret; + init_memory_mapping(start, (start + size -1)); + ret = __add_pages(zone, start_pfn, nr_pages); if (ret) goto error; - init_memory_mapping(start, (start + size -1)); - return ret; error: printk("%s: Problem encountered in __add_pages!\n", __func__); @@ -506,7 +493,24 @@ int remove_memory(u64 start, u64 size) } EXPORT_SYMBOL_GPL(remove_memory); -#else /* CONFIG_MEMORY_HOTPLUG */ +#ifndef CONFIG_ACPI_NUMA +int memory_add_physaddr_to_nid(u64 start) +{ + return 0; +} +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); +#endif + +#ifndef CONFIG_ACPI_NUMA +int memory_add_physaddr_to_nid(u64 start) +{ + return 0; +} +#endif + +#endif /* CONFIG_MEMORY_HOTPLUG */ + +#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE /* * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance, * just online the pages. @@ -532,7 +536,7 @@ int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages) } return err; } -#endif /* CONFIG_MEMORY_HOTPLUG */ +#endif static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, kcore_vsyscall; diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c index 45d7d823c3b8..c6e5e8d401a4 100644 --- a/arch/x86_64/mm/ioremap.c +++ b/arch/x86_64/mm/ioremap.c @@ -12,117 +12,16 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/module.h> -#include <asm/io.h> +#include <linux/io.h> #include <asm/pgalloc.h> #include <asm/fixmap.h> -#include <asm/cacheflush.h> #include <asm/tlbflush.h> +#include <asm/cacheflush.h> #include <asm/proto.h> #define ISA_START_ADDRESS 0xa0000 #define ISA_END_ADDRESS 0x100000 -static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) -{ - unsigned long end; - unsigned long pfn; - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - if (address >= end) - BUG(); - pfn = phys_addr >> PAGE_SHIFT; - do { - if (!pte_none(*pte)) { - printk("remap_area_pte: page already exists\n"); - BUG(); - } - set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW | - _PAGE_GLOBAL | _PAGE_DIRTY | _PAGE_ACCESSED | flags))); - address += PAGE_SIZE; - pfn++; - pte++; - } while (address && (address < end)); -} - -static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) -{ - unsigned long end; - - address &= ~PUD_MASK; - end = address + size; - if (end > PUD_SIZE) - end = PUD_SIZE; - phys_addr -= address; - if (address >= end) - BUG(); - do { - pte_t * pte = pte_alloc_kernel(pmd, address); - if (!pte) - return -ENOMEM; - remap_area_pte(pte, address, end - address, address + phys_addr, flags); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} - -static inline int remap_area_pud(pud_t * pud, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) -{ - unsigned long end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - phys_addr -= address; - if (address >= end) - BUG(); - do { - pmd_t * pmd = pmd_alloc(&init_mm, pud, address); - if (!pmd) - return -ENOMEM; - remap_area_pmd(pmd, address, end - address, address + phys_addr, flags); - address = (address + PUD_SIZE) & PUD_MASK; - pud++; - } while (address && (address < end)); - return 0; -} - -static int remap_area_pages(unsigned long address, unsigned long phys_addr, - unsigned long size, unsigned long flags) -{ - int error; - pgd_t *pgd; - unsigned long end = address + size; - - phys_addr -= address; - pgd = pgd_offset_k(address); - flush_cache_all(); - if (address >= end) - BUG(); - do { - pud_t *pud; - pud = pud_alloc(&init_mm, pgd, address); - error = -ENOMEM; - if (!pud) - break; - if (remap_area_pud(pud, address, end - address, - phys_addr + address, flags)) - break; - error = 0; - address = (address + PGDIR_SIZE) & PGDIR_MASK; - pgd++; - } while (address && (address < end)); - flush_tlb_all(); - return error; -} - /* * Fix up the linear direct mapping of the kernel to avoid cache attribute * conflicts. @@ -165,6 +64,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l void * addr; struct vm_struct * area; unsigned long offset, last_addr; + pgprot_t pgprot; /* Don't allow wraparound or zero size */ last_addr = phys_addr + size - 1; @@ -194,6 +94,8 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l } #endif + pgprot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_GLOBAL + | _PAGE_DIRTY | _PAGE_ACCESSED | flags); /* * Mappings have to be page-aligned */ @@ -209,7 +111,8 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l return NULL; area->phys_addr = phys_addr; addr = area->addr; - if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { + if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size, + phys_addr, pgprot)) { remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); return NULL; } diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index f8c04d6935c9..3cc0544e25f5 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c @@ -23,22 +23,13 @@ int acpi_numa __initdata; -#if (defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \ - defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)) \ - && !defined(CONFIG_MEMORY_HOTPLUG) -#define RESERVE_HOTADD 1 -#endif - static struct acpi_table_slit *acpi_slit; static nodemask_t nodes_parsed __initdata; static struct bootnode nodes[MAX_NUMNODES] __initdata; -static struct bootnode nodes_add[MAX_NUMNODES] __initdata; +static struct bootnode nodes_add[MAX_NUMNODES]; static int found_add_area __initdata; int hotadd_percent __initdata = 0; -#ifndef RESERVE_HOTADD -#define hotadd_percent 0 /* Ignore all settings */ -#endif /* Too small nodes confuse the VM badly. Usually they result from BIOS bugs. */ @@ -160,7 +151,7 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa) pxm, pa->apic_id, node); } -#ifdef RESERVE_HOTADD +#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE /* * Protect against too large hotadd areas that would fill up memory. */ @@ -203,15 +194,37 @@ static int hotadd_enough_memory(struct bootnode *nd) return 1; } +static int update_end_of_memory(unsigned long end) +{ + found_add_area = 1; + if ((end >> PAGE_SHIFT) > end_pfn) + end_pfn = end >> PAGE_SHIFT; + return 1; +} + +static inline int save_add_info(void) +{ + return hotadd_percent > 0; +} +#else +int update_end_of_memory(unsigned long end) {return 0;} +static int hotadd_enough_memory(struct bootnode *nd) {return 1;} +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +static inline int save_add_info(void) {return 1;} +#else +static inline int save_add_info(void) {return 0;} +#endif +#endif /* - * It is fine to add this area to the nodes data it will be used later + * Update nodes_add and decide if to include add are in the zone. + * Both SPARSE and RESERVE need nodes_add infomation. * This code supports one contigious hot add area per node. */ static int reserve_hotadd(int node, unsigned long start, unsigned long end) { unsigned long s_pfn = start >> PAGE_SHIFT; unsigned long e_pfn = end >> PAGE_SHIFT; - int changed = 0; + int ret = 0, changed = 0; struct bootnode *nd = &nodes_add[node]; /* I had some trouble with strange memory hotadd regions breaking @@ -240,7 +253,6 @@ static int reserve_hotadd(int node, unsigned long start, unsigned long end) /* Looks good */ - found_add_area = 1; if (nd->start == nd->end) { nd->start = start; nd->end = end; @@ -258,14 +270,12 @@ static int reserve_hotadd(int node, unsigned long start, unsigned long end) printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); } - if ((nd->end >> PAGE_SHIFT) > end_pfn) - end_pfn = nd->end >> PAGE_SHIFT; + ret = update_end_of_memory(nd->end); if (changed) printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); - return 0; + return ret; } -#endif /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ void __init @@ -284,7 +294,7 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) } if (ma->flags.enabled == 0) return; - if (ma->flags.hot_pluggable && hotadd_percent == 0) + if (ma->flags.hot_pluggable && !save_add_info()) return; start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32); end = start + (ma->length_lo | ((u64)ma->length_hi << 32)); @@ -327,15 +337,13 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) push_node_boundaries(node, nd->start >> PAGE_SHIFT, nd->end >> PAGE_SHIFT); -#ifdef RESERVE_HOTADD - if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) { + if (ma->flags.hot_pluggable && !reserve_hotadd(node, start, end) < 0) { /* Ignore hotadd region. Undo damage */ printk(KERN_NOTICE "SRAT: Hotplug region ignored\n"); *nd = oldnode; if ((nd->start | nd->end) == 0) node_clear(node, nodes_parsed); } -#endif } /* Sanity check to catch more bad SRATs (they are amazingly common). @@ -351,7 +359,6 @@ static int nodes_cover_memory(void) unsigned long e = nodes[i].end >> PAGE_SHIFT; pxmram += e - s; pxmram -= absent_pages_in_range(s, e); - pxmram -= nodes_add[i].end - nodes_add[i].start; if ((long)pxmram < 0) pxmram = 0; } @@ -459,3 +466,16 @@ int __node_distance(int a, int b) } EXPORT_SYMBOL(__node_distance); + +int memory_add_physaddr_to_nid(u64 start) +{ + int i, ret = 0; + + for_each_node(i) + if (nodes_add[i].start <= start && nodes_add[i].end > start) + ret = i; + + return ret; +} +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); + |